#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_7" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#'# Direct PII: variables to be removed
# !!!No Direct PII

#'# Direct PII-team: Encode field team names
# !!!No Direct PII-team

#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No Small Locations

#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q3)[na.exclude(mydata$eh_s7q3)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q3", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q4)[na.exclude(mydata$eh_s7q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q4", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q9)[na.exclude(mydata$eh_s7q9)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q9", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q10)[na.exclude(mydata$eh_s7q10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q10", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q11)[na.exclude(mydata$eh_s7q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q11", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q13)[na.exclude(mydata$eh_s7q13)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q13", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q15)[na.exclude(mydata$eh_s7q15)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q15", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q28)[na.exclude(mydata$eh_s7q28)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q28", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$eh_s7q34)[na.exclude(mydata$eh_s7q34)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="eh_s7q34", break_point=percentile_99.5, missing=999999)

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("eh_s7q16")
capture_tables (indirect_PII)

break_inkind <- c(-999,-998,1,2,3,4,5,99)
labels_inkind <- c("Refused to answer" =1, 
                "Don't know" =2, 
                "Sari Sari business/equipment" =3, 
                "Prepared-Food business/equipment (rice, vegetables, fish, etc)" =4, 
                "Fishing business/equipment" =5, 
                "Merienda/ streetfood business/equipment" =6, 
                "Other" = 7,
                "Other business/assets - specify"=8)
mydata <- ordinal_recode (variable="eh_s7q16", break_points=break_inkind, missing=999999, value_labels=labels_inkind)

#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data

#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 

open_ends <- c("eh_s7q17",
               "eh_s7q26",
               "eh_s7q33",
               "eh_s7q36")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$eh_s7q17[157] <- "[Other business/assets]"
mydata$eh_s7q17[260] <- "[Other business/assets]"
mydata$eh_s7q17[265] <- "[Other business/assets]"
mydata$eh_s7q17[281] <- "[Other business/assets]"
mydata$eh_s7q17[330] <- "[Other business/assets]"
mydata$eh_s7q17[429] <- "[Other business/assets]"
mydata$eh_s7q17[493] <- "[Other business/assets]"
mydata$eh_s7q17[563] <- "[Other business/assets]"
mydata$eh_s7q17[592] <- "[Other business/assets]"
mydata$eh_s7q17[664] <- "[Other business/assets]"
mydata$eh_s7q17[720] <- "[Other business/assets]"
mydata$eh_s7q17[774] <- "[Other business/assets]"
mydata$eh_s7q17[824] <- "[Other business/assets]"
mydata$eh_s7q17[913] <- "[Other business/assets]"
mydata$eh_s7q17[1043] <- "[Other business/assets]"
mydata$eh_s7q17[1061] <- "[Other business/assets]"
mydata$eh_s7q17[1096] <- "[Other business/assets]"
mydata$eh_s7q17[1255] <- "[Other business/assets]"
mydata$eh_s7q17[1262] <- "[Other business/assets]"
mydata$eh_s7q17[1269] <- "[Other business/assets]"
mydata$eh_s7q17[1296] <- "[Other business/assets]"
mydata$eh_s7q17[1371] <- "[Other business/assets]"
mydata$eh_s7q17[1373] <- "[Other business/assets]"
mydata$eh_s7q17[1407] <- "[Other business/assets]"
mydata$eh_s7q17[1414] <- "[Other business/assets]"
mydata$eh_s7q17[1423] <- "[Other business/assets]"
mydata$eh_s7q17[1511] <- "[Other business/assets]"
mydata$eh_s7q17[2103] <- "[Other business/assets]"
mydata$eh_s7q17[2141] <- "[Other business/assets]"
mydata$eh_s7q17[2143] <- "[Other business/assets]"
mydata$eh_s7q17[2153] <- "[Other business/assets]"
mydata$eh_s7q17[2160] <- "[Other business/assets]"

mydata$eh_s7q26[1134] <- "The dole descided to give them groceries worth [amount redacted], instead they requested rice or pig feeds"
mydata$eh_s7q26[2141] <- "[language]"

mydata$eh_s7q33[78] <- "From mayor in [location]"
mydata$eh_s7q33[109] <- "Senior Citizen -[amount]"
mydata$eh_s7q33[285] <- "Philhealth - [amount]"
mydata$eh_s7q33[291] <- "[language]"
mydata$eh_s7q33[295] <- "[language]"
mydata$eh_s7q33[317] <- "Shelter assistance - [amount]"
mydata$eh_s7q33[328] <- "[language]"
mydata$eh_s7q33[345] <- "Redcross ( shelter assistance ) [amount]"
mydata$eh_s7q33[364] <- "[language]"
mydata$eh_s7q33[368] <- "[amount] pesos"
mydata$eh_s7q33[374] <- "[language]"
mydata$eh_s7q33[385] <- "Philhealth - [amount]"
mydata$eh_s7q33[403] <- "Green Ladies (Government of [name])"
mydata$eh_s7q33[430] <- "Philhealth - [amount]"
mydata$eh_s7q33[433] <- "[language]"
mydata$eh_s7q33[443] <- "Cash gift [amount]"
mydata$eh_s7q33[467] <- "From SSS, [amount] pension for senior citizen every 3 months."
mydata$eh_s7q33[500] <- "Given by governor [name]"
mydata$eh_s7q33[557] <- "Philhealth [amount]"
mydata$eh_s7q33[609] <- "From municipal of [name]"
mydata$eh_s7q33[674] <- "[amount]kilos of rice"
mydata$eh_s7q33[703] <- "[language]"
mydata$eh_s7q33[738] <- "[language]"
mydata$eh_s7q33[814] <- "[amount]"
mydata$eh_s7q33[858] <- "[language]"
mydata$eh_s7q33[886] <- "[language]"
mydata$eh_s7q33[944] <- "Dswd [amount] Donation [amount]"
mydata$eh_s7q33[1017] <- "Gift cheque worth [amount]"
mydata$eh_s7q33[1049] <- "[language]"
mydata$eh_s7q33[1084] <- "Senior Citizen pension [amount]"
mydata$eh_s7q33[1207] <- "Christmast gift of president duterte worth 2 [amount]"
mydata$eh_s7q33[1254] <- "Christmas gift Grocery worth [amount] from Gov. [name], and [name]"
mydata$eh_s7q33[1257] <- "Given [amount]k rice of Mayor"
mydata$eh_s7q33[1307] <- "From governor [name]([amount]) and christmas gift fr. Brgy.([amount])"
mydata$eh_s7q33[1345] <- "[amount] from other benefits other than dswd."
mydata$eh_s7q33[1368] <- "[amount]"
mydata$eh_s7q33[1534] <- "DSWD totally damaged houses -[amount] 5pcs galvanized sim [amount]"
mydata$eh_s7q33[1615] <- "River of life NGO - P[amount] DSWD housing - P[amount] DSWD relief goods - P[amount]"
mydata$eh_s7q33[1855] <- "PhilHealth for [name]"
mydata$eh_s7q33[1866] <- "Medicine [amount] from barangay center"
mydata$eh_s7q33[1899] <- "Philhealth [amount]"
mydata$eh_s7q33[2001] <- "Philhealth [amount] for hospital bills"
mydata$eh_s7q33[2065] <- "[name] [amount] and [amount]for house renovation"
mydata$eh_s7q33[2075] <- "[language]"
mydata$eh_s7q33[2097] <- "[language]"
mydata$eh_s7q33[2098] <- "[language]"
mydata$eh_s7q33[2132] <- "Relief [amount]p Senior citizen [amount]"
mydata$eh_s7q33[2148] <- "[amount] kilos of rice plus grocery items from the governor"
mydata$eh_s7q33[2149] <- "[amount] kilos of rice and grocery package from the barangay, could be worth [amount] pesos"
mydata$eh_s7q33[2174] <- "[amount] peso worth of relief from the governor"
mydata$eh_s7q33[2193] <- "Philhealth [amount]"
mydata$eh_s7q33[2273] <- "BANHI: [name]' Sponsorship"

mydata$eh_s7q36[275] <- "As per respondent, they already paid the whole [amount] in SLP  and DSWD, and now they claiming the savings [amount]but the DSWD dont yet give back that said amount"
mydata$eh_s7q36[313] <- "Dole give them [amount] worthof items then [amount] cash"
mydata$eh_s7q36[1051] <- "The DOLE give [amount] for goat and [amount] for the rice"
mydata$eh_s7q36[1277] <- "Every 2 months the amount of money received is [amount] but the mother of the respondent is the direct beneficiary of 4P's. The amount of money declared here is the amount received by the children for the past 12 months."
mydata$eh_s7q36[1305] <- "She receive [amount] every 2 months for her 4ps."
mydata$eh_s7q36[1329] <- "The respondent get careless because  of the activity of her child [name] and she don't have idea how much the income,and repeat the 1st activity twice and she just realize that she just start her business last week"
mydata$eh_s7q36[1516] <- "She received benefits from owwa because she is ex overseas worker([name])"

#'# GPS data: Displace
# !!!No GPS data

#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
