#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_5" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)
#'
#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 



#'# Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)

#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team


#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No Small locations


#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q4)[na.exclude(mydata$m_s5q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q4", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q5)[na.exclude(mydata$m_s5q5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q5", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q9)[na.exclude(mydata$m_s5q9)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q9", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q10)[na.exclude(mydata$m_s5q10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q10", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q12)[na.exclude(mydata$m_s5q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q12", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q13)[na.exclude(mydata$m_s5q13)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q13", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q15)[na.exclude(mydata$m_s5q15)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q15", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q17)[na.exclude(mydata$m_s5q17)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q17", break_point=percentile_99.5, missing=999999)

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s5q19)[na.exclude(mydata$m_s5q19)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s5q19", break_point=percentile_99.5, missing=999999)


#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s5q2",
                  "m_s5q3",
                  "m_s5q6",
                  "m_s5q8",
                  "m_s5q11",
                  "m_s5q14",
                  "m_s5q16",
                  "m_s5q18",
                  "m_s5q21")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values


#'# Matching and crosstabulations: Run automated PII check 


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("m_s5q20",
              "m_s5q20_other",
              "m_endnote5")

report_open (list_open_ends = open_ends)


# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$m_s5q20[122] <- "Other"
mydata$m_s5q20[425] <- "Other"
mydata$m_s5q20[654] <- "Other"
mydata$m_s5q20[755] <- "Other"
mydata$m_s5q20[791] <- "Other"
mydata$m_s5q20[847] <- "Other"
mydata$m_s5q20[1003] <- "Other"
mydata$m_s5q20[1730] <- "Other"

mydata$m_endnote5[41] <- "4Ps benefits were received by [name] and he'll be the one to distribute it to his siblings who's studying in high school. So the respondent does not have an idea about the amount."
mydata$m_endnote5[63] <- "Only one student is beneficiary of 4Ps, that's why they only received a small amount. While the items from DOLE were left to [name 1] (other DOLE benefeciary)by [name 2] when sh's gone to Manila to worked. And [name 3] is the one who gets the money of the sold items."
mydata$m_endnote5[94] <- "[name] also conducted training regarding livelihood"
mydata$m_endnote5[303] <- "[Tagalog]"
mydata$m_endnote5[380] <- "[Tagalog]"
mydata$m_endnote5[854] <- "[Tagalog]"
mydata$m_endnote5[1075] <- "[Tagalog]"
mydata$m_endnote5[1764] <- "[Tagalog]"
mydata$m_endnote5[126] <- "The responses pertaining to 4Ps benefits were  answered by respondent's wife, [name]"
mydata$m_endnote5[165] <- "The ATM is in his daughter [name] living in [small location]. He dont know how much they received in 4Ps."
mydata$m_endnote5[185] <- "Respondent is not a 4Ps member but her 3 step children ([name], [name] and [name]) received a cash benefits from the 4Ps because they were covered from their aunt who was a member of 4Ps. She dont know how much did they received because of some family conflict between her and the aunt."
mydata$m_endnote5[214] <- "Beforethe respondent's father-in-law died, [name], got hospitalized. And the bill amounting to 4000 had been paid by his Senior Citizen membership axcording to the respondent. While the respondent had attended 4 times to a DoLE meeting and until now she's waiting for the 10000 worth of grocery items."
mydata$m_endnote5[330] <- "He is 4ps beneficiary but he did not get the pay out for the past months because he said he is far from Brgy [small location]"
mydata$m_endnote5[352] <- "In this section their family recieved benefits from 4P's, SLP she used to buy 2 piglets and groceries or relief goods from Local Government Unit of [small location], from Brgy. [small location] and  UNICEF."
mydata$m_endnote5[1035] <- "Street vendor [language]"
mydata$m_endnote5[1067] <- "They received a grocery last Decmber 2016 from the [small location] local government."
mydata$m_endnote5[1075] <- "[name] receive educ asst at [small location] municipality"
mydata$m_endnote5[1124] <- "[name] from 4Ps, household also received a 4pcs. Of plywood from [small location] local gov't. But respondent does not know its amount."
mydata$m_endnote5[1220] <- "Household received 10000(in kind) from DOLE, 9800 from 4Ps and another 15000 from [small location] government as educational assistance for [name] and [name], and another 5000 from [small location] brgy. Government. In total, they received a 39800 of benefits for the last 12 months."
mydata$m_endnote5[1382] <- "[name] from 4Ps, [name] also receiving 5000 as an educational assistance for her study."
mydata$m_endnote5[1468] <- "Up [small location] have more help in the household than dept of agriculture."
mydata$m_endnote5[1477] <- "[small location]"
mydata$m_endnote5[1479] <- "DOLE conducted seminar on how  to manage business which held at the municipal of [small location]"
mydata$m_endnote5[1514] <- "DOLE conducted seminars on how to manage a sari sari store held at Municpality of [small location].  They claimed lum sum in SSS and the monthly pension will start on 2018, monthly pension is 3,337"
mydata$m_endnote5[1727] <- "The respondents of barangay [small location] received a motorboat amounted to 35,000 from DOLE.the motorboat leader has a policy for the members,.. One person for one week."
mydata$m_endnote5[1764] <- "[language]"
mydata$m_endnote5[2239] <- "4Ps Scholarship [name]"


#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
