#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_4" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#'
#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#'# Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)

#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team


#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No Small locations


#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# !!!No Indirect PII - Ordinal


#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s4q1",
                  "m_s4q3",
                  "m_s4q4",
                  "m_s4q5",
                  "m_s4q6",
                  "m_s4q7",
                  "m_s4q8",
                  "m_s4q9")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values. 


#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("m_s4q1_other",
              "m_endnote4")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 
# !!! Redacted, as it contains sensitive information and some information is in Tagalog.
mydata$m_s4q1_other[29] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[38] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[46] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[53] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[55] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[69] <- "[Other]"
mydata$m_s4q1_other[77] <- "[Other]"
mydata$m_s4q1_other[100] <- "[Cleaner and helper]"
mydata$m_s4q1_other[102] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[188] <- "[Sales worker]"
mydata$m_s4q1_other[193] <- "[Other]"
mydata$m_s4q1_other[241] <- "[Other]"
mydata$m_s4q1_other[475] <- "[Other]"
mydata$m_s4q1_other[689] <- "[Other]"
mydata$m_s4q1_other[721] <- "[Other]"
mydata$m_s4q1_other[822] <- "[Other]"
mydata$m_s4q1_other[865] <- "[Other]"
mydata$m_s4q1_other[873] <- "[[language]]"
mydata$m_s4q1_other[916] <- "[Other]"
mydata$m_s4q1_other[978] <- "[Other]"
mydata$m_s4q1_other[1026] <- "[Other]"
mydata$m_s4q1_other[1054] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[1076] <- "[Sales worker]"
mydata$m_s4q1_other[1078] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1083] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1089] <- "[Other]"
mydata$m_s4q1_other[1093] <- "[Other]"
mydata$m_s4q1_other[1125] <- "[Sales worker]"
mydata$m_s4q1_other[1136] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1172] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1190] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1224] <- "[Sales worker]"
mydata$m_s4q1_other[1225] <- "[Other]"
mydata$m_s4q1_other[1233] <- "[Other]"
mydata$m_s4q1_other[1235] <- "[Other]"
mydata$m_s4q1_other[1269] <- "[Other]"
mydata$m_s4q1_other[1310] <- "[[language]]"
mydata$m_s4q1_other[1313] <- "[Agricultural, forestry and fishery labourer]"
mydata$m_s4q1_other[1315] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1316] <- "[]"
mydata$m_s4q1_other[1344] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[1395] <- "[Other]"
mydata$m_s4q1_other[1431] <- "[Other]"
mydata$m_s4q1_other[1464] <- "[Other]"
mydata$m_s4q1_other[1467] <- "[Personal service worker]"
mydata$m_s4q1_other[1582] <- "[Other]"
mydata$m_s4q1_other[1609] <- "[Other]"
mydata$m_s4q1_other[1625] <- "[Other]"
mydata$m_s4q1_other[1648] <- "[Other]"
mydata$m_s4q1_other[1736] <- "[Other]"
mydata$m_s4q1_other[1778] <- "[Other]"
mydata$m_s4q1_other[1875] <- "[Other]"
mydata$m_s4q1_other[1914] <- "[Other]"
mydata$m_s4q1_other[1916] <- "[Sales worker]"
mydata$m_s4q1_other[1917] <- "[Sales worker]"
mydata$m_s4q1_other[1940] <- "[Sales worker]"
mydata$m_s4q1_other[1976] <- "[Labourer in mining, construction, manufacturing and transport]"
mydata$m_s4q1_other[2007] <- "[Other]"
mydata$m_s4q1_other[2078] <- "[Sales worker]"
mydata$m_s4q1_other[2123] <- "[Other]"
mydata$m_s4q1_other[2137] <- "[Other]"
mydata$m_s4q1_other[2188] <- "[Other]"
mydata$m_s4q1_other[2211] <- "[Other]"
mydata$m_s4q1_other[2274] <- "[Other]"
mydata$m_s4q1_other[2281] <- "[Other]"

mydata$m_endnote4[18] <- "Even they are a 4Ps beneficiary, they don't have a cashcard. Also, they mainly get their income in [name] selling 'kakanin' in the market."
mydata$m_endnote4[27] <- "[name] was confinced at the hospital for dehydration"
mydata$m_endnote4[69] <- "The parents of the [name] siblings is regularly sending money to the household twice a month."
mydata$m_endnote4[76] <- "[name] got hospitalized for 4days because of diarrhea just this May 2017."
mydata$m_endnote4[87] <- "Before sDq12 has been answered, respondent's cp got low battery so she need to charged it for 45 minutes before we proceed. [name] had been hospitalized last October when he had a bicycle accident."
mydata$m_endnote4[156] <- "[name] got hospitalized last July 2016 because of meningitis for 25days."
mydata$m_endnote4[219] <- "[name] got hospitalized for 4 days last July 2016 because of asthma."
mydata$m_endnote4[231] <- "Respondent's husband, [name] got hospitalized because of leptospirosis, UTI and sepsis last April for 20 days. But until now, Michael is not yet fully recovered."
mydata$m_endnote4[236] <- "[name] got hospitalized due to asthma infection last May 2017."
mydata$m_endnote4[260] <- "Her kitchen is damage  by typhoon [name of the typhoon] Her livelihood is derived from the salary of her children who has working in Manila. She is too old to work."
mydata$m_endnote4[286] <- "[name] died last year April 2016"
mydata$m_endnote4[332] <- "They are affected by typhoon [name of typhoon] last October 2016 all roof are lumipad and thier plants are destroy."
mydata$m_endnote4[333] <- "Their house was totally damage by typhoon [name of the typhoon] last October and also affected their farm."
mydata$m_endnote4[337] <- "Their most source of income is the wage of his son [name]"
mydata$m_endnote4[349] <- "Affected by typhoon [name of the typhoon]"
mydata$m_endnote4[352] <- "Typoon [name of the typhoon] affected thier farm and some part of thier house."
mydata$m_endnote4[362] <- "All roofs flown  because of typhoon [name of the typhoon]"
mydata$m_endnote4[366] <- "Displacement because of typhoon [name of the typhoon]"
mydata$m_endnote4[462] <- "[name] hospitalized because of dengue."
mydata$m_endnote4[907] <- "The respo and the baseline respo [name] do not have an ATM account even if they receive benefits from 4Ps. Payouts are then given on a traditional basis such as calling [name] and personally handing him the cash. When asked why this is so, the reso explained that before they used to have an ATM which was coursed through her deceased mother's account."
mydata$m_endnote4[1064] <- "It was last October 2016 when [name] died. Respondent wasn't sure of the cause of death."
mydata$m_endnote4[1067] <- "Last April, they were forced to leave their house due to an earthquake. His 'fish cages' business had a lost of [amount of money] due to ever feeds, all the fishes died."
mydata$m_endnote4[1096] <- "They transferred to a new location- [small location] near the cemetery called [small location]"
mydata$m_endnote4[1137] <- "[name] DIED JUNE, 2016"
mydata$m_endnote4[1173] <- "[language], laundry to other household stop april this year"
mydata$m_endnote4[1220] <- "When there's no class meaning no worked in canteen, [name] do cooking for an income, she also do laundry to other household. Respondent also mentioned that they were given a 3years time to leaved their residency because it's a government property."
mydata$m_endnote4[1283] <- "The household's land were not owned, they were just giving [amount]of rice to the owner every after harvest. [name] was the one who was hospitalized last November."


#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
