#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_0" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 


#'# Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)
  
#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team


#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("m_s0q4",
             "m_s0q11",
             "m_s0q12") 
mydata <- encode_location (variables= locvars, missing=999999)


#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# !!!No Indirect PII - Ordinal


#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s0q21")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No specific values


#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("m_s0q18",
              "m_endnote0")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 


# !!!Redacted, as it contains sensitive information and some information is in Tagalog.
mydata$m_s0q18[1] <- "Shes working abroad in [city]"
mydata$m_s0q18[237] <- "[situataion]since November last year"
mydata$m_s0q18[269] <- "Went to [city] to take care of her sick daughter"
mydata$m_s0q18[295] <- "Went to a Health Center in [city] with her sick child"
mydata$m_s0q18[944] <- "At [municipality] and no definite time of arrival"
mydata$m_s0q18[988] <- "He rad [name]' work related"
mydata$m_s0q18[1062] <- "[place], christening"
mydata$m_s0q18[1091] <- "Worke abroad -[date]"
mydata$m_s0q18[1138] <- "[Tagalog]"
mydata$m_s0q18[1278] <- "Abroad [country]"
mydata$m_s0q18[1433] <- "[name] didnt know where is [name]"
mydata$m_s0q18[1435] <- "[name[ told that he is busy and lots of work to be done for thw whole week. He designate his wife to be the respondent since she is knowledgeable for answering the questions."
mydata$m_s0q18[1466] <- "Baseline respondent is in the hospital , in no definite date of back home, husband of [name] is in mountain with no definite time of arrival, the only adult is 20 yrs old child."
mydata$m_s0q18[1489] <- "She brought her husband to a hospital due to [illness]"

mydata$m_endnote0[11] <- "[name] not [name]"
mydata$m_endnote0[36] <- "Baseline Respondent last name is [last name] since she's already married. She said that she mistakenly gave her maiden name to the baseline surveyor."
mydata$m_endnote0[39] <- "Interview conducted in a waiting shed near [Highschool], because respondent is attending a meeting in the highschool. Respondent's last name is [last name] not [last name]."
mydata$m_endnote0[63] <- "Baseline respondent, [name], is busy working in [city], and no definite time of coming home. She left last June 2. [name]'s children were left to their grandmother's care."
mydata$m_endnote0[126] <- "Respondent's house is too far from the hiway and has a muddy road going there, so we had to rent a serviced tricycle from the [market] to his house and vise versa."
mydata$m_endnote0[128] <- "Baseline respondent moved out with his son [name] and gone with another woman."
mydata$m_endnote0[145] <- "She understand and speak Tagalog because she is from [city]"
mydata$m_endnote0[148] <- "Respondent is currently at her another house in brgy. [address]. I need to be accompanied by the tricycle driver to passed a steep woods and a toe-watered-river."
mydata$m_endnote0[149] <- "I also had to crossed a river to reached the respondent's house. According to the respondent, during rainy season, when the river overflows, they have to took a longer route in brgy. [address] in order for them to reach the highway."
mydata$m_endnote0[151] <- "Contact number is owned by [name], aunt and neighbor of the respondent because they don't have any."
mydata$m_endnote0[156] <- "Respondent is currently at her shop at [site], in her Aunt [name]'s frontyard. So, the interview needs to be conducted here at her [shop]."
mydata$m_endnote0[239] <- "Baseline respondent everydays goes to work from [time] because its raining season which means a lot of work in the farm. Contact numbers were owned by [name], their daughter who adopted by Aunt/neighbor."
mydata$m_endnote0[270] <- "[name] was currently at work and the wife is not sure of what time he'll be back. So I decided to interview his wife."
mydata$m_endnote0[350] <- "Her name is [name]NOT [name]."
mydata$m_endnote0[486] <- "[name]"
mydata$m_endnote0[580] <- "The baseline respondent name is [name]."
mydata$m_endnote0[681] <- "[name]"
mydata$m_endnote0[722] <- "Brgy. [name of barangay] not [name]"
mydata$m_endnote0[822] <- "[name]"
mydata$m_endnote0[829] <- "The respondent surname is [surname] (n is enye) not [surname]"
mydata$m_endnote0[830] <- "The correct spelling of the respondent name is [name]"
mydata$m_endnote0[844] <- "The correct spelling of the respondent is [name]"
mydata$m_endnote0[850] <- "[address]"
mydata$m_endnote0[866] <- "[name]"
mydata$m_endnote0[896] <- "[name] is hr right name"
mydata$m_endnote0[917] <- "[name]"
mydata$m_endnote0[922] <- "[Tagalog]"
mydata$m_endnote0[949] <- "Correct spelling ([name])"
mydata$m_endnote0[959] <- "[site]"
mydata$m_endnote0[975] <- "She is [native group], its hard for us to communicate because she don't know mucb of tagalog"
mydata$m_endnote0[987] <- "[name] is dead due to [illness]"
mydata$m_endnote0[1020] <- "[name] true spelling"
mydata$m_endnote0[1026] <- "[address]"
mydata$m_endnote0[1027] <- "With her [person]"
mydata$m_endnote0[1030] <- "[Tagalog]"
mydata$m_endnote0[1037] <- "The correct spelling of the respondent's name is [name]"
mydata$m_endnote0[1067] <- "[name] moved to brgy. [address] because her youngest daughter can not longer leave  here becausr of the hot surroundings and no elctricity. Also the school of their children were way far from here."
mydata$m_endnote0[1076] <- "[name] the baseline respondent is at work and the husband is available and knowledgeable the economic activity ofthe family"
mydata$m_endnote0[1119] <- "House is owned by [name] who is known in the sitio and not the respondents name."
mydata$m_endnote0[1147] <- "[name] moved out to another place"
mydata$m_endnote0[1175] <- "Basesline respondent not available because of work in [site]"
mydata$m_endnote0[1231] <- "[name] the wife of [name] says that she is the person who participated on the baseline survey last year not her husband."
mydata$m_endnote0[1243] <- "[name]"
mydata$m_endnote0[1275] <- "[name] is in manila and only the husband is available"
mydata$m_endnote0[1285] <- "[Tagalog]"
mydata$m_endnote0[1306] <- "[name] died on [date]"
mydata$m_endnote0[1332] <- "[Tagalog]"
mydata$m_endnote0[1379] <- "Interview conducted at Councilor [name]'s front yard, because respondent is working there."
mydata$m_endnote0[1386] <- "Interview conducted with my observer [names]"
mydata$m_endnote0[1404] <- "His youngest daughter just died [date]"
mydata$m_endnote0[1406] <- "Respondent is available with observe of [names]"
mydata$m_endnote0[1441] <- "Baseline respondent is available she moved out to [municipality]"
mydata$m_endnote0[1452] <- "[name] is the respondents name  be specific because there is 2 other [name]"
mydata$m_endnote0[1459] <- "The house is located near the respondent [name]'s house, before the [name] farm"
mydata$m_endnote0[1472] <- "Interview conducted outside their[ddress] because her house is far."
mydata$m_endnote0[1481] <- "Her surname is [surname], [address] is her sitio address."
mydata$m_endnote0[1486] <- "Interview conducted at [address] in his tricycle because he's driving it untik 6pm, but we're still alone and far from other people."
mydata$m_endnote0[1490] <- "Respondent's name is [name], not [name]."
mydata$m_endnote0[1518] <- "Respondent is moved to another barangay([barangay name]) because they experience fire"
mydata$m_endnote0[1549] <- "Their previous address is in Brgy. [barangay name]. Now they migrated there whole family in [address]"
mydata$m_endnote0[1565] <- "Their previous address is in [barangay].Now they migrated in Brgy. [address]."
mydata$m_endnote0[1608] <- "Correction of spelling of surname. It's [surname] not [surname]"
mydata$m_endnote0[1633] <- "The contact no.is care of [name]"
mydata$m_endnote0[1660] <- "The main Respondent([name])is not available always at work late to go home.I interview is his wife([name]).."
mydata$m_endnote0[1699] <- "Together with the S.F.O(observed by [name])"
mydata$m_endnote0[1786] <- "Correction of the surname. Its [surname] not [surname]."
mydata$m_endnote0[1847] <- "[name] is not available..shes in manila thats why her husband is my respondent"
mydata$m_endnote0[1894] <- "The contact no.is care of [name]"
mydata$m_endnote0[1923] <- "Daughter of [name]"
mydata$m_endnote0[1987] <- "Her name is [name] "
mydata$m_endnote0[2036] <- "His name is [name]"
mydata$m_endnote0[2085] <- "[name] is always going home from work very late(around [hours])in the eve"
mydata$m_endnote0[2161] <- "The contanct # is care off [name] grand child of respondent."
mydata$m_endnote0[2196] <- "I interviewed the respondent in her new house in barangay [barangay]. They have transferred there because their house in [site] was down due to typhoon but she reconstructing it."


#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
