#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_1" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

mydata  <- bottom_recode ("m_s1q28", 1935, -999)

#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#'# Direct PII: variables to be removed
# !!!No Direct PII 

#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team

#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("m_s1q15") 
mydata <- encode_location (variables= locvars, missing=999999)


#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s1q27",
                  "m_s1q29",
                  "m_s1q34",
                  "m_s1q38",
                  "m_s1q42",
                  "m_s1q43",
                  "m_s1q44",
                  "m_s1q45",
                  "m_s1q48",
                  "m_s1q49",
                  "m_s1q46",
                  "m_s1q47",
                  "m_s1q50",
                  "m_s1q51",
                  "m_s1q52",
                  "m_s1q53",
                  "m_s1q54",
                  "m_s1q55",
                  "m_s1q56",
                  "m_s1q57",
                  "m_s1q64",
                  "m_s1q65",
                  "m_s1q65_two",
                  "m_s1q66",
                  "m_s1q67",
                  "m_s1q67_two")

capture_tables (indirect_PII)

# Recode those with very specific values. 

break_edu <- c(-999, -998, 1, 2, 5, 6, 7, 8,99)
labels_edu <- c("Refused to answer" =1, 
                "Don't know" =2, 
                "Married Living with Spouse" =3, 
                "Married Not Living with Spouse" =4, 
                "Divorced/Annulled or Separated" =5,
                "Widow/Widower" =6, 
                "Not married but committed" =7, 
                "Single" = 8,
                "Other" = 9)
mydata <- ordinal_recode (variable="m_s1q34", break_points=break_edu, missing=999999, value_labels=labels_edu)

# !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q65_two"] 
mydata <- mydata[!names(mydata) %in% "m_s1q67_two"] 

#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("m_s1q10",
               "m_s1q14_other",
               "m_s1q16",
               "m_s1q34_other",
               "m_s1q36_other",
               "m_s1q65_other",
               "m_s1q67_other")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$m_s1q10[1] <- "Shes work abroad in [small location]"
mydata$m_s1q10[142] <- "Transfer to his mother house in [small location]"
mydata$m_s1q10[341] <- "She is now in Australia for treatment of [specific disease]"
mydata$m_s1q10[346] <- "With his live in partner in [small location] Ilocos Sur"
mydata$m_s1q10[563] <- "Moved to house of mother [name]"
mydata$m_s1q10[565] <- "Moved to house of mother name [name]"
mydata$m_s1q10[670] <- "Moved out to [small location],treatment for [specific disease]"
mydata$m_s1q10[765] <- "Accompanied by [name]"
mydata$m_s1q10[1246] <- "Son of [name] and [name]"
mydata$m_s1q10[1249] <- "Son of [name] and [name]"
mydata$m_s1q10[1250] <- "Son of [name] and [name]"
mydata$m_s1q10[1405] <- "With his mother [name] in Manila"
mydata$m_s1q10[1478] <- "With her mother [name]"
mydata$m_s1q10[1550] <- "Since [name] were at the age of 1year old,her Aunt [name] took her and been cared until now."
mydata$m_s1q10[1638] <- "[name] workjnb"
mydata$m_s1q10[1730] <- "[name] roden at [name]"
mydata$m_s1q10[1742] <- "[name] was [name] and [name]s son"
mydata$m_s1q10[1747] <- "At the other province living with her husband [name]"
mydata$m_s1q10[1749] <- "At the other province living with his wife [name]"
mydata$m_s1q10[1933] <- "[name] is living with her mother ([name]) in [small location] Rizal."
mydata$m_s1q10[2074] <- "[name] is sick and now at [small location] more 1 month and  staying until recover from sickness."
mydata$m_s1q10[2667] <- "She was the daughter of [name] and [name]"
mydata$m_s1q10[2670] <- "Living in the other house having their own family, she is [name] wife"
mydata$m_s1q10[2671] <- "Living in another house having his own family, [name] was the husband of [name] They are not married"
mydata$m_s1q10[2722] <- "[name] was the son of [name] and [name], they were together"
mydata$m_s1q10[3083] <- "[name]s child"
mydata$m_s1q10[3124] <- "[name] was adopted by the other family since she was  5 months old, During the baseline interview she is still living with them."
mydata$m_s1q10[3394] <- "[language]"
mydata$m_s1q10[3427] <- "Moved for other household (the same household with [name])"
mydata$m_s1q10[3429] <- "Moved for other household (the same household with [name])"
mydata$m_s1q10[3664] <- "She and her son and daughter goes to her husband in [small location], her husband is working there."
mydata$m_s1q10[3672] <- "[name] was the daughter of [name] an [name]"
mydata$m_s1q10[3674] <- "Is the son of [name] and [name]"
mydata$m_s1q10[3789] <- "Respondent said that [name]s mother adapted [name] six months ago til now, [name] is expected to become a permanent member of the respondent's mother household."
mydata$m_s1q10[3847] <- "[name] was the garandchild of the respondent, since joy is 1 year old the resondent is the one who took care of her , then now shes at her mother since june, of year 2016"
mydata$m_s1q10[3870] <- "[name] is [name]s brother, Now he is living in the other household, based on the respondent he is not working because he is senior citizen already."
mydata$m_s1q10[4025] <- "Live with parents [name]"
mydata$m_s1q10[4027] <- "Live with his parents [name]"
mydata$m_s1q10[4136] <- "Work in [small location]"
mydata$m_s1q10[4138] <- "Live separately with his family. Husband of [name]"
mydata$m_s1q10[4141] <- "Transfered and live  in [small location]"
mydata$m_s1q10[4143] <- "Transfered and live in [small location]"
mydata$m_s1q10[4155] <- "[name] is living with a girlfriend (lesbian)"
mydata$m_s1q10[4258] <- "[name]"
mydata$m_s1q10[4283] <- "Charity is living with her husband and soon to be married"
mydata$m_s1q10[4288] <- "[name] is [name]s wife"
mydata$m_s1q10[4290] <- "[name] was [name]s wife"
mydata$m_s1q10[5112] <- "[name] is together with [name], They are husband and wife."
mydata$m_s1q10[5839] <- "Her father and [name]  go was migrated already"
mydata$m_s1q10[5892] <- "[name] is the wife of [name] they are together"
mydata$m_s1q10[5981] <- "[name] was with [name]"
mydata$m_s1q10[5987] <- "[language]"
mydata$m_s1q10[5988] <- "Daughter of [name]"
mydata$m_s1q10[5989] <- "Independent [language]"
mydata$m_s1q10[6149] <- "Live with his parents in [small location]"
mydata$m_s1q10[6317] <- "Got separated with [name]"
mydata$m_s1q10[6389] <- "[name] ran away"
mydata$m_s1q10[6390] <- "Vacation in [small location] since april"
mydata$m_s1q10[6396] <- "[name] is the wife of [name] and they lived in the same household"
mydata$m_s1q10[6397] <- "[name] was the wife of [name] and they lived in the same house"
mydata$m_s1q10[6399] <- "[name] is the son of [name] and [name] and they  are together"
mydata$m_s1q10[6607] <- "Moved with her husband [name]"
mydata$m_s1q10[6612] <- "Daughter of [name] and [name]"
mydata$m_s1q10[6660] <- "[name] took care of [name] while her mother was in abroad, and now [name] is in his mother costudy."
mydata$m_s1q10[6694] <- "Former livein partner of [name]"
mydata$m_s1q10[6708] <- "Live sepately with [name]"
mydata$m_s1q10[6711] <- "Back to their own house in Surigao del Sur with his father ([name])"
mydata$m_s1q10[6716] <- "Back to their own house in Surigao with his father  ([name])"
mydata$m_s1q10[6768] <- "Separated with [name]. Has his own family"
mydata$m_s1q10[6770] <- "Wife of [name]"
mydata$m_s1q10[6954] <- "Transfer to [name]"
mydata$m_s1q10[7032] <- "Want to be with her mother, [name], and to look for a job."
mydata$m_s1q10[7039] <- "Also, to be with her mother, [name]"
mydata$m_s1q10[7040] <- "To be with his grandmother/guardian, [name], because his mother passed away aft3r giving birth to him. His father is [name]"
mydata$m_s1q10[7156] <- "[language]"
mydata$m_s1q10[7276] <- "[name] took [name] with her."
mydata$m_s1q10[7277] <- "[name] took [name] with her."
mydata$m_s1q10[7287] <- "Live together with his dad [name]"
mydata$m_s1q10[7499] <- "[language]"
mydata$m_s1q10[7504] <- "Son of [name] moved household"
mydata$m_s1q10[8069] <- "She broke up with [name]"
mydata$m_s1q10[8153] <- "[language]"
mydata$m_s1q10[8889] <- "Her husband live in brgy. [small location]"
mydata$m_s1q10[8900] <- "Husband of [name]  they live with [name] parents"
mydata$m_s1q10[9214] <- "Visit her children to her 1st husband in [small location]"
mydata$m_s1q10[9432] <- "[name] is child of [name] and [name] and he move to other location because the work of his father."
mydata$m_s1q10[9433] <- "[name] is child of [name] and [name] and he move to other location because the work of his father."
mydata$m_s1q10[9795] <- "She Stayed with her father in [small location]"
mydata$m_s1q10[9796] <- "He worked in [small location]"
mydata$m_s1q10[9967] <- "Caretaker"
mydata$m_s1q10[9981] <- "Move out to [small location]"
mydata$m_s1q10[10940] <- "Hd went home to his hometown in [small location], negros oriental"
mydata$m_s1q10[11026] <- "[name] bring his younger brother([name]) in Manila to See doctor for his follow up medications."
mydata$m_s1q10[11138] <- "[language] sa other relative"
mydata$m_s1q10[12050] <- "Daugther of [name] shes living with her mother in [small location]"
mydata$m_s1q10[12192] <- "[name] stays with his parents"
mydata$m_s1q10[12228] <- "[name] live with her mother again"
mydata$m_s1q10[12439] <- "[language]"
mydata$m_s1q10[12643] <- "[language]"
mydata$m_s1q10[12790] <- "[name](granchild)hes staying already to his own mother ."
mydata$m_s1q10[12795] <- "Hes staying already to his mother..([name] is granchild of the respo"
mydata$m_s1q10[12836] <- "[name] transfer to another household"
mydata$m_s1q10[12838] <- "[name] transfer to another household"
mydata$m_s1q10[12839] <- "He returns back to his mother([name])"
mydata$m_s1q10[12906] <- "[name] was fetch by her auntie and brought to laguna."
mydata$m_s1q10[12941] <- "They have there on house in the Barangay [small location]"
mydata$m_s1q10[13133] <- "Fallow her husband [name]"
mydata$m_s1q10[13230] <- "[small location]"
mydata$m_s1q10[13280] <- "Son of [name] and [name]"
mydata$m_s1q10[13284] <- "Daughter of [name] and [name]"
mydata$m_s1q10[13857] <- "[language]"
mydata$m_s1q10[13974] <- "[language]"
mydata$m_s1q10[15542] <- "[name] & [name] baby"
mydata$m_s1q10[15544] <- "She's with [name]"
mydata$m_s1q10[15546] <- "With her husband [name]"

mydata$m_s1q14_other[1524] <- "[small location]"
mydata$m_s1q14_other[2357] <- "[small location]"
mydata$m_s1q14_other[7892] <- "[small location]"
mydata$m_s1q14_other[12219] <- "[small location]"

mydata <- mydata[!names(mydata) %in% "m_s1q16"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q65_other"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q67_other"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.


#'# GPS data: Displace
# !!!No GPS data

#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
