#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_3" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#'
#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#'# Direct PII: variables to be removed
# !!!No Direct PII 


#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team


#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No Small locations


#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# !!!No indirect PII - Ordinal

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("ec_s3q1",
                  "ec_s3q2",
                  "ec_s3q3",
                  "ec_s3q4",
                  "ec_s3q5",
                  "ec_s3q6",
                  "ec_s3q7",
                  "ec_s3q8",
                  "ec_s3q9",
                  "ec_s3q10",
                  "ec_s3q11",
                  "ec_s3q12",
                  "ec_s3q13",
                  "ec_s3q14",
                  "ec_s3q15",
                  "ec_s3q16",
                  "ec_s3q17",
                  "ec_s3q19",
                  "ec_s3q21",
                  "ec_s3q22",
                  "ec_s3q23",
                  "ec_s3q24",
                  "ec_s3q25",
                  "ec_s3q26",
                  "ec_s3q27",
                  "ec_s3q28",
                  "ec_s3q29",
                  "ec_s3q30",
                  "ec_s3q31")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values. Other variables are critical for analysis.

#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data

#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("ec_s3q18",
               "ec_s3q20",
               "ec_s3q35",
               "ec_s3q45")

report_open (list_open_ends = open_ends)


# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 
mydata$ec_s3q20[75] <- "Ojt Municipal Office of [municipality]"
mydata$ec_s3q20[267] <- "[name] house"
mydata$ec_s3q20[305] <- "[language]"
mydata$ec_s3q20[1004] <- "Collecting in [municipality] national high school"
mydata$ec_s3q20[1225] <- "[language]"
mydata$ec_s3q20[1440] <- "In the Pumping Area of their place in [municipality]"
mydata$ec_s3q20[1742] <- "[language] school [language]"
mydata$ec_s3q20[1746] <- "[language]"
mydata$ec_s3q20[2761] <- "[municipality]"
mydata$ec_s3q20[2816] <- "[language]"
mydata$ec_s3q20[2847] <- "[language] deliver [language]"
mydata$ec_s3q35[232] <- "[language]"
mydata$ec_s3q35[426] <- "[language]"
mydata$ec_s3q35[464] <- "[language]"
mydata$ec_s3q35[467] <- "[language]"
mydata$ec_s3q35[474] <- "[language]"
mydata$ec_s3q35[555] <- "[language]"
mydata$ec_s3q35[595] <- "[language]"
mydata$ec_s3q35[597] <- "[language]"
mydata$ec_s3q35[607] <- "[language]"
mydata$ec_s3q35[654] <- "[language]"
mydata$ec_s3q35[734] <- "[language]"
mydata$ec_s3q35[757] <- "[language]"
mydata$ec_s3q35[766] <- "[language]"
mydata$ec_s3q35[787] <- "[language]"
mydata$ec_s3q35[837] <- "[language]"
mydata$ec_s3q35[842] <- "[language]"
mydata$ec_s3q35[898] <- "[language]"
mydata$ec_s3q35[903] <- "required sa school [language] (On the job training)"
mydata$ec_s3q35[930] <- "[language]"
mydata$ec_s3q35[976] <- "[language]"
mydata$ec_s3q35[996] <- "[language]"
mydata$ec_s3q35[1015] <- "[language]"
mydata$ec_s3q35[1016] <- "[language]"
mydata$ec_s3q35[1060] <- "[language]"
mydata$ec_s3q35[1071] <- "[language]"
mydata$ec_s3q35[1074] <- "[language]"
mydata$ec_s3q35[1075] <- "[language]"
mydata$ec_s3q35[1082] <- "[language]"
mydata$ec_s3q35[1135] <- "[language]"
mydata$ec_s3q35[1138] <- "[language]"
mydata$ec_s3q35[1189] <- "[language]"
mydata$ec_s3q35[1221] <- "[language]"
mydata$ec_s3q35[1227] <- "[language]"
mydata$ec_s3q35[1233] <- "[language]"
mydata$ec_s3q35[1268] <- "[language]"
mydata$ec_s3q35[1335] <- "[language]"
mydata$ec_s3q35[1398] <- "[language]"
mydata$ec_s3q35[1418] <- "[language]"
mydata$ec_s3q35[1451] <- "[language]"
mydata$ec_s3q35[1495] <- "[language]"
mydata$ec_s3q35[1580] <- "[language]"
mydata$ec_s3q35[1593] <- "[language]"
mydata$ec_s3q35[1705] <- "[language]"
mydata$ec_s3q35[1779] <- "[language]"
mydata$ec_s3q35[1787] <- "Family business [language]"
mydata$ec_s3q35[1788] <- "[language]"
mydata$ec_s3q35[1800] <- "[language]"
mydata$ec_s3q35[1810] <- "[language]"
mydata$ec_s3q35[1950] <- "[language]"
mydata$ec_s3q35[1967] <- "[language]"
mydata$ec_s3q35[1982] <- "[language]"
mydata$ec_s3q35[2150] <- "[language]"
mydata$ec_s3q35[2369] <- "[language]"
mydata$ec_s3q35[2481] <- "[language]"
mydata$ec_s3q35[2529] <- "[language]"
mydata$ec_s3q35[2683] <- "[language]"
mydata$ec_s3q35[2684] <- "[language]"
mydata$ec_s3q35[2728] <- "[language]"
mydata$ec_s3q35[2743] <- "[language]"
mydata$ec_s3q35[2926] <- "[language]"
mydata$ec_s3q35[3021] <- "[language]"
mydata$ec_s3q35[3123] <- "[language]"
mydata$ec_s3q35[3178] <- "[language]"
mydata$ec_s3q35[3179] <- "[language]"
mydata$ec_s3q35[3212] <- "[language]"
mydata$ec_s3q45[2629] <- "[language]"



#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
