#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_6" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 


#'# Direct PII: variables to be removed
# !!!No Direct PII

#'# Direct PII-team: Encode field team names
# !!!No Direct PII-team

#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No small locations

#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

pctile_99.5_eh_s6q71_1<- floor(quantile(na.exclude(mydata$eh_s6q71_1)[na.exclude(mydata$eh_s6q71_1)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_1<- floor(quantile(na.exclude(mydata$eh_s6q72_1)[na.exclude(mydata$eh_s6q72_1)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_1<- floor(quantile(na.exclude(mydata$eh_s6q76_1)[na.exclude(mydata$eh_s6q76_1)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_2<- floor(quantile(na.exclude(mydata$eh_s6q71_2)[na.exclude(mydata$eh_s6q71_2)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_2<- floor(quantile(na.exclude(mydata$eh_s6q72_2)[na.exclude(mydata$eh_s6q72_2)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_2<- floor(quantile(na.exclude(mydata$eh_s6q76_2)[na.exclude(mydata$eh_s6q76_2)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_3<- floor(quantile(na.exclude(mydata$eh_s6q71_3)[na.exclude(mydata$eh_s6q71_3)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_3<- floor(quantile(na.exclude(mydata$eh_s6q72_3)[na.exclude(mydata$eh_s6q72_3)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_3<- floor(quantile(na.exclude(mydata$eh_s6q76_3)[na.exclude(mydata$eh_s6q76_3)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_4<- floor(quantile(na.exclude(mydata$eh_s6q71_4)[na.exclude(mydata$eh_s6q71_4)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_4<- floor(quantile(na.exclude(mydata$eh_s6q72_4)[na.exclude(mydata$eh_s6q72_4)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_4<- floor(quantile(na.exclude(mydata$eh_s6q76_4)[na.exclude(mydata$eh_s6q76_4)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_5<- floor(quantile(na.exclude(mydata$eh_s6q71_5)[na.exclude(mydata$eh_s6q71_5)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_5<- floor(quantile(na.exclude(mydata$eh_s6q72_5)[na.exclude(mydata$eh_s6q72_5)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_5<- floor(quantile(na.exclude(mydata$eh_s6q76_5)[na.exclude(mydata$eh_s6q76_5)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_6<- floor(quantile(na.exclude(mydata$eh_s6q71_6)[na.exclude(mydata$eh_s6q71_6)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_6<- floor(quantile(na.exclude(mydata$eh_s6q72_6)[na.exclude(mydata$eh_s6q72_6)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_6<- floor(quantile(na.exclude(mydata$eh_s6q76_6)[na.exclude(mydata$eh_s6q76_6)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_7<- floor(quantile(na.exclude(mydata$eh_s6q71_7)[na.exclude(mydata$eh_s6q71_7)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_7<- floor(quantile(na.exclude(mydata$eh_s6q72_7)[na.exclude(mydata$eh_s6q72_7)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_7<- floor(quantile(na.exclude(mydata$eh_s6q76_7)[na.exclude(mydata$eh_s6q76_7)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_8<- floor(quantile(na.exclude(mydata$eh_s6q71_8)[na.exclude(mydata$eh_s6q71_8)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_8<- floor(quantile(na.exclude(mydata$eh_s6q72_8)[na.exclude(mydata$eh_s6q72_8)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_8<- floor(quantile(na.exclude(mydata$eh_s6q76_8)[na.exclude(mydata$eh_s6q76_8)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_9<- floor(quantile(na.exclude(mydata$eh_s6q71_9)[na.exclude(mydata$eh_s6q71_9)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_9<- floor(quantile(na.exclude(mydata$eh_s6q72_9)[na.exclude(mydata$eh_s6q72_9)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_9<- floor(quantile(na.exclude(mydata$eh_s6q76_9)[na.exclude(mydata$eh_s6q76_9)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_10<- floor(quantile(na.exclude(mydata$eh_s6q71_10)[na.exclude(mydata$eh_s6q71_10)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_10<- floor(quantile(na.exclude(mydata$eh_s6q72_10)[na.exclude(mydata$eh_s6q72_10)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_10<- floor(quantile(na.exclude(mydata$eh_s6q76_10)[na.exclude(mydata$eh_s6q76_10)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_11<- floor(quantile(na.exclude(mydata$eh_s6q71_11)[na.exclude(mydata$eh_s6q71_11)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_11<- floor(quantile(na.exclude(mydata$eh_s6q72_11)[na.exclude(mydata$eh_s6q72_11)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_11<- floor(quantile(na.exclude(mydata$eh_s6q76_11)[na.exclude(mydata$eh_s6q76_11)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_12<- floor(quantile(na.exclude(mydata$eh_s6q71_12)[na.exclude(mydata$eh_s6q71_12)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_12<- floor(quantile(na.exclude(mydata$eh_s6q72_12)[na.exclude(mydata$eh_s6q72_12)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_12<- floor(quantile(na.exclude(mydata$eh_s6q76_12)[na.exclude(mydata$eh_s6q76_12)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_13<- floor(quantile(na.exclude(mydata$eh_s6q71_13)[na.exclude(mydata$eh_s6q71_13)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_13<- floor(quantile(na.exclude(mydata$eh_s6q72_13)[na.exclude(mydata$eh_s6q72_13)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_13<- floor(quantile(na.exclude(mydata$eh_s6q76_13)[na.exclude(mydata$eh_s6q76_13)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_14<- floor(quantile(na.exclude(mydata$eh_s6q71_14)[na.exclude(mydata$eh_s6q71_14)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_14<- floor(quantile(na.exclude(mydata$eh_s6q72_14)[na.exclude(mydata$eh_s6q72_14)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_14<- floor(quantile(na.exclude(mydata$eh_s6q76_14)[na.exclude(mydata$eh_s6q76_14)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_15<- floor(quantile(na.exclude(mydata$eh_s6q71_15)[na.exclude(mydata$eh_s6q71_15)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_15<- floor(quantile(na.exclude(mydata$eh_s6q72_15)[na.exclude(mydata$eh_s6q72_15)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_15<- floor(quantile(na.exclude(mydata$eh_s6q76_15)[na.exclude(mydata$eh_s6q76_15)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_16<- floor(quantile(na.exclude(mydata$eh_s6q71_16)[na.exclude(mydata$eh_s6q71_16)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_16<- floor(quantile(na.exclude(mydata$eh_s6q72_16)[na.exclude(mydata$eh_s6q72_16)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_16<- floor(quantile(na.exclude(mydata$eh_s6q76_16)[na.exclude(mydata$eh_s6q76_16)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_17<- floor(quantile(na.exclude(mydata$eh_s6q71_17)[na.exclude(mydata$eh_s6q71_17)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_17<- floor(quantile(na.exclude(mydata$eh_s6q72_17)[na.exclude(mydata$eh_s6q72_17)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_17<- floor(quantile(na.exclude(mydata$eh_s6q76_17)[na.exclude(mydata$eh_s6q76_17)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_18<- floor(quantile(na.exclude(mydata$eh_s6q71_18)[na.exclude(mydata$eh_s6q71_18)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_18<- floor(quantile(na.exclude(mydata$eh_s6q72_18)[na.exclude(mydata$eh_s6q72_18)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_18<- floor(quantile(na.exclude(mydata$eh_s6q76_18)[na.exclude(mydata$eh_s6q76_18)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_19<- floor(quantile(na.exclude(mydata$eh_s6q71_19)[na.exclude(mydata$eh_s6q71_19)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_19<- floor(quantile(na.exclude(mydata$eh_s6q72_19)[na.exclude(mydata$eh_s6q72_19)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_19<- floor(quantile(na.exclude(mydata$eh_s6q76_19)[na.exclude(mydata$eh_s6q76_19)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_20<- floor(quantile(na.exclude(mydata$eh_s6q71_20)[na.exclude(mydata$eh_s6q71_20)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_20<- floor(quantile(na.exclude(mydata$eh_s6q72_20)[na.exclude(mydata$eh_s6q72_20)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_20<- floor(quantile(na.exclude(mydata$eh_s6q76_20)[na.exclude(mydata$eh_s6q76_20)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_21<- floor(quantile(na.exclude(mydata$eh_s6q71_21)[na.exclude(mydata$eh_s6q71_21)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_21<- floor(quantile(na.exclude(mydata$eh_s6q72_21)[na.exclude(mydata$eh_s6q72_21)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_21<- floor(quantile(na.exclude(mydata$eh_s6q76_21)[na.exclude(mydata$eh_s6q76_21)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_22<- floor(quantile(na.exclude(mydata$eh_s6q71_22)[na.exclude(mydata$eh_s6q71_22)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_22<- floor(quantile(na.exclude(mydata$eh_s6q72_22)[na.exclude(mydata$eh_s6q72_22)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_22<- floor(quantile(na.exclude(mydata$eh_s6q76_22)[na.exclude(mydata$eh_s6q76_22)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_23<- floor(quantile(na.exclude(mydata$eh_s6q71_23)[na.exclude(mydata$eh_s6q71_23)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_23<- floor(quantile(na.exclude(mydata$eh_s6q72_23)[na.exclude(mydata$eh_s6q72_23)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_23<- floor(quantile(na.exclude(mydata$eh_s6q76_23)[na.exclude(mydata$eh_s6q76_23)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_24<- floor(quantile(na.exclude(mydata$eh_s6q71_24)[na.exclude(mydata$eh_s6q71_24)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_24<- floor(quantile(na.exclude(mydata$eh_s6q72_24)[na.exclude(mydata$eh_s6q72_24)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_24<- floor(quantile(na.exclude(mydata$eh_s6q76_24)[na.exclude(mydata$eh_s6q76_24)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q71_25<- floor(quantile(na.exclude(mydata$eh_s6q71_25)[na.exclude(mydata$eh_s6q71_25)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q72_25<- floor(quantile(na.exclude(mydata$eh_s6q72_25)[na.exclude(mydata$eh_s6q72_25)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q76_25<- floor(quantile(na.exclude(mydata$eh_s6q76_25)[na.exclude(mydata$eh_s6q76_25)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q78<- floor(quantile(na.exclude(mydata$eh_s6q78)[na.exclude(mydata$eh_s6q78)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q79<- floor(quantile(na.exclude(mydata$eh_s6q79)[na.exclude(mydata$eh_s6q79)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q80<- floor(quantile(na.exclude(mydata$eh_s6q80)[na.exclude(mydata$eh_s6q80)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q82<- floor(quantile(na.exclude(mydata$eh_s6q82)[na.exclude(mydata$eh_s6q82)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q84<- floor(quantile(na.exclude(mydata$eh_s6q84)[na.exclude(mydata$eh_s6q84)!=-998], probs = c(0.995)))
pctile_99.5_eh_s6q85<- floor(quantile(na.exclude(mydata$eh_s6q85)[na.exclude(mydata$eh_s6q85)!=-998], probs = c(0.995)))

mydata <- top_recode (variable="eh_s6q71_1", break_point=pctile_99.5_eh_s6q71_1, missing=-998)
mydata <- top_recode (variable="eh_s6q72_1", break_point=pctile_99.5_eh_s6q72_1, missing=-998)
mydata <- top_recode (variable="eh_s6q76_1", break_point=pctile_99.5_eh_s6q76_1, missing=-998)
mydata <- top_recode (variable="eh_s6q71_2", break_point=pctile_99.5_eh_s6q71_2, missing=-998)
mydata <- top_recode (variable="eh_s6q72_2", break_point=pctile_99.5_eh_s6q72_2, missing=-998)
mydata <- top_recode (variable="eh_s6q76_2", break_point=pctile_99.5_eh_s6q76_2, missing=-998)
mydata <- top_recode (variable="eh_s6q71_3", break_point=pctile_99.5_eh_s6q71_3, missing=-998)
mydata <- top_recode (variable="eh_s6q72_3", break_point=pctile_99.5_eh_s6q72_3, missing=-998)
mydata <- top_recode (variable="eh_s6q76_3", break_point=pctile_99.5_eh_s6q76_3, missing=-998)
mydata <- top_recode (variable="eh_s6q71_4", break_point=pctile_99.5_eh_s6q71_4, missing=-998)
mydata <- top_recode (variable="eh_s6q72_4", break_point=pctile_99.5_eh_s6q72_4, missing=-998)
mydata <- top_recode (variable="eh_s6q76_4", break_point=pctile_99.5_eh_s6q76_4, missing=-998)
mydata <- top_recode (variable="eh_s6q71_5", break_point=pctile_99.5_eh_s6q71_5, missing=-998)
mydata <- top_recode (variable="eh_s6q72_5", break_point=pctile_99.5_eh_s6q72_5, missing=-998)
mydata <- top_recode (variable="eh_s6q76_5", break_point=pctile_99.5_eh_s6q76_5, missing=-998)
mydata <- top_recode (variable="eh_s6q71_6", break_point=pctile_99.5_eh_s6q71_6, missing=-998)
mydata <- top_recode (variable="eh_s6q72_6", break_point=pctile_99.5_eh_s6q72_6, missing=-998)
mydata <- top_recode (variable="eh_s6q76_6", break_point=pctile_99.5_eh_s6q76_6, missing=-998)
mydata <- top_recode (variable="eh_s6q71_7", break_point=pctile_99.5_eh_s6q71_7, missing=-998)
mydata <- top_recode (variable="eh_s6q72_7", break_point=pctile_99.5_eh_s6q72_7, missing=-998)
mydata <- top_recode (variable="eh_s6q76_7", break_point=pctile_99.5_eh_s6q76_7, missing=-998)
mydata <- top_recode (variable="eh_s6q71_8", break_point=pctile_99.5_eh_s6q71_8, missing=-998)
mydata <- top_recode (variable="eh_s6q72_8", break_point=pctile_99.5_eh_s6q72_8, missing=-998)
mydata <- top_recode (variable="eh_s6q76_8", break_point=pctile_99.5_eh_s6q76_8, missing=-998)
mydata <- top_recode (variable="eh_s6q71_9", break_point=pctile_99.5_eh_s6q71_9, missing=-998)
mydata <- top_recode (variable="eh_s6q72_9", break_point=pctile_99.5_eh_s6q72_9, missing=-998)
mydata <- top_recode (variable="eh_s6q76_9", break_point=pctile_99.5_eh_s6q76_9, missing=-998)
mydata <- top_recode (variable="eh_s6q71_10", break_point=pctile_99.5_eh_s6q71_10, missing=-998)
mydata <- top_recode (variable="eh_s6q72_10", break_point=pctile_99.5_eh_s6q72_10, missing=-998)
mydata <- top_recode (variable="eh_s6q76_10", break_point=pctile_99.5_eh_s6q76_10, missing=-998)
mydata <- top_recode (variable="eh_s6q71_11", break_point=pctile_99.5_eh_s6q71_11, missing=-998)
mydata <- top_recode (variable="eh_s6q72_11", break_point=pctile_99.5_eh_s6q72_11, missing=-998)
mydata <- top_recode (variable="eh_s6q76_11", break_point=pctile_99.5_eh_s6q76_11, missing=-998)
mydata <- top_recode (variable="eh_s6q71_12", break_point=pctile_99.5_eh_s6q71_12, missing=-998)
mydata <- top_recode (variable="eh_s6q72_12", break_point=pctile_99.5_eh_s6q72_12, missing=-998)
mydata <- top_recode (variable="eh_s6q76_12", break_point=pctile_99.5_eh_s6q76_12, missing=-998)
mydata <- top_recode (variable="eh_s6q71_13", break_point=pctile_99.5_eh_s6q71_13, missing=-998)
mydata <- top_recode (variable="eh_s6q72_13", break_point=pctile_99.5_eh_s6q72_13, missing=-998)
mydata <- top_recode (variable="eh_s6q76_13", break_point=pctile_99.5_eh_s6q76_13, missing=-998)
mydata <- top_recode (variable="eh_s6q71_14", break_point=pctile_99.5_eh_s6q71_14, missing=-998)
mydata <- top_recode (variable="eh_s6q72_14", break_point=pctile_99.5_eh_s6q72_14, missing=-998)
mydata <- top_recode (variable="eh_s6q76_14", break_point=pctile_99.5_eh_s6q76_14, missing=-998)
mydata <- top_recode (variable="eh_s6q71_15", break_point=pctile_99.5_eh_s6q71_15, missing=-998)
mydata <- top_recode (variable="eh_s6q72_15", break_point=pctile_99.5_eh_s6q72_15, missing=-998)
mydata <- top_recode (variable="eh_s6q76_15", break_point=pctile_99.5_eh_s6q76_15, missing=-998)
mydata <- top_recode (variable="eh_s6q71_16", break_point=pctile_99.5_eh_s6q71_16, missing=-998)
mydata <- top_recode (variable="eh_s6q72_16", break_point=pctile_99.5_eh_s6q72_16, missing=-998)
mydata <- top_recode (variable="eh_s6q76_16", break_point=pctile_99.5_eh_s6q76_16, missing=-998)
mydata <- top_recode (variable="eh_s6q71_17", break_point=pctile_99.5_eh_s6q71_17, missing=-998)
mydata <- top_recode (variable="eh_s6q72_17", break_point=pctile_99.5_eh_s6q72_17, missing=-998)
mydata <- top_recode (variable="eh_s6q76_17", break_point=pctile_99.5_eh_s6q76_17, missing=-998)
mydata <- top_recode (variable="eh_s6q71_18", break_point=pctile_99.5_eh_s6q71_18, missing=-998)
mydata <- top_recode (variable="eh_s6q72_18", break_point=pctile_99.5_eh_s6q72_18, missing=-998)
mydata <- top_recode (variable="eh_s6q76_18", break_point=pctile_99.5_eh_s6q76_18, missing=-998)
mydata <- top_recode (variable="eh_s6q71_19", break_point=pctile_99.5_eh_s6q71_19, missing=-998)
mydata <- top_recode (variable="eh_s6q72_19", break_point=pctile_99.5_eh_s6q72_19, missing=-998)
mydata <- top_recode (variable="eh_s6q76_19", break_point=pctile_99.5_eh_s6q76_19, missing=-998)
mydata <- top_recode (variable="eh_s6q71_20", break_point=pctile_99.5_eh_s6q71_20, missing=-998)
mydata <- top_recode (variable="eh_s6q72_20", break_point=pctile_99.5_eh_s6q72_20, missing=-998)
mydata <- top_recode (variable="eh_s6q76_20", break_point=pctile_99.5_eh_s6q76_20, missing=-998)
mydata <- top_recode (variable="eh_s6q71_21", break_point=pctile_99.5_eh_s6q71_21, missing=-998)
mydata <- top_recode (variable="eh_s6q72_21", break_point=pctile_99.5_eh_s6q72_21, missing=-998)
mydata <- top_recode (variable="eh_s6q76_21", break_point=pctile_99.5_eh_s6q76_21, missing=-998)
mydata <- top_recode (variable="eh_s6q71_22", break_point=pctile_99.5_eh_s6q71_22, missing=-998)
mydata <- top_recode (variable="eh_s6q72_22", break_point=pctile_99.5_eh_s6q72_22, missing=-998)
mydata <- top_recode (variable="eh_s6q76_22", break_point=pctile_99.5_eh_s6q76_22, missing=-998)
mydata <- top_recode (variable="eh_s6q71_23", break_point=pctile_99.5_eh_s6q71_23, missing=-998)
mydata <- top_recode (variable="eh_s6q72_23", break_point=pctile_99.5_eh_s6q72_23, missing=-998)
#mydata <- top_recode (variable="eh_s6q76_23", break_point=pctile_99.5_eh_s6q76_23, missing=-998) #Only one response
mydata <- top_recode (variable="eh_s6q71_24", break_point=pctile_99.5_eh_s6q71_24, missing=-998)
mydata <- top_recode (variable="eh_s6q72_24", break_point=pctile_99.5_eh_s6q72_24, missing=-998) 
#mydata <- top_recode (variable="eh_s6q76_24", break_point=pctile_99.5_eh_s6q76_24, missing=-998) #Only one response
mydata <- top_recode (variable="eh_s6q71_25", break_point=pctile_99.5_eh_s6q71_25, missing=-998) 
#mydata <- top_recode (variable="eh_s6q72_25", break_point=pctile_99.5_eh_s6q72_25, missing=-998) #Only one response
mydata <- top_recode (variable="eh_s6q76_25", break_point=pctile_99.5_eh_s6q76_25, missing=-998) 
mydata <- top_recode (variable="eh_s6q78", break_point=pctile_99.5_eh_s6q78, missing=-998)
mydata <- top_recode (variable="eh_s6q79", break_point=pctile_99.5_eh_s6q79, missing=-998)
mydata <- top_recode (variable="eh_s6q80", break_point=pctile_99.5_eh_s6q80, missing=-998)
mydata <- top_recode (variable="eh_s6q82", break_point=pctile_99.5_eh_s6q82, missing=-998)
mydata <- top_recode (variable="eh_s6q84", break_point=pctile_99.5_eh_s6q84, missing=-998)
mydata <- top_recode (variable="eh_s6q85", break_point=pctile_99.5_eh_s6q85, missing=-998)

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("eh_s6q3_1",
                  "eh_s6q5_1",
                  "eh_s6q6_1",
                  "eh_s6q5_2",
                  "eh_s6q6_2",
                  "eh_s6q5_3",
                  "eh_s6q6_3",
                  "eh_s6q5_4",
                  "eh_s6q6_4",
                  "eh_s6q5_5",
                  "eh_s6q6_5",
                  "eh_s6q5_6",
                  "eh_s6q6_6",
                  "eh_s6q5_7",
                  "eh_s6q6_7",
                  "eh_s6q5_8",
                  "eh_s6q6_8",
                  "eh_s6q5_9",
                  "eh_s6q6_9",
                  "eh_s6q5_10",
                  "eh_s6q6_10",
                  "eh_s6q5_11",
                  "eh_s6q6_11",
                  "eh_s6q5_12",
                  "eh_s6q6_12",
                  "eh_s6q5_13",
                  "eh_s6q6_13",
                  "eh_s6q5_14",
                  "eh_s6q6_14",
                  "eh_s6q5_15",
                  "eh_s6q6_15",
                  "eh_s6q5_16",
                  "eh_s6q6_16",
                  "eh_s6q5_17",
                  "eh_s6q6_17",
                  "eh_s6q5_18",
                  "eh_s6q6_18",
                  "eh_s6q5_19",
                  "eh_s6q6_19",
                  "eh_s6q5_20",
                  "eh_s6q6_20",
                  "eh_s6q5_21",
                  "eh_s6q6_21",
                  "eh_s6q5_22",
                  "eh_s6q6_22",
                  "eh_s6q5_23",
                  "eh_s6q6_23",
                  "eh_s6q5_24",
                  "eh_s6q6_24",
                  "eh_s6q5_25",
                  "eh_s6q6_25",
                  "eh_s6q9_1",
                  "eh_s6q11_1",
                  "eh_s6q12_1",
                  "eh_s6q11_2",
                  "eh_s6q12_2",
                  "eh_s6q11_3",
                  "eh_s6q12_3",
                  "eh_s6q11_4",
                  "eh_s6q12_4",
                  "eh_s6q11_5",
                  "eh_s6q12_5",
                  "eh_s6q11_6",
                  "eh_s6q12_6",
                  "eh_s6q11_7",
                  "eh_s6q12_7",
                  "eh_s6q15_1",
                  "eh_s6q17_1",
                  "eh_s6q18_1",
                  "eh_s6q17_2",
                  "eh_s6q18_2",
                  "eh_s6q17_3",
                  "eh_s6q18_3",
                  "eh_s6q17_4",
                  "eh_s6q18_4",
                  "eh_s6q17_5",
                  "eh_s6q18_5",
                  "eh_s6q17_6",
                  "eh_s6q18_6",
                  "eh_s6q17_7",
                  "eh_s6q18_7",
                  "eh_s6q17_8",
                  "eh_s6q18_8",
                  "eh_s6q21_1",
                  "eh_s6q23_1",
                  "eh_s6q23_2",
                  "eh_s6q23_3",
                  "eh_s6q23_4",
                  "eh_s6q23_5",
                  "eh_s6q23_6",
                  "eh_s6q23_7",
                  "eh_s6q23_8",
                  "eh_s6q27_1",
                  "eh_s6q29_1",
                  "eh_s6q29_2",
                  "eh_s6q29_3",
                  "eh_s6q29_4",
                  "eh_s6q29_5",
                  "eh_s6q29_6",
                  "eh_s6q29_7",
                  "eh_s6q29_8",
                  "eh_s6q33_1",
                  "eh_s6q35_1",
                  "eh_s6q35_2",
                  "eh_s6q35_3",
                  "eh_s6q35_4",
                  "eh_s6q35_5",
                  "eh_s6q39_1",
                  "eh_s6q41_1",
                  "eh_s6q41_2",
                  "eh_s6q41_3",
                  "eh_s6q41_4",
                  "eh_s6q41_5",
                  "eh_s6q41_6",
                  "eh_s6q45_1",
                  "eh_s6q47_1",
                  "eh_s6q47_2",
                  "eh_s6q47_3",
                  "eh_s6q47_4",
                  "eh_s6q47_5",
                  "eh_s6q47_6",
                  "eh_s6q47_7",
                  "eh_s6q50_1",
                  "eh_s6q55_1",
                  "eh_s6q56_1",
                  "eh_s6q69_1",
                  "eh_s6q75_1",
                  "eh_s6q56_2",
                  "eh_s6q69_2",
                  "eh_s6q75_2",
                  "eh_s6q56_3",
                  "eh_s6q69_3",
                  "eh_s6q75_3",
                  "eh_s6q56_4",
                  "eh_s6q69_4",
                  "eh_s6q75_4",
                  "eh_s6q56_5",
                  "eh_s6q69_5",
                  "eh_s6q75_5",
                  "eh_s6q56_6",
                  "eh_s6q69_6",
                  "eh_s6q75_6",
                  "eh_s6q56_7",
                  "eh_s6q69_7",
                  "eh_s6q75_7",
                  "eh_s6q56_8",
                  "eh_s6q69_8",
                  "eh_s6q75_8",
                  "eh_s6q56_9",
                  "eh_s6q69_9",
                  "eh_s6q75_9",
                  "eh_s6q56_10",
                  "eh_s6q69_10",
                  "eh_s6q75_10",
                  "eh_s6q56_11",
                  "eh_s6q69_11",
                  "eh_s6q75_11",
                  "eh_s6q56_12",
                  "eh_s6q69_12",
                  "eh_s6q75_12",
                  "eh_s6q56_13",
                  "eh_s6q69_13",
                  "eh_s6q75_13",
                  "eh_s6q56_14",
                  "eh_s6q69_14",
                  "eh_s6q75_14",
                  "eh_s6q56_15",
                  "eh_s6q69_15",
                  "eh_s6q75_15",
                  "eh_s6q56_16",
                  "eh_s6q69_16",
                  "eh_s6q75_16",
                  "eh_s6q56_17",
                  "eh_s6q69_17",
                  "eh_s6q75_17",
                  "eh_s6q56_18",
                  "eh_s6q69_18",
                  "eh_s6q75_18",
                  "eh_s6q56_19",
                  "eh_s6q69_19",
                  "eh_s6q75_19",
                  "eh_s6q56_20",
                  "eh_s6q69_20",
                  "eh_s6q75_20",
                  "eh_s6q56_21",
                  "eh_s6q69_21",
                  "eh_s6q75_21",
                  "eh_s6q56_22",
                  "eh_s6q69_22",
                  "eh_s6q75_22",
                  "eh_s6q56_23",
                  "eh_s6q69_23",
                  "eh_s6q75_23",
                  "eh_s6q56_24",
                  "eh_s6q69_24",
                  "eh_s6q75_24",
                  "eh_s6q56_25",
                  "eh_s6q69_25",
                  "eh_s6q75_25")
capture_tables (indirect_PII)

remove_vars <- c("eh_s6q3_1","eh_s6q9_1","eh_s6q15_1","eh_s6q21_1","eh_s6q27_1","eh_s6q33_1","eh_s6q39_1",
                 "eh_s6q45_1")

mydata <- mydata[!names(mydata) %in% remove_vars]

#'# Matching and crosstabulations: Run automated PII check 
# !!! Insufficient demographic data


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("eh_s6q2_1",
               "eh_s6q4_1",
               "eh_s6q2_2",
               "eh_s6q4_2",
               "eh_s6q2_3",
               "eh_s6q4_3",
               "eh_s6q2_4",
               "eh_s6q4_4",
               "eh_s6q2_5",
               "eh_s6q4_5",
               "eh_s6q2_6",
               "eh_s6q4_6",
               "eh_s6q2_7",
               "eh_s6q4_7",
               "eh_s6q2_8",
               "eh_s6q4_8",
               "eh_s6q2_9",
               "eh_s6q4_9",
               "eh_s6q2_10",
               "eh_s6q4_10",
               "eh_s6q2_11",
               "eh_s6q4_11",
               "eh_s6q2_12",
               "eh_s6q4_12",
               "eh_s6q2_13",
               "eh_s6q4_13",
               "eh_s6q2_14",
               "eh_s6q4_14",
               "eh_s6q2_15",
               "eh_s6q4_15",
               "eh_s6q2_16",
               "eh_s6q4_16",
               "eh_s6q2_17",
               "eh_s6q4_17",
               "eh_s6q2_18",
               "eh_s6q4_18",
               "eh_s6q2_19",
               "eh_s6q4_19",
               "eh_s6q2_20",
               "eh_s6q4_20",
               "eh_s6q2_21",
               "eh_s6q4_21",
               "eh_s6q2_22",
               "eh_s6q4_22",
               "eh_s6q2_23",
               "eh_s6q4_23",
               "eh_s6q2_24",
               "eh_s6q4_24",
               "eh_s6q2_25",
               "eh_s6q4_25",
               "eh_s6q8_1",
               "eh_s6q10_1",
               "eh_s6q8_2",
               "eh_s6q10_2",
               "eh_s6q8_3",
               "eh_s6q10_3",
               "eh_s6q8_4",
               "eh_s6q10_4",
               "eh_s6q8_5",
               "eh_s6q10_5",
               "eh_s6q8_6",
               "eh_s6q10_6",
               "eh_s6q8_7",
               "eh_s6q10_7",
               "eh_s6q14_1",
               "eh_s6q16_1",
               "eh_s6q14_2",
               "eh_s6q16_2",
               "eh_s6q14_3",
               "eh_s6q16_3",
               "eh_s6q14_4",
               "eh_s6q16_4",
               "eh_s6q14_5",
               "eh_s6q16_5",
               "eh_s6q14_6",
               "eh_s6q16_6",
               "eh_s6q14_7",
               "eh_s6q16_7",
               "eh_s6q14_8",
               "eh_s6q16_8",
               "eh_s6q20_1",
               "eh_s6q22_1",
               "eh_s6q20_2",
               "eh_s6q22_2",
               "eh_s6q20_3",
               "eh_s6q22_3",
               "eh_s6q20_4",
               "eh_s6q22_4",
               "eh_s6q20_5",
               "eh_s6q22_5",
               "eh_s6q20_6",
               "eh_s6q22_6",
               "eh_s6q20_7",
               "eh_s6q22_7",
               "eh_s6q20_8",
               "eh_s6q22_8",
               "eh_s6q26_1",
               "eh_s6q28_1",
               "eh_s6q26_2",
               "eh_s6q28_2",
               "eh_s6q26_3",
               "eh_s6q28_3",
               "eh_s6q26_4",
               "eh_s6q28_4",
               "eh_s6q26_5",
               "eh_s6q28_5",
               "eh_s6q26_6",
               "eh_s6q28_6",
               "eh_s6q26_7",
               "eh_s6q28_7",
               "eh_s6q26_8",
               "eh_s6q28_8",
               "eh_s6q32_1",
               "eh_s6q34_1",
               "eh_s6q32_2",
               "eh_s6q34_2",
               "eh_s6q32_3",
               "eh_s6q34_3",
               "eh_s6q32_4",
               "eh_s6q34_4",
               "eh_s6q32_5",
               "eh_s6q34_5",
               "eh_s6q38_1",
               "eh_s6q40_1",
               "eh_s6q38_2",
               "eh_s6q40_2",
               "eh_s6q38_3",
               "eh_s6q40_3",
               "eh_s6q38_4",
               "eh_s6q40_4",
               "eh_s6q38_5",
               "eh_s6q40_5",
               "eh_s6q38_6",
               "eh_s6q40_6",
               "eh_s6q44_1",
               "eh_s6q46_1",
               "eh_s6q44_2",
               "eh_s6q46_2",
               "eh_s6q44_3",
               "eh_s6q46_3",
               "eh_s6q44_4",
               "eh_s6q46_4",
               "eh_s6q44_5",
               "eh_s6q46_5",
               "eh_s6q44_6",
               "eh_s6q46_6",
               "eh_s6q44_7",
               "eh_s6q46_7",
               "eh_s6q51_1",
               "eh_s6q51_2",
               "eh_s6q51_3",
               "eh_s6q51_4",
               "eh_s6q51_5",
               "eh_s6q51_6",
               "eh_s6q51_7",
               "eh_s6q51_8",
               "eh_s6q51_9",
               "eh_s6q51_10",
               "eh_s6q51_11",
               "eh_s6q51_12",
               "eh_s6q51_13",
               "eh_s6q51_14",
               "eh_s6q51_15",
               "eh_s6q51_16",
               "eh_s6q51_17",
               "eh_s6q51_18",
               "eh_s6q51_19",
               "eh_s6q51_20",
               "eh_s6q51_21",
               "eh_s6q51_22",
               "eh_s6q51_23",
               "eh_s6q51_24",
               "eh_s6q51_25")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% open_ends]

#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
