#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_8" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)

#'
#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 


#'# Direct PII: variables to be removed
# !!!No Direct PII

#'# Direct PII-team: Encode field team names
# !!!No Direct PII-team

#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!No small locations

#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 


# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q8_1)[na.exclude(mydata$s8q8_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q8_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q9_1)[na.exclude(mydata$s8q9_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q9_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q12_1)[na.exclude(mydata$s8q12_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q12_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q13_1)[na.exclude(mydata$s8q13_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q13_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q14_1)[na.exclude(mydata$s8q14_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q14_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q15_1)[na.exclude(mydata$s8q15_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q15_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q16_1)[na.exclude(mydata$s8q16_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q16_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q17_1)[na.exclude(mydata$s8q17_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q17_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q20_1)[na.exclude(mydata$s8q20_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q20_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q21_1)[na.exclude(mydata$s8q21_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q21_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q23_1)[na.exclude(mydata$s8q23_1)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q23_1", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q8_2)[na.exclude(mydata$s8q8_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q8_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q9_2)[na.exclude(mydata$s8q9_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q9_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q12_2)[na.exclude(mydata$s8q12_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q12_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q13_2)[na.exclude(mydata$s8q13_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q13_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q14_2)[na.exclude(mydata$s8q14_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q14_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q15_2)[na.exclude(mydata$s8q15_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q15_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q16_2)[na.exclude(mydata$s8q16_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q16_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q17_2)[na.exclude(mydata$s8q17_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q17_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q20_2)[na.exclude(mydata$s8q20_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q20_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q21_2)[na.exclude(mydata$s8q21_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q21_2", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$s8q23_2)[na.exclude(mydata$s8q23_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s8q23_2", break_point=percentile_99.5, missing=999999)

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("s8q1",
                  "s8q2_1",
                  "s8q2_2")
capture_tables (indirect_PII)

# Recode those with very specific values. 

break_ocup <- c(-999,-888,1,7,9,20,21,24,27,30,31,34,36,37,39,40,43,44)
labels_ocup <- c("No Response" = 1,
                "Other: Specify" = 2,
                "Other" = 3,
                "Elementary occupations" = 4,
                "Other" = 5,
                "Service and sales workers" = 6,
                "Service and sales workers" = 7,
                "Elementary occupations "= 8,
                "Craft and related trades workers "= 9,
                "Craft and related trades workers"= 10,
                "Elementary occupations"= 11,
                "Elementary occupations"=12,
                "Craft and related trades workers"=13,
                "Service and sales workers"=14,
                "Craft and related trades workers"=15,
                "Elementary occupations"=16,
                "Craft and related trades workers"=17,
                "Craft and related trades workers"=18)
mydata <- ordinal_recode (variable="s8q2_1", break_points=break_ocup, missing=999999, value_labels=labels_ocup)
mydata <- ordinal_recode (variable="s8q2_2", break_points=break_ocup, missing=999999, value_labels=labels_ocup)

#'# Matching and crosstabulations: Run automated PII check 
# !!! Insufficient demographic data

#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("s8q1whynoresponse",
               "s8q2_other_1",
               "s8q2whynoresponse_1",
               "s8q3whynoresponse_1",
               "s8q4whynoresponse_1",
               "s8q5whynoresponse_1",
               "s8q5awhynoresponse_1",
               "s8q6whynoresponse_1",
               "s8q7_other_1",
               "s8q7whynoresponse_1",
               "s8q8whynoresponse_1",
               "s8q9whynoresponse_1",
               "s8q10_other_1",
               "s8q10whynoresponse_1",
               "s8q11whynoresponse_1",
               "s8q12whynoresponse_1",
               "s8q13whynoresponse_1",
               "s8q14whynoresponse_1",
               "s8q15whynoresponse_1",
               "s8q16whynoresponse_1",
               "s8q17whynoresponse_1",
               "s8q18_1",
               "s8q19_other_1",
               "s8q19whynoresponse_1",
               "s8q20whynoresponse_1",
               "s8q21whynoresponse_1",
               "s8q22whynoresponse_1",
               "s8q23whynoresponse_1",
               "s8q2_other_2",
               "s8q2whynoresponse_2",
               "s8q3whynoresponse_2",
               "s8q4whynoresponse_2",
               "s8q5whynoresponse_2",
               "s8q5awhynoresponse_2",
               "s8q6whynoresponse_2",
               "s8q7_other_2",
               "s8q7whynoresponse_2",
               "s8q8whynoresponse_2",
               "s8q9whynoresponse_2",
               "s8q10_other_2",
               "s8q10whynoresponse_2",
               "s8q11whynoresponse_2",
               "s8q12whynoresponse_2",
               "s8q13whynoresponse_2",
               "s8q14whynoresponse_2",
               "s8q15whynoresponse_2",
               "s8q16whynoresponse_2",
               "s8q17whynoresponse_2",
               "s8q18_2",
               "s8q19_other_2",
               "s8q19whynoresponse_2",
               "s8q20whynoresponse_2",
               "s8q21whynoresponse_2",
               "s8q22whynoresponse_2",
               "s8q23whynoresponse_2",
               "s8q2_other_3",
               "s8q2whynoresponse_3",
               "s8q3whynoresponse_3",
               "s8q4whynoresponse_3",
               "s8q5whynoresponse_3",
               "s8q5awhynoresponse_3",
               "s8q6whynoresponse_3",
               "s8q7_other_3",
               "s8q7whynoresponse_3",
               "s8q8whynoresponse_3",
               "s8q9whynoresponse_3",
               "s8q10_other_3",
               "s8q10whynoresponse_3",
               "s8q11whynoresponse_3",
               "s8q12whynoresponse_3",
               "s8q13whynoresponse_3",
               "s8q14whynoresponse_3",
               "s8q15whynoresponse_3",
               "s8q16whynoresponse_3",
               "s8q17whynoresponse_3",
               "s8q18_3",
               "s8q19_other_3",
               "s8q19whynoresponse_3",
               "s8q20whynoresponse_3",
               "s8q21whynoresponse_3",
               "s8q22whynoresponse_3",
               "s8q23whynoresponse_3")

report_open (list_open_ends = open_ends)


# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$s8q2_other_1[215] <- "Managers"
mydata$s8q2_other_1[281] <- "Managers"
mydata$s8q2_other_1[322] <- "Managers"
mydata$s8q2_other_1[460] <- "Managers"
mydata$s8q2_other_1[480] <- "Managers"
mydata$s8q2_other_1[492] <- "Managers"
mydata$s8q2_other_1[497] <- "Managers"
mydata$s8q2_other_1[520] <- "Managers"
mydata$s8q2_other_1[541] <- "Managers"
mydata$s8q2_other_1[546] <- "Managers"
mydata$s8q2_other_1[553] <- "Managers"
mydata$s8q2_other_1[557] <- "Managers"
mydata$s8q2_other_1[573] <- "Managers"
mydata$s8q2_other_1[593] <- "Managers"
mydata$s8q2_other_1[627] <- "Managers"
mydata$s8q2_other_1[704] <- "Managers"
mydata$s8q2_other_1[735] <- "Managers"
mydata$s8q2_other_1[859] <- "Managers"
mydata$s8q2_other_1[869] <- "Managers"
mydata$s8q2_other_1[937] <- "Managers"
mydata$s8q2_other_1[944] <- "Managers"
mydata$s8q2_other_1[966] <- "Managers"
mydata$s8q2_other_1[1013] <- "Managers"
mydata$s8q2_other_1[1019] <- "Managers"
mydata$s8q2_other_1[1021] <- "Managers"
mydata$s8q2_other_1[1022] <- "Managers"
mydata$s8q2_other_1[1152] <- "Managers"
mydata$s8q2_other_1[1171] <- "Managers"
mydata$s8q2_other_1[1228] <- "Managers"
mydata$s8q2_other_1[1408] <- "Managers"
mydata$s8q2_other_1[1414] <- "Managers"
mydata$s8q2_other_1[1553] <- "Managers"
mydata$s8q2_other_1[1638] <- "Managers"
mydata$s8q2_other_1[1649] <- "Managers"
mydata$s8q2_other_1[1888] <- "Managers"
mydata$s8q2_other_1[1890] <- "Managers"
mydata$s8q2_other_1[1926] <- "Managers"
mydata$s8q2_other_1[1928] <- "Managers"
mydata$s8q2_other_1[1935] <- "Managers"
mydata$s8q2_other_1[1958] <- "Managers"
mydata$s8q2_other_1[2015] <- "Managers"
mydata$s8q2_other_1[2028] <- "Managers"
mydata$s8q2_other_1[2066] <- "Managers"
mydata$s8q2_other_1[2145] <- "Managers"
mydata$s8q2_other_1[2171] <- "Managers"
mydata$s8q2_other_1[2177] <- "Managers"
mydata$s8q2_other_1[2226] <- "Managers"
mydata$s8q2_other_1[2265] <- "Managers"
mydata$s8q2_other_1[2279] <- "Managers"
mydata$s8q2_other_1[2281] <- "Managers"
mydata$s8q2_other_1[577] <- "Managers"
mydata$s8q2_other_1[623] <- "Managers"
mydata$s8q2_other_1[1696] <- "Managers"
mydata$s8q2_other_1[2031] <- "Managers"
mydata$s8q2_other_1[2278] <- "Managers"


mydata$s8q2_other_1[800] <- "Craft and related trades workers"
mydata$s8q2_other_1[1150] <- "Craft and related trades workers"
mydata$s8q2_other_1[1293] <- "Craft and related trades workers"
mydata$s8q2_other_1[1296] <- "Craft and related trades workers"
mydata$s8q2_other_1[1302] <- "Craft and related trades workers"
mydata$s8q2_other_1[1543] <- "Craft and related trades workers"
mydata$s8q2_other_1[1565] <- "Craft and related trades workers"
mydata$s8q2_other_1[1599] <- "Craft and related trades workers"
mydata$s8q2_other_1[1604] <- "Craft and related trades workers"
mydata$s8q2_other_1[1606] <- "Craft and related trades workers"
mydata$s8q2_other_1[1610] <- "Craft and related trades workers"
mydata$s8q2_other_1[1697] <- "Craft and related trades workers"
mydata$s8q2_other_1[1699] <- "Craft and related trades workers"
mydata$s8q2_other_1[1707] <- "Craft and related trades workers"
mydata$s8q2_other_1[1711] <- "Craft and related trades workers"
mydata$s8q2_other_1[1770] <- "Craft and related trades workers"
mydata$s8q2_other_1[1783] <- "Craft and related trades workers"
mydata$s8q2_other_1[1785] <- "Craft and related trades workers"
mydata$s8q2_other_1[1789] <- "Craft and related trades workers"
mydata$s8q2_other_1[1819] <- "Craft and related trades workers"
mydata$s8q2_other_1[1166] <- "Craft and related trades workers"
mydata$s8q2_other_1[1862] <- "Craft and related trades workers"
mydata$s8q2_other_1[2201] <- "Craft and related trades workers"


mydata$s8q2_other_1[176] <- "Service and sales workers"
mydata$s8q2_other_1[192] <- "Service and sales workers"
mydata$s8q2_other_1[519] <- "Service and sales workers"
mydata$s8q2_other_1[526] <- "Service and sales workers"
mydata$s8q2_other_1[604] <- "Service and sales workers"
mydata$s8q2_other_1[645] <- "Service and sales workers"
mydata$s8q2_other_1[751] <- "Service and sales workers"
mydata$s8q2_other_1[752] <- "Service and sales workers"
mydata$s8q2_other_1[925] <- "Service and sales workers"
mydata$s8q2_other_1[928] <- "Service and sales workers"
mydata$s8q2_other_1[929] <- "Service and sales workers"
mydata$s8q2_other_1[1125] <- "Service and sales workers"
mydata$s8q2_other_1[1127] <- "Service and sales workers"
mydata$s8q2_other_1[1157] <- "Service and sales workers"
mydata$s8q2_other_1[1199] <- "Service and sales workers"
mydata$s8q2_other_1[1200] <- "Service and sales workers"
mydata$s8q2_other_1[1206] <- "Service and sales workers"
mydata$s8q2_other_1[1278] <- "Service and sales workers"
mydata$s8q2_other_1[1309] <- "Service and sales workers"
mydata$s8q2_other_1[1315] <- "Service and sales workers"
mydata$s8q2_other_1[1347] <- "Service and sales workers"
mydata$s8q2_other_1[1356] <- "Service and sales workers"
mydata$s8q2_other_1[1455] <- "Service and sales workers"
mydata$s8q2_other_1[1541] <- "Service and sales workers"
mydata$s8q2_other_1[1551] <- "Service and sales workers"
mydata$s8q2_other_1[1571] <- "Service and sales workers"
mydata$s8q2_other_1[1583] <- "Service and sales workers"
mydata$s8q2_other_1[1655] <- "Service and sales workers"
mydata$s8q2_other_1[1661] <- "Service and sales workers"
mydata$s8q2_other_1[1708] <- "Service and sales workers"
mydata$s8q2_other_1[1722] <- "Service and sales workers"
mydata$s8q2_other_1[1733] <- "Service and sales workers"
mydata$s8q2_other_1[1804] <- "Service and sales workers"
mydata$s8q2_other_1[1807] <- "Service and sales workers"
mydata$s8q2_other_1[1875] <- "Service and sales workers"
mydata$s8q2_other_1[1880] <- "Service and sales workers"
mydata$s8q2_other_1[1920] <- "Service and sales workers"
mydata$s8q2_other_1[1925] <- "Service and sales workers"
mydata$s8q2_other_1[1931] <- "Service and sales workers"
mydata$s8q2_other_1[2022] <- "Service and sales workers"
mydata$s8q2_other_1[2024] <- "Service and sales workers"
mydata$s8q2_other_1[2058] <- "Service and sales workers"
mydata$s8q2_other_1[2062] <- "Service and sales workers"
mydata$s8q2_other_1[2143] <- "Service and sales workers"
mydata$s8q2_other_1[2167] <- "Service and sales workers"
mydata$s8q2_other_1[2190] <- "Service and sales workers"
mydata$s8q2_other_1[2229] <- "Service and sales workers"
mydata$s8q2_other_1[2238] <- "Service and sales workers"
mydata$s8q2_other_1[2244] <- "Service and sales workers"
mydata$s8q2_other_1[2270] <- "Service and sales workers"
mydata$s8q2_other_1[1527] <- "Service and sales workers"
mydata$s8q2_other_1[1530] <- "Service and sales workers"
mydata$s8q2_other_1[1539] <- "Service and sales workers"
mydata$s8q2_other_1[1540] <- "Service and sales workers"
mydata$s8q2_other_1[1551] <- "Service and sales workers"
mydata$s8q2_other_1[1566] <- "Service and sales workers"
mydata$s8q2_other_1[1587] <- "Service and sales workers"
mydata$s8q2_other_1[1588] <- "Service and sales workers"
mydata$s8q2_other_1[1595] <- "Service and sales workers"
mydata$s8q2_other_1[1626] <- "Service and sales workers"
mydata$s8q2_other_1[1629] <- "Service and sales workers"
mydata$s8q2_other_1[1631] <- "Service and sales workers"
mydata$s8q2_other_1[1846] <- "Service and sales workers"
mydata$s8q2_other_1[1885] <- "Service and sales workers"
mydata$s8q2_other_1[1917] <- "Service and sales workers"
mydata$s8q2_other_1[2063] <- "Service and sales workers"
mydata$s8q2_other_1[2072] <- "Service and sales workers"
mydata$s8q2_other_1[2105] <- "Service and sales workers"
mydata$s8q2_other_1[2166] <- "Service and sales workers"
mydata$s8q2_other_1[2182] <- "Service and sales workers"

mydata$s8q2_other_1[1765] <- "Plant and machine operators and assemblers"
mydata$s8q2_other_1[1780] <- "Plant and machine operators and assemblers"
mydata$s8q2_other_1[1946] <- "Plant and machine operators and assemblers"
mydata$s8q2_other_1[2006] <- "Plant and machine operators and assemblers"

mydata$s8q2_other_1[1188] <- "Skilled agricultural, forestry and fishery workers"
mydata$s8q2_other_1[1704] <- "Skilled agricultural, forestry and fishery workers"


mydata$s8q2_other_1[1188] <- "Elementary occupations"
mydata$s8q2_other_1[1704] <- "Elementary occupations"

mydata$s8q2whynoresponse_1[1536] <- "Service and sales workers"
mydata$s8q2whynoresponse_1[1544] <- "Service and sales workers"


mydata$s8q2_other_2[101] <- "Service and sales workers"
mydata$s8q2_other_2[520] <- "Service and sales workers"
mydata$s8q2_other_2[1166] <- "Service and sales workers"
mydata$s8q2_other_2[1471] <- "Service and sales workers"
mydata$s8q2_other_2[1543] <- "Service and sales workers"
mydata$s8q2_other_2[1560] <- "Service and sales workers"
mydata$s8q2_other_2[1567] <- "Service and sales workers"
mydata$s8q2_other_2[1592] <- "Service and sales workers"
mydata$s8q2_other_2[1617] <- "Service and sales workers"
mydata$s8q2_other_2[1637] <- "Service and sales workers"
mydata$s8q2_other_2[1638] <- "Service and sales workers"
mydata$s8q2_other_2[1921] <- "Service and sales workers"
mydata$s8q2_other_2[2012] <- "Service and sales workers"
mydata$s8q2_other_2[2145] <- "Service and sales workers"
mydata$s8q2_other_2[2161] <- "Service and sales workers"
mydata$s8q2_other_2[2167] <- "Service and sales workers"
mydata$s8q2_other_2[2174] <- "Service and sales workers"
mydata$s8q2_other_2[2165] <- "Service and sales workers"
mydata$s8q2_other_2[2087] <- "Service and sales workers"


mydata$s8q2_other_2[577] <- "Managers"
mydata$s8q2_other_2[1158] <- "Managers"
mydata$s8q2_other_2[1324] <- "Managers"
mydata$s8q2_other_2[2171] <- "Managers"

mydata$s8q2_other_2[1231] <- "Craft and related trades workers"
mydata$s8q2_other_2[1295] <- "Craft and related trades workers"
mydata$s8q2_other_2[1336] <- "Craft and related trades workers"

mydata$s8q2_other_2[1391] <- "Elementary occupations"
mydata$s8q2_other_2[1866] <- "Elementary occupations"

mydata$s8q2_other_2[1931] <- "Skilled agricultural, forestry and fishery workers"

mydata$s8q2_other_2[1722] <- "Plant and machine operators and assemblers"

mydata$s8q7_other_1[541] <- "[Wholesale and retail trade]"
mydata$s8q7_other_1[798] <- "Income of spouse from [Wholesale and retail trade]"
mydata$s8q7_other_1[929] <- "Earnings from [Wholesale and retail trade]"
mydata$s8q7_other_1[1048] <- "Earnings from [Wholesale and retail trade]"
mydata$s8q7_other_1[1195] <- "From own child [name redacted]"
mydata$s8q7_other_1[1233] <- "[Service and sales workers]"
mydata$s8q7_other_1[1586] <- "[Technicians and associate professionals]"
mydata$s8q7_other_1[1862] <- "[language]"
mydata$s8q7_other_1[2036] <- "[repair of motor vehicles and motorcycles]"

mydata$s8q10_other_1[541] <- "Income from [Agriculture, forestry and fishing]"
mydata$s8q10_other_1[929] <- "Sales from the [Transportation and storage]"
mydata$s8q10_other_1[1195] <- "[Wholesale and retail trade]"
mydata$s8q10_other_1[1233] <- "Loans from [Service and sales workers]"

mydata$s8q18_1[541] <- "Charcoal [amount redacted] per month"
mydata$s8q18_1[929] <- "[amount redacted]"
mydata$s8q18_1[1195] <- "[amount redacted]"

mydata$s8q19_other_1[432] <- "Everyday sales from [Wholesale and retail trade]"
mydata$s8q19_other_1[541] <- "Sales from the [Wholesale and retail trade]"
mydata$s8q19_other_1[627] <- "Loan from friend, personal and [Wholesale and retail trade]"
mydata$s8q19_other_1[859] <- "[Wholesale and retail trade] sales"
mydata$s8q19_other_1[926] <- "Daily sales from [Wholesale and retail trade]."

mydata$s8q19whynoresponse_1[1372] <- "[language]"

mydata$s8q21whynoresponse_1[257] <- "Operating for [number redacted] days only, [amount redacted]"
mydata$s8q21whynoresponse_1[1389] <- "[amount redacted]"

mydata$s8q22whynoresponse_1[322] <- "The store started for 1 week only"

mydata$s8q23whynoresponse_1[865] <- "Breakeven only, but sometimes she can save [amount redacted] but not always"

mydata$s8q2whynoresponse_2[859] <- "[Manager]"
mydata$s8q2whynoresponse_2[1541] <- "[Transportation and storage]"

mydata$s8q7_other_2[1560] <- "[language]"
mydata$s8q10_other_2[1543] <- "Workmate - [Service and sales worker]"

#'# GPS data: Displace
# !!!No GPS data


#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
