rm(list=ls(all=t))

Setup filenames

filename <- "Section_C" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII 

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No Small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!!No Indirect PII - Ordinal

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!No Indirect PII - Categorical

# Recode those with very specific values. 

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("c_consent_why")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

# !!!Redacted, as it contains a lot of sensitive information and some information is in Tagalog.
mydata$c_consent_why[2] <- "She's on vacation in [location]"
mydata$c_consent_why[35] <- "I was not able to interview  [name] because she is working."
mydata$c_consent_why[91] <- "[name] is on a vacation in [location]"
mydata$c_consent_why[126] <- "She is not able to be interview because she is with her sister at [location]"
mydata$c_consent_why[236] <- "[name] is on vacation."
mydata$c_consent_why[285] <- "Currently in [location] Will come back before the start of classes."
mydata$c_consent_why[286] <- "Currently in [location]"
mydata$c_consent_why[290] <- "She is not available because she is in vacation at [location], far away from their house."
mydata$c_consent_why[297] <- "He is now in [location]"
mydata$c_consent_why[302] <- "On a Vacation in [location]."
mydata$c_consent_why[303] <- "He is not available. He is working at [location] as construction worker."
mydata$c_consent_why[347] <- "Attending class in [location]."
mydata$c_consent_why[384] <- "Not available working in [location]"
mydata$c_consent_why[409] <- "He is not available because he is on vacation at [location]"
mydata$c_consent_why[448] <- "Currently in [location] with her Aunt."
mydata$c_consent_why[451] <- "Currently in [location]"
mydata$c_consent_why[454] <- "On vacation in her Aunt's house in [location]."
mydata$c_consent_why[460] <- "Currently in [location] with her sibling. Will return when enrollment starts."
mydata$c_consent_why[532] <- "His not available because he is in vacation at [location]"
mydata$c_consent_why[569] <- "The child is not available because she working at [location] as waitress."
mydata$c_consent_why[572] <- "Not available because he is working in [location] as a butcher."
mydata$c_consent_why[652] <- "The child is not available, he is working at [location]"
mydata$c_consent_why[665] <- "For vacation with her grandmother and auntie in [location]"
mydata$c_consent_why[688] <- "Working in [location], he is going home every week ends"
mydata$c_consent_why[988] <- "He's not around at time of visit. He's in [location] and do not know when he will come back."
mydata$c_consent_why[1020] <- "Respondent was in [location], he works as bakery helper"
mydata$c_consent_why[1057] <- "He is not around at time of visit. He went to [location] for a summer job for their school needs this coming school days. He will be back next month."
mydata$c_consent_why[1097] <- "Respo was on vacation at [location]"
mydata$c_consent_why[1109] <- "The interview for this child was conducted by another field officer ([name])."
mydata$c_consent_why[1149] <- "Respondent is busy working somewhere in [small location], in a restaurant"
mydata$c_consent_why[1332] <- "She is not available at the time of 2 visits. She went to [small location] for a vacation because she has rheumatic heart disease. It is required by her physician to rest."
mydata$c_consent_why[1542] <- "He is in [small location] since last week"
mydata$c_consent_why[1608] <- "[Tagalog]"
mydata$c_consent_why[2078] <- "Working in [location]. Last weekend he is her but he is coming back on march 17"
mydata$c_consent_why[2299] <- "The respondent only comes home every three months because she is working at [location]"
mydata$c_consent_why[2318] <- "The child is not available, because he is working at [location]. The child just go home as he want, the guardian of the child dont know when he arrive at home."
mydata$c_consent_why[2381] <- "She's with her aunt in [location] vacation for one week"
mydata$c_consent_why[2421] <- "[name] is mentally ill."
mydata$c_consent_why[2495] <- "He is not around at time of visit. He went out for fishing and will be arrived this afternoon and do not know the time..... he interviewed by [name] last march 10."
mydata$c_consent_why[2519] <- "Shes not available at the time of interview, she was working at [small location] as housemaid to help family income. Shes avilable for interview by May 2016."
mydata$c_consent_why[2722] <- "Shes working  now in [location]. She just started yesterday"
mydata$c_consent_why[2724] <- "Shes now working in [location]. she Is going home once a month."
mydata$c_consent_why[2859] <- "Not available working in [location]"
mydata$c_consent_why[2869] <- "Just travelled to [location] for vacation"
mydata$c_consent_why[2899] <- "She is in [location] for vacation. She will be 18 tomorrow and will be back by May."
mydata$c_consent_why[2932] <- "Working in [location] will be back on may"
mydata$c_consent_why[2952] <- "Not part of the Hh [name] in separate house"
mydata$c_consent_why[3113] <- "The child is in [location] for vacation"
mydata$c_consent_why[3124] <- "Child is in [location]. Will only be back in May."
mydata$c_consent_why[3132] <- "Having her vacation in [location]"
mydata$c_consent_why[3155] <- "Shes in [location] having her vacation"
mydata$c_consent_why[3213] <- "[location]"
mydata$c_consent_why[3381] <- "[name] is sick (flu)"
mydata$c_consent_why[3470] <- "She is not staying in the house anymore because she lives in the house of [name] her aunt to sustain her studies"
mydata$c_consent_why[3500] <- "The child is on vacation in [location] province at the time of visit and will be back by april"
mydata$c_consent_why[3513] <- "The child respondent is leaving for [location] for a job"
mydata$c_consent_why[3520] <- "[location] refused to be interview"
mydata$c_consent_why[3754] <- "Working in [location]"
mydata$c_consent_why[3803] <- "In [location] for vacation. No definite time of arrival."
mydata$c_consent_why[3806] <- "He is in [location], working. No definite time of arrival"
mydata$c_consent_why[3944] <- "Not available. Busy doing other activities in [location]"
mydata$c_consent_why[3953] <- "Working in [location]"
mydata$c_consent_why[3958] <- "She is in [location] for vacation."
mydata$c_consent_why[4122] <- "[name] is not part of the household roster. She now lives in [location]"
mydata$c_consent_why[4158] <- "[name] is currently on vacation in [location] and will be back on april."
mydata$c_consent_why[4159] <- "[name] is currently on vacation in [location] and will be back on april."
mydata$c_consent_why[4197] <- "not available , her mom said that she worked in [location] and she will be back home at 7pm"
mydata$c_consent_why[4206] <- "In [location] for vacation"
mydata$c_consent_why[4382] <- "Child refuse he was just from the hospital because of [name] still not feeling well."
mydata$c_consent_why[4511] <- "In [location] and will be back after 2 months"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)