rm(list=ls(all=t))

Setup filenames

filename <- "Section_16" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

direct_pii <- c("eh_s16q4", "eh_s16q6", "eh_s16q32")
mydata <- mydata[!names(mydata) %in% direct_pii] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

locvars <- c("eh_s16q13", "eh_s16q27") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## eh_s16q13. Q1059: ${m_s12q6}'s municipality of residence:  Munisipyo ng tirahan ni ${m_s1
##                      Malinao                       Abucay                    Mariveles                    Calabanga 
##                            1                            4                            8                            1 
##              Jose Panganiban                         Labo                      Magarao                    Naga City 
##                            1                            1                            2                            1 
##                      Pasacao     General Emilio Aguinaldo                        Jones                    Pagsanjan 
##                            1                            5                            1                            2 
##                         Anda                         Bani                     Bugallon                   Candelaria 
##                            1                            2                            1                            1 
##                     Sampaloc                        Tanay Other municipality - specify                         <NA> 
##                            1                            2                           18                         2234 
## [1] "Frequency table after encoding"
## eh_s16q13. Q1059: ${m_s12q6}'s municipality of residence:  Munisipyo ng tirahan ni ${m_s1
##  841  842  843  844  845  846  847  848  849  850  851  852  853  854  855  856  857  858  859 <NA> 
##    1    1    2    1    5    1    1    1    2    1    1    1    1    2    2    1   18    4    8 2234 
## [1] "Frequency table before encoding"
## eh_s16q27. Q1075: ${m_s12q17}'s municipality of residence:  Munisipyo ng tirahan ni ${m_s
##                      Malinao                       Abucay                    Mariveles                    Calabanga 
##                            1                            2                            4                            2 
##                         Labo                    Naga City                      Pasacao     General Emilio Aguinaldo 
##                            1                            2                            2                            2 
##                        Jones                    Pagsanjan                         Anda                     Sampaloc 
##                            4                            1                            2                            1 
##                    Jala-Jala                        Tanay Other municipality - specify                         <NA> 
##                            1                            1                           17                         2245 
## [1] "Frequency table after encoding"
## eh_s16q27. Q1075: ${m_s12q17}'s municipality of residence:  Munisipyo ng tirahan ni ${m_s
##  235  236  237  238  239  240  241  242  243  244  245  246  247  248  249 <NA> 
##    4    1    2    1    2    1    2    4    2    2    1   17    1    1    2 2245
small_area <- c("eh_s16q14", "eh_s16q28")
mydata <- mydata[!names(mydata) %in% small_area] # !!! Removed, as it contains sensitive lcoation information

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!!No Indirect PII - Ordinal

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!No Indirect PII - Categorical

Matching and crosstabulations: Run automated PII check

# !!! Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!!No Open-ends

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)