rm(list=ls(all=t))
filename <- "A_t2_Child_Availability" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
#!!!Save flagged dictionary in .csv format, add "DatasetReview" to name and continue processing data with subset of flagged variables
# !!!Include any Direct PII variables
#!!!No Direct PII
# !!!No Direct PII-team
# !!!No small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Remove date of birth
dropvars <- c("t2_q2_dob_dd")
mydata <- mydata[!names(mydata) %in% dropvars]
dropvars <- c("t2_q2_dob_mm")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("t2_a1_child_at_home",
"t2_a2_child_not_home",
"t2_q1_age",
"t2_q2_dob_yy",
"bl_t1_t2",
"bl_t1_t2_ad1")
capture_tables (indirect_PII)
# Recode those with very specific values.
# !!!Insufficient demographic information
#!!!No Open-ends
#!!!NO GPS variables
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)