rm(list=ls(all=t))
filename <- "Section_2" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!No Direct PII
# !!!No Direct PII-team
# !!!No Small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("c_s2q3",
"c_s2q4",
"c_s2q5",
"c_s2q6")
capture_tables (indirect_PII)
# Recode those with very specific values.
break_edu <- c(-999,0,1,2,10,17)
labels_edu <- c("No Response" = 1,
"Pre-Kinder" = 2,
"Kinder" = 3,
"1st-6th Grade" = 4,
"7th Grade-High School Graduate" = 5,
"1st Year Vocational training or associates degree or more" = 6)
mydata <- ordinal_recode (variable="c_s2q4", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## c_s2q4. What is the earliest grade a person like yourself can study in this school? Ano
## Pre-Kinder Kinder
## 442 1764
## 1st Grade 2nd Grade
## 64 2
## 4th Grade 5th Grade
## 3 3
## 6th Grade 7th Grade
## 37 1883
## 8th Grade 9th Grade
## 4 2
## 10th Grade 11th Grade
## 3 2
## 12th Grade High School Graduate
## 11 14
## 1st Year Vocational training or associates degree Vocational training or associates degree graduate
## 6 1
## 1st year of college Other: Specify
## 58 4
## <NA>
## 3
## recoded
## [-999,0) [0,1) [1,2) [2,10) [10,17) [17,1e+06)
## 0 0 442 0 0 0 0
## 1 0 0 1764 0 0 0
## 3 0 0 0 64 0 0
## 4 0 0 0 2 0 0
## 6 0 0 0 3 0 0
## 7 0 0 0 3 0 0
## 9 0 0 0 37 0 0
## 10 0 0 0 0 1883 0
## 11 0 0 0 0 4 0
## 12 0 0 0 0 2 0
## 13 0 0 0 0 3 0
## 14 0 0 0 0 2 0
## 15 0 0 0 0 11 0
## 16 0 0 0 0 14 0
## 17 0 0 0 0 0 6
## 19 0 0 0 0 0 1
## 20 0 0 0 0 0 58
## 96 0 0 0 0 0 4
## [1] "Frequency table after encoding"
## c_s2q4. What is the earliest grade a person like yourself can study in this school? Ano
## Pre-Kinder
## 442
## Kinder
## 1764
## 1st-6th Grade
## 109
## 7th Grade-High School Graduate
## 1919
## 1st Year Vocational training or associates degree or more
## 69
## <NA>
## 3
## [1] "Inspect value labels and relabel as necessary"
## No Response
## 1
## Pre-Kinder
## 2
## Kinder
## 3
## 1st-6th Grade
## 4
## 7th Grade-High School Graduate
## 5
## 1st Year Vocational training or associates degree or more
## 6
break_edu <- c(-999,0,10,14,17)
labels_edu <- c("No Response" = 1,
"Pre-Kinder to 6th Grade" = 2,
"7th-10th Grade" = 3,
"11th Grade-High School Graduate"=4,
"1st Year Vocational training or associates degree or more" = 5)
mydata <- ordinal_recode (variable="c_s2q5", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## c_s2q5. What is the last grade a person like yourself can study in this school? Ano ang
## Pre-Kinder Kinder
## 5 8
## 3rd Grade 4th Grade
## 1 13
## 5th Grade 6th Grade
## 4 2075
## 7th Grade 8th Grade
## 19 2
## 9th Grade 10th Grade
## 17 1143
## 11th Grade 12th Grade
## 19 861
## High School Graduate 1st Year Vocational training or associates degree
## 32 2
## 2nd Year Vocational training or associates degree Vocational training or associates degree graduate
## 3 3
## 2nd year of college 4th year of college or higher
## 1 63
## College graduate Education beyond college
## 11 13
## Other: Specify <NA>
## 7 4
## recoded
## [-999,0) [0,10) [10,14) [14,17) [17,1e+06)
## 0 0 5 0 0 0
## 1 0 8 0 0 0
## 5 0 1 0 0 0
## 6 0 13 0 0 0
## 7 0 4 0 0 0
## 9 0 2075 0 0 0
## 10 0 0 19 0 0
## 11 0 0 2 0 0
## 12 0 0 17 0 0
## 13 0 0 1143 0 0
## 14 0 0 0 19 0
## 15 0 0 0 861 0
## 16 0 0 0 32 0
## 17 0 0 0 0 2
## 18 0 0 0 0 3
## 19 0 0 0 0 3
## 21 0 0 0 0 1
## 23 0 0 0 0 63
## 24 0 0 0 0 11
## 25 0 0 0 0 13
## 96 0 0 0 0 7
## [1] "Frequency table after encoding"
## c_s2q5. What is the last grade a person like yourself can study in this school? Ano ang
## Pre-Kinder to 6th Grade
## 2106
## 7th-10th Grade
## 1181
## 11th Grade-High School Graduate
## 912
## 1st Year Vocational training or associates degree or more
## 103
## <NA>
## 4
## [1] "Inspect value labels and relabel as necessary"
## No Response
## 1
## Pre-Kinder to 6th Grade
## 2
## 7th-10th Grade
## 3
## 11th Grade-High School Graduate
## 4
## 1st Year Vocational training or associates degree or more
## 5
# !Insufficient information
# !!! Identify open-end variables here:
open_ends <- c("c_s2q1_other",
"c_s2q1noresponse",
"c_s2q3_other",
"c_s2q3noresponse",
"c_s2q4_other",
"c_s2q4noresponse",
"c_s2q5_other",
"c_s2q5noresponse",
"c_s2q6noresponse",
"c_s2q7_other",
"c_s2q7noresponse",
"c_s2q8noresponse",
"c_s2q9noresponse")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$c_s2q1_other[412] <- "Other"
mydata$c_s2q1_other[1101] <- "Other"
mydata$c_s2q1_other[1170] <- "Other"
mydata$c_s2q1_other[1338] <- "Other"
mydata$c_s2q1_other[2005] <- "Other"
mydata$c_s2q1_other[2206] <- "Other"
mydata$c_s2q1_other[3136] <- "Other"
mydata$c_s2q1_other[3976] <- "Other"
mydata$c_s2q3noresponse[1910] <- "[name] does not know if his school is private or public school."
mydata$c_s2q4_other[14] <- "Other"
mydata$c_s2q4_other[1987] <- "Other"
mydata$c_s2q4_other[3136] <- "Other"
mydata$c_s2q4_other[3976] <- "Other"
mydata$c_s2q5_other[14] <- "Other"
mydata$c_s2q5_other[1023] <- "Other"
mydata$c_s2q5_other[1090] <- "Other"
mydata$c_s2q5_other[1987] <- "Other"
mydata$c_s2q5_other[3136] <- "Other"
mydata$c_s2q5_other[3206] <- "Other"
mydata$c_s2q5_other[3976] <- "Other"
mydata$c_s2q8noresponse[1239] <- "[Tagalog]"
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)