rm(list=ls(all=t))
filename <- "B_t2_schooling_relabelled" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("t2_b3_b_schl_name_17_18")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!No Direct PII-team
# !!!No small locations
# Recode school schedules into groups
table(mydata$t2_b6_hr_schl_attend_from)
##
## 1:00:00 PM 10:00:00 AM 10:15:00 AM 12:00:00 PM 12:30:00 PM 3:30:00 PM
## 227 1 38 1 39 32 1
## 7:00:00 AM 7:30:00 AM 8:00:00 AM 8:18:00 AM 8:30:00 AM 8:35:00 AM 8:45:00 AM
## 30 18 31 1 18 1 2
## 9:00:00 AM 9:01:00 AM 9:05:00 AM 9:10:00 AM 9:13:00 AM 9:15:00 AM 9:20:00 AM
## 777 1 1 5 1 18 7
## 9:25:00 AM 9:29:00 AM 9:30:00 AM 9:32:00 AM 9:35:00 AM 9:40:00 AM 9:45:00 AM
## 3 1 1150 1 26 19 9
mydata$t2_b6_hr_schl_attend_from <- labelled(mydata$t2_b6_hr_schl_attend_from, c("1:00:00 PM" ="1:00:00 PM" ,
"10:00:00 AM"="10:00:00 AM",
"10:15:00 AM"="10:15:00 AM",
"12:00:00 PM"="12:00:00 PM",
"12:30:00 PM"="12:30:00 PM",
"3:30:00 PM"="3:30:00 PM",
"7:00:00 AM"="7:00:00 AM",
"7:30:00 AM"="7:30:00 AM",
"8:00:00 AM"="8:00:00 AM",
"8:18:00 AM"="8:18:00 AM",
"8:30:00 AM"="8:30:00 AM",
"8:35:00 AM"="8:35:00 AM",
"8:45:00 AM"="8:45:00 AM",
"9:00:00 AM"="9:00:00 AM",
"9:01:00 AM"="9:01:00 AM",
"9:05:00 AM"="9:05:00 AM",
"9:10:00 AM"="9:10:00 AM",
"9:13:00 AM"="9:13:00 AM",
"9:15:00 AM"="9:15:00 AM",
"9:20:00 AM"="9:20:00 AM",
"9:25:00 AM"="9:25:00 AM",
"9:29:00 AM"="9:29:00 AM",
"9:30:00 AM"="9:30:00 AM",
"9:32:00 AM"="9:32:00 AM",
"9:35:00 AM"="9:35:00 AM",
"9:40:00 AM"="9:40:00 AM",
"9:45:00 AM"="9:45:00 AM"))
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "7:00:00 AM"] <- "6:01:00-7:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "7:30:00 AM"] <- "7:01:00-8:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "8:00:00 AM"] <- "7:01:00-8:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "8:18:00 AM"] <- "8:01:00-9:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "8:30:00 AM"] <- "8:01:00-9:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "8:35:00 AM"] <- "8:01:00-9:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "8:45:00 AM"] <- "8:01:00-9:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:00:00 AM"] <- "8:01:00-9:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:01:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:05:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:10:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:13:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:15:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:20:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:25:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:29:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:30:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:32:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:35:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:40:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "9:45:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "10:00:00 AM"] <- "9:01:00-10:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "10:15:00 AM"] <- "10:01:00-11:00:00 AM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "12:00:00 PM"] <- "11:01:00 AM-12:00:00 PM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "12:30:00 PM"] <- "12:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "1:00:00 PM"] <- "12:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_from[mydata$t2_b6_hr_schl_attend_from == "3:30:00 PM"] <- "12:01:00-4:00:00 PM"
table(mydata$t2_b6_hr_schl_attend_from)
##
## 10:01:00-11:00:00 AM 11:01:00 AM-12:00:00 PM
## 227 1 39
## 12:01:00-4:00:00 PM 6:01:00-7:00:00 AM 7:01:00-8:00:00 AM
## 34 30 49
## 8:01:00-9:00:00 AM 9:01:00-10:00:00 AM
## 799 1280
table(mydata$t2_b6_hr_schl_attend_to)
##
## 1:00:00 PM 1:30:00 PM 1:40:00 PM 10:40:00 AM 11:00:00 AM 12:00:00 PM
## 227 13 16 8 1 1 12
## 12:30:00 PM 2:00:00 PM 2:10:00 PM 2:25:00 PM 2:30:00 PM 2:40:00 PM 3:00:00 PM
## 25 38 1 1 7 2 271
## 3:10:00 PM 3:15:00 PM 3:16:00 PM 3:20:00 PM 3:24:00 PM 3:25:00 PM 3:27:00 PM
## 14 10 1 6 1 2 1
## 3:30:00 PM 3:35:00 PM 3:40:00 PM 3:45:00 PM 3:50:00 PM 4:00:00 PM 4:10:00 PM
## 819 14 425 80 1 358 1
## 4:15:00 PM 4:16:00 PM 4:20:00 PM 4:25:00 PM 4:30:00 PM 4:38:00 PM 4:40:00 PM
## 1 1 2 1 19 1 6
## 4:52:00 PM 5:00:00 PM 5:30:00 PM 6:00:00 PM 9:30:00 AM
## 1 25 36 9 1
mydata$t2_b6_hr_schl_attend_to <- labelled(mydata$t2_b6_hr_schl_attend_to, c("9:30:00 AM"="9:30:00 AM",
"10:40:00 AM"="10:40:00 AM",
"11:00:00 AM"="11:00:00 AM",
"12:00:00 PM"="12:00:00 PM",
"12:30:00 PM"="12:30:00 PM",
"1:00:00 PM" ="1:00:00 PM",
"1:30:00 PM" ="1:30:00 PM",
"1:40:00 PM" ="1:40:00 PM",
"2:00:00 PM"="2:00:00 PM",
"2:10:00 PM"="2:10:00 PM",
"2:25:00 PM"="2:25:00 PM",
"2:30:00 PM"="2:30:00 PM",
"2:40:00 PM"="2:40:00 PM",
"3:00:00 PM"="3:00:00 PM",
"3:10:00 PM"="3:10:00 PM",
"3:15:00 PM"="3:15:00 PM",
"3:16:00 PM"="3:16:00 PM",
"3:20:00 PM"="3:20:00 PM",
"3:24:00 PM"="3:24:00 PM",
"3:25:00 PM"="3:25:00 PM",
"3:27:00 PM"="3:27:00 PM",
"3:30:00 PM"="3:30:00 PM",
"3:35:00 PM"="3:35:00 PM",
"3:40:00 PM"="3:40:00 PM",
"3:45:00 PM"="3:45:00 PM",
"3:50:00 PM"="3:50:00 PM",
"4:00:00 PM"="4:00:00 PM",
"4:10:00 PM"="4:10:00 PM",
"4:15:00 PM"="4:15:00 PM",
"4:16:00 PM"="4:16:00 PM",
"4:20:00 PM"="4:20:00 PM",
"4:25:00 PM"="4:25:00 PM",
"4:30:00 PM"="4:30:00 PM",
"4:38:00 PM"="4:38:00 PM",
"4:40:00 PM"="4:40:00 PM",
"4:52:00 PM"="4:52:00 PM",
"5:00:00 PM"="5:00:00 PM",
"5:30:00 PM"="5:30:00 PM",
"6:00:00 PM"="6:00:00 PM"))
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "9:30:00 AM"] <- "9:01:00 AM-12:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "10:40:00 AM"] <- "9:01:00 AM-12:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "11:00:00 AM"] <- "9:01:00 AM-12:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "12:00:00 PM"] <- "9:01:00 AM-12:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "12:30:00 PM"] <- "12:01:00-1:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "1:00:00 PM"] <- "12:01:00-1:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "1:30:00 PM"] <- "1:01:00-2:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "1:40:00 PM"] <- "1:01:00-2:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "2:00:00 PM"] <- "1:01:00-2:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "2:10:00 PM"] <- "2:01:00-3:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "2:25:00 PM"] <- "2:01:00-3:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "2:30:00 PM"] <- "2:01:00-3:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "2:40:00 PM"] <- "2:01:00-3:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:00:00 PM"] <- "2:01:00-3:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:10:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:15:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:16:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:20:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:24:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:25:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:27:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:30:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:35:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:40:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:10:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:45:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "3:50:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:00:00 PM"] <- "3:01:00-4:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:10:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:15:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:16:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:20:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:25:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:30:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:38:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:40:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "4:52:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "5:00:00 PM"] <- "4:01:00-5:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "5:30:00 PM"] <- "5:01:00-10:00:00 PM"
mydata$t2_b6_hr_schl_attend_to[mydata$t2_b6_hr_schl_attend_to == "6:00:00 PM"] <- "5:01:00-10:00:00 PM"
table(mydata$t2_b6_hr_schl_attend_to)
##
## 1:01:00-2:00:00 PM 12:01:00-1:00:00 PM
## 227 62 38
## 2:01:00-3:00:00 PM 3:01:00-4:00:00 PM 4:01:00-5:00:00 PM
## 282 1732 58
## 5:01:00-10:00:00 PM 9:01:00 AM-12:00:00 PM
## 45 15
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("t2_b3_a_same_schl_16_17",
"t2_b3_a_same_schl_17_18"
)
capture_tables (indirect_PII)
# !!!Insufficient demographic data
# !!! No Open-Ends
#
# !!!No GPS data
#
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)