rm(list=ls(all=t))
filename <- "Section_2" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
# !!!Include any Direct PII variables
dropvars <- c("name")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!!No Direct PII-team
# !!!No small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Recode education attainment of adults to reduce risk of re-identification
break_edu <- c(0:16, 17,20,26)
labels_edu <- c("Pre-Kinder"=1,
"Kinder"=2,
"1st Grade"=4,
"2nd Grade"=5,
"3rd Grade"=6,
"4th Grade"=7,
"5th Grade"=8,
"6th Grade"=10,
"7th Grade"=11,
"8th Grade"=12,
"9th Grade"=13,
"10th Grade"=14,
"11th Grade"=15,
"12th Grade"=16,
"High School Graduate"=17,
"Vocational training or associates degree"=18,
"Some college or higher"=19)
mydata <- ordinal_recode (variable="s2q7", break_points=break_edu, missing=999999, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## s2q7. What is 's highest educational level completed? Ano ang pinaka-mataas na antas
## Pre-Kinder Kinder
## 3 4
## 1st Grade 2nd Grade
## 11 12
## 3rd Grade 4th Grade
## 18 17
## 5th Grade 6th Grade
## 22 103
## 7th Grade 8th Grade
## 30 44
## 9th Grade 10th Grade
## 31 10
## 11th Grade High School Graduate
## 2 185
## 1st Year Vocational training or associates degree 2nd Year Vocational training or associates degree
## 8 8
## Vocational training or associates degree graduate 1st year of college
## 15 20
## 2nd year of college 3rd year of college
## 13 4
## 4th year of college or higher College graduate
## 2 22
## Education beyond college <NA>
## 2 87
## recoded
## [0,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,10) [10,11) [11,12) [12,13) [13,14) [14,15) [15,16)
## 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0
## 7 0 0 0 0 0 0 0 22 0 0 0 0 0 0 0 0
## 9 0 0 0 0 0 0 0 0 0 103 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0 0 0 0 30 0 0 0 0 0
## 11 0 0 0 0 0 0 0 0 0 0 0 44 0 0 0 0
## 12 0 0 0 0 0 0 0 0 0 0 0 0 31 0 0 0
## 13 0 0 0 0 0 0 0 0 0 0 0 0 0 10 0 0
## 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0
## 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## recoded
## [16,17) [17,20) [20,26) [26,1e+06)
## 0 0 0 0 0
## 1 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0
## 12 0 0 0 0
## 13 0 0 0 0
## 14 0 0 0 0
## 16 185 0 0 0
## 17 0 8 0 0
## 18 0 8 0 0
## 19 0 15 0 0
## 20 0 0 20 0
## 21 0 0 13 0
## 22 0 0 4 0
## 23 0 0 2 0
## 24 0 0 22 0
## 25 0 0 2 0
## [1] "Frequency table after encoding"
## s2q7. What is 's highest educational level completed? Ano ang pinaka-mataas na antas
## Pre-Kinder Kinder
## 3 4
## 1st Grade 2nd Grade
## 11 12
## 3rd Grade 4th Grade
## 18 17
## 5th Grade 6th Grade
## 22 103
## 7th Grade 8th Grade
## 30 44
## 9th Grade 10th Grade
## 31 10
## 11th Grade High School Graduate
## 2 185
## Vocational training or associates degree Some college or higher
## 31 63
## <NA>
## 87
## [1] "Inspect value labels and relabel as necessary"
## Pre-Kinder Kinder
## 1 2
## 1st Grade 2nd Grade
## 4 5
## 3rd Grade 4th Grade
## 6 7
## 5th Grade 6th Grade
## 8 10
## 7th Grade 8th Grade
## 11 12
## 9th Grade 10th Grade
## 13 14
## 11th Grade 12th Grade
## 15 16
## High School Graduate Vocational training or associates degree
## 17 18
## Some college or higher
## 19
mydata <- top_recode ("s2q3", break_point=70, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## s2q3. What is 's age in years? Ano ang edad ni sa taon?
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## 11 7 12 5 7 8 5 3 6 3 4 5 6 9 15 11 34 50 38 43 39 32 29
## 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## 26 21 24 19 11 5 12 6 10 5 8 10 6 11 11 7 6 5 3 4 3 6 3
## 47 48 49 50 52 54 55 56 58 59 60 64 65 67 69 70 73 74 76 78 83 85 86
## 4 2 5 4 5 1 2 3 2 3 4 3 2 1 1 1 1 1 3 3 1 1 1
## 87 88 <NA>
## 1 1 13
## [1] "Frequency table after encoding"
## s2q3. What is 's age in years? Ano ang edad ni sa taon?
## 1 2 3 4 5 6 7 8 9 10
## 11 7 12 5 7 8 5 3 6 3
## 11 12 13 14 15 16 17 18 19 20
## 4 5 6 9 15 11 34 50 38 43
## 21 22 23 24 25 26 27 28 29 30
## 39 32 29 26 21 24 19 11 5 12
## 31 32 33 34 35 36 37 38 39 40
## 6 10 5 8 10 6 11 11 7 6
## 41 42 43 44 45 46 47 48 49 50
## 5 3 4 3 6 3 4 2 5 4
## 52 54 55 56 58 59 60 64 65 67
## 5 1 2 3 2 3 4 3 2 1
## 69 70 or more <NA>
## 1 14 13
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("s2q4",
"s2q5",
"s2q7",
"s2q8",
"s2q9",
"s2q10",
"s2q11")
capture_tables (indirect_PII)
# Recode those with very specific values.
break_mstatus <- c(-999,-888,1,2,7,8)
labels_mstatus <- c("No Response"=1,
"Other" = 2,
"Married Living with Spouse" = 3,
"Other" = 4,
"Not married but comitted" = 5,
"Single"= 6)
mydata <- ordinal_recode (variable="s2q5", break_points=break_mstatus, missing=999999, value_labels=labels_mstatus)
## [1] "Frequency table before encoding"
## s2q5. What is 's marital status? Ano ang estado ng tungkol sa kasal ni ?
## Married Living with Spouse Married Not Living with Spouse Divorced
## 133 32 1
## Seperated Widow Not married but committed
## 27 20 125
## Single <NA>
## 330 5
## recoded
## [-999,-888) [-888,1) [1,2) [2,7) [7,8) [8,1e+06)
## 1 0 0 133 0 0 0
## 2 0 0 0 32 0 0
## 4 0 0 0 1 0 0
## 5 0 0 0 27 0 0
## 6 0 0 0 20 0 0
## 7 0 0 0 0 125 0
## 8 0 0 0 0 0 330
## [1] "Frequency table after encoding"
## s2q5. What is 's marital status? Ano ang estado ng tungkol sa kasal ni ?
## Other Married Living with Spouse Not married but comitted Single
## 80 133 125 330
## <NA>
## 5
## [1] "Inspect value labels and relabel as necessary"
## No Response Other Married Living with Spouse Other
## 1 2 3 4
## Not married but comitted Single
## 5 6
# Recode over-specifc activities with few members for adults
haven_table("s2q8")
## s2q8. What was 's principal usual activity when they were last living in this househol
## Sugarcane Farming
## 7
## Banana Farming
## 2
## Coconut Farming
## 6
## Other Farming
## 72
## Poultry Farmers
## 8
## Inland And Coastal Waters Fishermen
## 5
## Deep-Sea Fishermen
## 11
## Mining And Quarrying Including Gold Extraction
## 3
## Manufacturing Pyrotechnics
## 1
## Construction
## 32
## Domestic Work
## 43
## Street Work Including Scavenging And Begging
## 1
## Scavenging In Dumpsites
## 1
## Plumbers
## 1
## Vulcanizing (rubber workers)
## 1
## Heavy Equipment Operator (ie., bulldozer operator)
## 1
## Guard
## 4
## Hairdresser/Barber/Beautician
## 2
## Consumer store operator
## 6
## Cashiers, Tellers And Related Clerks
## 6
## Charcoal Makers And Related Workers
## 1
## Cleaners, Launderers And Related Workers
## 4
## Food Processing and Related Trades Workers
## 3
## Handicraft Workers In Wood, Textile, Leather, Chemicals And Related Workers
## 7
## Hotel Housekeepers And Restaurant Services Workers
## 5
## Machinery Mechanics, Fitters And Related Trades Workers
## 4
## Market Stall Vendors, Street Vendors And Related Workers
## 23
## Messengers, Porters, Doorkeepers And Related Workers
## 3
## Metal Molders, Welders, Sheet-Metal Workers, Structural-Metal Preparers And Related Trades Workers
## 2
## Motor Vehicle Drivers
## 7
## Painters And Related Trades Workers
## 1
## Textile, Garment And Related Trades Workers
## 2
## Wood Treaters, Cabinet Makers And Related Trades Workers
## 2
## Rice Farming
## 4
## Student
## 68
## Principally performs chores and other unpaid household services for own household
## 108
## <NA>
## 216
range <- 1:length(mydata$s2q3)
for (i in range){
if(is.na(mydata$s2q3[i])==FALSE){
if(mydata$s2q3[i]>17){
if(is.na(mydata$s2q8[i])==FALSE){
if(mydata$s2q8[i]==1|mydata$s2q8[i]==2|mydata$s2q8[i]==3|mydata$s2q8[i]==4|mydata$s2q8[i]==5|mydata$s2q8[i]==6){
mydata$s2q8[i]<- 7
}
if(mydata$s2q8[i]!=7 & mydata$s2q8[i]!=15 & mydata$s2q8[i]!=16 & mydata$s2q8[i]!=91){
mydata$s2q8[i] <- -888
}
}
}
}
}
haven_table("s2q8")
## s2q8. What was 's principal usual activity when they were last living in this househol
## Other: Specify
## 129
## Sugarcane Farming
## 1
## Coconut Farming
## 2
## Other Farming
## 84
## Poultry Farmers
## 2
## Construction
## 32
## Domestic Work
## 43
## Consumer store operator
## 2
## Handicraft Workers In Wood, Textile, Leather, Chemicals And Related Workers
## 2
## Market Stall Vendors, Street Vendors And Related Workers
## 2
## Motor Vehicle Drivers
## 1
## Textile, Garment And Related Trades Workers
## 1
## Student
## 48
## Principally performs chores and other unpaid household services for own household
## 108
## <NA>
## 216
# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('s2q3', 's2q4', 's2q7') ##!!! Replace with candidate categorical demo vars
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 673 rows and 35 variables.
## --> Categorical key variables: s2q3, s2q4, s2q7
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## s2q3 63 (63) 10.645 (10.645) 1 (1)
## s2q4 2 (2) 336.500 (336.500) 329 (329)
## s2q7 17 (17) 36.625 (36.625) 2 (2)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 0 (0.000%)
##
## ----------------------------------------------------------------------
# !!! Identify open-end variables here:
open_ends <- c("s2q2whynoresponse",
"s2q3whynoresponse",
"s2q4whynoresponse",
"s2q5whynoresponse",
"s2q6whynoresponse",
"s2q7_other",
"s2q7whynoresponse",
"s2q8_other",
"s2q8whynoresponse",
"s2q9whynoresponse",
"s2q10whynoresponse",
"s2q11_other",
"s2q11whynoresponse",
"s2q12whynoresponse",
"s2q13whynoresponse",
"s2q14whynoresponse",
"s2q15whynoresponse",
"s2q16whynoresponse")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$s2q8_other[29] <- "[name] stays at [city] for almost 2yrs from now. [name]is having a vacation twice a month in their house here in [site]."
mydata$s2q8_other[44] <- "[work]"
mydata$s2q8_other[45] <- "farmer"
mydata$s2q8_other[48] <- "driver"
mydata$s2q8_other[108] <- "She is studying in [college]"
mydata$s2q8_other[147] <- "farming"
mydata$s2q8_other[156] <- "Professor"
mydata$s2q8_other[162] <- "cutting"
mydata$s2q8_other[163] <- "Sister of [name]"
mydata$s2q8_other[189] <- "driver"
mydata$s2q8_other[205] <- "[work]"
mydata$s2q8_other[207] <- "Cashier"
mydata$s2q8_other[227] <- "Factory worker "
mydata$s2q8_other[239] <- "Factory worker "
mydata$s2q8_other[249] <- "Installation "
mydata$s2q8_other[313] <- "conductor"
mydata$s2q8_other[324] <- "[work]"
mydata$s2q8_other[339] <- "[work]"
mydata$s2q8_other[366] <- "dealer"
mydata$s2q8_other[461] <- "[object] making"
mydata$s2q8_other[477] <- "driver"
mydata$s2q8_other[496] <- "vendor"
mydata$s2q8_other[545] <- "Farming"
mydata$s2q8_other[548] <- "[work]"
mydata$s2q8_other[582] <- "[work]"
mydata$s2q8_other[650] <- "[work]"
mydata$s2q9whynoresponse[304] <- "[situation]"
mydata$s2q11_other[86] <- "Factory worker in [city]"
mydata$s2q11_other[97] <- "[Tagalo]"
mydata$s2q11_other[98] <- "[work]"
mydata$s2q11_other[108] <- "She is studying in [college]"
mydata$s2q11_other[118] <- "[Tagalo]"
mydata$s2q11_other[162] <- "[work]"
mydata$s2q11_other[249] <- "Installation"
mydata$s2q11_other[330] <- "[work]"
mydata$s2q11_other[412] <- "[vehicle]helper(logistic)"
mydata$s2q11_other[419] <- "[situation]"
mydata$s2q11_other[422] <- "[vehicle] helper"
mydata$s2q11_other[464] <- "[level] Teacher"
mydata$s2q11_other[537] <- "[work]"
mydata$s2q11_other[545] <- "[work]"
mydata$s2q11_other[555] <- "[Tagalo]"
mydata$s2q11_other[582] <- "Barangay [worker]"
mydata$s2q11_other[627] <- "[work]"
mydata$s2q11_other[646] <- "[vehicle] driver"
mydata$s2q11_other[655] <- "[vehicle] conductor"
mydata$s2q11whynoresponse[122] <- "At [province]"
mydata$s2q11whynoresponse[256] <- "[name] is not aware of his parent in law's whereabouts."
mydata$s2q11whynoresponse[304] <- "Not working, currently [situation]"
mydata$s2q11whynoresponse[562] <- "[name] is currently [situation]"
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)