rm(list=ls(all=t))
filename <- "Section_3" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!No Direct PII
# !!!No Direct PII-team
# !!!No small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Top code high income to the 99.5 percentile
percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q2)[na.exclude(mydata$s3q2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q2. What was the total value of these monetary gifts? Magkano ang kabuuang halaga
## 10 20 30 40 50 70 80 100 120 130 150 180 200 240 250
## 1 6 2 3 25 2 1 62 1 1 18 2 60 1 7
## 280 300 350 400 500 530 550 600 650 660 700 800 900 1000 1200
## 1 39 1 9 105 1 1 6 1 1 9 9 2 75 5
## 1250 1300 1400 1450 1500 1600 1800 2000 2100 2200 2500 2800 3000 3500 4000
## 1 1 1 1 18 2 1 36 1 1 3 1 25 2 3
## 5000 5300 5500 6000 6500 6850 7000 7500 8000 8200 9500 10000 11000 12000 15000
## 11 1 1 3 1 1 1 1 3 1 1 6 2 2 2
## 16000 27000 30000 52000 60000 70000 122244 <NA>
## 1 1 1 1 1 1 1 1698
## [1] "Frequency table after encoding"
## s3q2. What was the total value of these monetary gifts? Magkano ang kabuuang halaga
## 10 20 30 40 50 70 80
## 1 6 2 3 25 2 1
## 100 120 130 150 180 200 240
## 62 1 1 18 2 60 1
## 250 280 300 350 400 500 530
## 7 1 39 1 9 105 1
## 550 600 650 660 700 800 900
## 1 6 1 1 9 9 2
## 1000 1200 1250 1300 1400 1450 1500
## 75 5 1 1 1 1 18
## 1600 1800 2000 2100 2200 2500 2800
## 2 1 36 1 1 3 1
## 3000 3500 4000 5000 5300 5500 6000
## 25 2 3 11 1 1 3
## 6500 6850 7000 7500 8000 8200 9500
## 1 1 1 1 3 1 1
## 10000 11000 12000 15000 16000 27000 30000
## 6 2 2 2 1 1 1
## 52000 52119 or more <NA>
## 1 3 1698
percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q4)[na.exclude(mydata$s3q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q4", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q4. What was the total value of these non-monetary gifts? Magkano ang kabuuang hal
## 20 30 35 37 40 45 48 50 60 70 75 80 85 100 110 120 130 150
## 6 2 2 1 4 1 1 31 7 4 3 5 2 108 1 3 1 31
## 158 160 165 167 170 175 180 200 220 230 240 250 300 308 330 350 374 400
## 1 1 1 1 1 1 2 59 2 1 4 12 40 1 1 6 1 7
## 450 500 530 550 600 680 700 800 900 950 1000 1100 1200 1300 1500 1700 2000 2400
## 2 58 1 1 6 1 5 4 4 1 30 1 2 2 6 1 9 1
## 2500 3000 3500 3600 10000 <NA>
## 2 6 1 1 1 1795
## [1] "Frequency table after encoding"
## s3q4. What was the total value of these non-monetary gifts? Magkano ang kabuuang hal
## 20 30 35 37 40 45 48 50
## 6 2 2 1 4 1 1 31
## 60 70 75 80 85 100 110 120
## 7 4 3 5 2 108 1 3
## 130 150 158 160 165 167 170 175
## 1 31 1 1 1 1 1 1
## 180 200 220 230 240 250 300 308
## 2 59 2 1 4 12 40 1
## 330 350 374 400 450 500 530 550
## 1 6 1 7 2 58 1 1
## 600 680 700 800 900 950 1000 1100
## 6 1 5 4 4 1 30 1
## 1200 1300 1500 1700 2000 2400 2500 3000
## 2 2 6 1 9 1 2 6
## 3250 or more <NA>
## 3 1795
percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q6)[na.exclude(mydata$s3q6)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q6", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q6. What was the total value of these monetary gifts? Magkano ang kabuuang halaga
## 20 40 50 60 70 90 96 100 130 140 150 160 170 180 200
## 2 2 5 3 1 2 1 32 1 1 10 1 2 1 47
## 220 250 280 300 340 350 400 440 500 508 520 550 570 580 600
## 1 4 1 30 1 7 14 1 145 1 1 1 1 1 16
## 700 750 772 800 900 1000 1050 1100 1200 1300 1350 1400 1500 1600 1650
## 16 2 1 13 5 157 2 4 13 2 1 2 58 5 1
## 1700 1750 1800 2000 2030 2140 2200 2300 2380 2400 2500 2600 2800 2900 3000
## 5 1 3 88 1 1 3 2 1 1 21 1 5 1 55
## 3050 3100 3200 3400 3500 3600 3900 4000 4100 4300 4500 4800 5000 5200 5500
## 1 1 4 2 5 1 1 19 1 2 4 1 46 1 2
## 6000 6500 6700 7000 7200 7500 8000 8400 8700 8900 9000 9500 9600 10000 10200
## 25 2 1 2 1 1 4 1 1 1 3 1 2 23 1
## 10500 10800 11000 11100 12000 12360 12500 12800 14000 14400 14500 15000 15900 16000 17000
## 3 1 1 1 28 1 1 2 1 1 1 12 1 1 1
## 17500 18000 18400 19000 20000 21500 22000 24000 25000 28500 28800 30000 32000 33000 34000
## 1 7 1 2 5 1 1 12 2 1 1 7 2 1 1
## 35000 35800 36000 36500 37500 39000 40000 44000 45000 48000 48200 49500 50000 50800 51000
## 1 1 3 1 1 1 1 1 2 4 1 1 5 1 1
## 52200 56000 57600 59000 60000 60200 68000 72000 72500 73000 73500 77000 78000 84000 88000
## 1 2 1 1 5 1 1 1 1 1 1 1 2 5 1
## 90000 96000 108000 120000 126000 140000 144000 147000 156000 168000 180000 216000 240000 <NA>
## 1 5 1 3 1 1 2 1 1 1 1 1 1 1158
## [1] "Frequency table after encoding"
## s3q6. What was the total value of these monetary gifts? Magkano ang kabuuang halaga
## 20 40 50 60 70 90 96
## 2 2 5 3 1 2 1
## 100 130 140 150 160 170 180
## 32 1 1 10 1 2 1
## 200 220 250 280 300 340 350
## 47 1 4 1 30 1 7
## 400 440 500 508 520 550 570
## 14 1 145 1 1 1 1
## 580 600 700 750 772 800 900
## 1 16 16 2 1 13 5
## 1000 1050 1100 1200 1300 1350 1400
## 157 2 4 13 2 1 2
## 1500 1600 1650 1700 1750 1800 2000
## 58 5 1 5 1 3 88
## 2030 2140 2200 2300 2380 2400 2500
## 1 1 3 2 1 1 21
## 2600 2800 2900 3000 3050 3100 3200
## 1 5 1 55 1 1 4
## 3400 3500 3600 3900 4000 4100 4300
## 2 5 1 1 19 1 2
## 4500 4800 5000 5200 5500 6000 6500
## 4 1 46 1 2 25 2
## 6700 7000 7200 7500 8000 8400 8700
## 1 2 1 1 4 1 1
## 8900 9000 9500 9600 10000 10200 10500
## 1 3 1 2 23 1 3
## 10800 11000 11100 12000 12360 12500 12800
## 1 1 1 28 1 1 2
## 14000 14400 14500 15000 15900 16000 17000
## 1 1 1 12 1 1 1
## 17500 18000 18400 19000 20000 21500 22000
## 1 7 1 2 5 1 1
## 24000 25000 28500 28800 30000 32000 33000
## 12 2 1 1 7 2 1
## 34000 35000 35800 36000 36500 37500 39000
## 1 1 1 3 1 1 1
## 40000 44000 45000 48000 48200 49500 50000
## 1 1 2 4 1 1 5
## 50800 51000 52200 56000 57600 59000 60000
## 1 1 1 2 1 1 5
## 60200 68000 72000 72500 73000 73500 77000
## 1 1 1 1 1 1 1
## 78000 84000 88000 90000 96000 108000 120000
## 2 5 1 1 5 1 3
## 126000 140000 144000 144945 or more <NA>
## 1 1 2 6 1158
percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q8)[na.exclude(mydata$s3q8)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q8", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q8. What was the total value of these non-monetary gifts? Magkano ang kabuuang hal
## 1 10 20 22 25 30 50 53 54 60 65 70 75 80 90 99 100 108
## 1 1 1 1 1 3 24 1 1 3 1 2 5 3 1 1 100 1
## 120 130 150 151 160 180 194 200 210 250 280 300 350 360 380 400 450 480
## 10 3 61 1 1 1 1 97 2 12 1 84 9 1 1 19 3 1
## 500 525 550 600 650 700 710 750 800 900 1000 1050 1150 1200 1300 1350 1354 1400
## 120 1 3 12 3 15 1 2 11 1 63 1 2 6 1 1 1 1
## 1500 1700 2000 2400 2500 3000 3300 4000 4500 4800 5000 5200 6000 7000 10000 15000 19000 <NA>
## 27 2 26 2 4 14 1 3 1 1 14 1 1 1 8 1 1 1484
## [1] "Frequency table after encoding"
## s3q8. What was the total value of these non-monetary gifts? Magkano ang kabuuang hal
## 1 10 20 22 25 30 50
## 1 1 1 1 1 3 24
## 53 54 60 65 70 75 80
## 1 1 3 1 2 5 3
## 90 99 100 108 120 130 150
## 1 1 100 1 10 3 61
## 151 160 180 194 200 210 250
## 1 1 1 1 97 2 12
## 280 300 350 360 380 400 450
## 1 84 9 1 1 19 3
## 480 500 525 550 600 650 700
## 1 120 1 3 12 3 15
## 710 750 800 900 1000 1050 1150
## 1 2 11 1 63 1 2
## 1200 1300 1350 1354 1400 1500 1700
## 6 1 1 1 1 27 2
## 2000 2400 2500 3000 3300 4000 4500
## 26 2 4 14 1 3 1
## 4800 5000 5200 6000 7000 10000 or more <NA>
## 1 14 1 1 1 10 1484
# !!!No Indirect PII - Categorical
# !!!Insufficient demographic data
# !!! Identify open-end variables here:
open_ends <- c("s3q1whynoresponse",
"s3q2whynoresponse",
"s3q3whynoresponse",
"s3q4whynoresponse",
"s3q5whynoresponse",
"s3q6whynoresponse",
"s3q7whynoresponse",
"s3q8whynoresponse")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$s3q8whynoresponse[1080] <- "[name] does not know the market value of those clothing that was given to her family."
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)