rm(list=ls(all=t))
filename <- "Section_6" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
mydata$household_id <- zap_labels(mydata$household_id)
# !!!No Direct PII - team
# !!!Small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Top code high income to the 99.5 percentile
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q3)[na.exclude(mydata$m_s6q3)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q3", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## m_s6q3. sFq4: How much did you spend in total purchasing land in the last 12 months? M
## -998 600 4500 8000 15000 24000 25000 30000 33000 40000 250000 <NA>
## 1 1 1 2 1 1 1 3 1 1 1 2271
## [1] "Frequency table after encoding"
## m_s6q3. sFq4: How much did you spend in total purchasing land in the last 12 months? M
## -998 600 4500 8000 15000 24000 25000 30000 33000 40000 236350 or more
## 1 1 1 2 1 1 1 3 1 1 1
## <NA>
## 2271
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q12)[na.exclude(mydata$m_s6q12)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q12", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## m_s6q12. sFq16: How much in total would it have cost you to purchase those inputs with yo
## -998 0 2 5 10 15 20 25 30 35 40 45 50 55 60 70 75 80 90 100 120 150 180 200 220 250 260 290
## 31 13 2 1 31 1 43 5 15 4 19 4 96 2 12 7 4 5 2 105 3 13 2 28 1 7 1 1
## 300 400 450 458 500 600 650 700 750 800 850 900 998 1000 1100 1200 1250 1300 1400 1500 1600 1800 2000 2200 2250 2500 2652 3000
## 19 4 2 1 29 4 1 6 2 3 1 2 1 14 1 11 2 1 1 3 1 2 7 1 1 3 1 6
## 3600 3800 4000 4500 5000 5600 6000 6101 10000 13740 <NA>
## 1 1 1 3 7 1 3 1 2 1 1676
## [1] "Frequency table after encoding"
## m_s6q12. sFq16: How much in total would it have cost you to purchase those inputs with yo
## -998 0 2 5 10 15 20 25 30 35 40 45 50
## 31 13 2 1 31 1 43 5 15 4 19 4 96
## 55 60 70 75 80 90 100 120 150 180 200 220 250
## 2 12 7 4 5 2 105 3 13 2 28 1 7
## 260 290 300 400 450 458 500 600 650 700 750 800 850
## 1 1 19 4 2 1 29 4 1 6 2 3 1
## 900 998 1000 1100 1200 1250 1300 1400 1500 1600 1800 2000 2200
## 2 1 14 1 11 2 1 1 3 1 2 7 1
## 2250 2500 2652 3000 3600 3800 4000 4500 5000 5600 6000 6096 or more <NA>
## 1 3 1 6 1 1 1 3 7 1 3 4 1676
mydata$m_farm_expenses <- as.numeric(mydata$m_farm_expenses)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_farm_expenses)[na.exclude(mydata$m_farm_expenses)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="m_farm_expenses", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## m_farm_expenses.
## 0 20 40 80 90 93 100 120 180 200 220 250 271 300 400 432 450 500 525 550 565 574 600 800
## 1847 1 2 1 1 1 1 1 1 2 1 1 1 4 3 1 1 4 1 1 1 1 2 3
## 1000 1008 1025 1050 1085 1100 1150 1200 1310 1350 1400 1500 1520 1570 1600 1628 1730 1825 1900 1950 2000 2005 2050 2150
## 4 1 1 1 1 2 1 3 1 2 1 2 1 1 3 1 1 1 1 1 9 1 2 1
## 2400 2412 2450 2500 2600 2736 2840 3000 3040 3050 3200 3208 3300 3390 3420 3460 3500 3570 3600 3610 3700 3750 3760 3800
## 1 1 1 1 1 1 1 7 1 1 1 1 3 1 1 1 2 1 2 1 1 2 1 2
## 3900 3950 4000 4016 4050 4060 4200 4206 4300 4400 4480 4500 4600 4800 4810 4870 5000 5080 5100 5200 5390 5400 5450 5600
## 2 1 4 1 2 2 3 1 3 1 1 1 1 2 1 1 3 1 1 1 1 1 1 2
## 5876 5900 5990 6000 6100 6200 6264 6300 6600 6720 6736 6800 6850 7000 7115 7200 7316 7440 7500 7580 7600 7650 7828 7840
## 1 1 1 4 1 1 1 2 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1
## 7900 8000 8020 8025 8080 8380 8400 8460 8730 8750 8850 8970 9000 9030 9150 9360 9500 9600 10000 10200 10250 10267 10300 10390
## 1 3 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 2 1
## 10700 10900 11000 11150 11500 11600 11840 12000 12100 12158 12200 12400 12500 12600 12608 12800 12900 12990 13013 13165 13280 13500 13600 13800
## 2 1 4 1 1 2 1 6 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 14000 14400 14800 15000 15115 15400 15690 15800 16000 16100 16300 16801 17000 17300 17316 17500 17700 17780 17800 18000 18100 18400 18720 18860
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1
## 19000 19100 19300 19400 19550 20000 20600 20695 20710 21000 21200 21500 21550 21600 21800 21990 22000 22300 22400 23000 23988 24000 24200 24400
## 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## 24500 24600 25000 25250 25360 25400 25436 25500 26000 26200 26250 26600 27000 27400 27500 27600 28302 28400 28680 28800 28950 29158 29350 29600
## 1 1 3 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1
## 29800 30000 30500 31900 32000 33000 33800 34100 34500 35000 36000 37300 38800 40000 40200 41316 41600 43200 44350 44400 45200 46400 46718 48400
## 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1
## 49000 51000 51800 56900 57000 61800 63000 64400 65150 65800 68000 68675 77000 81725 87460 87700 88600 91000 102000 132160 138000 158200 163600 282600
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## <NA>
## 58
## [1] "Frequency table after encoding"
## m_farm_expenses. 75917
## 0 20 40 80 90 93 100 120 180 200 220 250
## 1847 1 2 1 1 1 1 1 1 2 1 1
## 271 300 400 432 450 500 525 550 565 574 600 800
## 1 4 3 1 1 4 1 1 1 1 2 3
## 1000 1008 1025 1050 1085 1100 1150 1200 1310 1350 1400 1500
## 4 1 1 1 1 2 1 3 1 2 1 2
## 1520 1570 1600 1628 1730 1825 1900 1950 2000 2005 2050 2150
## 1 1 3 1 1 1 1 1 9 1 2 1
## 2400 2412 2450 2500 2600 2736 2840 3000 3040 3050 3200 3208
## 1 1 1 1 1 1 1 7 1 1 1 1
## 3300 3390 3420 3460 3500 3570 3600 3610 3700 3750 3760 3800
## 3 1 1 1 2 1 2 1 1 2 1 2
## 3900 3950 4000 4016 4050 4060 4200 4206 4300 4400 4480 4500
## 2 1 4 1 2 2 3 1 3 1 1 1
## 4600 4800 4810 4870 5000 5080 5100 5200 5390 5400 5450 5600
## 1 2 1 1 3 1 1 1 1 1 1 2
## 5876 5900 5990 6000 6100 6200 6264 6300 6600 6720 6736 6800
## 1 1 1 4 1 1 1 2 2 1 1 2
## 6850 7000 7115 7200 7316 7440 7500 7580 7600 7650 7828 7840
## 1 2 1 1 1 1 1 1 1 1 1 1
## 7900 8000 8020 8025 8080 8380 8400 8460 8730 8750 8850 8970
## 1 3 1 1 1 1 2 1 1 1 1 1
## 9000 9030 9150 9360 9500 9600 10000 10200 10250 10267 10300 10390
## 2 1 1 1 1 1 2 1 1 1 2 1
## 10700 10900 11000 11150 11500 11600 11840 12000 12100 12158 12200 12400
## 2 1 4 1 1 2 1 6 1 1 1 1
## 12500 12600 12608 12800 12900 12990 13013 13165 13280 13500 13600 13800
## 1 1 1 1 1 1 1 1 1 1 1 1
## 14000 14400 14800 15000 15115 15400 15690 15800 16000 16100 16300 16801
## 2 1 1 1 1 1 1 1 1 1 1 1
## 17000 17300 17316 17500 17700 17780 17800 18000 18100 18400 18720 18860
## 1 1 1 1 1 1 1 2 2 1 1 1
## 19000 19100 19300 19400 19550 20000 20600 20695 20710 21000 21200 21500
## 1 1 1 1 1 2 1 1 1 2 1 1
## 21550 21600 21800 21990 22000 22300 22400 23000 23988 24000 24200 24400
## 1 1 1 1 1 1 1 2 1 1 1 1
## 24500 24600 25000 25250 25360 25400 25436 25500 26000 26200 26250 26600
## 1 1 3 1 1 1 1 1 2 1 1 2
## 27000 27400 27500 27600 28302 28400 28680 28800 28950 29158 29350 29600
## 1 1 1 1 1 1 1 1 1 1 1 1
## 29800 30000 30500 31900 32000 33000 33800 34100 34500 35000 36000 37300
## 1 1 1 1 1 1 1 1 1 1 2 1
## 38800 40000 40200 41316 41600 43200 44350 44400 45200 46400 46718 48400
## 1 1 1 1 1 1 1 1 2 1 1 1
## 49000 51000 51800 56900 57000 61800 63000 64400 65150 65800 68000 68675
## 1 1 1 1 1 1 1 1 1 1 1 1
## 75917 or more <NA>
## 12 58
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("m_s6q1",
"m_s6q2",
"m_s6q4",
"m_s6q5",
"m_s6q6",
"m_s6q8",
"m_s6q10",
"m_s6q11",
"m_s6q13")
capture_tables (indirect_PII)
# Recode those with very specific values.
# !!!No very specific values
# !!!Insufficient demographic data
# !!! Identify open-end variables here:
open_ends <- c("m_s6q4_other",
"m_s6q6_other",
"m_s6q8_warning",
"m_s6q13_other",
"m_endnote6")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
# !!!Redacted, as it contains sensitive information and some information is in Tagalog.
mydata$m_s6q4_other[1003] <- "[language](cooperativa)"
mydata$m_s6q4_other[1294] <- "[language] (tornohan)"
mydata$m_s6q13_other[352] <- "4 kls. Of Certified Seeds from Local Government of [small location]"
mydata$m_s6q13_other[434] <- "LGU [small location] City"
mydata$m_s6q13_other[1468] <- "Department of Agriculture and UP [small location]"
mydata$m_endnote6[173] <- "[name] planted vegetables at their backyard for their consumption, she is not using any fertilizer, no expenses, as well as the seeds were given by friends and neighbors"
mydata$m_endnote6[261] <- "[language]"
mydata$m_endnote6[468] <- "[language]"
mydata$m_endnote6[617] <- "[language]"
mydata$m_endnote6[949] <- "[language]"
mydata$m_endnote6[1126] <- "The farm they used were owned by [name] and his siblings. So they shared in expenses and same with the crops they harvest."
mydata$m_endnote6[1468] <- "Their crops is organic, by the help of UP [small location], they are planting vegetables. They gets free 4 bags of seeds in the department of Agriculture."
mydata$m_endnote6[2067] <- "The cost was decreased because he has only 5tupongs of land from the 0.50hectares na lang ang pig uuma sa dating half hec. [language]"
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)