rm(list=ls(all=t))
filename <- "midline_bgy_captain_survey" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!No Direct PII
# !!!No Direct PII - team
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("m_s0q4")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## m_s0q4. SELECT MUNICIPALITY PROVIDED.
## Malinao Manito Polangui Abucay
## 12 3 4 9
## Mariveles San Nicolas Amulung Enrile
## 16 2 5 5
## Calabanga Camaligan Canaman Jose Panganiban
## 5 1 3 5
## Labo Libmanan Magarao Naga City
## 4 1 3 2
## Ocampo Pasacao Tinambac General Emilio Aguinaldo
## 1 3 1 3
## Cauayan City Jones Pagsanjan Pila
## 4 18 9 1
## Agno Anda Bani Bautista
## 4 3 2 1
## Bugallon Calasiao San Carlos City Sual
## 4 1 1 1
## Urbiztondo Candelaria Sampaloc Jala-Jala
## 1 2 4 3
## Pililla San Mateo Tanay Pilar
## 3 3 4 5
## Sorsogon City <NA>
## 2 1
## [1] "Frequency table after encoding"
## m_s0q4. SELECT MUNICIPALITY PROVIDED.
## 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
## 4 4 4 3 2 12 3 9 3 3 5 18 1 1 4 9 5 3 2 3 2 1
## 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 <NA>
## 1 3 5 2 4 2 5 1 16 1 4 1 1 4 3 5 1 3 1 1
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q2)[na.exclude(mydata$bcs_s1q2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q2. How long have you been ${calc_1} in ${calc_barangay}? Gaano na po kayo katag
## 1 2 3 4 5 6 7 8 9 10 12 13 14 17 18 20 24 36 42 45 46 47
## 2 4 7 9 2 7 6 1 5 4 3 2 1 3 2 2 1 3 3 1 1 1
## 48 55 60 72 74 76 79 80 84 96 108 114 115 117 118 120 144 150 204 216 276 <NA>
## 22 1 2 5 1 2 1 3 27 1 2 1 1 2 2 14 1 1 3 1 1 1
## [1] "Frequency table after encoding"
## bcs_s1q2. How long have you been ${calc_1} in ${calc_barangay}? Gaano na po kayo katag
## 1 2 3 4 5 6 7 8 9
## 2 4 7 9 2 7 6 1 5
## 10 12 13 14 17 18 20 24 36
## 4 3 2 1 3 2 2 1 3
## 42 45 46 47 48 55 60 72 74
## 3 1 1 1 22 1 2 5 1
## 76 79 80 84 96 108 114 115 117
## 2 1 3 27 1 2 1 1 2
## 118 120 144 150 204 216 227 or more <NA>
## 2 14 1 1 3 1 1 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q5)[na.exclude(mydata$bcs_s1q5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q5", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q5. How long did you serve as ${calc_2}? Gaano po kayo katagal naglingkod bilang \
## 1 3 4 5 6 10 12 13 14 15 19 24 36 48 60 72 84 96 102 108 120 132
## 1 5 3 1 2 1 1 5 1 1 1 2 14 2 4 12 1 3 1 6 5 7
## 156 180 216 288 300 <NA>
## 5 2 1 1 1 76
## [1] "Frequency table after encoding"
## bcs_s1q5. How long did you serve as ${calc_2}? Gaano po kayo katagal naglingkod bilang \
## 1 3 4 5 6 10 12 13 14
## 1 5 3 1 2 1 1 5 1
## 15 19 24 36 48 60 72 84 96
## 1 1 2 14 2 4 12 1 3
## 102 108 120 132 156 180 216 288 294 or more
## 1 6 5 7 5 2 1 1 1
## <NA>
## 76
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q7)[na.exclude(mydata$bcs_s1q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q7. How long would it take you to travel one-way from here to municipal government o
## 1 2 3 5 10 15 20 25 30 35 40 45 50 60 90 120 150 <NA>
## 4 10 11 26 30 19 17 1 21 1 3 7 1 7 1 4 1 1
## [1] "Frequency table after encoding"
## bcs_s1q7. How long would it take you to travel one-way from here to municipal government o
## 1 2 3 5 10 15 20 25 30
## 4 10 11 26 30 19 17 1 21
## 35 40 45 50 60 90 120 125 or more <NA>
## 1 3 7 1 7 1 4 1 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q8)[na.exclude(mydata$bcs_s1q8)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q8", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q8. How many times have you made that trip in the last 12 months for official busine
## 4 5 6 10 12 15 24 30 32 36 40 44 48 50 52 56 60 70 84 96 100 104
## 1 2 1 1 3 1 19 1 1 14 1 1 13 1 1 1 16 1 2 9 3 1
## 108 120 124 128 130 132 140 144 150 156 160 174 180 182 192 200 240 252 300 <NA>
## 1 28 1 1 1 1 4 6 2 1 1 1 2 1 3 7 6 1 2 1
## [1] "Frequency table after encoding"
## bcs_s1q8. How many times have you made that trip in the last 12 months for official busine
## 4 5 6 10 12 15 24 30 32
## 1 2 1 1 3 1 19 1 1
## 36 40 44 48 50 52 56 60 70
## 14 1 1 13 1 1 1 16 1
## 84 96 100 104 108 120 124 128 130
## 2 9 3 1 1 28 1 1 1
## 132 140 144 150 156 160 174 180 182
## 1 4 6 2 1 1 1 2 1
## 192 200 240 252 300 or more <NA>
## 3 7 6 1 2 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q9)[na.exclude(mydata$bcs_s1q9)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q9", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q9. How long would it take you to travel (one-way) from here to regional center gove
## 0 1 2 3 4 6 7 8 10 15 20 30 40 45 50 60 90 120 130 180 240 300
## 1 7 16 6 2 2 1 2 1 6 2 8 3 1 1 24 12 23 1 27 10 5
## 360 480 <NA>
## 1 2 1
## [1] "Frequency table after encoding"
## bcs_s1q9. How long would it take you to travel (one-way) from here to regional center gove
## 0 1 2 3 4 6 7 8 10
## 1 7 16 6 2 2 1 2 1
## 15 20 30 40 45 50 60 90 120
## 6 2 8 3 1 1 24 12 23
## 130 180 240 300 360 480 or more <NA>
## 1 27 10 5 1 2 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q10)[na.exclude(mydata$bcs_s1q10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q10", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q10. How many times have you made that trip in the last 12 months for official busine
## -998 1 2 3 4 5 6 8 10 12 15 18 20 24 28 36 40 48 60 72 96 <NA>
## 1 48 30 19 7 15 5 1 12 8 1 1 3 3 1 3 1 1 2 1 1 1
## [1] "Frequency table after encoding"
## bcs_s1q10. How many times have you made that trip in the last 12 months for official busine
## -998 1 2 3 4 5 6 8 10 12
## 1 48 30 19 7 15 5 1 12 8
## 15 18 20 24 28 36 40 48 60 72
## 1 1 3 3 1 3 1 1 2 1
## 76 or more <NA>
## 1 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q11)[na.exclude(mydata$bcs_s1q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q11", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q11. How many households are there in this barangay? Ilan pong sambahayan ang mayroo
## -998 97 105 118 122 126 148 150 152 161 178 195 198 200 202 204 211 212
## 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 1 1 1
## 216 225 228 230 242 251 252 260 264 266 280 284 286 288 291 300 303 312
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 315 320 324 326 330 340 355 357 367 370 380 392 398 400 403 404 418 420
## 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 3
## 434 440 449 450 487 489 496 499 500 510 520 525 528 543 550 580 586 594
## 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1
## 600 610 612 658 660 664 678 687 690 697 700 710 717 719 750 754 765 769
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
## 785 786 820 828 840 860 867 906 907 916 984 1002 1005 1025 1038 1074 1076 1100
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1150 1168 1200 1218 1333 1500 1505 1560 1582 1585 1587 1606 1750 1800 1976 1980 2000 2005
## 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
## 2027 2104 2200 2410 2500 2672 2716 2786 3000 3100 3900 3926 4000 5300 7000 7600 11400 15000
## 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1
## 17000 30000 <NA>
## 1 1 1
## [1] "Frequency table after encoding"
## bcs_s1q11. How many households are there in this barangay? Ilan pong sambahayan ang mayroo
## -998 97 105 118 122 126 148
## 1 1 1 1 1 1 1
## 150 152 161 178 195 198 200
## 3 1 1 1 1 1 3
## 202 204 211 212 216 225 228
## 1 1 1 1 1 1 1
## 230 242 251 252 260 264 266
## 1 1 1 1 1 1 1
## 280 284 286 288 291 300 303
## 1 1 1 1 1 1 1
## 312 315 320 324 326 330 340
## 1 1 1 1 1 1 1
## 355 357 367 370 380 392 398
## 1 1 1 1 1 1 1
## 400 403 404 418 420 434 440
## 2 1 1 2 3 1 1
## 449 450 487 489 496 499 500
## 1 1 1 1 1 1 2
## 510 520 525 528 543 550 580
## 1 1 1 1 1 1 1
## 586 594 600 610 612 658 660
## 1 1 2 1 1 1 1
## 664 678 687 690 697 700 710
## 1 1 1 1 1 1 1
## 717 719 750 754 765 769 785
## 1 1 2 1 1 1 1
## 786 820 828 840 860 867 906
## 1 1 1 1 1 1 1
## 907 916 984 1002 1005 1025 1038
## 1 1 1 1 1 1 1
## 1074 1076 1100 1150 1168 1200 1218
## 1 1 1 2 2 3 1
## 1333 1500 1505 1560 1582 1585 1587
## 1 1 1 1 1 1 1
## 1606 1750 1800 1976 1980 2000 2005
## 1 1 1 1 1 2 1
## 2027 2104 2200 2410 2500 2672 2716
## 1 1 1 1 1 1 1
## 2786 3000 3100 3900 3926 4000 5300
## 1 2 1 1 1 2 1
## 7000 7600 11400 15000 17000 19405 or more <NA>
## 1 1 1 1 1 1 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q12)[na.exclude(mydata$bcs_s1q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q12. How many households in this Barangay are not engaged in agriculture. Ilang samb
## -998 1 2 5 8 10 15 17 20 21 27 30 32 33 34 39 40 43
## 2 2 1 2 1 8 1 1 5 1 1 3 2 1 2 1 1 2
## 45 48 50 56 57 58 60 61 63 66 71 75 79 80 89 90 99 100
## 2 1 12 1 1 1 1 2 1 1 1 2 1 3 1 2 1 8
## 101 103 115 116 130 137 143 150 153 158 160 175 194 200 210 230 236 240
## 1 2 1 1 1 1 1 4 1 1 2 1 1 4 1 2 1 1
## 243 261 271 298 300 317 350 364 375 400 405 500 544 580 600 640 700 800
## 1 1 1 1 6 1 3 1 1 2 1 8 1 1 3 1 1 2
## 864 900 999 1000 1100 1105 1400 1800 2004 2600 3000 3500 3800 6500 8000 16028 <NA>
## 1 1 1 4 1 1 1 2 1 1 1 2 1 1 2 1 1
## [1] "Frequency table after encoding"
## bcs_s1q12. How many households in this Barangay are not engaged in agriculture. Ilang samb
## -998 1 2 5 8 10 15 17
## 2 2 1 2 1 8 1 1
## 20 21 27 30 32 33 34 39
## 5 1 1 3 2 1 2 1
## 40 43 45 48 50 56 57 58
## 1 2 2 1 12 1 1 1
## 60 61 63 66 71 75 79 80
## 1 2 1 1 1 2 1 3
## 89 90 99 100 101 103 115 116
## 1 2 1 8 1 2 1 1
## 130 137 143 150 153 158 160 175
## 1 1 1 4 1 1 2 1
## 194 200 210 230 236 240 243 261
## 1 4 1 2 1 1 1 1
## 271 298 300 317 350 364 375 400
## 1 1 6 1 3 1 1 2
## 405 500 544 580 600 640 700 800
## 1 8 1 1 3 1 1 2
## 864 900 999 1000 1100 1105 1400 1800
## 1 1 1 4 1 1 1 2
## 2004 2600 3000 3500 3800 6500 8000 9485 or more
## 1 1 1 2 1 1 2 1
## <NA>
## 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q13)[na.exclude(mydata$bcs_s1q13)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q13", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q13. How many households in this Barangay are considered poor? Ilang sambahayan po s
## -998 2 20 25 30 32 40 50 58 60 63 70 75 80 90 96 99 100
## 1 1 6 2 3 1 3 3 1 3 1 1 1 2 2 1 1 7
## 106 107 120 130 140 142 150 158 161 170 175 180 182 184 200 208 212 219
## 1 1 1 1 1 1 8 1 1 1 1 1 1 1 6 1 1 1
## 221 229 230 231 232 238 250 251 258 262 270 300 301 310 312 315 320 347
## 1 1 1 1 1 1 4 1 1 1 1 5 1 1 1 1 3 1
## 350 352 355 364 369 372 380 400 402 406 420 450 453 472 488 500 534 538
## 5 1 1 1 1 1 2 7 1 1 1 4 1 1 1 3 1 1
## 540 600 688 700 706 725 726 750 790 800 900 922 951 1000 1100 1336 1418 1500
## 1 3 1 1 1 1 1 2 1 1 2 1 1 4 1 1 1 1
## 1629 1782 3000 3500 6000 10000 <NA>
## 1 1 3 1 1 1 1
## [1] "Frequency table after encoding"
## bcs_s1q13. How many households in this Barangay are considered poor? Ilang sambahayan po s
## -998 2 20 25 30 32 40 50
## 1 1 6 2 3 1 3 3
## 58 60 63 70 75 80 90 96
## 1 3 1 1 1 2 2 1
## 99 100 106 107 120 130 140 142
## 1 7 1 1 1 1 1 1
## 150 158 161 170 175 180 182 184
## 8 1 1 1 1 1 1 1
## 200 208 212 219 221 229 230 231
## 6 1 1 1 1 1 1 1
## 232 238 250 251 258 262 270 300
## 1 1 4 1 1 1 1 5
## 301 310 312 315 320 347 350 352
## 1 1 1 1 3 1 5 1
## 355 364 369 372 380 400 402 406
## 1 1 1 1 2 7 1 1
## 420 450 453 472 488 500 534 538
## 1 4 1 1 1 3 1 1
## 540 600 688 700 706 725 726 750
## 1 3 1 1 1 1 1 2
## 790 800 900 922 951 1000 1100 1336
## 1 1 2 1 1 4 1 1
## 1418 1500 1629 1782 3000 3500 6000 6740 or more
## 1 1 1 1 3 1 1 1
## <NA>
## 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q16)[na.exclude(mydata$bcs_s1q16)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q16", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q16. What percentage of households in this Barangay are connected to electric power?
## 0 1 2 3 5 10 13 15 16 20 22 25 30 35 39 40 47 50 68 83 95 99
## 28 14 8 8 38 24 1 1 1 15 2 3 6 1 2 3 1 1 1 1 1 1
## 100 <NA>
## 2 2
## [1] "Frequency table after encoding"
## bcs_s1q16. What percentage of households in this Barangay are connected to electric power?
## 0 1 2 3 5 10 13 15 16
## 28 14 8 8 38 24 1 1 1
## 20 22 25 30 35 39 40 47 50
## 15 2 3 6 1 2 3 1 1
## 68 83 95 99 100 or more <NA>
## 1 1 1 1 2 2
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q17)[na.exclude(mydata$bcs_s1q17)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q17. What percentage of households in this Barangay own and use cellular phones? Ila
## 10 30 50 60 70 80 85 90 93 95 98 99 100 <NA>
## 1 1 3 3 2 16 4 30 1 19 5 16 63 1
## [1] "Frequency table after encoding"
## bcs_s1q17. What percentage of households in this Barangay own and use cellular phones? Ila
## 10 30 50 60 70 80 85 90 93
## 1 1 3 3 2 16 4 30 1
## 95 98 99 100 or more <NA>
## 19 5 16 63 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q18)[na.exclude(mydata$bcs_s1q18)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q18", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q18. What percentage of households in the Barangay can regularly access the internet?
## -998 0 1 2 3 5 10 15 20 23 25 30 38 40 45 50 59 60 70 75 80 90
## 1 8 3 4 5 25 18 4 10 1 1 13 1 4 1 29 1 8 9 2 12 4
## <NA>
## 1
## [1] "Frequency table after encoding"
## bcs_s1q18. What percentage of households in the Barangay can regularly access the internet?
## -998 0 1 2 3 5 10 15 20 23
## 1 8 3 4 5 25 18 4 10 1
## 25 30 38 40 45 50 59 60 70 75
## 1 13 1 4 1 29 1 8 9 2
## 80 90 or more <NA>
## 12 4 1
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_2)[na.exclude(mydata$bcs_ty_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_2. How many typhoons have directly impacted this Barangay in the last 12 months? I
## -998 1 2 3 4 <NA>
## 1 55 15 13 3 78
## [1] "Frequency table after encoding"
## bcs_ty_2. How many typhoons have directly impacted this Barangay in the last 12 months? I
## -998 1 2 3 4 or more <NA>
## 1 55 15 13 3 78
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_5)[na.exclude(mydata$bcs_ty_5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_5", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_5. How much would you estimate was the cost of this damage in total over the last 1
## 20000 25000 50000 80000 1e+05 150000 2e+05 250000 3e+05 5e+05 6e+05 7e+05 720000
## 2 1 5 1 3 2 3 1 2 5 2 2 1
## 750000 8e+05 9e+05 1e+06 1500000 2e+06 2500000 3e+06 3500000 4e+06 5e+06 1e+07 2e+07
## 1 3 1 9 1 3 1 10 1 2 11 4 1
## 5e+07 1e+08 <NA>
## 2 1 84
## [1] "Frequency table after encoding"
## bcs_ty_5. How much would you estimate was the cost of this damage in total over the last 1
## 20000 25000 50000 80000 1e+05 150000
## 2 1 5 1 3 2
## 2e+05 250000 3e+05 5e+05 6e+05 7e+05
## 3 1 2 5 2 2
## 720000 750000 8e+05 9e+05 1e+06 1500000
## 1 1 3 1 9 1
## 2e+06 2500000 3e+06 3500000 4e+06 5e+06
## 3 1 10 1 2 11
## 1e+07 2e+07 5e+07 79999999 or more <NA>
## 4 1 2 1 84
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_7)[na.exclude(mydata$bcs_ty_7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_7. How many people have been killed as a result of typhoons in the last 12 months?
## 1 5 6 <NA>
## 2 1 1 161
## [1] "Frequency table after encoding"
## bcs_ty_7. How many people have been killed as a result of typhoons in the last 12 months?
## 1 5 or more <NA>
## 2 2 161
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_10)[na.exclude(mydata$bcs_ty_10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_10", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_10. In total, for how many days was the barangay without power? Sa kabuuan, ilang a
## 1 2 3 4 5 7 14 15 21 25 30 40 45 60 90 <NA>
## 5 7 7 2 2 12 6 3 3 1 19 1 4 12 3 78
## [1] "Frequency table after encoding"
## bcs_ty_10. In total, for how many days was the barangay without power? Sa kabuuan, ilang a
## 1 2 3 4 5 7 14 15 21 25
## 5 7 7 2 2 12 6 3 3 1
## 30 40 45 60 90 or more <NA>
## 19 1 4 12 3 78
percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_12)[na.exclude(mydata$bcs_ty_12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_12. In total, for how many days was transportation disrupted? Sa kabuuan, ilang ara
## 1 2 3 4 5 7 14 21 30 60 <NA>
## 18 8 2 4 2 8 1 1 2 1 118
## [1] "Frequency table after encoding"
## bcs_ty_12. In total, for how many days was transportation disrupted? Sa kabuuan, ilang ara
## 1 2 3 4 5 7 14 21 30 53 or more
## 18 8 2 4 2 8 1 1 2 1
## <NA>
## 118
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("bcs_s0q21",
"bcs_s1q1",
"bcs_s1q3",
"bcs_s1q4",
"bcs_s1q14",
"bcs_s1q15",
"bcs_ty_1",
"bcs_ty_3",
"bcs_ty_4",
"bcs_ty_6",
"bcs_ty_8",
"bcs_ty_9",
"bcs_ty_11",
"bcs_vio_5",
"bcs_vio_7",
"bcs_vio_9",
"bcs_qua_7",
"bcs_rec_1",
"bcs_rec_4",
"bcs_dev_3__1",
"bcs_dev_5__1",
"bcs_dev_3__2",
"bcs_dev_5__2",
"bcs_dev_3__3",
"bcs_dev_5__3",
"bcs_dev_3__4",
"bcs_dev_5__4",
"bcs_dev_3__5",
"bcs_dev_5__5",
"bcs_dev_3__6",
"bcs_dev_5__6")
capture_tables (indirect_PII)
# Recode those with very specific values.
break_activity <- c(1, 2, 3, 4, 5, 6)
labels_activity <- c("Barangay Captain"=1,
"Counselor"=2,
"Other"=3,
"Other"=4,
"Other"=5,
"Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q1", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## bcs_s1q1. What is your official role in ${calc_barangay}? Ano po ang inyong opisyal na p
## Barangay Captain Counselor /Kagawad Secretary
## 92 36 24
## Treasurer Barangay Health Worker (BHW) Barangay Tanod
## 6 2 2
## Other - specify <NA>
## 2 1
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
## 1 92 0 0 0 0 0
## 2 0 36 0 0 0 0
## 3 0 0 24 0 0 0
## 4 0 0 0 6 0 0
## 5 0 0 0 0 2 0
## 6 0 0 0 0 0 2
## 99 0 0 0 0 0 2
## [1] "Frequency table after encoding"
## bcs_s1q1. What is your official role in ${calc_barangay}? Ano po ang inyong opisyal na p
## Barangay Captain Counselor Other <NA>
## 92 36 36 1
## [1] "Inspect value labels and relabel as necessary"
## Barangay Captain Counselor Other Other Other Other
## 1 2 3 4 5 6
break_activity <- c(1, 2, 3, 4, 5, 6)
labels_activity <- c("Other"=1,
"Counselor"=2,
"Other"=3,
"Other"=4,
"Other"=5,
"Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q4", break_points=break_activity, missing=999999, value_labels=labels_activity)
## [1] "Frequency table before encoding"
## bcs_s1q4. What was this official role? Ano po ang opisyal na posisyong ito?
## Barangay Captain Counselor /Kagawad Secretary
## 3 65 6
## Treasurer Barangay Health Worker (BHW) Barangay Tanod
## 2 2 3
## Other - specify <NA>
## 8 76
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
## 1 3 0 0 0 0 0
## 2 0 65 0 0 0 0
## 3 0 0 6 0 0 0
## 4 0 0 0 2 0 0
## 5 0 0 0 0 2 0
## 6 0 0 0 0 0 3
## 99 0 0 0 0 0 8
## [1] "Frequency table after encoding"
## bcs_s1q4. What was this official role? Ano po ang opisyal na posisyong ito?
## Other Counselor <NA>
## 24 65 76
## [1] "Inspect value labels and relabel as necessary"
## Other Counselor Other Other Other Other
## 1 2 3 4 5 6
break_source <- c(-999,-998,1,2,3,4)
labels_source <- c("Refused to answer"=1,
"Don't know"=2,
"Connected to Grid"=3,
"Local Hydroelectric"=4,
"Other"=5,
"Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q15", break_points=break_source, missing=999999, value_labels=labels_source)
## [1] "Frequency table before encoding"
## bcs_s1q15. What is the main source of electric power? Ano po ang pangunahing pinagkukunan
## Connected to Grid Generators <NA>
## 161 2 2
## recoded
## [-999,-998) [-998,1) [1,2) [2,3) [3,4) [4,1e+06)
## 1 0 0 161 0 0 0
## 3 0 0 0 0 2 0
## [1] "Frequency table after encoding"
## bcs_s1q15. What is the main source of electric power? Ano po ang pangunahing pinagkukunan
## Connected to Grid Other <NA>
## 161 2 2
## [1] "Inspect value labels and relabel as necessary"
## Refused to answer Don't know Connected to Grid Local Hydroelectric Other
## 1 2 3 4 5
## Other
## 6
# !!!Insufficient demographic data
# !!! Identify open-end variables here:
open_ends <- c("bcs_scq2",
"bcs_scq4",
"bcs_s1q1_other",
"bcs_s1q4_other",
"bcs_political1other",
"bcs_political2other",
"bcs_political3other",
"bcs_s1q15other",
"bcs_s1q19other",
"bcs_calc_4",
"bcs_qua_3",
"bcs_qua_4",
"bcs_qua_5",
"bcs_qua_6",
"bcs_qua_8",
"bcs_qua_9",
"bcs_dev_2__1",
"bcs_dev_4__1",
"bcs_dev_6__1",
"bcs_dev_2__2",
"bcs_dev_4__2",
"bcs_dev_6__2",
"bcs_dev_2__3",
"bcs_dev_4__3",
"bcs_dev_6__3",
"bcs_dev_2__4",
"bcs_dev_4__4",
"bcs_dev_6__4",
"bcs_dev_2__5",
"bcs_dev_4__5",
"bcs_dev_6__5",
"bcs_dev_2__6",
"bcs_dev_4__6",
"bcs_dev_6__6")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$bcs_s1q1_other[56] <- "Other"
mydata$bcs_s1q1_other[102] <- "Other"
mydata$bcs_s1q4_other[5] <- "Other"
mydata$bcs_s1q4_other[7] <- "Other"
mydata$bcs_s1q4_other[8] <- "Other"
mydata$bcs_s1q4_other[12] <- "Other"
mydata$bcs_s1q4_other[98] <- "Other"
mydata$bcs_s1q4_other[107] <- "Other"
mydata$bcs_s1q4_other[131] <- "Other"
mydata$bcs_s1q4_other[149] <- "Other"
mydata$bcs_political2other[151] <- "[small location redacted] is not covered by prov.council"
mydata$bcs_political2other[152] <- "[small location redacted] is not covered buy prov.council"
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)