rm(list=ls(all=t))
filename <- "bhsection2" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!! No Direct PII
# !!! No Direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000
dropvars <- c("dise")
mydata <- mydata[!names(mydata) %in% dropvars]
locvars <- c("q006_block_id", "q007_vlg_id")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## q006_block_id. 6 Block Code
## 1 2 3 4 5 6 7 8 9 <NA>
## 1265 1042 1343 2751 585 1336 915 2905 3681 217
## [1] "Frequency table after encoding"
## q006_block_id. 6 Block Code
## 279 280 281 282 283 284 285 286 287 <NA>
## 1265 3681 585 1042 2905 1336 2751 1343 915 217
## [1] "Frequency table before encoding"
## q007_vlg_id. 7 Village Code
## 1 2 3 4 5 6 7 9 10 11 12 13 15 16 17 18 19 20
## 133 105 92 106 149 203 175 101 93 131 166 132 92 107 139 106 125 128
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 195 158 131 135 236 191 173 139 109 106 181 164 143 120 260 139 118 138
## 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
## 175 116 108 108 118 186 143 147 119 109 112 94 103 129 232 158 170 151
## 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
## 120 132 79 159 180 102 136 109 211 117 155 122 177 98 100 137 113 156
## 75 76 77 78 80 81 82 83 84 85 87 88 89 90 91 92 93 94
## 131 107 150 224 186 119 164 98 115 89 116 169 82 118 127 143 133 83
## 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
## 116 172 160 129 187 98 160 149 93 127 110 224 107 173 135 127 132 189
## 113 114 115 116 117 118 119 <NA>
## 87 157 129 92 173 141 102 217
## [1] "Frequency table after encoding"
## q007_vlg_id. 7 Village Code
## 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
## 79 100 181 186 107 139 127 107 156 129 166 115 160 127 128 139 189 108
## 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 120 131 143 195 138 109 93 120 160 106 87 139 172 132 82 143 93 108
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
## 137 133 113 131 155 107 159 92 136 116 102 149 151 118 92 127 236 132
## 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
## 122 149 112 109 89 164 94 186 133 141 135 116 83 177 170 92 105 173
## 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 110 116 180 150 173 119 224 101 211 106 118 147 164 129 118 187 109 135
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
## 158 191 125 169 158 131 232 98 173 175 203 98 157 98 106 103 175 129
## 373 374 375 376 377 378 379 <NA>
## 143 132 260 224 102 119 117 217
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
mydata <- top_recode (variable="q203_age", break_point=80, missing=NA)
## [1] "Frequency table before encoding"
## q203_age. 203 How old is ?
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 80 194 240 291 269 445 409 528 562 610 1187 789 889 655 536 503 422 310
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
## 339 137 210 93 116 78 67 201 93 86 190 70 622 59 292 149 113 864
## 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## 119 110 215 69 627 32 117 52 28 329 34 26 48 18 234 13 33 13
## 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## 11 166 25 17 47 10 290 15 31 23 6 148 14 8 25 6 146 6
## 72 73 74 75 76 77 78 79 80 82 83 85 86 88 90 91 92 93
## 19 6 3 31 8 2 5 5 56 3 2 22 1 1 18 1 2 1
## 95 97 99 100 102 108 <NA>
## 3 1 1 1 1 1 37
## [1] "Frequency table after encoding"
## q203_age. 203 How old is ?
## 0 1 2 3 4 5 6 7
## 80 194 240 291 269 445 409 528
## 8 9 10 11 12 13 14 15
## 562 610 1187 789 889 655 536 503
## 16 17 18 19 20 21 22 23
## 422 310 339 137 210 93 116 78
## 24 25 26 27 28 29 30 31
## 67 201 93 86 190 70 622 59
## 32 33 34 35 36 37 38 39
## 292 149 113 864 119 110 215 69
## 40 41 42 43 44 45 46 47
## 627 32 117 52 28 329 34 26
## 48 49 50 51 52 53 54 55
## 48 18 234 13 33 13 11 166
## 56 57 58 59 60 61 62 63
## 25 17 47 10 290 15 31 23
## 64 65 66 67 68 69 70 71
## 6 148 14 8 25 6 146 6
## 72 73 74 75 76 77 78 79
## 19 6 3 31 8 2 5 5
## 80 or more <NA>
## 115 37
mydata <- top_recode (variable="q208_age", break_point=12, missing=NA)
## [1] "Frequency table before encoding"
## q208_age. 208 At what age did first enroll in school?
## age 2 3 4 5 6 7 8 9 10 11 12 13 14 16 17 18 30
## 6 50 682 782 4307 2479 578 215 64 59 10 20 4 6 2 1 1 1
## 37 40 78 <NA>
## 1 2 1 6769
## [1] "Frequency table after encoding"
## q208_age. 208 At what age did first enroll in school?
## age 2 3 4 5 6 7 8
## 6 50 682 782 4307 2479 578 215
## 9 10 11 12 or more <NA>
## 64 59 10 39 6769
mydata <- top_recode (variable="q217_fee", break_point=25000, missing=NA)
## [1] "Frequency table before encoding"
## q217_fee. 217 How much did you have to pay in fees to ’s school in order to enroll him/her
## 0 1 2 3 5 6 7 8 10 11 12 15 20
## 4558 1 12 1 11 2 1 1 59 14 9 12 172
## 21 22 25 30 35 40 42 50 51 52 60 65 70
## 6 2 21 46 1 13 2 178 4 1 5 6 5
## 75 80 100 110 115 120 125 130 135 140 150 160 165
## 2 1 151 5 1 6 1 6 1 2 73 3 2
## 170 200 210 220 225 230 250 260 265 270 275 280 290
## 3 87 2 3 1 1 52 2 3 2 8 1 1
## 300 307 315 320 325 327 330 345 350 355 360 365 370
## 86 1 1 1 1 1 2 1 44 3 9 8 4
## 375 380 385 400 420 425 450 460 463 465 470 475 480
## 3 1 1 33 1 1 34 3 1 1 1 2 1
## 490 500 510 520 525 550 560 575 600 615 650 665 700
## 1 143 1 1 2 17 1 2 22 3 7 6 12
## 720 735 750 800 850 870 900 950 960 1000 1010 1020 1050
## 2 2 6 11 4 2 1 2 1 49 1 1 1
## 1100 1150 1160 1200 1250 1300 1400 1450 1500 1530 1551 1560 1580
## 6 2 1 19 2 8 2 1 33 2 1 3 1
## 1600 1620 1700 1720 1780 1800 1900 2000 2100 2200 2300 2400 2500
## 4 1 3 1 1 6 1 50 2 3 1 10 21
## 2700 2800 2900 3000 3050 3100 3300 3360 3500 3550 3600 3700 3800
## 3 2 1 50 1 1 2 1 21 1 17 1 3
## 3900 3960 4000 4200 4440 4500 4560 4680 4770 4800 4850 5000 5200
## 3 1 31 8 1 9 1 1 1 8 1 43 1
## 5400 5500 5540 5600 6000 6100 6480 6500 6550 6600 6640 7000 7200
## 3 5 1 1 38 2 1 3 1 2 1 19 3
## 7400 7500 7700 7800 8000 8400 8500 8900 9000 9100 9500 10000 11000
## 1 2 1 2 27 1 2 1 7 1 1 17 1
## 11130 11700 12000 12050 12500 13000 13500 14000 14400 15000 16000 16800 20000
## 1 1 9 1 3 1 1 2 1 9 1 1 2
## 22000 24000 25000 26000 27000 30000 35000 36000 40000 42000 45000 48000 50000
## 2 11 4 1 1 7 3 1 1 1 2 1 1
## 60000 70000 150000 195000 <NA>
## 1 1 1 1 9316
## [1] "Frequency table after encoding"
## q217_fee. 217 How much did you have to pay in fees to ’s school in order to enroll him/her
## 0 1 2 3 5 6
## 4558 1 12 1 11 2
## 7 8 10 11 12 15
## 1 1 59 14 9 12
## 20 21 22 25 30 35
## 172 6 2 21 46 1
## 40 42 50 51 52 60
## 13 2 178 4 1 5
## 65 70 75 80 100 110
## 6 5 2 1 151 5
## 115 120 125 130 135 140
## 1 6 1 6 1 2
## 150 160 165 170 200 210
## 73 3 2 3 87 2
## 220 225 230 250 260 265
## 3 1 1 52 2 3
## 270 275 280 290 300 307
## 2 8 1 1 86 1
## 315 320 325 327 330 345
## 1 1 1 1 2 1
## 350 355 360 365 370 375
## 44 3 9 8 4 3
## 380 385 400 420 425 450
## 1 1 33 1 1 34
## 460 463 465 470 475 480
## 3 1 1 1 2 1
## 490 500 510 520 525 550
## 1 143 1 1 2 17
## 560 575 600 615 650 665
## 1 2 22 3 7 6
## 700 720 735 750 800 850
## 12 2 2 6 11 4
## 870 900 950 960 1000 1010
## 2 1 2 1 49 1
## 1020 1050 1100 1150 1160 1200
## 1 1 6 2 1 19
## 1250 1300 1400 1450 1500 1530
## 2 8 2 1 33 2
## 1551 1560 1580 1600 1620 1700
## 1 3 1 4 1 3
## 1720 1780 1800 1900 2000 2100
## 1 1 6 1 50 2
## 2200 2300 2400 2500 2700 2800
## 3 1 10 21 3 2
## 2900 3000 3050 3100 3300 3360
## 1 50 1 1 2 1
## 3500 3550 3600 3700 3800 3900
## 21 1 17 1 3 3
## 3960 4000 4200 4440 4500 4560
## 1 31 8 1 9 1
## 4680 4770 4800 4850 5000 5200
## 1 1 8 1 43 1
## 5400 5500 5540 5600 6000 6100
## 3 5 1 1 38 2
## 6480 6500 6550 6600 6640 7000
## 1 3 1 2 1 19
## 7200 7400 7500 7700 7800 8000
## 3 1 2 1 2 27
## 8400 8500 8900 9000 9100 9500
## 1 2 1 7 1 1
## 10000 11000 11130 11700 12000 12050
## 17 1 1 1 9 1
## 12500 13000 13500 14000 14400 15000
## 3 1 1 2 1 9
## 16000 16800 20000 22000 24000 25000 or more
## 1 1 2 2 11 27
## <NA>
## 9316
mydata <- top_recode (variable="q218_edu_cost", break_point=25000, missing=NA)
## [1] "Frequency table before encoding"
## q218_edu_cost. 218 In the last 12 months, how much has this household spent out of pocket for '
## 0 5 13 14 50 60 100 110 120 150 180 200 220
## 42 1 1 1 2 2 8 3 1 9 1 34 1
## 250 265 270 300 335 350 360 365 400 420 450 500 510
## 6 1 2 45 1 6 3 2 53 1 6 328 1
## 520 540 550 580 590 600 630 640 650 660 680 700 750
## 2 3 14 1 1 100 1 1 10 3 1 82 5
## 760 780 800 820 850 900 930 950 960 1000 1025 1050 1060
## 1 1 109 4 6 20 1 6 1 1001 1 1 1
## 1100 1150 1160 1200 1250 1300 1340 1400 1450 1500 1600 1650 1700
## 17 3 1 155 8 13 1 3 3 749 7 1 3
## 1800 1900 1950 2000 2100 2150 2200 2300 2400 2500 2600 2650 2700
## 17 1 3 1192 3 1 7 1 5 289 1 1 4
## 2800 3000 3250 3333 3400 3500 3600 4000 4200 4500 4600 4800 5000
## 1 737 1 4 1 34 11 234 1 5 1 3 472
## 5200 5500 5770 5800 6000 6120 6500 6900 7000 7300 7500 7800 8000
## 1 2 1 1 145 1 1 1 67 1 1 1 73
## 8400 8500 8600 8700 9000 9600 9800 10000 11000 11200 12000 12200 13000
## 2 1 1 1 9 2 1 143 4 1 39 1 5
## 14000 15000 16000 18000 19000 19500 20000 21650 22000 23000 24000 25000 30000
## 3 56 7 2 1 1 24 1 2 1 2 11 10
## 36000 40000 48000 50000 60000 61200 65000 70000 72000 1e+05 105000 <NA>
## 4 3 1 7 1 1 2 3 1 4 1 9453
## [1] "Frequency table after encoding"
## q218_edu_cost. 218 In the last 12 months, how much has this household spent out of pocket for '
## 0 5 13 14 50 60
## 42 1 1 1 2 2
## 100 110 120 150 180 200
## 8 3 1 9 1 34
## 220 250 265 270 300 335
## 1 6 1 2 45 1
## 350 360 365 400 420 450
## 6 3 2 53 1 6
## 500 510 520 540 550 580
## 328 1 2 3 14 1
## 590 600 630 640 650 660
## 1 100 1 1 10 3
## 680 700 750 760 780 800
## 1 82 5 1 1 109
## 820 850 900 930 950 960
## 4 6 20 1 6 1
## 1000 1025 1050 1060 1100 1150
## 1001 1 1 1 17 3
## 1160 1200 1250 1300 1340 1400
## 1 155 8 13 1 3
## 1450 1500 1600 1650 1700 1800
## 3 749 7 1 3 17
## 1900 1950 2000 2100 2150 2200
## 1 3 1192 3 1 7
## 2300 2400 2500 2600 2650 2700
## 1 5 289 1 1 4
## 2800 3000 3250 3333 3400 3500
## 1 737 1 4 1 34
## 3600 4000 4200 4500 4600 4800
## 11 234 1 5 1 3
## 5000 5200 5500 5770 5800 6000
## 472 1 2 1 1 145
## 6120 6500 6900 7000 7300 7500
## 1 1 1 67 1 1
## 7800 8000 8400 8500 8600 8700
## 1 73 2 1 1 1
## 9000 9600 9800 10000 11000 11200
## 9 2 1 143 4 1
## 12000 12200 13000 14000 15000 16000
## 39 1 5 3 56 7
## 18000 19000 19500 20000 21650 22000
## 2 1 1 24 1 2
## 23000 24000 25000 or more <NA>
## 1 2 49 9453
mydata <- top_recode (variable="q221_age_stop_schl", break_point=25, missing=NA)
## [1] "Frequency table before encoding"
## q221_age_stop_schl. 221 At what age did stop attending school?
## 0 1 5 6 7 8 9 10 11 12 13 14 15 16 17
## 2 1 7 22 34 87 104 184 223 253 230 266 256 222 154
## 18 19 20 21 22 23 24 25 26 27 28 29 30 35 40
## 120 46 47 21 17 5 7 9 2 3 2 2 4 1 1
## <NA>
## 13708
## [1] "Frequency table after encoding"
## q221_age_stop_schl. 221 At what age did stop attending school?
## 0 1 5 6 7 8 9 10
## 2 1 7 22 34 87 104 184
## 11 12 13 14 15 16 17 18
## 223 253 230 266 256 222 154 120
## 19 20 21 22 23 24 25 or more <NA>
## 46 47 21 17 5 7 24 13708
mydata <- top_recode (variable="q224_income", break_point=10000, missing=NA)
## [1] "Frequency table before encoding"
## q224_income. 224 How much income in cash or in kind did earn from employment in the last 7 d
## 0 21 28 35 50 60 63 70 71 84 100 105 108
## 207 3 2 1 1 1 5 7 1 5 16 1 1
## 112 120 125 133 135 140 150 160 161 162 165 168 170
## 2 7 2 1 1 19 17 2 1 1 1 1 1
## 175 180 190 200 210 214 225 231 233 240 245 250 270
## 5 4 1 60 21 1 2 5 1 1 3 17 1
## 280 300 314 335 350 360 390 396 400 420 450 466 480
## 9 48 1 1 48 2 2 1 69 18 10 2 2
## 490 495 500 510 525 539 540 550 560 571 581 600 625
## 9 1 58 1 1 1 2 2 12 1 2 114 1
## 630 650 654 660 700 714 720 750 770 790 800 812 817
## 3 3 1 1 313 2 5 38 3 1 47 1 1
## 819 825 840 850 857 875 900 910 931 933 938 945 950
## 1 1 20 12 2 2 42 4 3 2 3 1 3
## 960 996 1000 1005 1010 1025 1050 1100 1125 1148 1150 1162 1166
## 1 2 116 2 1 5 143 5 2 1 1 5 2
## 1190 1200 1225 1243 1250 1260 1295 1310 1320 1323 1330 1350 1360
## 3 78 1 1 20 8 1 2 1 2 2 1 1
## 1395 1400 1428 1450 1470 1500 1512 1520 1550 1585 1596 1600 1610
## 1 776 3 4 1 77 1 1 1 1 1 15 3
## 1631 1633 1650 1675 1680 1700 1750 1800 1806 1862 1866 1890 1900
## 6 6 4 1 2 8 87 28 1 1 2 3 2
## 2000 2010 2050 2100 2150 2200 2250 2300 2331 2333 2350 2400 2450
## 44 1 1 528 1 1 4 2 2 2 1 17 39
## 2500 2566 2600 2625 2665 2700 2800 3000 3150 3200 3250 3500 3600
## 24 1 3 1 1 2 112 30 4 5 3 186 7
## 3800 3850 4000 4200 4300 4400 4500 4550 4800 4900 5000 5250 5362
## 1 5 2 26 1 1 3 1 1 6 6 1 1
## 5538 5600 6000 6300 6400 6500 6566 7000 7142 7200 7500 7700 8000
## 1 6 1 1 1 4 1 12 1 1 3 2 4
## 8400 8631 9165 9333 10000 10500 12000 12600 14000 15000 15900 16500 21500
## 2 1 1 1 1 4 3 2 2 4 1 1 1
## 24500 28000 34000 714200 <NA>
## 2 2 1 1 12090
## [1] "Frequency table after encoding"
## q224_income. 224 How much income in cash or in kind did earn from employment in the last 7 d
## 0 21 28 35 50 60
## 207 3 2 1 1 1
## 63 70 71 84 100 105
## 5 7 1 5 16 1
## 108 112 120 125 133 135
## 1 2 7 2 1 1
## 140 150 160 161 162 165
## 19 17 2 1 1 1
## 168 170 175 180 190 200
## 1 1 5 4 1 60
## 210 214 225 231 233 240
## 21 1 2 5 1 1
## 245 250 270 280 300 314
## 3 17 1 9 48 1
## 335 350 360 390 396 400
## 1 48 2 2 1 69
## 420 450 466 480 490 495
## 18 10 2 2 9 1
## 500 510 525 539 540 550
## 58 1 1 1 2 2
## 560 571 581 600 625 630
## 12 1 2 114 1 3
## 650 654 660 700 714 720
## 3 1 1 313 2 5
## 750 770 790 800 812 817
## 38 3 1 47 1 1
## 819 825 840 850 857 875
## 1 1 20 12 2 2
## 900 910 931 933 938 945
## 42 4 3 2 3 1
## 950 960 996 1000 1005 1010
## 3 1 2 116 2 1
## 1025 1050 1100 1125 1148 1150
## 5 143 5 2 1 1
## 1162 1166 1190 1200 1225 1243
## 5 2 3 78 1 1
## 1250 1260 1295 1310 1320 1323
## 20 8 1 2 1 2
## 1330 1350 1360 1395 1400 1428
## 2 1 1 1 776 3
## 1450 1470 1500 1512 1520 1550
## 4 1 77 1 1 1
## 1585 1596 1600 1610 1631 1633
## 1 1 15 3 6 6
## 1650 1675 1680 1700 1750 1800
## 4 1 2 8 87 28
## 1806 1862 1866 1890 1900 2000
## 1 1 2 3 2 44
## 2010 2050 2100 2150 2200 2250
## 1 1 528 1 1 4
## 2300 2331 2333 2350 2400 2450
## 2 2 2 1 17 39
## 2500 2566 2600 2625 2665 2700
## 24 1 3 1 1 2
## 2800 3000 3150 3200 3250 3500
## 112 30 4 5 3 186
## 3600 3800 3850 4000 4200 4300
## 7 1 5 2 26 1
## 4400 4500 4550 4800 4900 5000
## 1 3 1 1 6 6
## 5250 5362 5538 5600 6000 6300
## 1 1 1 6 1 1
## 6400 6500 6566 7000 7142 7200
## 1 4 1 12 1 1
## 7500 7700 8000 8400 8631 9165
## 3 2 4 2 1 1
## 9333 10000 or more <NA>
## 1 25 12090
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("q206_rel",
"q209_miss_schl",
"q212_grade",
"q213_edu",
"q215_grade",
"q216_schl_type",
"q222_emp_status")
capture_tables (indirect_PII)
# Recode those with very specific values.
break_rel <- c(1,2,3,4,5,99)
labels_rel <- c("Single/not committed" =1,
"Single, committed or engaged" =2,
"Currently Married and cohabitating" =3,
"Married but not cohabitating" =4,
"Divorced/Widowed/Widower" =5)
mydata <- ordinal_recode (variable="q206_rel", break_points=break_rel, missing=999999, value_labels=labels_rel)
## [1] "Frequency table before encoding"
## q206_rel. 206 Is 's relationship finalized?
## Single/not committed Single, committed or engaged
## 6435 566
## Currently Married and cohabitating Married but not cohabitating
## 5705 1110
## Divorced Widowed/Widower
## 17 621
## <NA>
## 1586
## recoded
## [1,2) [2,3) [3,4) [4,5) [5,99) [99,1e+06)
## 1 6435 0 0 0 0 0
## 2 0 566 0 0 0 0
## 3 0 0 5705 0 0 0
## 4 0 0 0 1110 0 0
## 5 0 0 0 0 17 0
## 6 0 0 0 0 621 0
## [1] "Frequency table after encoding"
## q206_rel. 206 Is 's relationship finalized?
## Single/not committed Single, committed or engaged
## 6435 566
## Currently Married and cohabitating Married but not cohabitating
## 5705 1110
## Divorced/Widowed/Widower <NA>
## 638 1586
## [1] "Inspect value labels and relabel as necessary"
## Single/not committed Single, committed or engaged
## 1 2
## Currently Married and cohabitating Married but not cohabitating
## 3 4
## Divorced/Widowed/Widower
## 5
val_labels(mydata$q215_grade)
## Other: Specify
## -96
## Never Attended School or Only Attended Pre-School
## 0
## Grade 1
## 1
## Grade 2
## 2
## Grade 3
## 3
## Grade 4
## 4
## Grade 5
## 5
## Grade 6
## 6
## Grade 7
## 7
## Grade 8
## 8
## Grade 9
## 9
## Grade 10
## 10
## Grade 11
## 11
## Grade 12
## 12
## University / Not Graduate
## 13
## University / Graduate
## 14
## Post Bachelors Tertiary Education
## 15
## Technical traning
## 16
## Professional studies
## 17
## Below primary
## 18
break_edu <- c(0:14,18,19)
labels_edu <- c("Never Attended School or Only Attended Pre-School" = 1,
"Grade 1" = 2,
"Grade 2" = 3,
"Grade 3" = 4,
"Grade 4" = 5,
"Grade 5" = 6,
"Grade 6" = 7,
"Grade 7" = 8,
"Grade 8" = 9,
"Grade 9" = 10,
"Grade 10" = 11,
"Grade 11" = 12,
"Grade 12" = 13,
"University / Not Graduate" = 14,
"University / Graduate or higher" = 15,
"Below primary" = 16)
mydata <- ordinal_recode (variable="q215_grade",
break_points=break_edu,
missing=999999,
value_labels=labels_edu)
## [1] "Frequency table before encoding"
## q215_grade. 215 At what grade is currently studying?
## Never Attended School or Only Attended Pre-School
## 53
## Grade 1
## 436
## Grade 2
## 507
## Grade 3
## 524
## Grade 4
## 501
## Grade 5
## 2631
## Grade 6
## 395
## Grade 7
## 462
## Grade 8
## 408
## Grade 9
## 309
## Grade 10
## 208
## Grade 11
## 99
## Grade 12
## 98
## University / Not Graduate
## 66
## University / Graduate
## 10
## Post Bachelors Tertiary Education
## 7
## Technical traning
## 6
## Professional studies
## 2
## Below primary
## 2
## <NA>
## 9316
## recoded
## [0,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,10) [10,11) [11,12) [12,13)
## 0 53 0 0 0 0 0 0 0 0 0 0 0 0
## 1 0 436 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 507 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 524 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 501 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 2631 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 395 0 0 0 0 0 0
## 7 0 0 0 0 0 0 0 462 0 0 0 0 0
## 8 0 0 0 0 0 0 0 0 408 0 0 0 0
## 9 0 0 0 0 0 0 0 0 0 309 0 0 0
## 10 0 0 0 0 0 0 0 0 0 0 208 0 0
## 11 0 0 0 0 0 0 0 0 0 0 0 99 0
## 12 0 0 0 0 0 0 0 0 0 0 0 0 98
## 13 0 0 0 0 0 0 0 0 0 0 0 0 0
## 14 0 0 0 0 0 0 0 0 0 0 0 0 0
## 15 0 0 0 0 0 0 0 0 0 0 0 0 0
## 16 0 0 0 0 0 0 0 0 0 0 0 0 0
## 17 0 0 0 0 0 0 0 0 0 0 0 0 0
## 18 0 0 0 0 0 0 0 0 0 0 0 0 0
## recoded
## [13,14) [14,18) [18,19) [19,1e+06)
## 0 0 0 0 0
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0
## 12 0 0 0 0
## 13 66 0 0 0
## 14 0 10 0 0
## 15 0 7 0 0
## 16 0 6 0 0
## 17 0 2 0 0
## 18 0 0 2 0
## [1] "Frequency table after encoding"
## q215_grade. 215 At what grade is currently studying?
## Never Attended School or Only Attended Pre-School
## 53
## Grade 1
## 436
## Grade 2
## 507
## Grade 3
## 524
## Grade 4
## 501
## Grade 5
## 2631
## Grade 6
## 395
## Grade 7
## 462
## Grade 8
## 408
## Grade 9
## 309
## Grade 10
## 208
## Grade 11
## 99
## Grade 12
## 98
## University / Not Graduate
## 66
## University / Graduate or higher
## 25
## Below primary
## 2
## <NA>
## 9316
## [1] "Inspect value labels and relabel as necessary"
## Never Attended School or Only Attended Pre-School
## 1
## Grade 1
## 2
## Grade 2
## 3
## Grade 3
## 4
## Grade 4
## 5
## Grade 5
## 6
## Grade 6
## 7
## Grade 7
## 8
## Grade 8
## 9
## Grade 9
## 10
## Grade 10
## 11
## Grade 11
## 12
## Grade 12
## 13
## University / Not Graduate
## 14
## University / Graduate or higher
## 15
## Below primary
## 16
# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('q204_gender', 'q203_age', 'q213_edu') ##!!! Replace with candidate categorical demo vars
# weight variable (add if available)
# selectedWeightVar = c('projwt') ##!!! Replace with weight var
# household id variable (cluster)
selectedHouseholdID = c('hh_id') ##!!! Replace with household id
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata,
keyVars = selectedKeyVars,
hhId = selectedHouseholdID)
sdcInitial
## The input dataset consists of 16040 rows and 35 variables.
## --> Categorical key variables: q204_gender, q203_age, q213_edu
## --> Cluster/Household-Id variable: hh_id
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## q204_gender 4 (4) 5340.667 (5340.667) 1 (1)
## q203_age 82 (82) 197.568 (197.568) 2 (2)
## q213_edu 11 (11) 1048.700 (1048.700) 33 (33)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 9 (0.056%)
##
## ----------------------------------------------------------------------
# !!! No open-ends
# !!! No GPS data
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)