rm(list=ls(all=t))
filename <- "bhsection5" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!! No Direct PII
# !!! No Direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000
dropvars <- c("dise")
mydata <- mydata[!names(mydata) %in% dropvars]
locvars <- c("q006_block_id", "q007_vlg_id")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## q006_block_id. 6 Block Code
## 1 2 3 4 5 6 7 8 9 <NA>
## 194 155 195 407 98 190 143 422 516 33
## [1] "Frequency table after encoding"
## q006_block_id. 6 Block Code
## 279 280 281 282 283 284 285 286 287 <NA>
## 422 155 195 516 407 98 194 143 190 33
## [1] "Frequency table before encoding"
## q007_vlg_id. 7 Village Code
## 1 2 3 4 5 6 7 9 10 11 12 13 15 16 17 18 19 20
## 16 16 16 15 20 31 28 17 15 20 24 24 15 18 21 17 17 18
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 30 22 18 17 32 27 26 18 14 15 24 24 22 16 29 18 17 22
## 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
## 27 17 16 18 17 28 20 24 21 19 17 17 16 18 26 24 27 18
## 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
## 17 21 13 24 22 16 18 18 29 16 18 21 25 13 16 19 16 23
## 75 76 77 78 80 81 82 83 84 85 87 88 89 90 91 92 93 94
## 23 17 22 29 30 17 22 17 17 13 16 22 15 19 19 19 21 13
## 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
## 17 22 28 21 25 18 24 21 15 19 14 31 16 27 21 17 21 26
## 113 114 115 116 117 118 119 <NA>
## 14 24 19 16 21 22 16 33
## [1] "Frequency table after encoding"
## q007_vlg_id. 7 Village Code
## 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
## 18 25 25 17 22 18 16 20 17 21 16 16 18 20 24 28 17 15
## 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 17 22 22 18 24 17 31 16 19 16 18 17 21 15 20 16 23 19
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
## 22 15 14 16 17 19 17 21 27 27 14 22 17 17 24 27 29 18
## 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
## 18 16 28 24 22 17 29 13 15 17 30 21 15 16 26 22 24 18
## 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 18 19 18 17 16 26 21 13 30 22 28 17 19 21 15 14 13 13
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
## 18 21 16 32 24 21 29 16 24 17 18 19 26 27 17 16 31 16
## 373 374 375 376 377 378 379 <NA>
## 23 21 22 24 19 24 21 33
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
mydata <- top_recode (variable="q501_cereals", break_point=percentile_checker ("q501_cereals"), missing=NA)
## [1] "Frequency table before encoding"
## q501_cereals. 501 Cereals & Cereal Products including muri, chira, maida, suji, noodles, bread
## 0 15 20 25 26 40 50 60 70 80 100 110 115 126 130
## 3 1 1 1 1 2 2 7 1 1 3 2 1 1 1
## 138 140 160 180 200 205 210 220 230 250 260 270 280 300 315
## 1 1 1 1 13 2 1 2 1 1 2 1 1 10 1
## 330 340 350 360 370 375 380 385 390 400 420 425 440 450 475
## 1 2 5 5 2 1 2 2 1 20 2 2 6 4 2
## 480 495 500 510 525 540 550 560 570 575 595 600 620 625 630
## 1 1 52 1 1 1 12 1 1 1 1 69 1 2 7
## 640 648 650 660 666 675 680 690 700 710 716 720 738 750 760
## 1 1 3 2 1 1 2 2 26 2 1 10 1 7 1
## 775 780 800 810 820 840 850 860 880 900 950 960 975 990 1000
## 1 1 74 1 1 3 10 1 3 32 3 2 1 1 298
## 1008 1020 1025 1036 1040 1050 1056 1060 1070 1080 1084 1100 1140 1150 1160
## 1 8 1 1 5 4 1 3 1 3 1 22 1 4 2
## 1174 1175 1192 1200 1220 1240 1250 1260 1275 1280 1287 1300 1320 1360 1370
## 1 1 1 106 3 1 5 5 1 1 1 5 3 2 1
## 1380 1390 1400 1420 1430 1440 1470 1500 1530 1540 1550 1560 1580 1600 1610
## 1 1 17 1 1 2 1 122 1 2 3 1 1 29 1
## 1620 1640 1650 1690 1700 1750 1760 1778 1800 1840 1845 1850 1860 1880 1896
## 3 1 3 2 21 2 1 1 81 1 1 1 1 1 1
## 1900 1940 1950 1980 2000 2010 2022 2030 2040 2050 2060 2080 2100 2110 2120
## 13 1 1 1 526 2 1 1 3 4 1 3 13 1 2
## 2140 2150 2160 2200 2210 2250 2300 2350 2375 2400 2450 2500 2600 2620 2640
## 1 2 1 50 1 2 9 1 1 6 1 88 3 1 1
## 2700 2750 2782 2800 2850 2900 3000 3008 3050 3100 3150 3160 3188 3200 3250
## 3 2 1 5 2 1 96 1 1 1 1 1 1 5 1
## 3300 3400 3430 3500 3600 3640 3700 3750 3800 3880 3900 4000 4150 4200 4300
## 2 2 1 13 6 1 2 1 1 1 1 70 1 1 1
## 4390 4400 4500 4600 4800 4850 5000 5001 5100 5400 5500 5680 6000 6040 7000
## 1 4 1 1 1 1 33 1 1 2 1 1 14 1 5
## 7340 7600 8000 8800 9900 10000 11000 12000 12200 12500 15000 16500 17000 20000 20250
## 1 1 11 2 1 8 2 1 1 1 5 1 1 2 1
## 20800 22000 25000 34000 80000
## 1 1 6 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q501_cereals. 501 Cereals & Cereal Products including muri, chira, maida, suji, noodles, bread
## 0 15 20 25 26 40
## 3 1 1 1 1 2
## 50 60 70 80 100 110
## 2 7 1 1 3 2
## 115 126 130 138 140 160
## 1 1 1 1 1 1
## 180 200 205 210 220 230
## 1 13 2 1 2 1
## 250 260 270 280 300 315
## 1 2 1 1 10 1
## 330 340 350 360 370 375
## 1 2 5 5 2 1
## 380 385 390 400 420 425
## 2 2 1 20 2 2
## 440 450 475 480 495 500
## 6 4 2 1 1 52
## 510 525 540 550 560 570
## 1 1 1 12 1 1
## 575 595 600 620 625 630
## 1 1 69 1 2 7
## 640 648 650 660 666 675
## 1 1 3 2 1 1
## 680 690 700 710 716 720
## 2 2 26 2 1 10
## 738 750 760 775 780 800
## 1 7 1 1 1 74
## 810 820 840 850 860 880
## 1 1 3 10 1 3
## 900 950 960 975 990 1000
## 32 3 2 1 1 298
## 1008 1020 1025 1036 1040 1050
## 1 8 1 1 5 4
## 1056 1060 1070 1080 1084 1100
## 1 3 1 3 1 22
## 1140 1150 1160 1174 1175 1192
## 1 4 2 1 1 1
## 1200 1220 1240 1250 1260 1275
## 106 3 1 5 5 1
## 1280 1287 1300 1320 1360 1370
## 1 1 5 3 2 1
## 1380 1390 1400 1420 1430 1440
## 1 1 17 1 1 2
## 1470 1500 1530 1540 1550 1560
## 1 122 1 2 3 1
## 1580 1600 1610 1620 1640 1650
## 1 29 1 3 1 3
## 1690 1700 1750 1760 1778 1800
## 2 21 2 1 1 81
## 1840 1845 1850 1860 1880 1896
## 1 1 1 1 1 1
## 1900 1940 1950 1980 2000 2010
## 13 1 1 1 526 2
## 2022 2030 2040 2050 2060 2080
## 1 1 3 4 1 3
## 2100 2110 2120 2140 2150 2160
## 13 1 2 1 2 1
## 2200 2210 2250 2300 2350 2375
## 50 1 2 9 1 1
## 2400 2450 2500 2600 2620 2640
## 6 1 88 3 1 1
## 2700 2750 2782 2800 2850 2900
## 3 2 1 5 2 1
## 3000 3008 3050 3100 3150 3160
## 96 1 1 1 1 1
## 3188 3200 3250 3300 3400 3430
## 1 5 1 2 2 1
## 3500 3600 3640 3700 3750 3800
## 13 6 1 2 1 1
## 3880 3900 4000 4150 4200 4300
## 1 1 70 1 1 1
## 4390 4400 4500 4600 4800 4850
## 1 4 1 1 1 1
## 5000 5001 5100 5400 5500 5680
## 33 1 1 2 1 1
## 6000 6040 7000 7340 7600 8000
## 14 1 5 1 1 11
## 8800 9900 10000 11000 12000 12200
## 2 1 8 2 1 1
## 12500 15000 16500 17000 20000 or more
## 1 5 1 1 13
mydata <- top_recode (variable="q502_pulses", break_point=percentile_checker ("q502_pulses"), missing=NA)
## [1] "Frequency table before encoding"
## q502_pulses. 502 Pulses and Pulse Products including soybean, gram products, besan, sattu, et
## 0 3 10 15 20 30 35 40 45 50 60 62 65 70 75
## 19 1 1 1 2 2 2 14 1 17 12 1 1 7 1
## 80 90 95 100 106 110 120 125 130 135 140 142 150 160 170
## 30 7 1 101 1 5 36 2 4 1 24 1 64 41 5
## 175 180 185 190 195 200 208 210 215 220 225 230 235 240 244
## 1 31 1 4 1 289 1 6 2 13 2 8 1 40 1
## 245 250 260 270 280 290 297 300 310 320 325 330 340 350 360
## 2 63 10 6 38 1 1 222 7 27 2 3 10 33 21
## 365 370 375 380 390 400 410 420 425 430 440 450 460 470 480
## 1 2 1 14 4 156 1 3 2 3 4 18 3 1 18
## 490 500 510 520 540 550 560 580 595 600 620 640 650 660 680
## 1 317 3 3 2 11 4 6 1 105 3 3 8 2 3
## 690 700 720 725 730 740 750 800 805 820 840 850 874 880 900
## 1 37 4 1 1 2 7 41 1 1 1 2 1 1 10
## 910 920 950 960 1000 1020 1040 1050 1080 1100 1150 1160 1185 1200 1240
## 1 1 1 1 123 1 1 1 1 3 2 1 1 14 1
## 1250 1280 1300 1400 1500 1505 1580 1600 1800 2000 2100 2200 2400 2500 2700
## 1 1 3 1 28 1 1 4 2 28 1 1 3 2 1
## 3000 3500 3600 4000 4500 5000 5300 5500 6000 7000 7200 9000 10000 12000 20000
## 19 6 1 2 1 9 1 1 5 2 1 3 2 1 1
## 30300
## 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q502_pulses. 502 Pulses and Pulse Products including soybean, gram products, besan, sattu, et
## 0 3 10 15 20 30 35
## 19 1 1 1 2 2 2
## 40 45 50 60 62 65 70
## 14 1 17 12 1 1 7
## 75 80 90 95 100 106 110
## 1 30 7 1 101 1 5
## 120 125 130 135 140 142 150
## 36 2 4 1 24 1 64
## 160 170 175 180 185 190 195
## 41 5 1 31 1 4 1
## 200 208 210 215 220 225 230
## 289 1 6 2 13 2 8
## 235 240 244 245 250 260 270
## 1 40 1 2 63 10 6
## 280 290 297 300 310 320 325
## 38 1 1 222 7 27 2
## 330 340 350 360 365 370 375
## 3 10 33 21 1 2 1
## 380 390 400 410 420 425 430
## 14 4 156 1 3 2 3
## 440 450 460 470 480 490 500
## 4 18 3 1 18 1 317
## 510 520 540 550 560 580 595
## 3 3 2 11 4 6 1
## 600 620 640 650 660 680 690
## 105 3 3 8 2 3 1
## 700 720 725 730 740 750 800
## 37 4 1 1 2 7 41
## 805 820 840 850 874 880 900
## 1 1 1 2 1 1 10
## 910 920 950 960 1000 1020 1040
## 1 1 1 1 123 1 1
## 1050 1080 1100 1150 1160 1185 1200
## 1 1 3 2 1 1 14
## 1240 1250 1280 1300 1400 1500 1505
## 1 1 1 3 1 28 1
## 1580 1600 1800 2000 2100 2200 2400
## 1 4 2 28 1 1 3
## 2500 2700 3000 3500 3600 4000 4500
## 2 1 19 6 1 2 1
## 5000 5300 5500 6000 or more
## 9 1 1 16
mydata <- top_recode (variable="q503_milk", break_point=percentile_checker ("q503_milk"), missing=NA)
## [1] "Frequency table before encoding"
## q503_milk. 503 Milk
## 0 30 50 60 89 90 100 102 120 150 180 200 210
## 47 1 1 1 1 3 2 1 1 1 2 2 1
## 240 250 300 330 350 360 400 420 450 480 500 508 550
## 2 1 42 5 2 2 13 1 8 2 30 1 1
## 560 570 600 660 690 700 720 750 758 800 900 920 1000
## 1 2 278 5 1 17 1 18 1 13 53 1 66
## 1004 1010 1025 1050 1080 1140 1200 1240 1250 1300 1320 1350 1360
## 1 1 1 3 2 1 618 2 2 7 2 2 1
## 1400 1440 1450 1500 1508 1600 1750 1800 1801 2000 2070 2100 2160
## 5 1 1 311 1 10 1 69 1 74 1 4 1
## 2200 2250 2400 2408 2500 2610 2700 2850 2860 3000 3003 3020 3200
## 3 5 207 1 21 1 4 1 1 124 1 1 3
## 3300 3350 3500 3600 4000 4030 4050 4200 4500 4508 4800 5000 5250
## 2 1 3 47 11 1 1 1 21 1 32 10 1
## 5400 6000 6500 6600 6800 7200 7230 7500 8000 8400 8800 9000 9600
## 1 49 1 1 1 3 1 5 3 2 1 4 1
## 10000 10500 12000 15000 15003 16800 24000 24001 120000
## 3 1 7 5 1 1 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q503_milk. 503 Milk
## 0 30 50 60 89 90
## 47 1 1 1 1 3
## 100 102 120 150 180 200
## 2 1 1 1 2 2
## 210 240 250 300 330 350
## 1 2 1 42 5 2
## 360 400 420 450 480 500
## 2 13 1 8 2 30
## 508 550 560 570 600 660
## 1 1 1 2 278 5
## 690 700 720 750 758 800
## 1 17 1 18 1 13
## 900 920 1000 1004 1010 1025
## 53 1 66 1 1 1
## 1050 1080 1140 1200 1240 1250
## 3 2 1 618 2 2
## 1300 1320 1350 1360 1400 1440
## 7 2 2 1 5 1
## 1450 1500 1508 1600 1750 1800
## 1 311 1 10 1 69
## 1801 2000 2070 2100 2160 2200
## 1 74 1 4 1 3
## 2250 2400 2408 2500 2610 2700
## 5 207 1 21 1 4
## 2850 2860 3000 3003 3020 3200
## 1 1 124 1 1 3
## 3300 3350 3500 3600 4000 4030
## 2 1 3 47 11 1
## 4050 4200 4500 4508 4800 5000
## 1 1 21 1 32 10
## 5250 5400 6000 6500 6600 6800
## 1 1 49 1 1 1
## 7200 7230 7500 8000 8400 8800
## 3 1 5 3 2 1
## 9000 9600 10000 10500 12000 or more
## 4 1 3 1 17
mydata <- top_recode (variable="q504_milk_products", break_point=percentile_checker ("q504_milk_products"), missing=NA)
## [1] "Frequency table before encoding"
## q504_milk_products. 504 Milk Products including condensed milk, milk powder, babyfood, ghee, butter,
## 0 30 45 50 60 70 80 90 100 150 160 175 180 189 200
## 743 1 1 3 2 1 1 1 11 8 2 3 5 1 17
## 225 250 270 275 300 315 320 330 340 350 360 370 375 380 400
## 1 11 2 1 38 1 1 3 4 81 33 17 2 5 116
## 420 430 440 450 475 480 490 500 510 525 535 550 580 590 600
## 1 1 1 20 2 1 1 149 1 2 1 5 1 1 134
## 630 650 660 680 690 700 710 720 740 750 760 770 800 825 850
## 2 13 2 3 1 118 1 17 6 10 2 1 89 1 2
## 860 880 900 940 950 990 1000 1005 1020 1050 1100 1110 1140 1150 1200
## 1 1 28 1 4 1 140 1 2 9 6 2 2 2 111
## 1250 1280 1300 1320 1350 1400 1440 1450 1480 1500 1520 1550 1600 1650 1700
## 2 2 8 2 2 30 1 2 3 59 1 1 9 3 2
## 1750 1800 1850 1866 1900 1950 2000 2100 2200 2400 2500 2650 2800 3000 3150
## 3 31 1 1 2 1 54 12 1 8 14 1 7 22 1
## 3200 3500 3800 4000 4200 4800 4900 5000 5070 5400 5700 6000 6600 7000 7500
## 2 12 1 4 4 2 1 7 1 1 1 4 1 1 1
## 8000 9500 10000 19000 21600 42000
## 3 1 1 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q504_milk_products. 504 Milk Products including condensed milk, milk powder, babyfood, ghee, butter,
## 0 30 45 50 60 70 80
## 743 1 1 3 2 1 1
## 90 100 150 160 175 180 189
## 1 11 8 2 3 5 1
## 200 225 250 270 275 300 315
## 17 1 11 2 1 38 1
## 320 330 340 350 360 370 375
## 1 3 4 81 33 17 2
## 380 400 420 430 440 450 475
## 5 116 1 1 1 20 2
## 480 490 500 510 525 535 550
## 1 1 149 1 2 1 5
## 580 590 600 630 650 660 680
## 1 1 134 2 13 2 3
## 690 700 710 720 740 750 760
## 1 118 1 17 6 10 2
## 770 800 825 850 860 880 900
## 1 89 1 2 1 1 28
## 940 950 990 1000 1005 1020 1050
## 1 4 1 140 1 2 9
## 1100 1110 1140 1150 1200 1250 1280
## 6 2 2 2 111 2 2
## 1300 1320 1350 1400 1440 1450 1480
## 8 2 2 30 1 2 3
## 1500 1520 1550 1600 1650 1700 1750
## 59 1 1 9 3 2 3
## 1800 1850 1866 1900 1950 2000 2100
## 31 1 1 2 1 54 12
## 2200 2400 2500 2650 2800 3000 3150
## 1 8 14 1 7 22 1
## 3200 3500 3800 4000 4200 4800 4900
## 2 12 1 4 4 2 1
## 5000 5070 5400 5700 6000 or more
## 7 1 1 1 15
mydata <- top_recode (variable="q505_oil", break_point=8000, missing=NA)
## [1] "Frequency table before encoding"
## q505_oil. 505 Edible oil and Vanaspati
## 0 2 7 8 10 35 45 50 60 70 80
## 645 1 2 3 1 1 1 3 2 2 1
## 100 120 150 160 164 170 180 200 220 240 245
## 18 4 9 13 1 1 7 61 2 20 1
## 250 255 270 295 300 320 350 360 365 370 375
## 14 2 6 1 99 8 8 6 1 1 2
## 380 400 410 420 425 430 435 440 450 460 475
## 4 130 1 3 3 1 1 1 55 1 1
## 480 490 500 520 535 540 550 560 575 600 608
## 28 2 391 1 1 5 12 7 2 136 1
## 620 625 630 650 660 665 680 700 720 740 750
## 1 2 10 11 2 1 1 77 4 1 26
## 770 780 800 810 830 840 850 900 940 950 960
## 1 1 48 1 1 3 3 16 1 1 1
## 980 1000 1050 1060 1080 1100 1125 1140 1150 1200 1250
## 1 67 1 1 1 9 1 2 2 58 8
## 1260 1300 1350 1400 1450 1500 1550 1600 1650 1680 1700
## 1 14 4 28 1 108 1 15 1 1 7
## 1800 2000 2400 2500 2800 3000 3200 3450 3500 3600 4500
## 10 23 3 3 1 9 1 1 3 1 1
## 5000 6000 6400 7000 7600 8100 8500 8900 9300 12000 12500
## 1 2 1 1 1 1 1 1 1 1 1
## 18360 20000 30020 325220 350800 7590540
## 1 1 1 1 1 1
## [1] "Frequency table after encoding"
## q505_oil. 505 Edible oil and Vanaspati
## 0 2 7 8 10 35 45
## 645 1 2 3 1 1 1
## 50 60 70 80 100 120 150
## 3 2 2 1 18 4 9
## 160 164 170 180 200 220 240
## 13 1 1 7 61 2 20
## 245 250 255 270 295 300 320
## 1 14 2 6 1 99 8
## 350 360 365 370 375 380 400
## 8 6 1 1 2 4 130
## 410 420 425 430 435 440 450
## 1 3 3 1 1 1 55
## 460 475 480 490 500 520 535
## 1 1 28 2 391 1 1
## 540 550 560 575 600 608 620
## 5 12 7 2 136 1 1
## 625 630 650 660 665 680 700
## 2 10 11 2 1 1 77
## 720 740 750 770 780 800 810
## 4 1 26 1 1 48 1
## 830 840 850 900 940 950 960
## 1 3 3 16 1 1 1
## 980 1000 1050 1060 1080 1100 1125
## 1 67 1 1 1 9 1
## 1140 1150 1200 1250 1260 1300 1350
## 2 2 58 8 1 14 4
## 1400 1450 1500 1550 1600 1650 1680
## 28 1 108 1 15 1 1
## 1700 1800 2000 2400 2500 2800 3000
## 7 10 23 3 3 1 9
## 3200 3450 3500 3600 4500 5000 6000
## 1 1 3 1 1 1 2
## 6400 7000 7600 8000 or more
## 1 1 1 12
mydata <- top_recode (variable="q506_vegetables", break_point=percentile_checker ("q506_vegetables"), missing=NA)
## [1] "Frequency table before encoding"
## q506_vegetables. 506 Vegetables
## 0 40 50 60 90 100 120 150 160 200 240 250 300 350 400
## 8 1 2 1 1 9 1 14 2 39 3 12 78 5 67
## 430 450 500 600 650 675 700 750 800 850 900 1000 1050 1060 1100
## 1 24 198 357 2 1 48 17 63 1 152 194 1 1 3
## 1200 1240 1250 1300 1400 1500 1508 1600 1700 1800 1950 2000 2100 2400 2500
## 220 1 1 5 3 514 1 4 1 36 1 86 12 11 19
## 3000 3008 3500 4000 4200 4500 5000 6000 7500 8000 9000 10000 11500 15000 15400
## 101 1 1 4 1 5 4 6 1 1 1 3 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q506_vegetables. 506 Vegetables
## 0 40 50 60 90 100 120
## 8 1 2 1 1 9 1
## 150 160 200 240 250 300 350
## 14 2 39 3 12 78 5
## 400 430 450 500 600 650 675
## 67 1 24 198 357 2 1
## 700 750 800 850 900 1000 1050
## 48 17 63 1 152 194 1
## 1060 1100 1200 1240 1250 1300 1400
## 1 3 220 1 1 5 3
## 1500 1508 1600 1700 1800 1950 2000
## 514 1 4 1 36 1 86
## 2100 2400 2500 3000 3008 3500 4000
## 12 11 19 101 1 1 4
## 4200 4500 5000 6000 or more
## 1 5 4 15
mydata <- top_recode (variable="q507_fruits", break_point=percentile_checker ("q507_fruits"), missing=NA)
## [1] "Frequency table before encoding"
## q507_fruits. 507 Fruits& nuts including mango, banana, coconut, dates, kishmish, monacca, oth
## 0 20 25 40 50 60 75 80 82 100 107 120 130 150 160
## 400 3 1 3 26 6 1 8 1 121 1 2 1 41 2
## 180 200 208 210 220 240 250 280 300 320 350 360 400 430 450
## 2 319 3 2 1 2 36 4 240 1 4 1 102 1 6
## 500 508 535 536 580 600 650 668 700 720 750 800 840 900 1000
## 360 1 1 1 1 89 3 1 25 1 5 28 1 11 204
## 1008 1050 1095 1150 1200 1300 1500 1600 1700 1750 1800 1900 2000 2100 2160
## 1 1 1 1 37 4 97 2 1 1 7 1 46 1 1
## 2200 2300 2460 2500 2600 2800 3000 3300 3400 4000 4500 5000 5400 5800 6000
## 1 1 1 6 1 1 39 1 1 6 3 7 1 1 3
## 9000 10000 10255 10300 15000
## 1 1 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q507_fruits. 507 Fruits& nuts including mango, banana, coconut, dates, kishmish, monacca, oth
## 0 20 25 40 50 60 75
## 400 3 1 3 26 6 1
## 80 82 100 107 120 130 150
## 8 1 121 1 2 1 41
## 160 180 200 208 210 220 240
## 2 2 319 3 2 1 2
## 250 280 300 320 350 360 400
## 36 4 240 1 4 1 102
## 430 450 500 508 535 536 580
## 1 6 360 1 1 1 1
## 600 650 668 700 720 750 800
## 89 3 1 25 1 5 28
## 840 900 1000 1008 1050 1095 1150
## 1 11 204 1 1 1 1
## 1200 1300 1500 1600 1700 1750 1800
## 37 4 97 2 1 1 7
## 1900 2000 2100 2160 2200 2300 2460
## 1 46 1 1 1 1 1
## 2500 2600 2800 3000 3300 3400 4000
## 6 1 1 39 1 1 6
## 4500 5000 or more
## 3 17
mydata <- top_recode (variable="q508_egg", break_point=percentile_checker ("q508_egg"), missing=NA)
## [1] "Frequency table before encoding"
## q508_egg. 508 Egg, fish, and meat
## 0 8 20 40 45 50 60 70 100 108 110 120 130 140 150
## 1404 1 2 1 1 4 2 2 18 1 1 4 1 3 9
## 160 180 200 210 240 250 270 280 300 320 325 350 360 370 380
## 2 3 36 1 4 9 1 1 45 2 1 16 6 2 2
## 400 420 450 460 500 510 520 540 550 600 620 640 680 700 720
## 52 1 8 1 105 2 2 1 1 56 1 3 6 34 2
## 740 750 760 765 780 800 810 830 840 900 940 1000 1020 1040 1050
## 1 5 2 1 1 47 1 1 4 12 1 128 1 2 5
## 1080 1100 1110 1120 1140 1150 1200 1240 1300 1350 1360 1400 1440 1450 1500
## 2 2 1 1 1 1 46 1 2 1 1 3 4 2 49
## 1502 1520 1600 1700 1750 1800 1920 2000 2100 2250 2400 2500 2600 2700 2720
## 1 1 9 1 1 7 1 63 3 1 1 11 1 1 1
## 2800 3000 3200 3500 3600 3800 4000 4200 4500 5000 5100 6000 8000 9000 10000
## 2 33 1 4 1 1 4 1 3 5 1 1 2 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q508_egg. 508 Egg, fish, and meat
## 0 8 20 40 45 50 60
## 1404 1 2 1 1 4 2
## 70 100 108 110 120 130 140
## 2 18 1 1 4 1 3
## 150 160 180 200 210 240 250
## 9 2 3 36 1 4 9
## 270 280 300 320 325 350 360
## 1 1 45 2 1 16 6
## 370 380 400 420 450 460 500
## 2 2 52 1 8 1 105
## 510 520 540 550 600 620 640
## 2 2 1 1 56 1 3
## 680 700 720 740 750 760 765
## 6 34 2 1 5 2 1
## 780 800 810 830 840 900 940
## 1 47 1 1 4 12 1
## 1000 1020 1040 1050 1080 1100 1110
## 128 1 2 5 2 2 1
## 1120 1140 1150 1200 1240 1300 1350
## 1 1 1 46 1 2 1
## 1360 1400 1440 1450 1500 1502 1520
## 1 3 4 2 49 1 1
## 1600 1700 1750 1800 1920 2000 2100
## 9 1 1 7 1 63 3
## 2250 2400 2500 2600 2700 2720 2800
## 1 1 11 1 1 1 2
## 3000 3200 3500 3600 3800 4000 4200
## 33 1 4 1 1 4 1
## 4500 or more
## 14
mydata <- top_recode (variable="q509_sugar", break_point=percentile_checker ("q509_sugar"), missing=NA)
## [1] "Frequency table before encoding"
## q509_sugar. 509 Sugar including gur, candy, misri, honey, etc.
## 0 6 18 20 35 40 64 68 70 75 80 90 100 105 106
## 6 1 1 1 1 7 2 1 7 2 9 3 10 11 1
## 110 114 115 117 120 122 123 130 135 136 140 145 150 155 156
## 2 2 1 2 40 1 1 1 3 2 10 4 38 2 1
## 160 165 170 175 180 185 190 192 200 202 204 205 210 212 215
## 29 1 3 44 14 4 5 2 226 1 1 1 14 1 4
## 216 220 224 225 230 234 235 240 242 243 245 246 250 255 256
## 2 3 1 5 5 1 6 23 1 1 20 1 41 8 1
## 260 262 265 266 268 270 273 274 275 280 284 285 288 290 295
## 7 1 1 1 1 4 1 1 3 36 1 2 2 2 1
## 300 304 305 312 315 320 325 330 335 340 344 345 350 355 358
## 165 2 1 1 3 19 2 3 3 10 1 3 112 1 1
## 360 364 365 370 375 380 384 385 390 392 400 405 410 420 425
## 19 1 1 5 11 12 1 2 4 1 198 2 3 9 1
## 430 432 435 440 450 455 456 460 465 466 470 480 490 495 500
## 6 4 1 4 41 5 1 3 2 1 2 13 5 1 168
## 504 506 510 512 520 525 528 530 540 550 555 560 565 570 572
## 1 2 2 1 5 15 1 2 1 18 1 5 1 1 1
## 580 585 600 604 620 635 640 645 650 653 654 659 660 667 675
## 2 1 101 1 1 1 3 1 7 1 1 1 3 1 2
## 680 696 700 705 720 730 740 750 755 760 775 776 780 790 800
## 5 1 39 1 4 1 3 11 1 1 5 1 1 3 46
## 813 840 850 855 865 875 900 925 942 950 960 975 990 1000 1010
## 1 1 5 1 1 6 15 1 1 5 2 1 1 103 1
## 1015 1025 1036 1040 1050 1060 1080 1086 1100 1130 1133 1145 1150 1160 1200
## 1 1 1 2 16 1 3 1 7 1 1 2 3 1 43
## 1235 1250 1282 1300 1336 1400 1425 1440 1500 1505 1520 1540 1550 1560 1570
## 1 1 1 4 1 8 1 1 44 1 1 1 2 1 1
## 1575 1600 1620 1700 1750 1800 1820 1830 1850 1900 1910 1950 2000 2005 2025
## 1 3 1 6 4 5 1 1 3 2 1 1 58 1 1
## 2070 2100 2250 2400 2450 2500 2560 2700 2800 2850 3000 3100 3200 3275 3400
## 1 5 1 7 1 5 1 3 2 1 12 1 4 1 1
## 3500 3600 4000 4200 4400 4500 4800 5000 5100 5200 5250 6000 8000 9600 10000
## 10 2 11 1 1 2 2 5 1 1 1 6 2 1 2
## 12800 15000 18000 22400 41000
## 1 1 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q509_sugar. 509 Sugar including gur, candy, misri, honey, etc.
## 0 6 18 20 35 40 64
## 6 1 1 1 1 7 2
## 68 70 75 80 90 100 105
## 1 7 2 9 3 10 11
## 106 110 114 115 117 120 122
## 1 2 2 1 2 40 1
## 123 130 135 136 140 145 150
## 1 1 3 2 10 4 38
## 155 156 160 165 170 175 180
## 2 1 29 1 3 44 14
## 185 190 192 200 202 204 205
## 4 5 2 226 1 1 1
## 210 212 215 216 220 224 225
## 14 1 4 2 3 1 5
## 230 234 235 240 242 243 245
## 5 1 6 23 1 1 20
## 246 250 255 256 260 262 265
## 1 41 8 1 7 1 1
## 266 268 270 273 274 275 280
## 1 1 4 1 1 3 36
## 284 285 288 290 295 300 304
## 1 2 2 2 1 165 2
## 305 312 315 320 325 330 335
## 1 1 3 19 2 3 3
## 340 344 345 350 355 358 360
## 10 1 3 112 1 1 19
## 364 365 370 375 380 384 385
## 1 1 5 11 12 1 2
## 390 392 400 405 410 420 425
## 4 1 198 2 3 9 1
## 430 432 435 440 450 455 456
## 6 4 1 4 41 5 1
## 460 465 466 470 480 490 495
## 3 2 1 2 13 5 1
## 500 504 506 510 512 520 525
## 168 1 2 2 1 5 15
## 528 530 540 550 555 560 565
## 1 2 1 18 1 5 1
## 570 572 580 585 600 604 620
## 1 1 2 1 101 1 1
## 635 640 645 650 653 654 659
## 1 3 1 7 1 1 1
## 660 667 675 680 696 700 705
## 3 1 2 5 1 39 1
## 720 730 740 750 755 760 775
## 4 1 3 11 1 1 5
## 776 780 790 800 813 840 850
## 1 1 3 46 1 1 5
## 855 865 875 900 925 942 950
## 1 1 6 15 1 1 5
## 960 975 990 1000 1010 1015 1025
## 2 1 1 103 1 1 1
## 1036 1040 1050 1060 1080 1086 1100
## 1 2 16 1 3 1 7
## 1130 1133 1145 1150 1160 1200 1235
## 1 1 2 3 1 43 1
## 1250 1282 1300 1336 1400 1425 1440
## 1 1 4 1 8 1 1
## 1500 1505 1520 1540 1550 1560 1570
## 44 1 1 1 2 1 1
## 1575 1600 1620 1700 1750 1800 1820
## 1 3 1 6 4 5 1
## 1830 1850 1900 1910 1950 2000 2005
## 1 3 2 1 1 58 1
## 2025 2070 2100 2250 2400 2450 2500
## 1 1 5 1 7 1 5
## 2560 2700 2800 2850 3000 3100 3200
## 1 3 2 1 12 1 4
## 3275 3400 3500 3600 4000 4200 4400
## 1 1 10 2 11 1 1
## 4500 4800 5000 5100 5200 5250 6000 or more
## 2 2 5 1 1 1 16
mydata <- top_recode (variable="q510_salt", break_point=7000, missing=NA)
## [1] "Frequency table before encoding"
## q510_salt. 510 Salt & Spices including dry chillies, curry powder, oilseeds, garlic, ginger
## 0 50 70 90 100 118 120 130 140 150 155 158 160 165 175
## 6 1 1 3 19 1 5 1 1 17 1 1 2 1 1
## 200 220 225 230 240 245 250 260 270 275 280 290 300 310 320
## 113 7 1 1 3 1 27 5 7 1 12 6 157 4 8
## 325 330 340 345 350 360 365 370 380 390 400 406 407 410 420
## 1 2 5 1 13 6 1 3 2 3 84 1 1 2 4
## 430 435 440 445 450 455 460 470 475 480 490 500 505 510 520
## 1 1 5 1 16 1 5 4 2 5 1 415 1 2 6
## 525 530 540 550 560 570 575 580 590 600 605 610 620 625 630
## 1 1 9 13 5 4 1 7 4 109 1 4 1 2 3
## 640 650 660 670 680 685 690 700 705 710 715 720 730 740 745
## 3 13 4 2 7 1 2 84 1 1 1 14 6 2 1
## 750 760 770 780 790 795 800 810 820 830 850 860 870 880 900
## 16 4 1 3 1 1 88 2 5 2 11 1 2 1 23
## 920 930 940 950 960 980 990 1000 1020 1030 1050 1080 1100 1120 1130
## 5 2 2 6 1 3 1 300 1 1 5 1 5 1 2
## 1150 1200 1210 1220 1240 1250 1260 1280 1295 1300 1310 1340 1350 1380 1400
## 3 57 1 1 2 7 1 2 1 6 1 1 2 1 11
## 1420 1430 1450 1460 1480 1500 1520 1530 1540 1550 1555 1580 1600 1655 1680
## 3 1 4 2 2 146 2 1 1 3 1 1 9 1 1
## 1700 1710 1720 1750 1780 1800 1850 1860 1880 1900 2000 2020 2050 2070 2100
## 5 1 1 4 2 8 1 1 1 2 113 1 1 1 2
## 2200 2220 2260 2300 2380 2400 2420 2500 2600 2660 2800 3000 3200 3400 3500
## 1 2 1 5 1 1 1 17 1 1 2 38 1 2 4
## 3570 3920 4000 4220 4500 4820 5000 5280 6000 7003 8000 8500 8620 9000 9866
## 1 1 5 1 2 1 16 1 2 1 2 1 1 1 1
## 10000 15000 20000
## 4 1 1
## [1] "Frequency table after encoding"
## q510_salt. 510 Salt & Spices including dry chillies, curry powder, oilseeds, garlic, ginger
## 0 50 70 90 100 118 120
## 6 1 1 3 19 1 5
## 130 140 150 155 158 160 165
## 1 1 17 1 1 2 1
## 175 200 220 225 230 240 245
## 1 113 7 1 1 3 1
## 250 260 270 275 280 290 300
## 27 5 7 1 12 6 157
## 310 320 325 330 340 345 350
## 4 8 1 2 5 1 13
## 360 365 370 380 390 400 406
## 6 1 3 2 3 84 1
## 407 410 420 430 435 440 445
## 1 2 4 1 1 5 1
## 450 455 460 470 475 480 490
## 16 1 5 4 2 5 1
## 500 505 510 520 525 530 540
## 415 1 2 6 1 1 9
## 550 560 570 575 580 590 600
## 13 5 4 1 7 4 109
## 605 610 620 625 630 640 650
## 1 4 1 2 3 3 13
## 660 670 680 685 690 700 705
## 4 2 7 1 2 84 1
## 710 715 720 730 740 745 750
## 1 1 14 6 2 1 16
## 760 770 780 790 795 800 810
## 4 1 3 1 1 88 2
## 820 830 850 860 870 880 900
## 5 2 11 1 2 1 23
## 920 930 940 950 960 980 990
## 5 2 2 6 1 3 1
## 1000 1020 1030 1050 1080 1100 1120
## 300 1 1 5 1 5 1
## 1130 1150 1200 1210 1220 1240 1250
## 2 3 57 1 1 2 7
## 1260 1280 1295 1300 1310 1340 1350
## 1 2 1 6 1 1 2
## 1380 1400 1420 1430 1450 1460 1480
## 1 11 3 1 4 2 2
## 1500 1520 1530 1540 1550 1555 1580
## 146 2 1 1 3 1 1
## 1600 1655 1680 1700 1710 1720 1750
## 9 1 1 5 1 1 4
## 1780 1800 1850 1860 1880 1900 2000
## 2 8 1 1 1 2 113
## 2020 2050 2070 2100 2200 2220 2260
## 1 1 1 2 1 2 1
## 2300 2380 2400 2420 2500 2600 2660
## 5 1 1 1 17 1 1
## 2800 3000 3200 3400 3500 3570 3920
## 2 38 1 2 4 1 1
## 4000 4220 4500 4820 5000 5280 6000
## 5 1 2 1 16 1 2
## 7000 or more
## 13
mydata <- top_recode (variable="q511_tea", break_point=3000, missing=NA)
## [1] "Frequency table before encoding"
## q511_tea. 511 Other food items including beverages such as tea, coffee, fruit juice and pr
## 0 10 20 30 35 40 45 50 52 55 60
## 34 1 2 1 3 3 3 17 1 8 28
## 65 70 72 75 80 85 88 90 100 110 116
## 17 20 1 5 8 1 1 4 89 19 1
## 120 125 126 130 135 140 150 152 155 160 165
## 109 4 1 33 1 22 119 2 1 31 2
## 170 175 180 190 195 200 205 210 215 220 230
## 4 1 37 4 12 279 1 9 1 29 18
## 235 240 250 255 260 265 270 280 290 295 300
## 1 64 92 2 26 2 9 16 4 2 210
## 305 308 310 320 328 330 335 340 345 350 360
## 1 3 3 9 1 4 2 8 1 30 17
## 365 370 380 390 395 400 410 416 420 425 430
## 2 2 2 4 1 89 1 1 10 1 2
## 440 445 450 460 475 480 495 500 508 510 515
## 8 1 13 5 2 10 1 228 3 3 1
## 520 530 540 550 560 570 580 585 590 599 600
## 3 4 9 8 6 2 4 1 1 1 78
## 630 640 645 650 660 665 670 680 690 700 720
## 5 1 1 8 2 3 1 4 1 31 4
## 730 740 750 760 770 780 800 820 840 850 860
## 4 4 14 5 1 4 22 1 2 5 4
## 880 890 900 919 930 960 970 980 990 1000 1030
## 1 1 7 1 1 1 1 1 1 69 1
## 1040 1050 1060 1080 1100 1120 1130 1140 1160 1180 1195
## 1 1 1 1 2 1 1 1 1 1 1
## 1200 1202 1220 1240 1300 1303 1400 1401 1500 1560 1600
## 16 1 1 2 4 1 2 1 24 1 5
## 1620 1636 1650 1680 1730 1740 1750 1800 1840 1980 2000
## 1 1 2 1 1 1 1 2 1 1 19
## 2100 2200 2300 2400 2500 2650 2700 2800 3000 3250 3500
## 2 3 1 1 4 1 1 1 12 1 2
## 3800 5000 5400 6500 7800 11450 31000 6600400
## 1 3 1 1 1 1 1 1
## [1] "Frequency table after encoding"
## q511_tea. 511 Other food items including beverages such as tea, coffee, fruit juice and pr
## 0 10 20 30 35 40 45
## 34 1 2 1 3 3 3
## 50 52 55 60 65 70 72
## 17 1 8 28 17 20 1
## 75 80 85 88 90 100 110
## 5 8 1 1 4 89 19
## 116 120 125 126 130 135 140
## 1 109 4 1 33 1 22
## 150 152 155 160 165 170 175
## 119 2 1 31 2 4 1
## 180 190 195 200 205 210 215
## 37 4 12 279 1 9 1
## 220 230 235 240 250 255 260
## 29 18 1 64 92 2 26
## 265 270 280 290 295 300 305
## 2 9 16 4 2 210 1
## 308 310 320 328 330 335 340
## 3 3 9 1 4 2 8
## 345 350 360 365 370 380 390
## 1 30 17 2 2 2 4
## 395 400 410 416 420 425 430
## 1 89 1 1 10 1 2
## 440 445 450 460 475 480 495
## 8 1 13 5 2 10 1
## 500 508 510 515 520 530 540
## 228 3 3 1 3 4 9
## 550 560 570 580 585 590 599
## 8 6 2 4 1 1 1
## 600 630 640 645 650 660 665
## 78 5 1 1 8 2 3
## 670 680 690 700 720 730 740
## 1 4 1 31 4 4 4
## 750 760 770 780 800 820 840
## 14 5 1 4 22 1 2
## 850 860 880 890 900 919 930
## 5 4 1 1 7 1 1
## 960 970 980 990 1000 1030 1040
## 1 1 1 1 69 1 1
## 1050 1060 1080 1100 1120 1130 1140
## 1 1 1 2 1 1 1
## 1160 1180 1195 1200 1202 1220 1240
## 1 1 1 16 1 1 2
## 1300 1303 1400 1401 1500 1560 1600
## 4 1 2 1 24 1 5
## 1620 1636 1650 1680 1730 1740 1750
## 1 1 2 1 1 1 1
## 1800 1840 1980 2000 2100 2200 2300
## 2 1 1 19 2 3 1
## 2400 2500 2650 2700 2800 3000 or more
## 1 4 1 1 1 25
mydata <- top_recode (variable="q512_pan", break_point=percentile_checker ("q512_pan"), missing=NA)
## [1] "Frequency table before encoding"
## q512_pan. 512 Pan, tobacco, intoxicants
## 0 8 20 25 30 40 50 60 75 78 80 88 90 95 100
## 920 4 1 1 3 2 5 1 3 1 1 1 1 1 31
## 102 110 120 130 150 160 180 200 210 220 225 230 240 250 275
## 1 1 2 3 95 1 2 63 3 1 1 1 6 10 1
## 300 330 345 350 360 370 380 400 420 440 450 480 500 510 540
## 239 2 1 5 7 1 1 24 5 3 60 4 103 4 3
## 600 608 615 640 650 660 700 720 750 760 780 800 840 900 910
## 221 1 1 1 3 2 17 2 16 1 1 21 1 63 1
## 960 990 1000 1020 1050 1100 1200 1250 1300 1308 1350 1400 1480 1500 1600
## 2 1 85 1 6 4 30 1 1 1 1 1 1 69 1
## 1620 1650 1713 1750 1800 1850 2000 2050 2100 2150 2180 2250 2400 2500 2520
## 1 2 1 1 8 1 31 1 4 1 1 1 1 7 1
## 2550 2600 3000 3450 3500 3600 3750 3950 4000 4500 4850 5000 5400 6000 7500
## 1 1 49 1 2 4 2 1 9 4 1 6 1 16 1
## 9000 10000 12000 15000 15150
## 1 1 1 2 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q512_pan. 512 Pan, tobacco, intoxicants
## 0 8 20 25 30 40 50
## 920 4 1 1 3 2 5
## 60 75 78 80 88 90 95
## 1 3 1 1 1 1 1
## 100 102 110 120 130 150 160
## 31 1 1 2 3 95 1
## 180 200 210 220 225 230 240
## 2 63 3 1 1 1 6
## 250 275 300 330 345 350 360
## 10 1 239 2 1 5 7
## 370 380 400 420 440 450 480
## 1 1 24 5 3 60 4
## 500 510 540 600 608 615 640
## 103 4 3 221 1 1 1
## 650 660 700 720 750 760 780
## 3 2 17 2 16 1 1
## 800 840 900 910 960 990 1000
## 21 1 63 1 2 1 85
## 1020 1050 1100 1200 1250 1300 1308
## 1 6 4 30 1 1 1
## 1350 1400 1480 1500 1600 1620 1650
## 1 1 1 69 1 1 2
## 1713 1750 1800 1850 2000 2050 2100
## 1 1 8 1 31 1 4
## 2150 2180 2250 2400 2500 2520 2550
## 1 1 1 1 7 1 1
## 2600 3000 3450 3500 3600 3750 3950
## 1 49 1 2 4 2 1
## 4000 4500 4850 5000 5400 6000 or more
## 9 4 1 6 1 23
mydata <- top_recode (variable="q513_fuel", break_point=9000, missing=NA)
## [1] "Frequency table before encoding"
## q513_fuel. 513 Fuel & Light
## 0 8 22 70 86 90 100 105 115 125 140 147 150 162 170
## 73 1 1 1 1 1 11 1 1 2 1 1 14 1 1
## 175 180 190 200 205 220 225 226 235 240 249 250 266 270 280
## 3 1 1 50 2 1 1 1 1 2 1 37 1 3 1
## 300 310 315 320 322 330 335 340 342 350 355 360 370 375 380
## 100 4 1 2 1 2 1 1 1 32 1 2 2 4 1
## 385 400 410 420 425 435 450 470 475 480 500 505 510 514 515
## 1 70 1 1 1 1 12 1 2 1 163 2 2 1 1
## 520 525 530 550 560 600 625 630 635 640 650 670 678 680 690
## 1 1 1 20 1 125 3 3 1 1 33 1 1 3 2
## 700 720 725 740 750 757 760 770 780 800 820 825 830 839 850
## 94 4 1 2 33 1 1 2 1 89 2 8 1 1 23
## 870 875 880 884 900 920 925 930 936 946 950 960 970 975 980
## 1 2 1 1 49 2 1 1 1 1 14 2 1 2 1
## 986 990 1000 1025 1030 1040 1050 1060 1100 1107 1110 1120 1125 1140 1150
## 1 1 258 1 1 1 7 1 52 1 1 1 1 2 13
## 1170 1200 1220 1225 1250 1260 1300 1325 1330 1337 1350 1360 1366 1390 1400
## 2 68 2 1 19 1 49 1 1 1 9 2 1 1 16
## 1433 1450 1488 1500 1525 1533 1550 1600 1625 1630 1650 1662 1700 1750 1800
## 1 11 1 147 1 1 3 38 2 1 5 1 24 10 20
## 1825 1834 1850 1874 1900 1920 1930 1950 1975 2000 2050 2060 2100 2115 2150
## 1 1 3 1 12 1 1 3 1 84 1 1 19 2 2
## 2170 2175 2200 2230 2235 2250 2300 2350 2370 2400 2450 2460 2500 2580 2600
## 1 1 6 1 1 3 11 1 1 3 1 1 32 1 7
## 2650 2700 2750 2800 2850 2900 3000 3010 3100 3150 3200 3210 3250 3300 3325
## 1 2 1 6 1 5 38 1 7 1 1 1 3 1 1
## 3345 3350 3400 3500 3600 3690 3750 3900 4000 4175 4200 4250 4300 4500 4600
## 1 3 2 9 5 1 3 1 26 1 2 1 3 9 3
## 4750 5000 5180 5200 5330 5400 5425 5500 5600 5700 5800 6000 6300 6500 6600
## 1 10 1 1 1 1 1 1 2 1 1 5 1 1 3
## 6620 6680 6800 6860 7000 7500 8000 8500 9000 9050 9500 10000 13000 13600 16500
## 1 1 1 1 3 1 2 1 1 1 1 2 1 1 1
## 19600 20000 25000
## 1 1 3
## [1] "Frequency table after encoding"
## q513_fuel. 513 Fuel & Light
## 0 8 22 70 86 90 100
## 73 1 1 1 1 1 11
## 105 115 125 140 147 150 162
## 1 1 2 1 1 14 1
## 170 175 180 190 200 205 220
## 1 3 1 1 50 2 1
## 225 226 235 240 249 250 266
## 1 1 1 2 1 37 1
## 270 280 300 310 315 320 322
## 3 1 100 4 1 2 1
## 330 335 340 342 350 355 360
## 2 1 1 1 32 1 2
## 370 375 380 385 400 410 420
## 2 4 1 1 70 1 1
## 425 435 450 470 475 480 500
## 1 1 12 1 2 1 163
## 505 510 514 515 520 525 530
## 2 2 1 1 1 1 1
## 550 560 600 625 630 635 640
## 20 1 125 3 3 1 1
## 650 670 678 680 690 700 720
## 33 1 1 3 2 94 4
## 725 740 750 757 760 770 780
## 1 2 33 1 1 2 1
## 800 820 825 830 839 850 870
## 89 2 8 1 1 23 1
## 875 880 884 900 920 925 930
## 2 1 1 49 2 1 1
## 936 946 950 960 970 975 980
## 1 1 14 2 1 2 1
## 986 990 1000 1025 1030 1040 1050
## 1 1 258 1 1 1 7
## 1060 1100 1107 1110 1120 1125 1140
## 1 52 1 1 1 1 2
## 1150 1170 1200 1220 1225 1250 1260
## 13 2 68 2 1 19 1
## 1300 1325 1330 1337 1350 1360 1366
## 49 1 1 1 9 2 1
## 1390 1400 1433 1450 1488 1500 1525
## 1 16 1 11 1 147 1
## 1533 1550 1600 1625 1630 1650 1662
## 1 3 38 2 1 5 1
## 1700 1750 1800 1825 1834 1850 1874
## 24 10 20 1 1 3 1
## 1900 1920 1930 1950 1975 2000 2050
## 12 1 1 3 1 84 1
## 2060 2100 2115 2150 2170 2175 2200
## 1 19 2 2 1 1 6
## 2230 2235 2250 2300 2350 2370 2400
## 1 1 3 11 1 1 3
## 2450 2460 2500 2580 2600 2650 2700
## 1 1 32 1 7 1 2
## 2750 2800 2850 2900 3000 3010 3100
## 1 6 1 5 38 1 7
## 3150 3200 3210 3250 3300 3325 3345
## 1 1 1 3 1 1 1
## 3350 3400 3500 3600 3690 3750 3900
## 3 2 9 5 1 3 1
## 4000 4175 4200 4250 4300 4500 4600
## 26 1 2 1 3 9 3
## 4750 5000 5180 5200 5330 5400 5425
## 1 10 1 1 1 1 1
## 5500 5600 5700 5800 6000 6300 6500
## 1 2 1 1 5 1 1
## 6600 6620 6680 6800 6860 7000 7500
## 3 1 1 1 1 3 1
## 8000 8500 9000 or more
## 2 1 13
mydata <- top_recode (variable="q514_cinema", break_point=percentile_checker ("q514_cinema"), missing=NA)
## [1] "Frequency table before encoding"
## q514_cinema. 514 Entertainment including cinema, picnic, sports, club fees, video cassettes,
## 0 8 15 40 50 70 75 80 90 99 100 120 125 130 140
## 1706 3 2 2 3 1 1 2 1 2 47 19 2 21 1
## 150 160 170 180 200 210 220 230 231 240 244 250 254 255 260
## 142 3 2 2 142 2 1 1 1 3 1 59 1 1 3
## 267 280 288 290 300 308 315 325 330 345 350 365 400 450 453
## 1 2 1 1 51 1 1 1 1 1 6 2 3 3 1
## 500 520 600 700 800 850 1000 1100 1200 1300 1500 1650 1700 2000 2150
## 25 1 5 4 4 2 16 1 4 1 8 1 1 9 1
## 2500 3000 4000 4500 5000 6000 8000 10000 11000 15000
## 3 3 3 1 2 1 1 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q514_cinema. 514 Entertainment including cinema, picnic, sports, club fees, video cassettes,
## 0 8 15 40 50 70 75
## 1706 3 2 2 3 1 1
## 80 90 99 100 120 125 130
## 2 1 2 47 19 2 21
## 140 150 160 170 180 200 210
## 1 142 3 2 2 142 2
## 220 230 231 240 244 250 254
## 1 1 1 3 1 59 1
## 255 260 267 280 288 290 300
## 1 3 1 2 1 1 51
## 308 315 325 330 345 350 365
## 1 1 1 1 1 6 2
## 400 450 453 500 520 600 700
## 3 3 1 25 1 5 4
## 800 850 1000 1100 1200 1300 1500
## 4 2 16 1 4 1 8
## 1650 1700 2000 2150 2500 3000 or more
## 1 1 9 1 3 14
mydata <- top_recode (variable="q515_torch", break_point=2000, missing=NA)
## [1] "Frequency table before encoding"
## q515_torch. 515 Personal care and effects including spectacles, torch, umbrella, lighter, et
## 0 2 6 15 25 30 45 50 52 58 60 80 100 110 120 130 140 150
## 2077 1 1 1 1 1 1 17 1 1 3 6 26 1 4 1 3 17
## 170 180 200 210 225 240 250 270 300 315 320 350 400 430 450 490 500 525
## 1 1 42 1 1 1 16 1 20 1 1 5 11 1 4 1 28 1
## 550 570 575 600 700 750 800 900 950 1000 1030 1100 1200 1300 1400 1500 1800 2000
## 3 1 1 9 4 2 5 1 1 4 1 1 2 1 1 3 1 2
## 2500 2508 3000 3100 5000
## 3 1 4 1 1
## [1] "Frequency table after encoding"
## q515_torch. 515 Personal care and effects including spectacles, torch, umbrella, lighter, et
## 0 2 6 15 25 30 45
## 2077 1 1 1 1 1 1
## 50 52 58 60 80 100 110
## 17 1 1 3 6 26 1
## 120 130 140 150 170 180 200
## 4 1 3 17 1 1 42
## 210 225 240 250 270 300 315
## 1 1 1 16 1 20 1
## 320 350 400 430 450 490 500
## 1 5 11 1 4 1 28
## 525 550 570 575 600 700 750
## 1 3 1 1 9 4 2
## 800 900 950 1000 1030 1100 1200
## 5 1 1 4 1 1 2
## 1300 1400 1500 1800 2000 or more
## 1 1 3 1 12
mydata <- top_recode (variable="q516_paste", break_point=3000, missing=NA)
## [1] "Frequency table before encoding"
## q516_paste. 516 Toilet articles including toothpaste, hair oil, shaving blades, etc.
## 0 10 15 20 25 26 30 32 35 40 42
## 35 3 3 2 6 1 7 1 3 7 1
## 43 44 45 50 52 60 63 65 66 70 75
## 1 2 5 64 1 19 1 3 1 15 3
## 80 84 85 86 88 89 90 91 95 96 100
## 23 1 4 1 1 1 11 1 3 1 263
## 104 105 110 113 114 115 120 124 125 130 132
## 1 10 11 2 1 2 31 3 5 8 1
## 135 140 142 145 150 153 155 158 160 161 170
## 4 8 2 3 163 1 4 1 19 1 10
## 175 178 180 185 188 190 195 200 205 206 208
## 6 1 19 1 1 7 1 429 1 1 1
## 210 211 215 220 225 230 235 240 245 250 260
## 8 1 2 11 3 12 1 11 3 72 4
## 270 274 275 280 290 300 304 305 308 310 320
## 9 1 1 11 2 278 1 2 1 4 10
## 330 334 335 350 360 365 370 375 380 385 400
## 6 1 1 24 3 3 2 2 6 1 70
## 420 450 470 490 500 508 520 530 534 550 562
## 3 6 1 1 295 1 3 2 1 2 1
## 580 590 600 630 650 700 720 750 800 850 900
## 1 1 40 1 1 26 1 1 19 1 3
## 907 1000 1200 1210 1300 1500 1580 1800 2000 2002 2200
## 1 69 5 1 1 13 1 1 8 1 1
## 2500 3000 3200 3300 4000 4150 5000 6300 10000 11000 15000
## 3 4 1 1 1 1 2 1 2 1 1
## 300150 1731500
## 1 1
## [1] "Frequency table after encoding"
## q516_paste. 516 Toilet articles including toothpaste, hair oil, shaving blades, etc.
## 0 10 15 20 25 26 30
## 35 3 3 2 6 1 7
## 32 35 40 42 43 44 45
## 1 3 7 1 1 2 5
## 50 52 60 63 65 66 70
## 64 1 19 1 3 1 15
## 75 80 84 85 86 88 89
## 3 23 1 4 1 1 1
## 90 91 95 96 100 104 105
## 11 1 3 1 263 1 10
## 110 113 114 115 120 124 125
## 11 2 1 2 31 3 5
## 130 132 135 140 142 145 150
## 8 1 4 8 2 3 163
## 153 155 158 160 161 170 175
## 1 4 1 19 1 10 6
## 178 180 185 188 190 195 200
## 1 19 1 1 7 1 429
## 205 206 208 210 211 215 220
## 1 1 1 8 1 2 11
## 225 230 235 240 245 250 260
## 3 12 1 11 3 72 4
## 270 274 275 280 290 300 304
## 9 1 1 11 2 278 1
## 305 308 310 320 330 334 335
## 2 1 4 10 6 1 1
## 350 360 365 370 375 380 385
## 24 3 3 2 2 6 1
## 400 420 450 470 490 500 508
## 70 3 6 1 1 295 1
## 520 530 534 550 562 580 590
## 3 2 1 2 1 1 1
## 600 630 650 700 720 750 800
## 40 1 1 26 1 1 19
## 850 900 907 1000 1200 1210 1300
## 1 3 1 69 5 1 1
## 1500 1580 1800 2000 2002 2200 2500
## 13 1 1 8 1 1 3
## 3000 or more
## 17
mydata <- top_recode (variable="q517_bulb", break_point=4000, missing=NA)
## [1] "Frequency table before encoding"
## q517_bulb. 517 Sundry articles including electric bulb, tubelight, glassware, bucket, washi
## 0 10 20 30 40 50 60 65 70 74 75 80 90 95 100
## 25 1 1 1 7 11 6 1 4 1 2 4 3 2 59
## 105 106 110 120 122 125 126 128 130 137 140 145 146 149 150
## 1 1 10 13 1 1 1 1 7 1 5 1 1 1 57
## 158 160 164 165 170 175 180 185 187 190 200 205 208 210 220
## 1 12 1 1 10 3 8 1 1 3 178 1 1 4 9
## 225 230 240 245 246 250 260 262 270 275 280 287 290 300 305
## 3 13 6 2 1 93 9 1 6 2 10 1 3 255 1
## 310 315 320 325 330 340 345 346 350 355 356 360 365 368 370
## 9 2 9 1 14 14 1 1 74 2 1 9 2 1 10
## 375 380 385 388 390 392 394 400 405 408 410 415 420 430 436
## 2 12 4 1 6 1 1 188 1 1 7 1 7 5 1
## 440 450 454 460 465 470 475 480 490 499 500 508 510 520 522
## 6 52 1 7 1 8 2 7 3 1 429 1 3 2 1
## 525 526 530 540 550 560 570 580 590 600 608 610 615 620 624
## 1 1 1 3 19 5 2 2 3 107 1 1 1 5 1
## 640 650 670 680 690 700 710 720 730 750 760 770 780 795 800
## 1 9 2 1 1 52 1 4 1 10 2 1 1 1 49
## 820 835 850 900 910 915 920 936 940 950 960 970 980 1000 1010
## 1 1 7 8 1 1 1 1 1 2 1 1 1 131 1
## 1030 1050 1100 1152 1166 1190 1200 1240 1250 1300 1350 1390 1400 1500 1580
## 1 1 2 1 1 1 12 1 2 2 2 1 1 34 1
## 1600 1610 1630 1650 1700 1710 2000 2100 2200 2300 2480 2500 2850 3000 3030
## 1 1 1 1 1 1 18 1 1 1 1 4 1 8 1
## 3100 3350 3500 3600 4000 4500 5000 5600 7000 8400 10000
## 1 1 1 1 1 1 4 1 2 1 4
## [1] "Frequency table after encoding"
## q517_bulb. 517 Sundry articles including electric bulb, tubelight, glassware, bucket, washi
## 0 10 20 30 40 50 60
## 25 1 1 1 7 11 6
## 65 70 74 75 80 90 95
## 1 4 1 2 4 3 2
## 100 105 106 110 120 122 125
## 59 1 1 10 13 1 1
## 126 128 130 137 140 145 146
## 1 1 7 1 5 1 1
## 149 150 158 160 164 165 170
## 1 57 1 12 1 1 10
## 175 180 185 187 190 200 205
## 3 8 1 1 3 178 1
## 208 210 220 225 230 240 245
## 1 4 9 3 13 6 2
## 246 250 260 262 270 275 280
## 1 93 9 1 6 2 10
## 287 290 300 305 310 315 320
## 1 3 255 1 9 2 9
## 325 330 340 345 346 350 355
## 1 14 14 1 1 74 2
## 356 360 365 368 370 375 380
## 1 9 2 1 10 2 12
## 385 388 390 392 394 400 405
## 4 1 6 1 1 188 1
## 408 410 415 420 430 436 440
## 1 7 1 7 5 1 6
## 450 454 460 465 470 475 480
## 52 1 7 1 8 2 7
## 490 499 500 508 510 520 522
## 3 1 429 1 3 2 1
## 525 526 530 540 550 560 570
## 1 1 1 3 19 5 2
## 580 590 600 608 610 615 620
## 2 3 107 1 1 1 5
## 624 640 650 670 680 690 700
## 1 1 9 2 1 1 52
## 710 720 730 750 760 770 780
## 1 4 1 10 2 1 1
## 795 800 820 835 850 900 910
## 1 49 1 1 7 8 1
## 915 920 936 940 950 960 970
## 1 1 1 1 2 1 1
## 980 1000 1010 1030 1050 1100 1152
## 1 131 1 1 1 2 1
## 1166 1190 1200 1240 1250 1300 1350
## 1 1 12 1 2 2 2
## 1390 1400 1500 1580 1600 1610 1630
## 1 1 34 1 1 1 1
## 1650 1700 1710 2000 2100 2200 2300
## 1 1 1 18 1 1 1
## 2480 2500 2850 3000 3030 3100 3350
## 1 4 1 8 1 1 1
## 3500 3600 4000 or more
## 1 1 14
mydata <- top_recode (variable="q518_servant", break_point=50000, missing=NA)
## [1] "Frequency table before encoding"
## q518_servant. 518 Consumer services such as domestic servants, tailoring, grinding charges, te
## 0 10 20 30 40 50 60 70 72 80 90 100 105
## 198 2 3 12 5 22 10 3 1 12 4 83 1
## 110 120 125 130 140 150 160 170 171 180 183 190 200
## 3 10 2 1 3 30 5 1 1 9 1 2 142
## 205 208 210 220 225 230 238 240 250 260 270 280 285
## 2 1 3 3 1 2 2 4 25 5 1 4 1
## 290 295 300 310 314 320 330 340 350 360 370 375 380
## 2 1 93 4 1 3 1 3 22 8 1 1 1
## 400 410 420 430 433 440 450 460 470 475 480 490 500
## 72 1 4 1 1 3 13 2 1 1 1 1 154
## 510 520 530 540 550 560 580 590 600 608 624 640 650
## 2 5 1 1 17 1 1 1 67 1 2 2 5
## 660 670 680 690 700 710 720 750 760 770 780 790 800
## 3 1 1 1 42 2 3 9 1 2 1 2 28
## 850 860 870 900 950 960 980 1000 1004 1015 1050 1060 1070
## 4 1 2 19 1 1 2 116 1 1 3 1 2
## 1080 1100 1120 1125 1130 1150 1160 1170 1180 1200 1220 1240 1250
## 1 10 1 1 1 6 1 2 1 37 1 1 5
## 1270 1300 1350 1360 1384 1400 1410 1420 1450 1460 1480 1500 1540
## 2 10 4 1 1 7 1 1 3 1 1 78 1
## 1550 1560 1580 1590 1600 1630 1650 1666 1700 1720 1750 1800 1820
## 4 1 1 1 13 1 3 1 7 3 5 6 1
## 1850 1860 1880 1900 1920 1950 2000 2060 2080 2100 2130 2150 2180
## 3 1 1 6 2 1 59 4 1 6 1 8 1
## 2200 2250 2270 2300 2310 2350 2360 2380 2400 2410 2450 2500 2550
## 8 1 1 4 1 1 1 1 3 1 3 22 1
## 2570 2600 2650 2700 2720 2740 2750 2800 2900 3000 3100 3141 3150
## 1 3 4 6 1 2 5 4 2 77 7 1 2
## 3160 3165 3200 3220 3260 3280 3300 3330 3380 3400 3440 3500 3600
## 1 1 5 1 2 1 7 1 2 3 1 17 4
## 3660 3680 3700 3800 4000 4100 4120 4150 4200 4280 4300 4310 4400
## 1 1 1 1 34 3 1 1 7 1 2 1 1
## 4500 4550 4560 4570 4600 4650 4660 5000 5050 5080 5150 5180 5187
## 4 1 1 1 3 2 1 61 1 1 1 1 1
## 5200 5210 5240 5250 5340 5360 5400 5500 5600 5620 5629 5650 5660
## 2 1 1 1 2 1 6 6 6 1 1 1 1
## 5700 5750 5800 5900 6000 6050 6060 6100 6140 6150 6160 6200 6280
## 4 1 4 1 26 1 1 2 1 1 1 2 1
## 6300 6320 6400 6500 6620 6700 6730 6800 6900 6950 6990 7000 7050
## 1 1 2 3 1 1 1 2 1 1 1 12 3
## 7100 7250 7480 7500 7600 7850 7900 8000 8250 8400 8500 8600 9000
## 2 1 1 3 1 1 1 15 1 1 4 2 3
## 9200 9500 9525 9650 10000 10060 10200 10270 10300 10400 10500 10650 10700
## 1 5 1 1 21 1 3 1 2 1 2 1 3
## 10740 10800 11000 11200 11700 12000 12200 12300 12520 12700 13000 13420 13800
## 2 1 4 2 1 11 1 1 1 1 3 1 1
## 14000 14500 15000 15180 15200 15240 15400 15500 16000 16200 16400 18000 18210
## 3 1 17 1 1 1 2 3 4 1 1 1 1
## 19010 20000 20100 20275 20300 20380 20450 20500 20800 21000 21500 22000 22100
## 1 14 1 1 2 1 1 1 1 1 1 1 1
## 22900 23500 25000 25300 26900 27000 29300 30000 30300 30400 31000 31202 31300
## 1 1 4 1 1 1 1 3 1 1 2 1 1
## 31800 35000 40000 45000 50000 50600 51700 56000 60000 80000 1e+05 101900 154000
## 1 2 2 1 2 1 1 1 3 1 1 1 1
## 204000 350000
## 1 1
## [1] "Frequency table after encoding"
## q518_servant. 518 Consumer services such as domestic servants, tailoring, grinding charges, te
## 0 10 20 30 40 50
## 198 2 3 12 5 22
## 60 70 72 80 90 100
## 10 3 1 12 4 83
## 105 110 120 125 130 140
## 1 3 10 2 1 3
## 150 160 170 171 180 183
## 30 5 1 1 9 1
## 190 200 205 208 210 220
## 2 142 2 1 3 3
## 225 230 238 240 250 260
## 1 2 2 4 25 5
## 270 280 285 290 295 300
## 1 4 1 2 1 93
## 310 314 320 330 340 350
## 4 1 3 1 3 22
## 360 370 375 380 400 410
## 8 1 1 1 72 1
## 420 430 433 440 450 460
## 4 1 1 3 13 2
## 470 475 480 490 500 510
## 1 1 1 1 154 2
## 520 530 540 550 560 580
## 5 1 1 17 1 1
## 590 600 608 624 640 650
## 1 67 1 2 2 5
## 660 670 680 690 700 710
## 3 1 1 1 42 2
## 720 750 760 770 780 790
## 3 9 1 2 1 2
## 800 850 860 870 900 950
## 28 4 1 2 19 1
## 960 980 1000 1004 1015 1050
## 1 2 116 1 1 3
## 1060 1070 1080 1100 1120 1125
## 1 2 1 10 1 1
## 1130 1150 1160 1170 1180 1200
## 1 6 1 2 1 37
## 1220 1240 1250 1270 1300 1350
## 1 1 5 2 10 4
## 1360 1384 1400 1410 1420 1450
## 1 1 7 1 1 3
## 1460 1480 1500 1540 1550 1560
## 1 1 78 1 4 1
## 1580 1590 1600 1630 1650 1666
## 1 1 13 1 3 1
## 1700 1720 1750 1800 1820 1850
## 7 3 5 6 1 3
## 1860 1880 1900 1920 1950 2000
## 1 1 6 2 1 59
## 2060 2080 2100 2130 2150 2180
## 4 1 6 1 8 1
## 2200 2250 2270 2300 2310 2350
## 8 1 1 4 1 1
## 2360 2380 2400 2410 2450 2500
## 1 1 3 1 3 22
## 2550 2570 2600 2650 2700 2720
## 1 1 3 4 6 1
## 2740 2750 2800 2900 3000 3100
## 2 5 4 2 77 7
## 3141 3150 3160 3165 3200 3220
## 1 2 1 1 5 1
## 3260 3280 3300 3330 3380 3400
## 2 1 7 1 2 3
## 3440 3500 3600 3660 3680 3700
## 1 17 4 1 1 1
## 3800 4000 4100 4120 4150 4200
## 1 34 3 1 1 7
## 4280 4300 4310 4400 4500 4550
## 1 2 1 1 4 1
## 4560 4570 4600 4650 4660 5000
## 1 1 3 2 1 61
## 5050 5080 5150 5180 5187 5200
## 1 1 1 1 1 2
## 5210 5240 5250 5340 5360 5400
## 1 1 1 2 1 6
## 5500 5600 5620 5629 5650 5660
## 6 6 1 1 1 1
## 5700 5750 5800 5900 6000 6050
## 4 1 4 1 26 1
## 6060 6100 6140 6150 6160 6200
## 1 2 1 1 1 2
## 6280 6300 6320 6400 6500 6620
## 1 1 1 2 3 1
## 6700 6730 6800 6900 6950 6990
## 1 1 2 1 1 1
## 7000 7050 7100 7250 7480 7500
## 12 3 2 1 1 3
## 7600 7850 7900 8000 8250 8400
## 1 1 1 15 1 1
## 8500 8600 9000 9200 9500 9525
## 4 2 3 1 5 1
## 9650 10000 10060 10200 10270 10300
## 1 21 1 3 1 2
## 10400 10500 10650 10700 10740 10800
## 1 2 1 3 2 1
## 11000 11200 11700 12000 12200 12300
## 4 2 1 11 1 1
## 12520 12700 13000 13420 13800 14000
## 1 1 3 1 1 3
## 14500 15000 15180 15200 15240 15400
## 1 17 1 1 1 2
## 15500 16000 16200 16400 18000 18210
## 3 4 1 1 1 1
## 19010 20000 20100 20275 20300 20380
## 1 14 1 1 2 1
## 20450 20500 20800 21000 21500 22000
## 1 1 1 1 1 1
## 22100 22900 23500 25000 25300 26900
## 1 1 1 4 1 1
## 27000 29300 30000 30300 30400 31000
## 1 1 3 1 1 2
## 31202 31300 31800 35000 40000 45000
## 1 1 1 2 2 1
## 50000 or more
## 14
mydata <- top_recode (variable="q519_disel", break_point=30000, missing=NA)
## [1] "Frequency table before encoding"
## q519_disel. 519 Conveyance including porter charges, diesel, petrol, school bus/van, etc.
## 0 8 10 40 50 80 100 108 120 150 152 160 180
## 882 1 1 3 8 1 19 1 1 8 1 1 3
## 200 225 250 300 310 350 360 375 400 450 480 500 510
## 67 2 5 46 2 2 1 1 30 3 1 185 1
## 550 560 600 650 675 700 750 800 840 900 1000 1100 1200
## 1 1 70 2 1 22 4 24 1 10 214 2 41
## 1210 1250 1300 1500 1520 1540 1650 1700 1800 1950 2000 2100 2200
## 1 1 5 211 1 1 2 1 5 1 93 5 2
## 2400 2500 2700 2800 3000 3290 3300 3500 3600 3700 4000 4200 4500
## 6 14 2 3 169 1 2 3 2 1 15 1 13
## 5000 5400 5500 6000 7000 7500 8000 9000 10000 12000 13000 14000 15000
## 38 1 3 12 3 4 4 2 8 3 2 1 16
## 15002 18000 20000 20003 23500 25000 30000 33000 36000 39000 43200 45000 50000
## 1 1 3 1 1 1 4 1 1 1 1 1 2
## 60000 66000 75000 80000 152609 527000
## 1 1 1 2 1 1
## [1] "Frequency table after encoding"
## q519_disel. 519 Conveyance including porter charges, diesel, petrol, school bus/van, etc.
## 0 8 10 40 50 80
## 882 1 1 3 8 1
## 100 108 120 150 152 160
## 19 1 1 8 1 1
## 180 200 225 250 300 310
## 3 67 2 5 46 2
## 350 360 375 400 450 480
## 2 1 1 30 3 1
## 500 510 550 560 600 650
## 185 1 1 1 70 2
## 675 700 750 800 840 900
## 1 22 4 24 1 10
## 1000 1100 1200 1210 1250 1300
## 214 2 41 1 1 5
## 1500 1520 1540 1650 1700 1800
## 211 1 1 2 1 5
## 1950 2000 2100 2200 2400 2500
## 1 93 5 2 6 14
## 2700 2800 3000 3290 3300 3500
## 2 3 169 1 2 3
## 3600 3700 4000 4200 4500 5000
## 2 1 15 1 13 38
## 5400 5500 6000 7000 7500 8000
## 1 3 12 3 4 4
## 9000 10000 12000 13000 14000 15000
## 2 8 3 2 1 16
## 15002 18000 20000 20003 23500 25000
## 1 1 3 1 1 1
## 30000 or more
## 18
mydata <- top_recode (variable="q520_rent", break_point=percentile_checker ("q520_rent"), missing=NA)
## [1] "Frequency table before encoding"
## q520_rent. 520 Rent / house rent
## 0 8 10 16 30 150 160 200 250 300 500 550 600 700 800 1000 1200 1260
## 2218 1 1 1 1 1 1 1 2 1 16 1 2 7 3 31 5 1
## 1300 1500 1600 2000 2200 2500 3000 3500 4000 4500 5000 7000
## 5 18 3 14 1 4 6 3 1 2 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q520_rent. 520 Rent / house rent
## 0 8 10 16 30 150 160
## 2218 1 1 1 1 1 1
## 200 250 300 500 550 600 700
## 1 2 1 16 1 2 7
## 800 1000 1200 1260 1300 1500 1600
## 3 31 5 1 5 18 3
## 2000 2200 2500 3000 or more
## 14 1 4 14
mydata <- top_recode (variable="q521_tax", break_point=4500, missing=NA)
## [1] "Frequency table before encoding"
## q521_tax. 521 Consumer taxes and cesses including water charges
## 0 8 10 20 25 26 29 30 32 35 38 40 43
## 955 1 2 3 10 8 1 18 2 5 1 4 1
## 45 50 51 52 53 55 56 58 60 62 66 68 70
## 1 53 1 56 2 1 1 1 40 1 1 1 12
## 72 75 80 90 95 100 102 105 106 107 110 112 115
## 1 9 8 1 1 90 2 1 3 1 2 2 1
## 120 122 125 130 142 150 160 166 170 183 198 200 206
## 6 1 3 4 1 42 1 1 4 1 1 154 1
## 210 215 220 226 249 250 280 290 300 330 333 335 350
## 2 1 2 1 1 118 3 1 148 1 1 1 47
## 359 360 365 375 400 408 430 450 460 485 500 550 590
## 1 1 1 1 90 1 1 5 4 1 112 4 1
## 600 625 650 700 750 800 900 950 1000 1050 1100 1200 1226
## 92 1 1 17 12 14 33 1 32 3 4 15 1
## 1240 1300 1400 1450 1500 1552 1600 2000 2060 2100 2500 3000 3500
## 1 2 1 1 12 1 1 10 1 1 3 2 1
## 3600 4000 4500 4800 5000 6250 6600 10000 10250 21000 22000 50100 82000
## 1 2 1 1 1 1 1 2 1 1 1 1 1
## 250000
## 1
## [1] "Frequency table after encoding"
## q521_tax. 521 Consumer taxes and cesses including water charges
## 0 8 10 20 25 26 29
## 955 1 2 3 10 8 1
## 30 32 35 38 40 43 45
## 18 2 5 1 4 1 1
## 50 51 52 53 55 56 58
## 53 1 56 2 1 1 1
## 60 62 66 68 70 72 75
## 40 1 1 1 12 1 9
## 80 90 95 100 102 105 106
## 8 1 1 90 2 1 3
## 107 110 112 115 120 122 125
## 1 2 2 1 6 1 3
## 130 142 150 160 166 170 183
## 4 1 42 1 1 4 1
## 198 200 206 210 215 220 226
## 1 154 1 2 1 2 1
## 249 250 280 290 300 330 333
## 1 118 3 1 148 1 1
## 335 350 359 360 365 375 400
## 1 47 1 1 1 1 90
## 408 430 450 460 485 500 550
## 1 1 5 4 1 112 4
## 590 600 625 650 700 750 800
## 1 92 1 1 17 12 14
## 900 950 1000 1050 1100 1200 1226
## 33 1 32 3 4 15 1
## 1240 1300 1400 1450 1500 1552 1600
## 1 2 1 1 12 1 1
## 2000 2060 2100 2500 3000 3500 3600
## 10 1 1 3 2 1 1
## 4000 4500 or more
## 2 13
mydata <- top_recode (variable="q522_medicine", break_point=percentile_checker ("q522_medicine"), missing=NA)
## [1] "Frequency table before encoding"
## q522_medicine. 522 Medical Expenses (non-institutional)
## 0 2 4 5 8 10 11 15 20 25 30 35 40
## 1265 3 1 38 2 36 1 9 8 4 4 3 4
## 50 55 60 65 70 80 100 120 125 150 200 205 208
## 20 1 2 1 1 1 48 1 1 12 81 1 1
## 220 230 250 260 280 300 308 320 330 350 360 370 400
## 2 2 11 1 2 56 1 1 1 5 1 2 25
## 440 450 500 550 600 650 660 700 750 800 850 900 1000
## 1 4 141 3 19 6 1 18 1 11 1 3 121
## 1050 1100 1150 1200 1250 1300 1400 1500 1600 1630 1730 1800 2000
## 1 3 1 18 1 4 1 47 3 1 1 3 66
## 2400 2500 2700 3000 3010 3100 3500 4000 4200 4500 4800 5000 5100
## 2 14 1 46 1 1 3 26 1 1 1 51 2
## 6000 6650 7000 7500 8000 10000 12000 13000 15000 17000 20000 25000 30000
## 14 1 3 1 4 12 8 1 6 1 2 3 2
## 40000 50000 60003 150000
## 2 2 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q522_medicine. 522 Medical Expenses (non-institutional)
## 0 2 4 5 8 10
## 1265 3 1 38 2 36
## 11 15 20 25 30 35
## 1 9 8 4 4 3
## 40 50 55 60 65 70
## 4 20 1 2 1 1
## 80 100 120 125 150 200
## 1 48 1 1 12 81
## 205 208 220 230 250 260
## 1 1 2 2 11 1
## 280 300 308 320 330 350
## 2 56 1 1 1 5
## 360 370 400 440 450 500
## 1 2 25 1 4 141
## 550 600 650 660 700 750
## 3 19 6 1 18 1
## 800 850 900 1000 1050 1100
## 11 1 3 121 1 3
## 1150 1200 1250 1300 1400 1500
## 1 18 1 4 1 47
## 1600 1630 1730 1800 2000 2400
## 3 1 1 3 66 2
## 2500 2700 3000 3010 3100 3500
## 14 1 46 1 1 3
## 4000 4200 4500 4800 5000 5100
## 26 1 1 1 51 2
## 6000 6650 7000 7500 8000 10000
## 14 1 3 1 4 12
## 12000 13000 15000 17000 20000 or more
## 8 1 6 1 13
mydata <- top_recode (variable="q523_med_institute", break_point=350000, missing=NA)
## [1] "Frequency table before encoding"
## q523_med_institute. 523 Medical (institutional)
## 0 1 8 10 50 60 90 100 150 200 220
## 654 1 2 1 3 1 1 7 2 24 2
## 250 260 270 300 350 400 450 456 500 580 600
## 1 1 1 14 1 5 1 1 77 1 22
## 650 700 720 750 800 808 880 900 1000 1100 1150
## 1 10 1 1 8 1 1 2 100 1 1
## 1200 1300 1500 1600 1700 1800 2000 2200 2250 2350 2400
## 19 1 70 2 2 1 159 1 1 1 4
## 2500 3000 3500 3600 3700 3800 4000 4600 4800 5000 5400
## 22 121 5 5 2 1 52 2 2 202 1
## 5500 5600 6000 6100 6250 6500 7000 7200 7400 8000 8400
## 1 2 91 1 1 1 47 2 1 43 2
## 8800 8960 9000 9600 10000 10500 11000 11800 12000 13000 14000
## 2 1 6 1 123 1 4 1 51 6 1
## 15000 16000 17000 18000 18400 19200 20000 21000 22000 22003 24000
## 59 1 2 10 1 1 53 3 6 1 9
## 24500 25000 25500 26000 28800 30000 32000 32400 35000 36000 40000
## 1 17 1 1 1 27 3 1 9 3 13
## 42000 45000 50000 60000 65000 70000 75000 80000 1e+05 130000 150000
## 1 6 33 22 1 6 1 8 9 1 5
## 160000 2e+05 230000 250000 3e+05 350000 5e+05 550000 7e+05 750000 1e+06
## 1 1 1 1 5 1 4 1 1 1 2
## 1800022 2500000 3e+06
## 1 1 1
## [1] "Frequency table after encoding"
## q523_med_institute. 523 Medical (institutional)
## 0 1 8 10 50 60
## 654 1 2 1 3 1
## 90 100 150 200 220 250
## 1 7 2 24 2 1
## 260 270 300 350 400 450
## 1 1 14 1 5 1
## 456 500 580 600 650 700
## 1 77 1 22 1 10
## 720 750 800 808 880 900
## 1 1 8 1 1 2
## 1000 1100 1150 1200 1300 1500
## 100 1 1 19 1 70
## 1600 1700 1800 2000 2200 2250
## 2 2 1 159 1 1
## 2350 2400 2500 3000 3500 3600
## 1 4 22 121 5 5
## 3700 3800 4000 4600 4800 5000
## 2 1 52 2 2 202
## 5400 5500 5600 6000 6100 6250
## 1 1 2 91 1 1
## 6500 7000 7200 7400 8000 8400
## 1 47 2 1 43 2
## 8800 8960 9000 9600 10000 10500
## 2 1 6 1 123 1
## 11000 11800 12000 13000 14000 15000
## 4 1 51 6 1 59
## 16000 17000 18000 18400 19200 20000
## 1 2 10 1 1 53
## 21000 22000 22003 24000 24500 25000
## 3 6 1 9 1 17
## 25500 26000 28800 30000 32000 32400
## 1 1 1 27 3 1
## 35000 36000 40000 42000 45000 50000
## 9 3 13 1 6 33
## 60000 65000 70000 75000 80000 1e+05
## 22 1 6 1 8 9
## 130000 150000 160000 2e+05 230000 250000
## 1 5 1 1 1 1
## 3e+05 350000 or more
## 5 13
mydata <- top_recode (variable="q524_fee", break_point=60000, missing=NA)
## [1] "Frequency table before encoding"
## q524_fee. 524 Tuition fees & other fees including private tutor, school/college fees, etc.
## 0 2 3 8 10 12 20 22 25 30 40 45 50 51 60
## 1516 1 1 1 3 1 9 1 1 3 2 1 5 1 3
## 63 90 95 100 140 150 170 175 200 240 250 300 350 360 365
## 1 1 1 21 2 7 1 1 32 1 9 24 7 1 2
## 400 450 470 500 520 550 560 575 600 615 650 700 750 800 810
## 22 8 1 21 1 8 1 1 15 1 3 4 5 8 1
## 836 900 925 950 1000 1050 1100 1110 1160 1200 1250 1300 1350 1500 1550
## 1 7 1 1 34 1 3 1 1 20 2 2 1 17 1
## 1600 1700 1800 2000 2075 2160 2200 2400 2500 2700 2800 2900 2975 3000 3002
## 3 4 7 20 1 1 1 19 7 2 3 2 1 33 1
## 3100 3160 3200 3300 3350 3400 3500 3600 3750 3900 3950 4000 4100 4200 4300
## 2 1 3 2 1 1 5 19 2 1 1 27 1 6 1
## 4400 4440 4500 4700 4800 5000 5060 5200 5400 5500 5540 5770 6000 6050 6100
## 1 1 4 1 6 43 1 1 3 3 1 1 30 1 1
## 6250 6300 6400 6500 6600 6700 6800 7000 7200 7250 7400 7405 7500 7600 7700
## 1 1 1 1 4 1 1 11 6 1 1 1 2 1 2
## 7800 8000 8400 8430 8450 8800 9000 9500 10000 10500 10900 11000 11400 12000 12500
## 2 17 4 1 1 1 7 1 25 1 1 2 1 13 2
## 13000 13200 13500 14000 14200 14400 14500 14900 15000 15800 16000 16050 17000 17100 18000
## 5 1 2 2 1 2 1 1 15 1 3 1 1 1 3
## 19000 19200 20000 22000 22300 24000 25000 27000 27600 28000 29000 29500 30000 30450 30550
## 1 1 11 1 1 4 2 1 1 1 1 1 7 1 1
## 31000 35000 36000 39000 40000 40500 42000 48000 50000 55000 59600 60000 73600 75000 78650
## 1 3 1 1 2 1 1 2 3 2 1 5 1 1 1
## 90000 91000 1e+05 5e+05
## 1 1 6 1
## [1] "Frequency table after encoding"
## q524_fee. 524 Tuition fees & other fees including private tutor, school/college fees, etc.
## 0 2 3 8 10 12
## 1516 1 1 1 3 1
## 20 22 25 30 40 45
## 9 1 1 3 2 1
## 50 51 60 63 90 95
## 5 1 3 1 1 1
## 100 140 150 170 175 200
## 21 2 7 1 1 32
## 240 250 300 350 360 365
## 1 9 24 7 1 2
## 400 450 470 500 520 550
## 22 8 1 21 1 8
## 560 575 600 615 650 700
## 1 1 15 1 3 4
## 750 800 810 836 900 925
## 5 8 1 1 7 1
## 950 1000 1050 1100 1110 1160
## 1 34 1 3 1 1
## 1200 1250 1300 1350 1500 1550
## 20 2 2 1 17 1
## 1600 1700 1800 2000 2075 2160
## 3 4 7 20 1 1
## 2200 2400 2500 2700 2800 2900
## 1 19 7 2 3 2
## 2975 3000 3002 3100 3160 3200
## 1 33 1 2 1 3
## 3300 3350 3400 3500 3600 3750
## 2 1 1 5 19 2
## 3900 3950 4000 4100 4200 4300
## 1 1 27 1 6 1
## 4400 4440 4500 4700 4800 5000
## 1 1 4 1 6 43
## 5060 5200 5400 5500 5540 5770
## 1 1 3 3 1 1
## 6000 6050 6100 6250 6300 6400
## 30 1 1 1 1 1
## 6500 6600 6700 6800 7000 7200
## 1 4 1 1 11 6
## 7250 7400 7405 7500 7600 7700
## 1 1 1 2 1 2
## 7800 8000 8400 8430 8450 8800
## 2 17 4 1 1 1
## 9000 9500 10000 10500 10900 11000
## 7 1 25 1 1 2
## 11400 12000 12500 13000 13200 13500
## 1 13 2 5 1 2
## 14000 14200 14400 14500 14900 15000
## 2 1 2 1 1 15
## 15800 16000 16050 17000 17100 18000
## 1 3 1 1 1 3
## 19000 19200 20000 22000 22300 24000
## 1 1 11 1 1 4
## 25000 27000 27600 28000 29000 29500
## 2 1 1 1 1 1
## 30000 30450 30550 31000 35000 36000
## 7 1 1 1 3 1
## 39000 40000 40500 42000 48000 50000
## 1 2 1 1 2 3
## 55000 59600 60000 or more
## 2 1 17
mydata <- top_recode (variable="q525_books", break_point=percentile_checker ("q525_books"), missing=NA)
## [1] "Frequency table before encoding"
## q525_books. 525 School books & other educational articles including newspaper, library charg
## 0 8 10 60 100 120 130 150 166 200 210 220 250 295 300
## 626 1 1 1 6 6 2 5 1 17 1 1 3 1 10
## 350 360 400 500 540 600 650 700 800 900 950 1000 1100 1150 1200
## 2 1 19 61 2 24 1 11 7 4 3 165 1 1 15
## 1250 1280 1300 1320 1400 1440 1450 1480 1500 1600 1700 1750 1800 1900 2000
## 1 1 1 1 2 1 1 1 100 3 4 1 4 2 188
## 2100 2150 2200 2300 2400 2500 2600 2640 2700 2800 2900 3000 3008 3150 3200
## 2 1 5 2 4 49 1 1 3 2 1 181 1 1 3
## 3300 3400 3500 3600 3700 4000 4100 4200 4260 4400 4500 4800 4900 5000 5500
## 1 2 18 2 2 112 1 2 1 1 11 3 1 164 7
## 5800 6000 6500 6800 7000 7100 7200 7500 7600 8000 8500 9000 9200 10000 10120
## 1 80 5 1 38 1 1 3 1 46 2 28 1 68 2
## 10130 10500 11000 11500 11700 12000 12500 13000 14000 14500 15000 15120 15300 16000 16800
## 1 2 8 3 1 27 1 10 4 1 30 1 1 6 1
## 17000 17900 18000 18200 19000 20000 20150 21000 22000 23000 24000 25000 25200 27000 28000
## 1 1 4 1 3 9 1 2 4 2 3 8 1 2 1
## 30000 32000 33000 34000 35000 38000 43200 45000 50000 52000 55000 60000 70000 80000 84600
## 12 3 1 1 4 1 1 1 5 1 1 1 3 1 1
## 1e+05 3e+05
## 2 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q525_books. 525 School books & other educational articles including newspaper, library charg
## 0 8 10 60 100 120
## 626 1 1 1 6 6
## 130 150 166 200 210 220
## 2 5 1 17 1 1
## 250 295 300 350 360 400
## 3 1 10 2 1 19
## 500 540 600 650 700 800
## 61 2 24 1 11 7
## 900 950 1000 1100 1150 1200
## 4 3 165 1 1 15
## 1250 1280 1300 1320 1400 1440
## 1 1 1 1 2 1
## 1450 1480 1500 1600 1700 1750
## 1 1 100 3 4 1
## 1800 1900 2000 2100 2150 2200
## 4 2 188 2 1 5
## 2300 2400 2500 2600 2640 2700
## 2 4 49 1 1 3
## 2800 2900 3000 3008 3150 3200
## 2 1 181 1 1 3
## 3300 3400 3500 3600 3700 4000
## 1 2 18 2 2 112
## 4100 4200 4260 4400 4500 4800
## 1 2 1 1 11 3
## 4900 5000 5500 5800 6000 6500
## 1 164 7 1 80 5
## 6800 7000 7100 7200 7500 7600
## 1 38 1 1 3 1
## 8000 8500 9000 9200 10000 10120
## 46 2 28 1 68 2
## 10130 10500 11000 11500 11700 12000
## 1 2 8 3 1 27
## 12500 13000 14000 14500 15000 15120
## 1 10 4 1 30 1
## 15300 16000 16800 17000 17900 18000
## 1 6 1 1 1 4
## 18200 19000 20000 20150 21000 22000
## 1 3 9 1 2 4
## 23000 24000 25000 25200 27000 28000
## 2 3 8 1 2 1
## 30000 32000 33000 34000 35000 38000
## 12 3 1 1 4 1
## 43200 45000 50000 or more
## 1 1 16
mydata <- top_recode (variable="q526_clothes", break_point=percentile_checker ("q526_clothes"), missing=NA)
## [1] "Frequency table before encoding"
## q526_clothes. 526 Clothing and bedding
## 0 50 60 200 300 400 500 600 700 800 900 1000 1100
## 59 1 1 2 3 3 11 5 3 3 1 41 1
## 1102 1200 1300 1350 1400 1450 1500 1600 1800 1900 2000 2100 2250
## 1 13 1 1 3 1 29 1 2 1 118 4 1
## 2400 2500 2700 2900 3000 3140 3150 3250 3500 3600 3700 3900 4000
## 2 32 1 1 175 1 1 1 9 1 1 1 138
## 4100 4200 4300 4500 4900 5000 5100 5200 5400 5450 5500 5600 5700
## 1 1 1 4 2 477 3 1 1 1 8 1 1
## 5800 6000 6100 6300 6500 7000 7008 7400 7500 8000 8500 8900 9000
## 2 218 1 1 2 100 1 1 3 105 6 1 18
## 9400 9600 10000 10500 11000 11500 11700 12000 12500 13000 13800 14000 14900
## 1 1 352 2 7 1 1 48 1 4 1 2 1
## 15000 16000 17000 18000 18500 19000 20000 21000 22000 22100 23000 24000 25000
## 106 2 1 2 1 1 59 3 3 1 1 2 22
## 25800 26000 27000 27100 28000 29000 30000 31000 35000 40000 41000 42000 45000
## 1 1 1 1 1 1 21 1 7 7 1 1 2
## 48000 50000 55000 56000 60000 70000 80000 1e+05 107200 110000 114100 3e+05 4e+05
## 1 21 2 1 4 1 5 5 1 1 1 1 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q526_clothes. 526 Clothing and bedding
## 0 50 60 200 300 400
## 59 1 1 2 3 3
## 500 600 700 800 900 1000
## 11 5 3 3 1 41
## 1100 1102 1200 1300 1350 1400
## 1 1 13 1 1 3
## 1450 1500 1600 1800 1900 2000
## 1 29 1 2 1 118
## 2100 2250 2400 2500 2700 2900
## 4 1 2 32 1 1
## 3000 3140 3150 3250 3500 3600
## 175 1 1 1 9 1
## 3700 3900 4000 4100 4200 4300
## 1 1 138 1 1 1
## 4500 4900 5000 5100 5200 5400
## 4 2 477 3 1 1
## 5450 5500 5600 5700 5800 6000
## 1 8 1 1 2 218
## 6100 6300 6500 7000 7008 7400
## 1 1 2 100 1 1
## 7500 8000 8500 8900 9000 9400
## 3 105 6 1 18 1
## 9600 10000 10500 11000 11500 11700
## 1 352 2 7 1 1
## 12000 12500 13000 13800 14000 14900
## 48 1 4 1 2 1
## 15000 16000 17000 18000 18500 19000
## 106 2 1 2 1 1
## 20000 21000 22000 22100 23000 24000
## 59 3 3 1 1 2
## 25000 25800 26000 27000 27100 28000
## 22 1 1 1 1 1
## 29000 30000 31000 35000 40000 41000
## 1 21 1 7 7 1
## 42000 45000 48000 50000 55000 56000
## 1 2 1 21 2 1
## 60000 70000 80000 or more
## 4 1 16
mydata <- top_recode (variable="q527_shoes", break_point=percentile_checker ("q527_shoes"), missing=NA)
## [1] "Frequency table before encoding"
## q527_shoes. 527 Footwear
## 0 50 70 100 150 200 240 300 320 360 400 430 500 550 600
## 25 1 2 4 1 11 2 16 1 5 10 1 163 3 52
## 700 800 810 860 900 990 1000 1050 1100 1200 1250 1300 1400 1500 1600
## 29 28 1 1 11 1 500 3 1 59 1 4 4 194 5
## 1640 1650 1700 1705 1750 1800 2000 2100 2160 2200 2300 2400 2450 2500 2600
## 1 1 1 1 2 5 510 4 1 1 1 11 1 53 3
## 2750 2800 3000 3200 3500 3550 3600 4000 4300 4500 5000 5050 5100 5500 6000
## 1 3 262 1 8 1 3 78 1 1 147 1 2 1 42
## 7000 7200 8000 8400 9600 10000 12000 15000 20000 25000 30000
## 9 1 6 1 1 24 9 6 3 4 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q527_shoes. 527 Footwear
## 0 50 70 100 150 200
## 25 1 2 4 1 11
## 240 300 320 360 400 430
## 2 16 1 5 10 1
## 500 550 600 700 800 810
## 163 3 52 29 28 1
## 860 900 990 1000 1050 1100
## 1 11 1 500 3 1
## 1200 1250 1300 1400 1500 1600
## 59 1 4 4 194 5
## 1640 1650 1700 1705 1750 1800
## 1 1 1 1 2 5
## 2000 2100 2160 2200 2300 2400
## 510 4 1 1 1 11
## 2450 2500 2600 2750 2800 3000
## 1 53 3 1 3 262
## 3200 3500 3550 3600 4000 4300
## 1 8 1 3 78 1
## 4500 5000 5050 5100 5500 6000
## 1 147 1 2 1 42
## 7000 7200 8000 8400 9600 10000
## 9 1 6 1 1 24
## 12000 15000 or more
## 9 15
mydata <- top_recode (variable="q528_furniture", break_point=40000, missing=NA)
## [1] "Frequency table before encoding"
## q528_furniture. 528 Furniture and Fixtures including bedstead, almirah, suitcase, carpet, painti
## 0 5 100 110 150 200 225 250 280 300 350 400 500 600 615
## 1772 1 1 1 1 5 1 1 1 4 1 6 32 14 1
## 700 750 800 900 1000 1100 1200 1250 1300 1400 1500 1508 1600 1700 1800
## 9 1 18 3 42 6 12 2 4 10 15 1 5 5 4
## 2000 2100 2200 2300 2400 2500 2600 2800 3000 3100 3200 3400 3500 3550 3600
## 49 6 2 1 6 14 2 5 39 1 3 2 8 1 3
## 4000 4100 4200 4300 4400 4500 4800 5000 5200 5500 5800 5900 6000 6500 6700
## 32 1 2 1 1 12 1 38 2 5 1 1 15 5 1
## 7000 7500 8000 8400 8500 8800 9000 10000 11000 11500 12000 13000 13500 14000 15000
## 7 3 12 1 2 1 8 13 2 1 4 1 1 2 10
## 16000 16400 17500 18000 19000 20000 21000 22000 23100 25000 26000 27000 28000 29000 30000
## 1 1 1 1 1 8 1 1 1 1 1 1 1 1 5
## 32000 32500 35000 40000 48000 50000 60000 67000 70000 2e+05
## 1 1 1 5 1 4 4 1 1 1
## [1] "Frequency table after encoding"
## q528_furniture. 528 Furniture and Fixtures including bedstead, almirah, suitcase, carpet, painti
## 0 5 100 110 150 200
## 1772 1 1 1 1 5
## 225 250 280 300 350 400
## 1 1 1 4 1 6
## 500 600 615 700 750 800
## 32 14 1 9 1 18
## 900 1000 1100 1200 1250 1300
## 3 42 6 12 2 4
## 1400 1500 1508 1600 1700 1800
## 10 15 1 5 5 4
## 2000 2100 2200 2300 2400 2500
## 49 6 2 1 6 14
## 2600 2800 3000 3100 3200 3400
## 2 5 39 1 3 2
## 3500 3550 3600 4000 4100 4200
## 8 1 3 32 1 2
## 4300 4400 4500 4800 5000 5200
## 1 1 12 1 38 2
## 5500 5800 5900 6000 6500 6700
## 5 1 1 15 5 1
## 7000 7500 8000 8400 8500 8800
## 7 3 12 1 2 1
## 9000 10000 11000 11500 12000 13000
## 8 13 2 1 4 1
## 13500 14000 15000 16000 16400 17500
## 1 2 10 1 1 1
## 18000 19000 20000 21000 22000 23100
## 1 1 8 1 1 1
## 25000 26000 27000 28000 29000 30000
## 1 1 1 1 1 5
## 32000 32500 35000 40000 or more
## 1 1 1 17
mydata <- top_recode (variable="q529_crockery", break_point=percentile_checker ("q529_crockery"), missing=NA)
## [1] "Frequency table before encoding"
## q529_crockery. 529 Crockery & utensils including stainless steel utensils, casseroles, themos,
## 0 8 40 50 60 80 100 120 140 150 160 200 240 250 275
## 1637 3 2 4 2 3 19 2 1 13 1 57 2 14 1
## 300 310 340 350 370 400 420 430 450 490 500 550 560 600 650
## 34 2 1 5 1 22 1 1 8 1 128 2 1 23 1
## 700 720 750 800 900 1000 1100 1140 1200 1300 1400 1500 1550 1600 1700
## 15 1 2 17 7 90 3 1 7 2 1 48 1 1 2
## 1750 1800 1840 2000 2200 2400 2500 2720 3000 3500 4000 4500 4810 5000 5200
## 1 2 1 51 1 2 10 1 26 1 8 2 1 19 1
## 5500 5800 6000 7000 7500 8000 9000 10000 11000 13050 15000 18000 20000 22000 27000
## 2 2 4 2 1 1 1 8 1 1 4 1 2 1 1
## 40000 50000 2e+05
## 2 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q529_crockery. 529 Crockery & utensils including stainless steel utensils, casseroles, themos,
## 0 8 40 50 60 80
## 1637 3 2 4 2 3
## 100 120 140 150 160 200
## 19 2 1 13 1 57
## 240 250 275 300 310 340
## 2 14 1 34 2 1
## 350 370 400 420 430 450
## 5 1 22 1 1 8
## 490 500 550 560 600 650
## 1 128 2 1 23 1
## 700 720 750 800 900 1000
## 15 1 2 17 7 90
## 1100 1140 1200 1300 1400 1500
## 3 1 7 2 1 48
## 1550 1600 1700 1750 1800 1840
## 1 1 2 1 2 1
## 2000 2200 2400 2500 2720 3000
## 51 1 2 10 1 26
## 3500 4000 4500 4810 5000 5200
## 1 8 2 1 19 1
## 5500 5800 6000 7000 7500 8000
## 2 2 4 2 1 1
## 9000 10000 11000 13050 15000 or more
## 1 8 1 1 13
mydata <- top_recode (variable="q530_electricity", break_point=30000, missing=NA)
## [1] "Frequency table before encoding"
## q530_electricity. 530 Cooking and household appliances including electric fan, air conditioners, s
## 0 8 70 80 100 150 200 220 240 250 270 300 340 350 360
## 1657 2 1 2 3 2 17 1 1 5 1 8 1 2 2
## 400 450 500 550 600 650 700 750 800 850 900 950 960 1000 1050
## 1 4 46 4 9 1 17 2 9 2 9 1 1 41 2
## 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 1650 1700 1800 1900
## 11 1 31 4 10 2 9 1 44 2 4 2 4 5 2
## 1910 2000 2002 2040 2100 2200 2400 2500 2550 2600 2700 2750 2800 2900 3000
## 1 26 1 1 3 10 4 18 1 1 1 2 1 2 36
## 3200 3300 3400 3500 3600 3700 3800 3850 3900 4000 4100 4200 4300 4500 4600
## 2 1 1 21 2 3 3 1 1 27 2 2 1 12 4
## 4700 5000 5050 5500 6000 6200 6500 6800 7000 7160 7200 7300 7400 7500 7600
## 1 41 1 7 4 3 6 2 13 1 1 1 1 3 1
## 8000 8500 8800 9200 9500 10000 11500 12000 12500 12600 13000 13200 13300 13700 14000
## 13 2 1 1 2 12 3 7 3 2 3 1 2 1 1
## 14050 14200 15000 16000 16500 17100 17300 18300 19000 19600 20000 21000 21400 22000 25000
## 1 1 3 2 1 1 1 1 2 1 3 1 1 2 1
## 25750 28000 30000 35000 40000 50000 52000 75000
## 1 1 3 2 3 2 1 1
## [1] "Frequency table after encoding"
## q530_electricity. 530 Cooking and household appliances including electric fan, air conditioners, s
## 0 8 70 80 100 150
## 1657 2 1 2 3 2
## 200 220 240 250 270 300
## 17 1 1 5 1 8
## 340 350 360 400 450 500
## 1 2 2 1 4 46
## 550 600 650 700 750 800
## 4 9 1 17 2 9
## 850 900 950 960 1000 1050
## 2 9 1 1 41 2
## 1100 1150 1200 1250 1300 1350
## 11 1 31 4 10 2
## 1400 1450 1500 1550 1600 1650
## 9 1 44 2 4 2
## 1700 1800 1900 1910 2000 2002
## 4 5 2 1 26 1
## 2040 2100 2200 2400 2500 2550
## 1 3 10 4 18 1
## 2600 2700 2750 2800 2900 3000
## 1 1 2 1 2 36
## 3200 3300 3400 3500 3600 3700
## 2 1 1 21 2 3
## 3800 3850 3900 4000 4100 4200
## 3 1 1 27 2 2
## 4300 4500 4600 4700 5000 5050
## 1 12 4 1 41 1
## 5500 6000 6200 6500 6800 7000
## 7 4 3 6 2 13
## 7160 7200 7300 7400 7500 7600
## 1 1 1 1 3 1
## 8000 8500 8800 9200 9500 10000
## 13 2 1 1 2 12
## 11500 12000 12500 12600 13000 13200
## 3 7 3 2 3 1
## 13300 13700 14000 14050 14200 15000
## 2 1 1 1 1 3
## 16000 16500 17100 17300 18300 19000
## 2 1 1 1 1 2
## 19600 20000 21000 21400 22000 25000
## 1 3 1 1 2 1
## 25750 28000 30000 or more
## 1 1 12
mydata <- top_recode (variable="q531_tv", break_point=17000, missing=NA)
## [1] "Frequency table before encoding"
## q531_tv. 531 Goods for Recreation including TV, radio, tape recorder, musical instruments
## 0 8 10 20 100 200 250 300 400 500 650 700 800 850 1000
## 2124 2 1 1 3 1 1 3 2 5 2 3 2 1 5
## 1200 1500 1600 1650 1700 2000 2400 2500 2800 3000 3200 3500 3600 3750 4000
## 2 11 3 1 2 16 2 7 1 11 1 3 1 1 11
## 4500 5000 5250 5500 6000 6300 6500 7000 8000 9000 10000 10500 11000 12000 12500
## 2 23 1 6 13 1 1 10 5 7 11 1 1 3 2
## 13000 13500 13950 14000 14800 15000 16000 17000 18000 20000 25000 26000 30000 52000 80000
## 6 2 1 6 1 5 3 2 4 1 2 1 1 1 1
## [1] "Frequency table after encoding"
## q531_tv. 531 Goods for Recreation including TV, radio, tape recorder, musical instruments
## 0 8 10 20 100 200
## 2124 2 1 1 3 1
## 250 300 400 500 650 700
## 1 3 2 5 2 3
## 800 850 1000 1200 1500 1600
## 2 1 5 2 11 3
## 1650 1700 2000 2400 2500 2800
## 1 2 16 2 7 1
## 3000 3200 3500 3600 3750 4000
## 11 1 3 1 1 11
## 4500 5000 5250 5500 6000 6300
## 2 23 1 6 13 1
## 6500 7000 8000 9000 10000 10500
## 1 10 5 7 11 1
## 11000 12000 12500 13000 13500 13950
## 1 3 2 6 2 1
## 14000 14800 15000 16000 17000 or more
## 6 1 5 3 13
mydata <- top_recode (variable="q532_jewelry", break_point=450000, missing=NA)
## [1] "Frequency table before encoding"
## q532_jewelry. 532 Jewelry & ornaments
## 0 8 50 60 80 100 150 200 250 300 350
## 1641 1 2 2 1 10 5 22 3 7 1
## 400 450 500 600 650 700 750 800 880 900 1000
## 7 2 63 13 1 3 1 6 1 3 62
## 1100 1200 1340 1400 1500 1600 1630 1700 2000 2400 2500
## 1 10 1 2 12 1 1 1 24 3 10
## 2900 3000 3150 3400 3500 3600 4000 4190 4200 4500 5000
## 1 24 1 1 2 2 14 1 1 4 25
## 5200 5500 6000 6400 7000 7200 7300 7500 8000 8500 8700
## 1 3 10 2 12 1 1 1 9 2 1
## 8870 9000 9300 9500 10000 10500 10600 11000 12000 12500 12600
## 1 5 1 2 28 1 1 5 10 1 1
## 13000 13300 13500 14000 14500 15000 16000 16800 17000 17500 18000
## 9 1 1 3 1 19 5 1 2 1 4
## 18500 19000 20000 20600 21000 22000 23000 24000 25000 26000 27000
## 1 2 15 1 3 5 1 3 8 1 2
## 28000 28500 30000 30800 32000 34000 35000 37000 38000 39500 40000
## 3 1 15 1 2 1 8 1 2 1 9
## 41500 42000 43500 45000 45400 48000 49000 50000 51000 55000 60000
## 1 1 1 3 1 1 1 23 1 1 5
## 60250 61000 63000 65000 67000 70000 80000 82000 85000 90300 97300
## 1 1 1 1 1 12 9 2 1 1 1
## 99000 1e+05 120000 150000 190000 2e+05 201000 230000 250000 3e+05 305000
## 1 16 1 4 1 8 1 1 1 2 1
## 4e+05 460000 5e+05 7e+05 1e+06 1500000 2e+06 2100000 3500000 5e+06
## 2 1 2 1 3 2 1 1 1 1
## [1] "Frequency table after encoding"
## q532_jewelry. 532 Jewelry & ornaments
## 0 8 50 60 80 100
## 1641 1 2 2 1 10
## 150 200 250 300 350 400
## 5 22 3 7 1 7
## 450 500 600 650 700 750
## 2 63 13 1 3 1
## 800 880 900 1000 1100 1200
## 6 1 3 62 1 10
## 1340 1400 1500 1600 1630 1700
## 1 2 12 1 1 1
## 2000 2400 2500 2900 3000 3150
## 24 3 10 1 24 1
## 3400 3500 3600 4000 4190 4200
## 1 2 2 14 1 1
## 4500 5000 5200 5500 6000 6400
## 4 25 1 3 10 2
## 7000 7200 7300 7500 8000 8500
## 12 1 1 1 9 2
## 8700 8870 9000 9300 9500 10000
## 1 1 5 1 2 28
## 10500 10600 11000 12000 12500 12600
## 1 1 5 10 1 1
## 13000 13300 13500 14000 14500 15000
## 9 1 1 3 1 19
## 16000 16800 17000 17500 18000 18500
## 5 1 2 1 4 1
## 19000 20000 20600 21000 22000 23000
## 2 15 1 3 5 1
## 24000 25000 26000 27000 28000 28500
## 3 8 1 2 3 1
## 30000 30800 32000 34000 35000 37000
## 15 1 2 1 8 1
## 38000 39500 40000 41500 42000 43500
## 2 1 9 1 1 1
## 45000 45400 48000 49000 50000 51000
## 3 1 1 1 23 1
## 55000 60000 60250 61000 63000 65000
## 1 5 1 1 1 1
## 67000 70000 80000 82000 85000 90300
## 1 12 9 2 1 1
## 97300 99000 1e+05 120000 150000 190000
## 1 1 16 1 4 1
## 2e+05 201000 230000 250000 3e+05 305000
## 8 1 1 1 2 1
## 4e+05 450000 or more
## 2 13
mydata <- top_recode (variable="q533_cycle", break_point=500000, missing=NA)
## [1] "Frequency table before encoding"
## q533_cycle. 533 Personal transport equipment including bicycle, scooter, car, tyres, tubes,
## 0 8 10 30 50 100 120 150 200 210 250
## 1390 7 1 1 3 9 1 4 36 1 7
## 280 300 350 360 400 450 500 550 590 600 700
## 1 35 4 1 10 1 91 2 1 21 5
## 750 800 808 900 1000 1100 1200 1300 1350 1400 1500
## 1 9 1 7 80 4 27 1 1 5 58
## 1600 1650 1700 1750 1800 1850 1900 2000 2200 2300 2350
## 5 1 5 1 13 1 1 123 4 1 1
## 2400 2500 2600 2700 2800 3000 3200 3320 3500 3600 3700
## 3 15 1 1 1 60 1 1 6 2 1
## 4000 4500 4800 5000 5250 5500 6000 6200 6500 6600 7000
## 28 3 1 35 1 1 17 1 1 1 5
## 7200 7500 8000 9000 10000 12000 13000 13270 13500 13800 14000
## 2 2 6 2 15 4 1 1 1 1 1
## 15000 16000 16500 18000 19500 20000 20040 22000 23000 25000 26000
## 8 1 1 3 1 13 1 1 1 4 1
## 28000 30000 32000 32500 35000 36000 36600 40000 40250 40300 42000
## 1 5 3 1 2 3 1 4 1 1 2
## 45000 46000 48000 50000 52000 52500 53000 55000 56000 57000 57200
## 1 1 1 11 4 1 1 2 2 1 1
## 58100 60000 61000 62000 63000 64000 65000 67000 68000 70000 74000
## 1 9 3 2 1 2 3 1 1 6 2
## 90000 1e+05 120000 140000 150000 180000 250000 285000 3e+05 350000 450000
## 1 5 1 1 2 1 3 1 2 1 1
## 5e+05 520008 7e+05 820000 9e+05 1e+06 1015000 2500000 2605000 3e+06 3500000
## 2 1 1 2 1 1 1 1 1 1 1
## 6900000
## 1
## [1] "Frequency table after encoding"
## q533_cycle. 533 Personal transport equipment including bicycle, scooter, car, tyres, tubes,
## 0 8 10 30 50 100
## 1390 7 1 1 3 9
## 120 150 200 210 250 280
## 1 4 36 1 7 1
## 300 350 360 400 450 500
## 35 4 1 10 1 91
## 550 590 600 700 750 800
## 2 1 21 5 1 9
## 808 900 1000 1100 1200 1300
## 1 7 80 4 27 1
## 1350 1400 1500 1600 1650 1700
## 1 5 58 5 1 5
## 1750 1800 1850 1900 2000 2200
## 1 13 1 1 123 4
## 2300 2350 2400 2500 2600 2700
## 1 1 3 15 1 1
## 2800 3000 3200 3320 3500 3600
## 1 60 1 1 6 2
## 3700 4000 4500 4800 5000 5250
## 1 28 3 1 35 1
## 5500 6000 6200 6500 6600 7000
## 1 17 1 1 1 5
## 7200 7500 8000 9000 10000 12000
## 2 2 6 2 15 4
## 13000 13270 13500 13800 14000 15000
## 1 1 1 1 1 8
## 16000 16500 18000 19500 20000 20040
## 1 1 3 1 13 1
## 22000 23000 25000 26000 28000 30000
## 1 1 4 1 1 5
## 32000 32500 35000 36000 36600 40000
## 3 1 2 3 1 4
## 40250 40300 42000 45000 46000 48000
## 1 1 2 1 1 1
## 50000 52000 52500 53000 55000 56000
## 11 4 1 1 2 2
## 57000 57200 58100 60000 61000 62000
## 1 1 1 9 3 2
## 63000 64000 65000 67000 68000 70000
## 1 2 3 1 1 6
## 74000 90000 1e+05 120000 140000 150000
## 2 1 5 1 1 2
## 180000 250000 285000 3e+05 350000 450000
## 1 3 1 2 1 1
## 5e+05 or more
## 14
mydata <- top_recode (variable="q534_therapy", 3000, missing=NA)
## [1] "Frequency table before encoding"
## q534_therapy. 534 Therapeutic appliances including glass eye, hearing aids, orthopaedic equipm
## 0 8 20 30 40 50 60 75 80 100 140 150 170 200 250
## 2185 5 1 1 1 1 1 1 1 5 1 2 1 10 6
## 270 300 320 350 400 420 450 480 500 600 700 800 830 950 1000
## 1 14 1 6 8 1 1 1 14 11 11 8 1 1 12
## 1100 1200 1220 1350 1400 1500 1600 2000 2200 2500 3000 3500 4000 5000 10000
## 1 6 1 1 1 5 1 8 1 2 1 1 2 3 3
## 12000 24000 32000
## 1 1 1
## [1] "Frequency table after encoding"
## q534_therapy. 534 Therapeutic appliances including glass eye, hearing aids, orthopaedic equipm
## 0 8 20 30 40 50 60
## 2185 5 1 1 1 1 1
## 75 80 100 140 150 170 200
## 1 1 5 1 2 1 10
## 250 270 300 320 350 400 420
## 6 1 14 1 6 8 1
## 450 480 500 600 700 800 830
## 1 1 14 11 11 8 1
## 950 1000 1100 1200 1220 1350 1400
## 1 12 1 6 1 1 1
## 1500 1600 2000 2200 2500 3000 or more
## 5 1 8 1 2 13
mydata <- top_recode (variable="q535_clock", 20000, missing=NA)
## [1] "Frequency table before encoding"
## q535_clock. 535 Other personal goods including clocks, watches, PC, telephone, mobile, etc.
## 0 1 8 50 80 100 150 180 200 240 250 270 300 350 380
## 1469 1 1 3 1 7 4 1 14 1 3 1 13 1 1
## 400 450 500 600 700 750 800 840 850 900 950 1000 1050 1100 1150
## 2 1 22 14 7 1 7 1 1 5 2 67 2 30 3
## 1200 1250 1300 1350 1360 1400 1440 1450 1500 1550 1600 1650 1700 1800 1900
## 88 5 17 1 1 11 1 1 91 1 13 1 7 16 2
## 2000 2060 2100 2200 2300 2350 2400 2500 2600 2800 2900 3000 3200 3500 3600
## 86 1 2 3 3 1 9 22 2 2 1 51 2 11 8
## 3800 4000 4100 4200 4300 4500 4900 5000 5100 5400 5500 5700 6000 6200 6500
## 1 25 2 1 1 6 1 36 1 1 3 1 25 1 2
## 6700 6800 7000 7200 7500 7600 7700 8000 8200 8500 9000 9500 10000 10350 10500
## 1 1 18 1 3 1 1 10 1 2 7 1 13 1 2
## 11000 12000 12500 13000 14000 15000 15300 15700 16000 16250 18000 20000 24000 25000 26000
## 1 3 2 1 1 5 1 1 1 1 4 4 1 3 1
## 31000 35000 36000 40000 70000 90000 1e+05
## 1 1 1 1 1 1 1
## [1] "Frequency table after encoding"
## q535_clock. 535 Other personal goods including clocks, watches, PC, telephone, mobile, etc.
## 0 1 8 50 80 100
## 1469 1 1 3 1 7
## 150 180 200 240 250 270
## 4 1 14 1 3 1
## 300 350 380 400 450 500
## 13 1 1 2 1 22
## 600 700 750 800 840 850
## 14 7 1 7 1 1
## 900 950 1000 1050 1100 1150
## 5 2 67 2 30 3
## 1200 1250 1300 1350 1360 1400
## 88 5 17 1 1 11
## 1440 1450 1500 1550 1600 1650
## 1 1 91 1 13 1
## 1700 1800 1900 2000 2060 2100
## 7 16 2 86 1 2
## 2200 2300 2350 2400 2500 2600
## 3 3 1 9 22 2
## 2800 2900 3000 3200 3500 3600
## 2 1 51 2 11 8
## 3800 4000 4100 4200 4300 4500
## 1 25 2 1 1 6
## 4900 5000 5100 5400 5500 5700
## 1 36 1 1 3 1
## 6000 6200 6500 6700 6800 7000
## 25 1 2 1 1 18
## 7200 7500 7600 7700 8000 8200
## 1 3 1 1 10 1
## 8500 9000 9500 10000 10350 10500
## 2 7 1 13 1 2
## 11000 12000 12500 13000 14000 15000
## 1 3 2 1 1 5
## 15300 15700 16000 16250 18000 20000 or more
## 1 1 1 1 4 16
mydata <- top_recode (variable="q536_repair", break_point=percentile_checker ("q536_repair"), missing=NA)
## [1] "Frequency table before encoding"
## q536_repair. 536 Repair and maintenance of residential buildings, bathroom equipment, etc.
## 0 8 50 200 500 600 800 1000 1200 1250 1500
## 1423 2 1 1 5 2 1 9 2 1 5
## 2000 2200 2500 3000 4000 4400 5000 5250 5500 6000 6400
## 12 1 2 18 8 1 31 1 1 2 1
## 6500 6600 7000 8000 8200 8500 9000 10000 11000 12000 13000
## 1 1 6 14 1 1 1 42 1 13 3
## 14700 15000 16000 17000 18000 19000 20000 20661 22000 25000 27500
## 1 26 9 2 4 1 41 1 1 16 1
## 28000 30000 30500 32000 33000 35000 38000 40000 42000 43000 45000
## 1 39 1 1 2 17 2 40 1 1 6
## 48000 50000 52000 55000 60000 65000 68000 70000 71000 73000 75000
## 3 83 1 6 51 9 1 38 1 1 6
## 80000 85000 86000 90000 95000 1e+05 101000 105000 110000 111000 115000
## 34 2 1 13 3 61 1 3 2 1 1
## 120000 125000 130000 135000 150000 160000 175000 180000 2e+05 202000 225000
## 3 3 2 1 30 5 1 1 27 1 1
## 250000 265000 275000 3e+05 304000 350000 360000 375000 4e+05 420000 450000
## 5 1 1 25 1 4 1 1 17 1 1
## 497000 5e+05 6e+05 650000 7e+05 732600 750000 8e+05 9e+05 950000 1e+06
## 1 14 5 2 1 1 2 7 2 1 9
## 1065000 1100000 1200000 1350000 1400000 1500000 2e+06 2010000 2500000 3e+06 4e+06
## 1 2 1 1 1 6 2 1 5 1 1
## 1.5e+07 2.2e+07 3.5e+07
## 1 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## q536_repair. 536 Repair and maintenance of residential buildings, bathroom equipment, etc.
## 0 8 50 200 500 600
## 1423 2 1 1 5 2
## 800 1000 1200 1250 1500 2000
## 1 9 2 1 5 12
## 2200 2500 3000 4000 4400 5000
## 1 2 18 8 1 31
## 5250 5500 6000 6400 6500 6600
## 1 1 2 1 1 1
## 7000 8000 8200 8500 9000 10000
## 6 14 1 1 1 42
## 11000 12000 13000 14700 15000 16000
## 1 13 3 1 26 9
## 17000 18000 19000 20000 20661 22000
## 2 4 1 41 1 1
## 25000 27500 28000 30000 30500 32000
## 16 1 1 39 1 1
## 33000 35000 38000 40000 42000 43000
## 2 17 2 40 1 1
## 45000 48000 50000 52000 55000 60000
## 6 3 83 1 6 51
## 65000 68000 70000 71000 73000 75000
## 9 1 38 1 1 6
## 80000 85000 86000 90000 95000 1e+05
## 34 2 1 13 3 61
## 101000 105000 110000 111000 115000 120000
## 1 3 2 1 1 3
## 125000 130000 135000 150000 160000 175000
## 3 2 1 30 5 1
## 180000 2e+05 202000 225000 250000 265000
## 1 27 1 1 5 1
## 275000 3e+05 304000 350000 360000 375000
## 1 25 1 4 1 1
## 4e+05 420000 450000 497000 5e+05 6e+05
## 17 1 1 1 14 5
## 650000 7e+05 732600 750000 8e+05 9e+05
## 2 1 1 2 7 2
## 950000 1e+06 1065000 1100000 1200000 1350000
## 1 9 1 2 1 1
## 1400000 1500000 2e+06 or more
## 1 6 13
# !!! No Indirect PII categorical
# !!! No direct demographic variables available in dataset
# !!! No open-ends
# !!! No GPS data
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)