rm(list=ls(all=t))
filename <- "ehsection5" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!! No Direct PII
# !!! No Direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000
dropvars <- c("dise")
mydata <- mydata[!names(mydata) %in% dropvars]
locvars <- c("a006_a_block_id", "a007_a_vill_id")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## a006_a_block_id. 006 Block ID
## 1 2 3 4 5 6 7 8 9
## 203 167 192 404 97 190 155 422 528
## [1] "Frequency table after encoding"
## a006_a_block_id. 006 Block ID
## 279 280 281 282 283 284 285 286 287
## 422 167 192 528 404 97 203 155 190
## [1] "Frequency table before encoding"
## a007_a_vill_id. 007 Village ID
## 1 2 3 4 5 6 7 8 9 10 11 12 13 15 16 17 18 19 20 21 22 23 24 25 26 27 28
## 16 16 16 15 20 30 28 14 15 15 17 24 24 15 18 21 16 17 18 30 22 18 17 32 27 26 18
## 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
## 15 15 24 26 22 16 29 19 17 21 27 16 16 18 16 28 20 23 21 19 17 17 16 18 26 24 27
## 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 80 81 82 83
## 18 16 21 13 24 20 16 18 18 29 16 18 21 23 13 16 19 16 23 23 17 22 29 30 16 22 17
## 84 85 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
## 17 13 16 22 15 19 19 19 20 13 17 23 29 21 25 18 24 21 15 19 13 31 14 27 21 17 21
## 112 113 114 115 116 117 118 119 120 121 122
## 27 14 24 20 16 21 22 20 13 10 10
## [1] "Frequency table after encoding"
## a007_a_vill_id. 007 Village ID
## 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
## 18 30 14 22 15 16 16 16 17 17 19 25 23 30 17 10 31 15 22 16 21 22 19 15 17 24 16
## 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
## 24 19 32 17 21 17 16 21 23 18 29 19 22 13 15 17 17 23 27 28 16 24 13 16 19 29 21
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
## 16 24 20 27 22 20 23 18 13 16 15 16 13 15 28 18 17 24 29 29 21 16 20 27 19 20 16
## 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
## 15 18 20 15 14 21 20 27 18 24 22 21 24 26 14 23 26 19 17 10 18 16 16 26 21 16 18
## 717 718 719 720 721 722 723 724 725 726 727
## 21 13 13 18 21 17 18 27 30 16 18
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
mydata <- top_recode (variable="a501_cereal", break_point=12000, missing=NA)
## [1] "Frequency table before encoding"
## a501_cereal. 501 Cereals & Cereal Products including muri, chira, maida, suji, noodles, bread
## 0 20 25 30 40 50 60 65 70 80 90 100 110 120 125
## 11 1 2 1 4 13 7 1 5 5 1 10 3 2 1
## 130 150 165 166 180 195 200 210 225 230 240 250 260 270 280
## 1 2 1 1 1 1 8 1 2 2 3 3 3 4 2
## 290 300 350 360 380 390 400 410 430 440 450 452 460 475 480
## 1 9 6 1 1 1 19 1 2 3 6 1 9 1 4
## 500 512 520 540 550 560 570 580 600 605 610 620 625 630 640
## 56 1 3 1 6 5 1 1 44 2 1 4 1 2 2
## 645 650 660 670 675 680 690 700 720 724 725 730 750 760 770
## 2 10 7 1 1 5 3 31 4 1 2 2 11 2 2
## 780 785 790 795 800 808 814 820 829 830 840 850 855 860 865
## 5 1 1 1 56 1 1 4 1 2 3 8 1 4 1
## 870 872 875 880 890 900 905 920 925 938 945 950 960 970 975
## 3 1 1 8 3 27 1 3 2 1 1 6 4 1 1
## 980 985 1000 1020 1030 1040 1050 1060 1080 1085 1090 1100 1110 1120 1125
## 1 1 236 1 2 5 8 4 3 2 3 36 1 2 2
## 1130 1137 1140 1150 1160 1170 1175 1180 1185 1190 1200 1210 1220 1225 1230
## 1 1 2 4 3 2 1 3 1 2 91 4 2 1 1
## 1240 1250 1255 1260 1270 1280 1290 1300 1320 1340 1350 1360 1375 1390 1400
## 1 19 1 9 2 1 1 31 5 1 7 3 1 1 34
## 1425 1430 1450 1460 1470 1480 1500 1510 1520 1530 1540 1550 1560 1566 1570
## 1 2 5 1 2 1 146 1 2 2 1 2 6 1 1
## 1580 1590 1600 1625 1630 1640 1650 1660 1680 1700 1735 1740 1750 1800 1820
## 5 3 20 1 1 2 5 1 1 17 1 1 3 48 1
## 1840 1850 1860 1870 1884 1900 1920 1950 2000 2010 2020 2030 2040 2050 2060
## 1 1 1 1 1 7 1 1 405 1 1 2 3 3 4
## 2070 2075 2080 2090 2100 2120 2140 2150 2160 2200 2230 2248 2250 2260 2280
## 1 1 5 1 20 1 1 6 3 46 1 1 5 1 1
## 2300 2330 2350 2360 2400 2450 2500 2550 2560 2580 2600 2640 2700 2750 2800
## 22 1 1 1 13 1 99 3 2 1 5 1 5 1 2
## 2900 3000 3003 3060 3080 3100 3200 3300 3340 3360 3400 3450 3500 3550 3600
## 3 120 1 1 1 1 3 1 1 1 2 1 13 1 2
## 3780 3800 4000 4050 4080 4120 4350 4500 4540 4600 4800 5000 5200 5400 5500
## 1 1 60 1 1 2 1 3 1 2 1 34 1 1 1
## 5570 6000 6200 6450 7000 7200 8000 9125 10000 11000 11500 12500 13000 16160 17500
## 1 9 1 1 2 1 9 1 6 2 1 2 1 1 1
## 27000 30000 32000 40000 71500 132400 <NA>
## 1 1 1 2 1 1 3
## [1] "Frequency table after encoding"
## a501_cereal. 501 Cereals & Cereal Products including muri, chira, maida, suji, noodles, bread
## 0 20 25 30 40 50 60
## 11 1 2 1 4 13 7
## 65 70 80 90 100 110 120
## 1 5 5 1 10 3 2
## 125 130 150 165 166 180 195
## 1 1 2 1 1 1 1
## 200 210 225 230 240 250 260
## 8 1 2 2 3 3 3
## 270 280 290 300 350 360 380
## 4 2 1 9 6 1 1
## 390 400 410 430 440 450 452
## 1 19 1 2 3 6 1
## 460 475 480 500 512 520 540
## 9 1 4 56 1 3 1
## 550 560 570 580 600 605 610
## 6 5 1 1 44 2 1
## 620 625 630 640 645 650 660
## 4 1 2 2 2 10 7
## 670 675 680 690 700 720 724
## 1 1 5 3 31 4 1
## 725 730 750 760 770 780 785
## 2 2 11 2 2 5 1
## 790 795 800 808 814 820 829
## 1 1 56 1 1 4 1
## 830 840 850 855 860 865 870
## 2 3 8 1 4 1 3
## 872 875 880 890 900 905 920
## 1 1 8 3 27 1 3
## 925 938 945 950 960 970 975
## 2 1 1 6 4 1 1
## 980 985 1000 1020 1030 1040 1050
## 1 1 236 1 2 5 8
## 1060 1080 1085 1090 1100 1110 1120
## 4 3 2 3 36 1 2
## 1125 1130 1137 1140 1150 1160 1170
## 2 1 1 2 4 3 2
## 1175 1180 1185 1190 1200 1210 1220
## 1 3 1 2 91 4 2
## 1225 1230 1240 1250 1255 1260 1270
## 1 1 1 19 1 9 2
## 1280 1290 1300 1320 1340 1350 1360
## 1 1 31 5 1 7 3
## 1375 1390 1400 1425 1430 1450 1460
## 1 1 34 1 2 5 1
## 1470 1480 1500 1510 1520 1530 1540
## 2 1 146 1 2 2 1
## 1550 1560 1566 1570 1580 1590 1600
## 2 6 1 1 5 3 20
## 1625 1630 1640 1650 1660 1680 1700
## 1 1 2 5 1 1 17
## 1735 1740 1750 1800 1820 1840 1850
## 1 1 3 48 1 1 1
## 1860 1870 1884 1900 1920 1950 2000
## 1 1 1 7 1 1 405
## 2010 2020 2030 2040 2050 2060 2070
## 1 1 2 3 3 4 1
## 2075 2080 2090 2100 2120 2140 2150
## 1 5 1 20 1 1 6
## 2160 2200 2230 2248 2250 2260 2280
## 3 46 1 1 5 1 1
## 2300 2330 2350 2360 2400 2450 2500
## 22 1 1 1 13 1 99
## 2550 2560 2580 2600 2640 2700 2750
## 3 2 1 5 1 5 1
## 2800 2900 3000 3003 3060 3080 3100
## 2 3 120 1 1 1 1
## 3200 3300 3340 3360 3400 3450 3500
## 3 1 1 1 2 1 13
## 3550 3600 3780 3800 4000 4050 4080
## 1 2 1 1 60 1 1
## 4120 4350 4500 4540 4600 4800 5000
## 2 1 3 1 2 1 34
## 5200 5400 5500 5570 6000 6200 6450
## 1 1 1 1 9 1 1
## 7000 7200 8000 9125 10000 11000 11500
## 2 1 9 1 6 2 1
## 12000 or more <NA>
## 12 3
mydata <- top_recode (variable="a502_pulse", break_point=percentile_checker ("a502_pulse"), missing=NA)
## [1] "Frequency table before encoding"
## a502_pulse. 502 Pulses and Pulse Products including soybean, gram products, besan, sattu
## 0 0.239999994635582 0.980000019073486 20 25 30
## 15 1 1 1 1 2
## 40 45 50 60 70 75
## 3 1 9 9 3 1
## 80 100 105 110 115 120
## 18 53 2 4 1 19
## 125 130 135 138 140 145
## 3 3 2 1 11 1
## 150 155 160 165 170 180
## 40 1 33 2 5 19
## 184 185 190 192 195 200
## 1 1 5 1 1 213
## 205 210 220 225 230 235
## 1 13 13 4 4 2
## 240 250 255 260 265 268
## 35 64 1 11 1 1
## 270 280 285 290 295 300
## 10 26 1 7 1 203
## 310 315 317 320 325 330
## 5 1 1 26 1 2
## 340 345 349 350 360 365
## 14 1 1 28 21 1
## 368 370 375 380 385 390
## 3 8 1 12 4 3
## 400 410 416 420 424 425
## 151 3 1 8 1 2
## 430 440 450 460 465 470
## 2 6 19 12 1 4
## 475 480 485 486 490 500
## 1 26 1 1 3 400
## 515 516 520 530 540 550
## 2 1 9 6 8 12
## 560 565 570 580 590 600
## 9 1 2 2 1 103
## 616 620 630 640 645 650
## 1 1 5 9 1 5
## 660 680 700 720 750 760
## 2 1 69 3 9 2
## 780 800 850 860 900 1000
## 1 64 1 2 8 191
## 1050 1100 1110 1120 1200 1250
## 1 5 1 1 22 1
## 1450 1500 1800 2000 2400 2500
## 1 42 2 49 1 5
## 3000 3500 4000 5000 6000 10000
## 11 1 9 7 1 1
## 15000 <NA>
## 1 5
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a502_pulse. 502 Pulses and Pulse Products including soybean, gram products, besan, sattu
## 0 0.239999994635582 0.980000019073486 20 25 30
## 15 1 1 1 1 2
## 40 45 50 60 70 75
## 3 1 9 9 3 1
## 80 100 105 110 115 120
## 18 53 2 4 1 19
## 125 130 135 138 140 145
## 3 3 2 1 11 1
## 150 155 160 165 170 180
## 40 1 33 2 5 19
## 184 185 190 192 195 200
## 1 1 5 1 1 213
## 205 210 220 225 230 235
## 1 13 13 4 4 2
## 240 250 255 260 265 268
## 35 64 1 11 1 1
## 270 280 285 290 295 300
## 10 26 1 7 1 203
## 310 315 317 320 325 330
## 5 1 1 26 1 2
## 340 345 349 350 360 365
## 14 1 1 28 21 1
## 368 370 375 380 385 390
## 3 8 1 12 4 3
## 400 410 416 420 424 425
## 151 3 1 8 1 2
## 430 440 450 460 465 470
## 2 6 19 12 1 4
## 475 480 485 486 490 500
## 1 26 1 1 3 400
## 515 516 520 530 540 550
## 2 1 9 6 8 12
## 560 565 570 580 590 600
## 9 1 2 2 1 103
## 616 620 630 640 645 650
## 1 1 5 9 1 5
## 660 680 700 720 750 760
## 2 1 69 3 9 2
## 780 800 850 860 900 1000
## 1 64 1 2 8 191
## 1050 1100 1110 1120 1200 1250
## 1 5 1 1 22 1
## 1450 1500 1800 2000 2400 2500
## 1 42 2 49 1 5
## 3000 3500 4000 or more <NA>
## 11 1 19 5
mydata <- top_recode (variable="a503_milk", break_point=percentile_checker ("a503_milk"), missing=NA)
## [1] "Frequency table before encoding"
## a503_milk. 503 Milk
## 0 0.150000005960464 60 100 120 150
## 171 1 1 1 2 3
## 200 225 240 250 280 300
## 2 1 1 1 2 52
## 320 330 350 375 400 450
## 1 2 3 1 17 10
## 500 520 540 560 600 620
## 41 2 1 2 270 1
## 630 640 650 660 700 720
## 3 1 1 7 22 3
## 750 800 850 860 900 1000
## 31 26 1 1 63 77
## 1008 1050 1080 1100 1200 1230
## 1 2 1 1 509 1
## 1240 1250 1260 1280 1300 1320
## 4 8 6 1 5 4
## 1350 1400 1450 1500 1560 1575
## 8 4 1 333 1 1
## 1600 1650 1700 1800 2000 2100
## 4 2 1 65 89 5
## 2250 2300 2400 2500 2520 2600
## 5 1 117 15 1 1
## 2700 2800 3000 3150 3500 3600
## 4 1 190 1 4 23
## 3750 4000 4500 4800 5000 6000
## 1 16 26 11 9 30
## 6200 7000 7200 7500 8000 9000
## 1 3 1 4 1 2
## 10000 10500 10800 12000
## 3 2 1 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a503_milk. 503 Milk
## 0 0.150000005960464 60 100 120 150
## 171 1 1 1 2 3
## 200 225 240 250 280 300
## 2 1 1 1 2 52
## 320 330 350 375 400 450
## 1 2 3 1 17 10
## 500 520 540 560 600 620
## 41 2 1 2 270 1
## 630 640 650 660 700 720
## 3 1 1 7 22 3
## 750 800 850 860 900 1000
## 31 26 1 1 63 77
## 1008 1050 1080 1100 1200 1230
## 1 2 1 1 509 1
## 1240 1250 1260 1280 1300 1320
## 4 8 6 1 5 4
## 1350 1400 1450 1500 1560 1575
## 8 4 1 333 1 1
## 1600 1650 1700 1800 2000 2100
## 4 2 1 65 89 5
## 2250 2300 2400 2500 2520 2600
## 5 1 117 15 1 1
## 2700 2800 3000 3150 3500 3600
## 4 1 190 1 4 23
## 3750 4000 4500 4800 5000 6000
## 1 16 26 11 9 30
## 6200 7000 7200 7500 or more
## 1 3 1 15
mydata <- top_recode (variable="a504_milk_prod", break_point=percentile_checker ("a504_milk_prod"), missing=NA)
## [1] "Frequency table before encoding"
## a504_milk_prod. 504 Milk Products including condensed milk, milk powder, babyfood, ghee, butter
## 0 1 30 40 43 50 60 70 85 90 100 120 150 160 190 200 220 250
## 727 1 2 2 1 4 2 1 1 1 33 1 9 1 1 42 2 13
## 260 280 300 320 330 350 355 360 370 375 380 400 420 430 440 450 460 480
## 1 1 42 1 1 41 1 2 3 2 2 122 6 1 3 62 1 3
## 500 520 550 560 600 620 650 660 680 700 720 750 800 840 850 860 880 900
## 179 1 8 2 140 7 10 3 2 110 2 13 143 4 7 1 3 38
## 910 920 940 950 1000 1050 1100 1110 1150 1200 1300 1330 1350 1400 1410 1500 1600 1650
## 1 1 1 3 144 7 5 2 2 94 11 1 6 45 1 43 22 1
## 1680 1700 1750 1800 1840 1850 1900 2000 2100 2250 2300 2333 2400 2500 2700 2800 2860 3000
## 1 3 2 15 1 1 2 47 5 3 1 1 10 9 1 2 1 19
## 3200 3500 3600 4000 4400 4500 4800 5000 6000 7000 8000 10000 14004 <NA>
## 1 6 1 8 1 4 1 2 1 2 1 1 1 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a504_milk_prod. 504 Milk Products including condensed milk, milk powder, babyfood, ghee, butter
## 0 1 30 40 43 50 60 70
## 727 1 2 2 1 4 2 1
## 85 90 100 120 150 160 190 200
## 1 1 33 1 9 1 1 42
## 220 250 260 280 300 320 330 350
## 2 13 1 1 42 1 1 41
## 355 360 370 375 380 400 420 430
## 1 2 3 2 2 122 6 1
## 440 450 460 480 500 520 550 560
## 3 62 1 3 179 1 8 2
## 600 620 650 660 680 700 720 750
## 140 7 10 3 2 110 2 13
## 800 840 850 860 880 900 910 920
## 143 4 7 1 3 38 1 1
## 940 950 1000 1050 1100 1110 1150 1200
## 1 3 144 7 5 2 2 94
## 1300 1330 1350 1400 1410 1500 1600 1650
## 11 1 6 45 1 43 22 1
## 1680 1700 1750 1800 1840 1850 1900 2000
## 1 3 2 15 1 1 2 47
## 2100 2250 2300 2333 2400 2500 2700 2800
## 5 3 1 1 10 9 1 2
## 2860 3000 3200 3500 3600 4000 4400 4500 or more
## 1 19 1 6 1 8 1 13
## <NA>
## 2
mydata <- top_recode (variable="a505_edible_oil", break_point=8000, missing=NA)
## [1] "Frequency table before encoding"
## a505_edible_oil. 505 Edible oil and Vanaspati
## 0 0.699999988079071 8 90 100 120
## 7 1 1 1 14 1
## 150 160 180 200 210 220
## 4 4 10 62 2 3
## 225 240 245 250 260 266
## 1 11 1 17 2 1
## 270 280 285 295 300 320
## 15 2 1 1 98 9
## 325 330 350 360 370 375
## 1 1 14 14 1 2
## 380 390 400 410 420 425
## 2 1 147 1 4 5
## 430 440 445 450 460 470
## 2 1 1 117 4 4
## 475 480 490 495 500 510
## 2 17 3 1 504 2
## 520 525 530 533 540 550
## 2 2 3 3 14 19
## 560 566 580 590 595 600
## 8 2 6 1 1 143
## 620 625 630 640 650 660
## 2 1 11 4 33 3
## 665 670 700 720 730 740
## 2 1 102 13 1 1
## 745 750 800 825 840 850
## 1 70 84 1 2 2
## 860 880 890 900 910 930
## 1 1 1 30 1 1
## 960 970 980 1000 1050 1100
## 2 1 1 131 1 15
## 1150 1200 1220 1250 1260 1300
## 1 70 2 8 1 57
## 1320 1340 1350 1380 1390 1400
## 1 1 15 1 1 29
## 1450 1470 1500 1520 1530 1538
## 7 1 185 1 1 1
## 1550 1600 1700 1750 1800 1850
## 1 35 8 1 18 1
## 1890 1900 2000 2100 2200 2400
## 1 2 30 1 1 3
## 2500 2600 2700 2800 3000 3200
## 3 1 4 2 6 1
## 3600 4000 4500 5000 6000 7000
## 2 1 2 2 1 1
## 7500 <NA>
## 2 1
## [1] "Frequency table after encoding"
## a505_edible_oil. 505 Edible oil and Vanaspati
## 0 0.699999988079071 8 90 100 120
## 7 1 1 1 14 1
## 150 160 180 200 210 220
## 4 4 10 62 2 3
## 225 240 245 250 260 266
## 1 11 1 17 2 1
## 270 280 285 295 300 320
## 15 2 1 1 98 9
## 325 330 350 360 370 375
## 1 1 14 14 1 2
## 380 390 400 410 420 425
## 2 1 147 1 4 5
## 430 440 445 450 460 470
## 2 1 1 117 4 4
## 475 480 490 495 500 510
## 2 17 3 1 504 2
## 520 525 530 533 540 550
## 2 2 3 3 14 19
## 560 566 580 590 595 600
## 8 2 6 1 1 143
## 620 625 630 640 650 660
## 2 1 11 4 33 3
## 665 670 700 720 730 740
## 2 1 102 13 1 1
## 745 750 800 825 840 850
## 1 70 84 1 2 2
## 860 880 890 900 910 930
## 1 1 1 30 1 1
## 960 970 980 1000 1050 1100
## 2 1 1 131 1 15
## 1150 1200 1220 1250 1260 1300
## 1 70 2 8 1 57
## 1320 1340 1350 1380 1390 1400
## 1 1 15 1 1 29
## 1450 1470 1500 1520 1530 1538
## 7 1 185 1 1 1
## 1550 1600 1700 1750 1800 1850
## 1 35 8 1 18 1
## 1890 1900 2000 2100 2200 2400
## 1 2 30 1 1 3
## 2500 2600 2700 2800 3000 3200
## 3 1 4 2 6 1
## 3600 4000 4500 5000 6000 7000
## 2 1 2 2 1 1
## 7500 <NA>
## 2 1
mydata <- top_recode (variable="a506_vegetable", break_point=percentile_checker ("a506_vegetable"), missing=NA)
## [1] "Frequency table before encoding"
## a506_vegetable. 506 Vegetables
## 0 20 30 50 100 120 150 180 200 240 250 260 300 350 400 420 450 460 500 550 560 600
## 14 1 1 1 11 1 10 1 22 5 7 1 70 4 62 1 10 1 206 5 1 258
## 630 645 650 700 720 745 750 800 850 900 950 1000 1100 1150 1160 1200 1220 1250 1300 1350 1400 1500
## 1 1 4 59 1 1 23 71 4 116 2 269 3 1 1 203 1 1 5 1 5 499
## 1600 1650 1700 1750 1800 1850 1860 2000 2100 2400 2500 3000 3200 3300 3500 4000 4500 5000 5500 6000 7000 8000
## 13 1 4 1 48 1 1 115 6 9 21 131 1 1 5 14 4 4 1 6 2 1
## 9000 <NA>
## 1 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a506_vegetable. 506 Vegetables
## 0 20 30 50 100 120 150 180
## 14 1 1 1 11 1 10 1
## 200 240 250 260 300 350 400 420
## 22 5 7 1 70 4 62 1
## 450 460 500 550 560 600 630 645
## 10 1 206 5 1 258 1 1
## 650 700 720 745 750 800 850 900
## 4 59 1 1 23 71 4 116
## 950 1000 1100 1150 1160 1200 1220 1250
## 2 269 3 1 1 203 1 1
## 1300 1350 1400 1500 1600 1650 1700 1750
## 5 1 5 499 13 1 4 1
## 1800 1850 1860 2000 2100 2400 2500 3000
## 48 1 1 115 6 9 21 131
## 3200 3300 3500 4000 4500 5000 or more <NA>
## 1 1 5 14 4 15 2
mydata <- top_recode (variable="a507_fruit", break_point=percentile_checker ("a507_fruit"), missing=NA)
## [1] "Frequency table before encoding"
## a507_fruit. 507 Fruits& nuts including mango, banana, coconut, dates, kishmish, monacca
## 0 30 40 50 60 70 80 100 105 110 120 130 140 150 160 180 190 200
## 350 1 4 21 3 2 10 70 1 3 12 3 2 42 2 1 1 287
## 210 220 225 240 250 260 290 300 320 325 330 345 350 355 360 365 380 385
## 1 2 1 3 48 6 1 232 6 2 1 3 10 1 11 1 1 1
## 400 420 425 435 450 455 460 480 485 500 550 560 600 620 640 700 750 800
## 125 4 3 1 8 1 1 1 1 460 5 1 74 1 1 49 3 49
## 900 950 1000 1050 1200 1300 1400 1500 1600 1800 2000 2100 2300 2500 3000 4000 5000 6000
## 13 1 185 1 26 2 2 78 2 1 52 3 1 13 27 4 1 4
## 10000 <NA>
## 1 7
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a507_fruit. 507 Fruits& nuts including mango, banana, coconut, dates, kishmish, monacca
## 0 30 40 50 60 70 80 100
## 350 1 4 21 3 2 10 70
## 105 110 120 130 140 150 160 180
## 1 3 12 3 2 42 2 1
## 190 200 210 220 225 240 250 260
## 1 287 1 2 1 3 48 6
## 290 300 320 325 330 345 350 355
## 1 232 6 2 1 3 10 1
## 360 365 380 385 400 420 425 435
## 11 1 1 1 125 4 3 1
## 450 455 460 480 485 500 550 560
## 8 1 1 1 1 460 5 1
## 600 620 640 700 750 800 900 950
## 74 1 1 49 3 49 13 1
## 1000 1050 1200 1300 1400 1500 1600 1800
## 185 1 26 2 2 78 2 1
## 2000 2100 2300 2500 3000 or more <NA>
## 52 3 1 13 37 7
mydata <- top_recode (variable="a508_egg", break_point=3500, missing=NA)
## [1] "Frequency table before encoding"
## a508_egg. 508 Egg, fish, and meat
## 0 1 2 10 20 50 60 80 100 110 115 120 130 140 150 160 180 200 225 240 250 280
## 1520 1 1 1 1 4 4 2 14 1 1 2 2 5 6 1 2 33 1 1 6 1
## 290 300 350 400 430 450 480 500 508 520 540 550 560 585 600 620 640 660 700 720 730 735
## 1 53 6 58 3 11 2 103 1 2 2 3 1 1 31 1 1 2 18 1 1 1
## 750 800 830 850 900 1000 1050 1100 1200 1280 1300 1400 1500 1600 1650 1700 1730 1800 1860 1920 2000 2100
## 1 48 1 2 10 134 1 1 42 1 2 3 50 12 1 4 1 4 1 1 78 4
## 2500 2600 3000 3200 3400 4000 4500 5000 6250 7000 <NA>
## 11 2 12 1 1 4 2 3 1 2 4
## [1] "Frequency table after encoding"
## a508_egg. 508 Egg, fish, and meat
## 0 1 2 10 20 50 60 80
## 1520 1 1 1 1 4 4 2
## 100 110 115 120 130 140 150 160
## 14 1 1 2 2 5 6 1
## 180 200 225 240 250 280 290 300
## 2 33 1 1 6 1 1 53
## 350 400 430 450 480 500 508 520
## 6 58 3 11 2 103 1 2
## 540 550 560 585 600 620 640 660
## 2 3 1 1 31 1 1 2
## 700 720 730 735 750 800 830 850
## 18 1 1 1 1 48 1 2
## 900 1000 1050 1100 1200 1280 1300 1400
## 10 134 1 1 42 1 2 3
## 1500 1600 1650 1700 1730 1800 1860 1920
## 50 12 1 4 1 4 1 1
## 2000 2100 2500 2600 3000 3200 3400 3500 or more
## 78 4 11 2 12 1 1 12
## <NA>
## 4
mydata <- top_recode (variable="a509_honey", break_point=percentile_checker ("a509_honey"), missing=NA)
## [1] "Frequency table before encoding"
## a509_honey. 509 Sugar including gur, candy, misri, honey, etc
## 0 40 60 70 80 84 90 92 100 102 105 115 120 125 126 135 140 150
## 4 1 3 1 23 1 1 1 10 1 1 1 54 1 2 2 3 11
## 160 165 168 175 180 190 194 195 200 205 208 210 220 225 228 234 240 245
## 39 1 1 3 3 3 1 1 385 1 1 10 5 10 1 1 68 3
## 246 250 252 253 260 265 266 268 270 280 285 288 290 295 300 310 316 318
## 1 57 2 1 9 1 1 1 2 60 1 1 2 1 91 1 1 1
## 320 325 329 335 336 340 346 350 352 360 365 370 380 400 408 410 418 420
## 45 2 1 1 2 4 1 28 1 13 1 2 3 383 1 2 1 15
## 430 435 440 450 456 460 464 470 480 500 502 516 520 525 530 540 550 560
## 2 1 7 32 1 4 1 4 27 197 1 1 7 1 2 1 10 6
## 575 580 590 600 620 630 640 650 660 680 695 700 720 750 800 840 850 860
## 1 2 1 138 3 3 5 11 2 4 1 44 2 2 106 7 1 1
## 870 880 900 1000 1020 1030 1050 1080 1100 1140 1200 1250 1260 1280 1300 1320 1360 1365
## 1 4 25 113 1 1 1 1 6 1 37 2 2 1 2 1 1 1
## 1400 1500 1600 1700 1785 1800 1900 2000 2100 2400 2500 2800 3000 3200 4000 5000 6000 8000
## 3 32 4 1 1 6 1 31 2 4 3 1 6 4 7 2 2 2
## 9000 10000 <NA>
## 4 1 3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a509_honey. 509 Sugar including gur, candy, misri, honey, etc
## 0 40 60 70 80 84 90 92
## 4 1 3 1 23 1 1 1
## 100 102 105 115 120 125 126 135
## 10 1 1 1 54 1 2 2
## 140 150 160 165 168 175 180 190
## 3 11 39 1 1 3 3 3
## 194 195 200 205 208 210 220 225
## 1 1 385 1 1 10 5 10
## 228 234 240 245 246 250 252 253
## 1 1 68 3 1 57 2 1
## 260 265 266 268 270 280 285 288
## 9 1 1 1 2 60 1 1
## 290 295 300 310 316 318 320 325
## 2 1 91 1 1 1 45 2
## 329 335 336 340 346 350 352 360
## 1 1 2 4 1 28 1 13
## 365 370 380 400 408 410 418 420
## 1 2 3 383 1 2 1 15
## 430 435 440 450 456 460 464 470
## 2 1 7 32 1 4 1 4
## 480 500 502 516 520 525 530 540
## 27 197 1 1 7 1 2 1
## 550 560 575 580 590 600 620 630
## 10 6 1 2 1 138 3 3
## 640 650 660 680 695 700 720 750
## 5 11 2 4 1 44 2 2
## 800 840 850 860 870 880 900 1000
## 106 7 1 1 1 4 25 113
## 1020 1030 1050 1080 1100 1140 1200 1250
## 1 1 1 1 6 1 37 2
## 1260 1280 1300 1320 1360 1365 1400 1500
## 2 1 2 1 1 1 3 32
## 1600 1700 1785 1800 1900 2000 2100 2400
## 4 1 1 6 1 31 2 4
## 2500 2800 3000 3200 4000 or more <NA>
## 3 1 6 4 18 3
mydata <- top_recode (variable="a510_salt", break_point=7000, missing=NA)
## [1] "Frequency table before encoding"
## a510_salt. 510 Salt & Spices including dry chillies, curry powder, oilseeds, garlic, ginger
## 0 60 75 80 90 100 110 120 125 130 132 140 150 160 170 175 180 188
## 7 3 1 1 1 15 1 3 1 2 1 1 17 6 3 1 1 1
## 190 200 210 220 240 245 250 256 260 270 280 290 294 300 310 320 330 340
## 1 104 5 3 2 1 28 1 4 6 5 1 1 175 3 6 4 4
## 343 346 350 358 360 365 370 375 390 400 410 420 422 430 442 450 460 465
## 1 1 16 1 7 1 3 1 3 144 3 5 2 1 1 14 8 1
## 468 475 480 485 490 500 516 518 520 523 524 525 527 528 530 535 536 540
## 1 4 1 2 4 526 1 1 8 2 2 2 1 3 4 1 2 8
## 542 544 545 546 548 550 554 560 570 580 590 600 620 636 650 660 670 680
## 1 1 2 2 1 8 1 8 4 1 1 185 2 1 8 1 1 1
## 700 730 745 750 780 800 820 850 860 870 900 920 950 960 980 1000 1060 1100
## 94 1 1 4 1 85 1 6 2 2 21 1 1 4 1 396 1 3
## 1200 1300 1380 1500 1600 1800 1860 2000 2500 2700 3000 3500 4000 5000 6000 10000 <NA>
## 39 4 1 96 2 2 1 82 19 1 33 1 5 5 1 1 5
## [1] "Frequency table after encoding"
## a510_salt. 510 Salt & Spices including dry chillies, curry powder, oilseeds, garlic, ginger
## 0 60 75 80 90 100 110 120
## 7 3 1 1 1 15 1 3
## 125 130 132 140 150 160 170 175
## 1 2 1 1 17 6 3 1
## 180 188 190 200 210 220 240 245
## 1 1 1 104 5 3 2 1
## 250 256 260 270 280 290 294 300
## 28 1 4 6 5 1 1 175
## 310 320 330 340 343 346 350 358
## 3 6 4 4 1 1 16 1
## 360 365 370 375 390 400 410 420
## 7 1 3 1 3 144 3 5
## 422 430 442 450 460 465 468 475
## 2 1 1 14 8 1 1 4
## 480 485 490 500 516 518 520 523
## 1 2 4 526 1 1 8 2
## 524 525 527 528 530 535 536 540
## 2 2 1 3 4 1 2 8
## 542 544 545 546 548 550 554 560
## 1 1 2 2 1 8 1 8
## 570 580 590 600 620 636 650 660
## 4 1 1 185 2 1 8 1
## 670 680 700 730 745 750 780 800
## 1 1 94 1 1 4 1 85
## 820 850 860 870 900 920 950 960
## 1 6 2 2 21 1 1 4
## 980 1000 1060 1100 1200 1300 1380 1500
## 1 396 1 3 39 4 1 96
## 1600 1800 1860 2000 2500 2700 3000 3500
## 2 2 1 82 19 1 33 1
## 4000 5000 6000 7000 or more <NA>
## 5 5 1 1 5
mydata <- top_recode (variable="a511_other_food", break_point=3000, missing=NA)
## [1] "Frequency table before encoding"
## a511_other_food. 511 Other food items including beverages such as tea, coffee, fruit juice
## 0 0.300000011920929 15 22 30 35
## 21 1 1 1 2 2
## 40 50 60 65 66 70
## 7 12 17 18 1 8
## 72 80 90 95 100 110
## 1 9 7 1 69 5
## 120 125 130 132 135 140
## 87 2 28 1 1 16
## 145 148 150 156 160 165
## 2 1 84 1 26 3
## 170 180 190 195 200 205
## 8 19 2 1 218 1
## 210 220 225 230 240 245
## 9 40 1 11 64 1
## 250 255 260 265 267 270
## 102 1 30 1 1 6
## 275 280 290 300 310 315
## 3 19 2 248 5 1
## 320 324 325 330 340 350
## 17 1 4 3 9 31
## 352 356 360 363 364 365
## 1 2 22 1 1 5
## 368 370 375 380 384 385
## 1 1 3 16 2 4
## 390 400 410 418 420 425
## 5 117 1 1 13 5
## 426 430 440 450 452 460
## 1 8 7 24 2 6
## 480 490 495 500 508 520
## 13 2 3 242 1 10
## 530 535 540 550 560 580
## 1 1 10 5 12 2
## 600 610 620 630 640 650
## 105 1 3 6 1 3
## 660 665 670 680 690 700
## 5 1 1 3 1 40
## 710 720 740 750 760 775
## 2 4 4 11 3 1
## 780 800 810 820 835 840
## 1 53 1 2 1 2
## 850 860 900 920 930 940
## 4 1 18 2 1 1
## 960 1000 1050 1060 1100 1120
## 2 99 2 1 4 2
## 1130 1150 1160 1200 1220 1240
## 1 1 1 13 1 1
## 1270 1300 1400 1500 1600 1650
## 1 2 4 40 2 1
## 1680 1700 1775 1900 1930 2000
## 1 3 1 1 1 18
## 2200 2240 2500 2700 3000 3170
## 1 1 4 1 2 1
## 3200 3500 3600 4000 4400 5000
## 1 1 1 1 1 3
## 8200 12500 <NA>
## 1 1 8
## [1] "Frequency table after encoding"
## a511_other_food. 511 Other food items including beverages such as tea, coffee, fruit juice
## 0 0.300000011920929 15 22 30 35
## 21 1 1 1 2 2
## 40 50 60 65 66 70
## 7 12 17 18 1 8
## 72 80 90 95 100 110
## 1 9 7 1 69 5
## 120 125 130 132 135 140
## 87 2 28 1 1 16
## 145 148 150 156 160 165
## 2 1 84 1 26 3
## 170 180 190 195 200 205
## 8 19 2 1 218 1
## 210 220 225 230 240 245
## 9 40 1 11 64 1
## 250 255 260 265 267 270
## 102 1 30 1 1 6
## 275 280 290 300 310 315
## 3 19 2 248 5 1
## 320 324 325 330 340 350
## 17 1 4 3 9 31
## 352 356 360 363 364 365
## 1 2 22 1 1 5
## 368 370 375 380 384 385
## 1 1 3 16 2 4
## 390 400 410 418 420 425
## 5 117 1 1 13 5
## 426 430 440 450 452 460
## 1 8 7 24 2 6
## 480 490 495 500 508 520
## 13 2 3 242 1 10
## 530 535 540 550 560 580
## 1 1 10 5 12 2
## 600 610 620 630 640 650
## 105 1 3 6 1 3
## 660 665 670 680 690 700
## 5 1 1 3 1 40
## 710 720 740 750 760 775
## 2 4 4 11 3 1
## 780 800 810 820 835 840
## 1 53 1 2 1 2
## 850 860 900 920 930 940
## 4 1 18 2 1 1
## 960 1000 1050 1060 1100 1120
## 2 99 2 1 4 2
## 1130 1150 1160 1200 1220 1240
## 1 1 1 13 1 1
## 1270 1300 1400 1500 1600 1650
## 1 2 4 40 2 1
## 1680 1700 1775 1900 1930 2000
## 1 3 1 1 1 18
## 2200 2240 2500 2700 3000 or more <NA>
## 1 1 4 1 13 8
mydata <- top_recode (variable="a512_pan", break_point=percentile_checker ("a512_pan"), missing=NA)
## [1] "Frequency table before encoding"
## a512_pan. 512 Pan, tobacco, intoxicants
## 0 1 3 10 20 25 40 50 60 70 75 80 100 125 150 180 200 220
## 1190 1 1 1 1 1 1 8 1 1 2 1 13 2 46 2 48 1
## 240 250 260 300 320 330 350 360 390 400 420 450 460 480 500 510 540 550
## 3 12 2 179 2 1 1 1 1 34 1 33 2 4 86 4 1 1
## 600 650 660 700 750 800 840 850 900 1000 1100 1180 1200 1290 1400 1480 1500 1560
## 246 2 2 12 5 11 2 1 51 76 1 1 45 1 1 3 60 2
## 1600 1680 1700 1800 2000 2100 2400 2500 2700 3000 3150 3300 3320 3450 3500 3900 4000 4050
## 2 1 2 8 31 2 5 5 1 39 1 1 1 1 2 1 7 1
## 4300 4500 5000 5400 6000 6600 7000 9000 10000 <NA>
## 1 2 6 1 9 1 1 1 2 12
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a512_pan. 512 Pan, tobacco, intoxicants
## 0 1 3 10 20 25 40 50
## 1190 1 1 1 1 1 1 8
## 60 70 75 80 100 125 150 180
## 1 1 2 1 13 2 46 2
## 200 220 240 250 260 300 320 330
## 48 1 3 12 2 179 2 1
## 350 360 390 400 420 450 460 480
## 1 1 1 34 1 33 2 4
## 500 510 540 550 600 650 660 700
## 86 4 1 1 246 2 2 12
## 750 800 840 850 900 1000 1100 1180
## 5 11 2 1 51 76 1 1
## 1200 1290 1400 1480 1500 1560 1600 1680
## 45 1 1 3 60 2 2 1
## 1700 1800 2000 2100 2400 2500 2700 3000
## 2 8 31 2 5 5 1 39
## 3150 3300 3320 3450 3500 3900 4000 4050
## 1 1 1 1 2 1 7 1
## 4300 4500 5000 5400 6000 or more <NA>
## 1 2 6 1 14 12
mydata <- top_recode (variable="a513_fuel", break_point=9000, missing=NA)
## [1] "Frequency table before encoding"
## a513_fuel. 513 Fuel & Light
## 0 13 23 30 100 115 120 150 151 152 200 215 216 230 233 245 246 250
## 27 1 1 1 4 1 1 8 1 1 16 1 1 1 1 1 2 12
## 252 253 260 266 275 278 300 310 320 325 330 350 355 360 375 380 400 405
## 1 1 1 1 2 1 24 1 1 1 1 22 1 2 3 2 41 1
## 410 415 420 425 433 440 445 450 460 475 480 500 502 505 520 530 536 550
## 3 1 3 2 1 2 1 11 1 1 1 59 1 1 1 2 1 15
## 556 560 566 576 580 600 602 608 617 625 627 640 650 665 666 675 680 690
## 1 2 1 1 1 50 1 1 1 3 1 2 12 1 1 3 1 1
## 700 710 714 716 720 725 730 740 745 750 760 766 770 774 778 780 785 800
## 106 1 1 1 3 3 2 1 1 38 4 2 3 1 1 1 2 108
## 816 820 830 835 850 865 866 870 880 885 887 890 892 900 905 910 920 930
## 1 4 1 1 22 1 1 6 1 2 1 1 1 84 1 2 1 1
## 933 940 950 956 960 966 980 982 990 1000 1006 1030 1040 1050 1075 1100 1110 1116
## 1 2 18 1 1 1 2 1 1 168 1 1 1 10 1 48 1 2
## 1120 1125 1150 1166 1170 1175 1187 1190 1200 1210 1216 1225 1230 1235 1238 1240 1250 1260
## 1 1 15 1 1 3 1 1 104 2 1 1 1 1 1 1 19 3
## 1265 1266 1270 1280 1290 1300 1308 1324 1330 1350 1360 1375 1385 1400 1410 1433 1450 1460
## 1 1 1 1 2 52 1 1 3 16 1 1 2 58 1 1 17 3
## 1480 1490 1500 1525 1530 1540 1550 1560 1570 1575 1580 1590 1600 1630 1640 1650 1660 1662
## 1 1 158 2 2 2 13 1 1 2 2 1 41 1 1 6 2 1
## 1680 1685 1700 1720 1725 1735 1738 1740 1744 1750 1770 1775 1780 1800 1810 1815 1820 1830
## 1 1 47 1 1 1 1 1 1 19 1 1 1 63 3 1 2 3
## 1840 1850 1860 1865 1870 1880 1900 1920 1930 1950 1960 1990 2000 2015 2025 2050 2070 2075
## 1 7 2 1 3 1 20 1 1 2 1 1 138 1 1 5 1 1
## 2100 2125 2135 2150 2166 2200 2220 2223 2250 2260 2300 2310 2320 2330 2336 2350 2365 2400
## 15 1 1 5 1 24 1 1 9 2 16 3 1 1 1 11 1 18
## 2450 2457 2480 2500 2540 2550 2600 2700 2717 2720 2750 2800 2820 2846 2850 2856 2875 2900
## 1 1 1 47 1 1 5 10 1 1 6 21 1 1 1 1 1 3
## 2935 2950 3000 3100 3125 3166 3170 3200 3230 3250 3310 3350 3400 3500 3529 3600 3700 3800
## 1 1 61 2 1 1 1 5 1 1 1 1 1 14 1 3 6 8
## 4000 4200 4266 4300 4350 4370 4500 4700 4800 5000 5200 5224 5350 5400 5500 5700 5800 6000
## 11 2 2 3 1 1 5 2 2 14 1 1 1 2 2 1 1 5
## 6070 6300 6500 6775 7000 7170 7500 7900 8600 10000 10400 12000 12750 12900 17000 18000 21000 24000
## 1 1 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1
## 30770 49000 62000 67000 <NA>
## 1 1 1 1 2
## [1] "Frequency table after encoding"
## a513_fuel. 513 Fuel & Light
## 0 13 23 30 100 115 120 150
## 27 1 1 1 4 1 1 8
## 151 152 200 215 216 230 233 245
## 1 1 16 1 1 1 1 1
## 246 250 252 253 260 266 275 278
## 2 12 1 1 1 1 2 1
## 300 310 320 325 330 350 355 360
## 24 1 1 1 1 22 1 2
## 375 380 400 405 410 415 420 425
## 3 2 41 1 3 1 3 2
## 433 440 445 450 460 475 480 500
## 1 2 1 11 1 1 1 59
## 502 505 520 530 536 550 556 560
## 1 1 1 2 1 15 1 2
## 566 576 580 600 602 608 617 625
## 1 1 1 50 1 1 1 3
## 627 640 650 665 666 675 680 690
## 1 2 12 1 1 3 1 1
## 700 710 714 716 720 725 730 740
## 106 1 1 1 3 3 2 1
## 745 750 760 766 770 774 778 780
## 1 38 4 2 3 1 1 1
## 785 800 816 820 830 835 850 865
## 2 108 1 4 1 1 22 1
## 866 870 880 885 887 890 892 900
## 1 6 1 2 1 1 1 84
## 905 910 920 930 933 940 950 956
## 1 2 1 1 1 2 18 1
## 960 966 980 982 990 1000 1006 1030
## 1 1 2 1 1 168 1 1
## 1040 1050 1075 1100 1110 1116 1120 1125
## 1 10 1 48 1 2 1 1
## 1150 1166 1170 1175 1187 1190 1200 1210
## 15 1 1 3 1 1 104 2
## 1216 1225 1230 1235 1238 1240 1250 1260
## 1 1 1 1 1 1 19 3
## 1265 1266 1270 1280 1290 1300 1308 1324
## 1 1 1 1 2 52 1 1
## 1330 1350 1360 1375 1385 1400 1410 1433
## 3 16 1 1 2 58 1 1
## 1450 1460 1480 1490 1500 1525 1530 1540
## 17 3 1 1 158 2 2 2
## 1550 1560 1570 1575 1580 1590 1600 1630
## 13 1 1 2 2 1 41 1
## 1640 1650 1660 1662 1680 1685 1700 1720
## 1 6 2 1 1 1 47 1
## 1725 1735 1738 1740 1744 1750 1770 1775
## 1 1 1 1 1 19 1 1
## 1780 1800 1810 1815 1820 1830 1840 1850
## 1 63 3 1 2 3 1 7
## 1860 1865 1870 1880 1900 1920 1930 1950
## 2 1 3 1 20 1 1 2
## 1960 1990 2000 2015 2025 2050 2070 2075
## 1 1 138 1 1 5 1 1
## 2100 2125 2135 2150 2166 2200 2220 2223
## 15 1 1 5 1 24 1 1
## 2250 2260 2300 2310 2320 2330 2336 2350
## 9 2 16 3 1 1 1 11
## 2365 2400 2450 2457 2480 2500 2540 2550
## 1 18 1 1 1 47 1 1
## 2600 2700 2717 2720 2750 2800 2820 2846
## 5 10 1 1 6 21 1 1
## 2850 2856 2875 2900 2935 2950 3000 3100
## 1 1 1 3 1 1 61 2
## 3125 3166 3170 3200 3230 3250 3310 3350
## 1 1 1 5 1 1 1 1
## 3400 3500 3529 3600 3700 3800 4000 4200
## 1 14 1 3 6 8 11 2
## 4266 4300 4350 4370 4500 4700 4800 5000
## 2 3 1 1 5 2 2 14
## 5200 5224 5350 5400 5500 5700 5800 6000
## 1 1 1 2 2 1 1 5
## 6070 6300 6500 6775 7000 7170 7500 7900
## 1 1 3 1 3 1 1 1
## 8600 9000 or more <NA>
## 1 13 2
mydata <- top_recode (variable="a514_cinema", break_point=2000, missing=NA)
## [1] "Frequency table before encoding"
## a514_cinema. 514 Entertainment including cinema, picnic, sports, club fees, video cassettes,
## 0 1 10 18 40 48 50 60 100 120 130 138 140 150 160 170 180 183 200 210 220 240
## 1948 1 1 1 1 1 1 1 26 2 1 1 1 33 1 2 7 1 116 1 7 1
## 250 260 270 280 299 300 310 320 350 360 380 385 390 400 450 500 600 616 650 700 800 870
## 20 5 1 1 1 46 1 5 6 1 1 1 1 11 3 32 6 1 1 1 1 1
## 1000 1200 1500 2000 2500 3000 4000 4500 5000 6000 7000 <NA>
## 17 2 8 14 1 3 2 1 3 1 1 3
## [1] "Frequency table after encoding"
## a514_cinema. 514 Entertainment including cinema, picnic, sports, club fees, video cassettes,
## 0 1 10 18 40 48 50 60
## 1948 1 1 1 1 1 1 1
## 100 120 130 138 140 150 160 170
## 26 2 1 1 1 33 1 2
## 180 183 200 210 220 240 250 260
## 7 1 116 1 7 1 20 5
## 270 280 299 300 310 320 350 360
## 1 1 1 46 1 5 6 1
## 380 385 390 400 450 500 600 616
## 1 1 1 11 3 32 6 1
## 650 700 800 870 1000 1200 1500 2000 or more
## 1 1 1 1 17 2 8 26
## <NA>
## 3
mydata <- top_recode (variable="a515_torch", break_point=2000, missing=NA)
## [1] "Frequency table before encoding"
## a515_torch. 515 Personal care including spectacles, torch, umbrella, lighter, etc
## 0 1 8 10 15 20 30 50 60 70 80 100 115 120 130 140 150 160 180 190 200 210
## 1918 1 1 1 1 2 2 10 2 1 4 69 1 13 9 1 54 3 5 1 72 1
## 220 230 240 250 300 320 330 350 360 375 400 450 460 470 480 500 540 550 600 650 700 800
## 2 1 4 12 36 1 1 7 2 1 16 6 2 1 1 37 1 3 13 3 4 1
## 900 950 1000 1150 1200 1400 1500 2000 2700 2800 3000 <NA>
## 1 1 12 1 1 2 5 3 1 1 1 2
## [1] "Frequency table after encoding"
## a515_torch. 515 Personal care including spectacles, torch, umbrella, lighter, etc
## 0 1 8 10 15 20 30 50
## 1918 1 1 1 1 2 2 10
## 60 70 80 100 115 120 130 140
## 2 1 4 69 1 13 9 1
## 150 160 180 190 200 210 220 230
## 54 3 5 1 72 1 2 1
## 240 250 300 320 330 350 360 375
## 4 12 36 1 1 7 2 1
## 400 450 460 470 480 500 540 550
## 16 6 2 1 1 37 1 3
## 600 650 700 800 900 950 1000 1150
## 13 3 4 1 1 1 12 1
## 1200 1400 1500 2000 or more <NA>
## 1 2 5 6 2
mydata <- top_recode (variable="a516_toilet", break_point=3000, missing=NA)
## [1] "Frequency table before encoding"
## a516_toilet. 516 Toiletries such as toothpaste, hair oil, shaving blades, etc
## 0 0.5 2 10 20 25 30 35 40 50 60 68 70 75 78 80 84 85 90 95 100 105
## 55 1 1 1 5 1 4 3 7 32 14 1 9 4 1 17 3 2 9 1 217 1
## 110 115 120 125 128 130 135 140 145 150 152 155 160 165 170 175 180 181 190 200 201 210
## 10 2 30 6 1 7 1 11 2 127 1 5 27 1 10 4 18 1 8 451 1 13
## 215 218 220 225 227 230 235 240 245 250 255 260 264 270 275 280 285 290 300 305 320 322
## 1 1 21 4 1 11 2 6 1 109 1 14 1 1 2 13 1 3 286 1 9 2
## 324 325 330 340 344 345 350 360 364 365 368 370 375 380 385 400 416 420 424 425 430 450
## 2 1 2 4 1 1 40 10 3 1 1 6 1 2 1 124 2 2 2 1 2 9
## 456 460 462 470 490 500 510 550 560 570 590 600 610 650 660 680 700 750 800 820 840 850
## 1 7 1 2 1 332 1 2 2 1 2 32 1 2 1 1 19 1 16 1 1 1
## 900 1000 1200 1500 1502 2000 2500 3000 4500 <NA>
## 1 60 3 10 1 8 3 2 1 6
## [1] "Frequency table after encoding"
## a516_toilet. 516 Toiletries such as toothpaste, hair oil, shaving blades, etc
## 0 0.5 2 10 20 25 30 35
## 55 1 1 1 5 1 4 3
## 40 50 60 68 70 75 78 80
## 7 32 14 1 9 4 1 17
## 84 85 90 95 100 105 110 115
## 3 2 9 1 217 1 10 2
## 120 125 128 130 135 140 145 150
## 30 6 1 7 1 11 2 127
## 152 155 160 165 170 175 180 181
## 1 5 27 1 10 4 18 1
## 190 200 201 210 215 218 220 225
## 8 451 1 13 1 1 21 4
## 227 230 235 240 245 250 255 260
## 1 11 2 6 1 109 1 14
## 264 270 275 280 285 290 300 305
## 1 1 2 13 1 3 286 1
## 320 322 324 325 330 340 344 345
## 9 2 2 1 2 4 1 1
## 350 360 364 365 368 370 375 380
## 40 10 3 1 1 6 1 2
## 385 400 416 420 424 425 430 450
## 1 124 2 2 2 1 2 9
## 456 460 462 470 490 500 510 550
## 1 7 1 2 1 332 1 2
## 560 570 590 600 610 650 660 680
## 2 1 2 32 1 2 1 1
## 700 750 800 820 840 850 900 1000
## 19 1 16 1 1 1 1 60
## 1200 1500 1502 2000 2500 3000 or more <NA>
## 3 10 1 8 3 3 6
mydata <- top_recode (variable="a517_bulb", break_point=4000, missing=NA)
## [1] "Frequency table before encoding"
## a517_bulb. 517 Sundry articles including electric bulb, tubelight, glassware, bucket, washi
## 0 0.5 20 30 32 40 45 50 60 70 75 80 100 110 120 125 130 135
## 38 1 2 1 2 4 1 7 2 4 1 3 47 5 10 2 3 1
## 140 150 155 160 167 170 175 180 181 190 195 200 205 210 216 220 225 230
## 4 61 1 5 1 3 3 6 1 4 1 195 1 13 1 11 2 4
## 235 240 245 250 260 265 266 270 275 280 290 300 305 310 315 317 320 325
## 1 13 1 121 17 2 1 5 4 7 2 301 2 9 2 1 22 1
## 330 335 340 342 350 355 360 365 370 375 380 385 390 395 400 410 415 420
## 3 3 6 1 95 2 18 1 4 1 9 1 2 1 187 5 2 13
## 428 430 435 440 450 454 455 458 460 465 468 470 480 484 485 490 492 495
## 1 6 1 5 67 1 2 2 12 3 1 1 5 2 4 2 1 1
## 500 510 518 520 523 524 525 527 530 540 542 547 548 550 555 560 570 575
## 411 3 2 8 1 1 2 1 3 6 2 2 1 22 1 13 1 1
## 580 600 610 620 630 640 650 660 680 700 720 730 750 780 800 850 890 900
## 4 92 1 1 2 5 10 5 2 39 1 2 7 3 48 3 1 11
## 940 980 1000 1050 1100 1120 1150 1200 1250 1260 1280 1300 1500 1600 1700 1705 1800 1850
## 1 1 97 1 2 1 1 11 1 2 1 2 14 1 2 1 1 1
## 1880 2000 2002 2200 2300 2500 3000 3500 3800 4000 4500 5000 6000 8000 10000 30100 <NA>
## 1 20 1 1 1 8 11 3 1 3 1 6 1 1 1 1 4
## [1] "Frequency table after encoding"
## a517_bulb. 517 Sundry articles including electric bulb, tubelight, glassware, bucket, washi
## 0 0.5 20 30 32 40 45 50
## 38 1 2 1 2 4 1 7
## 60 70 75 80 100 110 120 125
## 2 4 1 3 47 5 10 2
## 130 135 140 150 155 160 167 170
## 3 1 4 61 1 5 1 3
## 175 180 181 190 195 200 205 210
## 3 6 1 4 1 195 1 13
## 216 220 225 230 235 240 245 250
## 1 11 2 4 1 13 1 121
## 260 265 266 270 275 280 290 300
## 17 2 1 5 4 7 2 301
## 305 310 315 317 320 325 330 335
## 2 9 2 1 22 1 3 3
## 340 342 350 355 360 365 370 375
## 6 1 95 2 18 1 4 1
## 380 385 390 395 400 410 415 420
## 9 1 2 1 187 5 2 13
## 428 430 435 440 450 454 455 458
## 1 6 1 5 67 1 2 2
## 460 465 468 470 480 484 485 490
## 12 3 1 1 5 2 4 2
## 492 495 500 510 518 520 523 524
## 1 1 411 3 2 8 1 1
## 525 527 530 540 542 547 548 550
## 2 1 3 6 2 2 1 22
## 555 560 570 575 580 600 610 620
## 1 13 1 1 4 92 1 1
## 630 640 650 660 680 700 720 730
## 2 5 10 5 2 39 1 2
## 750 780 800 850 890 900 940 980
## 7 3 48 3 1 11 1 1
## 1000 1050 1100 1120 1150 1200 1250 1260
## 97 1 2 1 1 11 1 2
## 1280 1300 1500 1600 1700 1705 1800 1850
## 1 2 14 1 2 1 1 1
## 1880 2000 2002 2200 2300 2500 3000 3500
## 1 20 1 1 1 8 11 3
## 3800 4000 or more <NA>
## 1 14 4
mydata <- top_recode (variable="a518_consu_service", break_point=50000, missing=NA)
## [1] "Frequency table before encoding"
## a518_consu_service. 518 Consumer services such as domestic servants, tailoring, grinding charges,
## 0 1 20 24.2000007629395 25 30
## 224 1 1 1 3 6
## 40 45 50 60 65 70
## 6 1 30 25 2 9
## 74 75 80 84 90 99
## 1 6 13 1 12 1
## 100 120 125 130 133 140
## 95 13 4 6 1 4
## 145 150 153 160 165 170
## 1 76 1 13 1 4
## 175 180 190 199 200 210
## 3 12 2 1 149 6
## 213 216 220 225 230 239
## 1 1 7 2 4 1
## 240 250 260 270 275 280
## 3 45 6 5 1 9
## 285 290 298 300 310 320
## 1 4 1 108 3 5
## 330 340 350 360 370 375
## 1 2 26 2 1 1
## 380 390 399 400 420 428
## 4 2 1 59 5 1
## 430 440 450 455 460 480
## 2 1 20 1 4 2
## 490 499 500 525 530 540
## 1 1 145 1 4 6
## 550 560 580 590 599 600
## 13 2 3 2 1 59
## 620 625 630 640 645 650
## 3 2 1 1 2 10
## 659 660 670 700 710 720
## 1 1 1 52 1 1
## 740 750 754 760 764 780
## 1 5 1 1 1 1
## 800 810 820 840 850 860
## 44 1 2 2 4 1
## 900 940 950 980 1000 1050
## 21 2 1 1 128 1
## 1060 1100 1101 1120 1140 1150
## 3 13 1 1 1 3
## 1165 1190 1200 1220 1240 1249
## 1 1 37 1 1 1
## 1250 1260 1270 1300 1320 1360
## 2 2 1 13 1 1
## 1400 1440 1450 1460 1500 1550
## 5 1 1 2 70 1
## 1560 1580 1590 1600 1620 1650
## 1 1 1 14 1 1
## 1680 1700 1730 1750 1800 1833
## 2 7 1 1 1 1
## 1848 1860 1870 1900 1950 2000
## 1 1 1 2 2 82
## 2040 2060 2098 2100 2120 2150
## 1 1 1 5 1 4
## 2160 2180 2199 2200 2228 2250
## 1 1 1 9 1 1
## 2280 2300 2400 2440 2500 2550
## 1 8 4 2 24 1
## 2580 2600 2640 2650 2700 2800
## 2 1 1 1 5 1
## 2900 3000 3010 3100 3150 3200
## 2 50 1 4 1 5
## 3300 3400 3450 3480 3500 3600
## 4 2 1 1 9 4
## 3650 3700 3790 3850 3900 4000
## 1 1 1 1 1 35
## 4080 4100 4150 4200 4260 4280
## 1 5 1 1 1 1
## 4299 4300 4350 4400 4440 4500
## 1 2 1 3 1 5
## 4600 4700 4800 4950 5000 5050
## 1 2 1 1 58 1
## 5100 5200 5300 5400 5500 5700
## 4 3 2 1 2 1
## 6000 6100 6135 6200 6400 6500
## 28 2 1 4 2 1
## 6580 6600 6800 7000 7300 7400
## 1 1 2 11 1 1
## 7430 7500 7600 7780 8000 8100
## 1 1 1 1 10 1
## 8350 8650 9000 9280 9600 10000
## 1 1 3 1 1 25
## 10050 10060 10200 10720 10750 11000
## 1 1 2 1 1 3
## 11030 11700 12000 12300 13000 13500
## 1 1 3 1 1 1
## 15000 15700 15900 16000 16500 17200
## 4 1 1 3 1 1
## 18000 20000 20200 23000 26000 30000
## 1 7 1 1 1 1
## 30310 30600 35000 40000 43000 50000
## 1 1 1 3 1 3
## 80000 1e+05 120100 <NA>
## 1 1 1 7
## [1] "Frequency table after encoding"
## a518_consu_service. 518 Consumer services such as domestic servants, tailoring, grinding charges,
## 0 1 20 24.2000007629395 25 30
## 224 1 1 1 3 6
## 40 45 50 60 65 70
## 6 1 30 25 2 9
## 74 75 80 84 90 99
## 1 6 13 1 12 1
## 100 120 125 130 133 140
## 95 13 4 6 1 4
## 145 150 153 160 165 170
## 1 76 1 13 1 4
## 175 180 190 199 200 210
## 3 12 2 1 149 6
## 213 216 220 225 230 239
## 1 1 7 2 4 1
## 240 250 260 270 275 280
## 3 45 6 5 1 9
## 285 290 298 300 310 320
## 1 4 1 108 3 5
## 330 340 350 360 370 375
## 1 2 26 2 1 1
## 380 390 399 400 420 428
## 4 2 1 59 5 1
## 430 440 450 455 460 480
## 2 1 20 1 4 2
## 490 499 500 525 530 540
## 1 1 145 1 4 6
## 550 560 580 590 599 600
## 13 2 3 2 1 59
## 620 625 630 640 645 650
## 3 2 1 1 2 10
## 659 660 670 700 710 720
## 1 1 1 52 1 1
## 740 750 754 760 764 780
## 1 5 1 1 1 1
## 800 810 820 840 850 860
## 44 1 2 2 4 1
## 900 940 950 980 1000 1050
## 21 2 1 1 128 1
## 1060 1100 1101 1120 1140 1150
## 3 13 1 1 1 3
## 1165 1190 1200 1220 1240 1249
## 1 1 37 1 1 1
## 1250 1260 1270 1300 1320 1360
## 2 2 1 13 1 1
## 1400 1440 1450 1460 1500 1550
## 5 1 1 2 70 1
## 1560 1580 1590 1600 1620 1650
## 1 1 1 14 1 1
## 1680 1700 1730 1750 1800 1833
## 2 7 1 1 1 1
## 1848 1860 1870 1900 1950 2000
## 1 1 1 2 2 82
## 2040 2060 2098 2100 2120 2150
## 1 1 1 5 1 4
## 2160 2180 2199 2200 2228 2250
## 1 1 1 9 1 1
## 2280 2300 2400 2440 2500 2550
## 1 8 4 2 24 1
## 2580 2600 2640 2650 2700 2800
## 2 1 1 1 5 1
## 2900 3000 3010 3100 3150 3200
## 2 50 1 4 1 5
## 3300 3400 3450 3480 3500 3600
## 4 2 1 1 9 4
## 3650 3700 3790 3850 3900 4000
## 1 1 1 1 1 35
## 4080 4100 4150 4200 4260 4280
## 1 5 1 1 1 1
## 4299 4300 4350 4400 4440 4500
## 1 2 1 3 1 5
## 4600 4700 4800 4950 5000 5050
## 1 2 1 1 58 1
## 5100 5200 5300 5400 5500 5700
## 4 3 2 1 2 1
## 6000 6100 6135 6200 6400 6500
## 28 2 1 4 2 1
## 6580 6600 6800 7000 7300 7400
## 1 1 2 11 1 1
## 7430 7500 7600 7780 8000 8100
## 1 1 1 1 10 1
## 8350 8650 9000 9280 9600 10000
## 1 1 3 1 1 25
## 10050 10060 10200 10720 10750 11000
## 1 1 2 1 1 3
## 11030 11700 12000 12300 13000 13500
## 1 1 3 1 1 1
## 15000 15700 15900 16000 16500 17200
## 4 1 1 3 1 1
## 18000 20000 20200 23000 26000 30000
## 1 7 1 1 1 1
## 30310 30600 35000 40000 43000 50000 or more
## 1 1 1 3 1 6
## <NA>
## 7
mydata <- top_recode (variable="a519_petrol", break_point=30000, missing=NA)
## [1] "Frequency table before encoding"
## a519_petrol. 519 Conveyance including porter charges, diesel, petrol, school bus/van, etc
## 0 10 20 50 60 70 100 120 150 180 200 250 300 307 350 400 450 500
## 690 1 1 4 1 1 11 1 6 1 60 3 48 1 1 28 3 191
## 540 550 560 580 600 620 650 700 720 750 800 840 850 860 900 1000 1005 1100
## 1 1 1 1 61 1 2 28 1 4 34 1 1 1 12 263 1 2
## 1200 1250 1300 1320 1350 1400 1500 1600 1750 1800 2000 2100 2200 2250 2300 2400 2500 2800
## 43 1 3 1 1 3 244 8 1 4 143 6 1 1 1 5 27 1
## 3000 3100 3200 3300 3500 4000 4200 4500 5000 5050 6000 6400 7000 8000 8400 9000 9684 10000
## 213 2 1 1 9 15 1 4 29 1 23 1 4 3 1 4 1 4
## 10500 12000 15000 20000 40000 45000 50000 <NA>
## 1 2 1 4 2 1 4 64
## [1] "Frequency table after encoding"
## a519_petrol. 519 Conveyance including porter charges, diesel, petrol, school bus/van, etc
## 0 10 20 50 60 70 100
## 690 1 1 4 1 1 11
## 120 150 180 200 250 300 307
## 1 6 1 60 3 48 1
## 350 400 450 500 540 550 560
## 1 28 3 191 1 1 1
## 580 600 620 650 700 720 750
## 1 61 1 2 28 1 4
## 800 840 850 860 900 1000 1005
## 34 1 1 1 12 263 1
## 1100 1200 1250 1300 1320 1350 1400
## 2 43 1 3 1 1 3
## 1500 1600 1750 1800 2000 2100 2200
## 244 8 1 4 143 6 1
## 2250 2300 2400 2500 2800 3000 3100
## 1 1 5 27 1 213 2
## 3200 3300 3500 4000 4200 4500 5000
## 1 1 9 15 1 4 29
## 5050 6000 6400 7000 8000 8400 9000
## 1 23 1 4 3 1 4
## 9684 10000 10500 12000 15000 20000 30000 or more
## 1 4 1 2 1 4 7
## <NA>
## 64
mydata <- top_recode (variable="a520_rent", break_point=percentile_checker ("a520_rent"), missing=NA)
## [1] "Frequency table before encoding"
## a520_rent. 520 Rent / house rent
## 0 1 100 200 400 500 550 700 800 1000 1200 1500 1800 2000 2500 2800 3000 3500 4000 5000 6000 <NA>
## 2228 1 1 4 2 7 1 1 2 22 2 26 1 28 8 1 11 3 3 4 1 1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a520_rent. 520 Rent / house rent
## 0 1 100 200 400 500 550 700
## 2228 1 1 4 2 7 1 1
## 800 1000 1200 1500 1800 2000 2500 2800
## 2 22 2 26 1 28 8 1
## 3000 or more <NA>
## 22 1
mydata <- top_recode (variable="a521_taxes", break_point=4500, missing=NA)
## [1] "Frequency table before encoding"
## a521_taxes. 521 Consumer taxes and cesses including water charges
## 0 26 27 30 35 50 52 53 54 55 57 60 62 65 70 75 80 82 90 100 101 103
## 731 1 1 7 2 32 13 1 1 21 1 19 1 10 4 6 3 1 11 103 1 4
## 104 105 110 112 115 120 121 125 126 130 135 140 145 150 160 165 170 175 180 200 205 206
## 2 1 29 1 1 10 1 16 1 1 2 1 1 30 1 2 4 3 2 137 1 1
## 207 208 216 217 218 220 230 240 250 252 260 270 300 307 320 340 350 360 375 380 400 414
## 3 1 1 1 1 5 1 1 112 1 3 2 268 1 2 1 54 1 2 1 106 1
## 415 420 450 460 500 550 600 630 650 700 750 800 900 1000 1050 1060 1100 1120 1200 1250 1300 1350
## 1 1 10 1 126 1 161 1 3 30 12 26 48 55 1 2 1 1 27 3 1 2
## 1500 1600 2000 2100 2500 3000 3130 3400 3600 4000 5000 5250 6000 7000 9000 <NA>
## 15 3 12 2 2 6 1 1 1 1 3 1 1 1 1 2
## [1] "Frequency table after encoding"
## a521_taxes. 521 Consumer taxes and cesses including water charges
## 0 26 27 30 35 50 52 53
## 731 1 1 7 2 32 13 1
## 54 55 57 60 62 65 70 75
## 1 21 1 19 1 10 4 6
## 80 82 90 100 101 103 104 105
## 3 1 11 103 1 4 2 1
## 110 112 115 120 121 125 126 130
## 29 1 1 10 1 16 1 1
## 135 140 145 150 160 165 170 175
## 2 1 1 30 1 2 4 3
## 180 200 205 206 207 208 216 217
## 2 137 1 1 3 1 1 1
## 218 220 230 240 250 252 260 270
## 1 5 1 1 112 1 3 2
## 300 307 320 340 350 360 375 380
## 268 1 2 1 54 1 2 1
## 400 414 415 420 450 460 500 550
## 106 1 1 1 10 1 126 1
## 600 630 650 700 750 800 900 1000
## 161 1 3 30 12 26 48 55
## 1050 1060 1100 1120 1200 1250 1300 1350
## 1 2 1 1 27 3 1 2
## 1500 1600 2000 2100 2500 3000 3130 3400
## 15 3 12 2 2 6 1 1
## 3600 4000 4500 or more <NA>
## 1 1 7 2
mydata <- top_recode (variable="a522_medical_expenses", break_point=42000, missing=NA)
## [1] "Frequency table before encoding"
## a522_medical_expenses. 522 Medical Expenses (non-institutional)
## 0 2 5 10 15 20 25 26 30 35 40 50 60 70 71
## 1048 1 12 53 4 22 1 1 10 1 4 17 7 2 1
## 75 80 98 100 120 130 150 160 180 200 220 240 250 290 300
## 1 3 1 54 1 1 12 1 1 108 2 1 9 1 53
## 350 400 420 450 475 480 500 508 520 550 570 575 600 620 650
## 9 38 1 8 1 2 158 1 1 5 1 1 44 2 6
## 700 720 730 750 760 800 850 900 950 1000 1100 1200 1300 1400 1500
## 18 1 1 5 2 28 5 2 1 182 2 21 5 1 54
## 1620 1700 1800 2000 2500 2700 2800 3000 3200 3500 3800 4000 4500 4900 5000
## 1 1 1 95 22 1 1 42 2 3 1 22 3 1 50
## 5200 6000 6500 7000 7500 7501 8000 9000 10000 11500 12000 14000 15000 18000 20000
## 1 5 1 6 1 1 2 2 10 1 5 1 9 1 4
## 25000 30000 40000 50000 60000 70000 1e+05 110000 2e+05 250000 4e+05 <NA>
## 3 2 2 5 1 1 1 1 1 1 1 4
## [1] "Frequency table after encoding"
## a522_medical_expenses. 522 Medical Expenses (non-institutional)
## 0 2 5 10 15 20 25
## 1048 1 12 53 4 22 1
## 26 30 35 40 50 60 70
## 1 10 1 4 17 7 2
## 71 75 80 98 100 120 130
## 1 1 3 1 54 1 1
## 150 160 180 200 220 240 250
## 12 1 1 108 2 1 9
## 290 300 350 400 420 450 475
## 1 53 9 38 1 8 1
## 480 500 508 520 550 570 575
## 2 158 1 1 5 1 1
## 600 620 650 700 720 730 750
## 44 2 6 18 1 1 5
## 760 800 850 900 950 1000 1100
## 2 28 5 2 1 182 2
## 1200 1300 1400 1500 1620 1700 1800
## 21 5 1 54 1 1 1
## 2000 2500 2700 2800 3000 3200 3500
## 95 22 1 1 42 2 3
## 3800 4000 4500 4900 5000 5200 6000
## 1 22 3 1 50 1 5
## 6500 7000 7500 7501 8000 9000 10000
## 1 6 1 1 2 2 10
## 11500 12000 14000 15000 18000 20000 25000
## 1 5 1 9 1 4 3
## 30000 40000 42000 or more <NA>
## 2 2 12 4
mydata <- top_recode (variable="a523_medical", break_point=350000, missing=NA)
## [1] "Frequency table before encoding"
## a523_medical. 523 Medical (institutional)
## 0 1 10 20 22 50 98 100 150 200 250 280 300
## 302 1 1 1 1 1 1 6 1 23 3 1 13
## 400 450 500 600 650 700 800 850 900 1000 1100 1130 1200
## 4 1 79 22 1 11 8 1 4 133 3 1 29
## 1400 1500 1600 1750 1900 2000 2200 2300 2400 2500 2600 3000 3500
## 1 66 2 1 1 152 1 1 4 26 2 163 7
## 3600 4000 4400 4500 4800 5000 5500 6000 6200 6500 6700 7000 7200
## 2 98 1 2 1 302 1 96 1 3 1 52 3
## 7500 8000 8400 8500 9000 10000 10050 10200 11000 12000 13000 14000 15000
## 4 67 1 1 17 142 1 1 4 85 4 3 80
## 16000 17000 17200 18000 20000 21000 22000 23000 24000 25000 25500 26000 28000
## 2 1 1 7 68 1 1 1 7 24 1 5 1
## 30000 30900 35000 36000 40000 45000 48000 50000 52000 55000 59000 60000 70000
## 27 1 7 2 21 1 1 22 1 1 1 12 5
## 72000 75000 80000 95000 1e+05 109500 110000 120000 150000 155000 160000 165000 2e+05
## 1 1 7 1 14 1 1 1 11 1 1 1 4
## 250000 3e+05 350000 4e+05 5e+05 7e+05 8e+05 1500000 <NA>
## 1 4 1 2 1 1 1 1 27
## [1] "Frequency table after encoding"
## a523_medical. 523 Medical (institutional)
## 0 1 10 20 22 50 98
## 302 1 1 1 1 1 1
## 100 150 200 250 280 300 400
## 6 1 23 3 1 13 4
## 450 500 600 650 700 800 850
## 1 79 22 1 11 8 1
## 900 1000 1100 1130 1200 1400 1500
## 4 133 3 1 29 1 66
## 1600 1750 1900 2000 2200 2300 2400
## 2 1 1 152 1 1 4
## 2500 2600 3000 3500 3600 4000 4400
## 26 2 163 7 2 98 1
## 4500 4800 5000 5500 6000 6200 6500
## 2 1 302 1 96 1 3
## 6700 7000 7200 7500 8000 8400 8500
## 1 52 3 4 67 1 1
## 9000 10000 10050 10200 11000 12000 13000
## 17 142 1 1 4 85 4
## 14000 15000 16000 17000 17200 18000 20000
## 3 80 2 1 1 7 68
## 21000 22000 23000 24000 25000 25500 26000
## 1 1 1 7 24 1 5
## 28000 30000 30900 35000 36000 40000 45000
## 1 27 1 7 2 21 1
## 48000 50000 52000 55000 59000 60000 70000
## 1 22 1 1 1 12 5
## 72000 75000 80000 95000 1e+05 109500 110000
## 1 1 7 1 14 1 1
## 120000 150000 155000 160000 165000 2e+05 250000
## 1 11 1 1 1 4 1
## 3e+05 350000 or more <NA>
## 4 7 27
mydata <- top_recode (variable="a524_tution", break_point=60000, missing=NA)
## [1] "Frequency table before encoding"
## a524_tution. 524 Tuition fees & other fees including private tutor, school/college fees, etc
## 0 8 10 30 50 60 75 80 100 120 140 150 170 200 220
## 900 1 1 1 5 2 1 1 24 1 1 6 1 55 1
## 250 255 300 315 340 350 360 365 375 380 390 400 415 420 425
## 8 1 28 1 1 9 5 2 2 2 1 31 1 1 1
## 450 460 470 475 480 485 500 520 540 550 565 570 600 620 625
## 7 2 2 2 1 1 75 2 3 5 1 1 47 1 2
## 630 650 660 665 700 720 730 750 760 770 780 800 830 850 860
## 1 8 2 2 16 1 1 12 1 1 1 15 1 3 1
## 900 920 930 940 950 960 990 1000 1025 1040 1050 1100 1150 1160 1200
## 10 3 1 1 1 1 1 72 1 1 2 10 1 1 35
## 1210 1250 1275 1290 1300 1330 1340 1370 1380 1400 1430 1450 1460 1500 1570
## 1 2 1 1 5 1 2 1 1 8 1 1 1 29 1
## 1600 1640 1685 1700 1800 1880 1900 1920 1950 2000 2100 2200 2215 2240 2400
## 5 1 1 7 6 1 1 1 1 65 3 3 1 1 11
## 2410 2500 2600 2700 2725 2800 2810 2850 2860 3000 3200 3300 3320 3365 3390
## 1 16 2 1 1 2 1 1 1 51 3 2 1 1 1
## 3400 3500 3550 3590 3600 3675 3700 3800 4000 4100 4150 4200 4270 4275 4400
## 1 13 1 1 9 1 1 3 37 2 1 2 1 1 2
## 4480 4500 4700 4730 4800 5000 5400 5450 5500 5690 5750 6000 6150 6400 6500
## 1 9 1 1 5 71 1 1 4 1 1 58 1 1 1
## 6600 6700 6800 6900 6950 7000 7100 7200 7300 7500 7800 8000 8100 8400 8500
## 1 1 1 1 1 20 1 9 1 4 2 32 1 5 1
## 8880 9000 9200 9600 9800 9855 10000 10200 10500 10600 10800 11000 11100 11700 12000
## 1 21 1 2 2 1 66 2 3 1 3 4 1 1 32
## 12300 12800 13000 13700 14000 15000 15400 15700 16000 16500 16800 17000 18000 18300 18500
## 1 1 8 1 9 30 1 1 4 1 1 5 7 1 1
## 19000 20000 20360 20400 21000 21600 22000 23000 23007 24000 25000 25500 26000 27000 30000
## 2 33 1 1 2 1 3 1 1 3 8 1 3 2 10
## 30200 32000 35000 36000 37000 38000 40000 45000 48000 50000 52000 60000 62000 63000 70000
## 1 3 2 1 1 1 4 2 1 4 1 3 1 1 2
## 72000 75000 1e+05 110000 120000 160000 2e+05 230000 340000 410000 <NA>
## 1 1 2 1 1 1 1 1 1 1 17
## [1] "Frequency table after encoding"
## a524_tution. 524 Tuition fees & other fees including private tutor, school/college fees, etc
## 0 8 10 30 50 60 75
## 900 1 1 1 5 2 1
## 80 100 120 140 150 170 200
## 1 24 1 1 6 1 55
## 220 250 255 300 315 340 350
## 1 8 1 28 1 1 9
## 360 365 375 380 390 400 415
## 5 2 2 2 1 31 1
## 420 425 450 460 470 475 480
## 1 1 7 2 2 2 1
## 485 500 520 540 550 565 570
## 1 75 2 3 5 1 1
## 600 620 625 630 650 660 665
## 47 1 2 1 8 2 2
## 700 720 730 750 760 770 780
## 16 1 1 12 1 1 1
## 800 830 850 860 900 920 930
## 15 1 3 1 10 3 1
## 940 950 960 990 1000 1025 1040
## 1 1 1 1 72 1 1
## 1050 1100 1150 1160 1200 1210 1250
## 2 10 1 1 35 1 2
## 1275 1290 1300 1330 1340 1370 1380
## 1 1 5 1 2 1 1
## 1400 1430 1450 1460 1500 1570 1600
## 8 1 1 1 29 1 5
## 1640 1685 1700 1800 1880 1900 1920
## 1 1 7 6 1 1 1
## 1950 2000 2100 2200 2215 2240 2400
## 1 65 3 3 1 1 11
## 2410 2500 2600 2700 2725 2800 2810
## 1 16 2 1 1 2 1
## 2850 2860 3000 3200 3300 3320 3365
## 1 1 51 3 2 1 1
## 3390 3400 3500 3550 3590 3600 3675
## 1 1 13 1 1 9 1
## 3700 3800 4000 4100 4150 4200 4270
## 1 3 37 2 1 2 1
## 4275 4400 4480 4500 4700 4730 4800
## 1 2 1 9 1 1 5
## 5000 5400 5450 5500 5690 5750 6000
## 71 1 1 4 1 1 58
## 6150 6400 6500 6600 6700 6800 6900
## 1 1 1 1 1 1 1
## 6950 7000 7100 7200 7300 7500 7800
## 1 20 1 9 1 4 2
## 8000 8100 8400 8500 8880 9000 9200
## 32 1 5 1 1 21 1
## 9600 9800 9855 10000 10200 10500 10600
## 2 2 1 66 2 3 1
## 10800 11000 11100 11700 12000 12300 12800
## 3 4 1 1 32 1 1
## 13000 13700 14000 15000 15400 15700 16000
## 8 1 9 30 1 1 4
## 16500 16800 17000 18000 18300 18500 19000
## 1 1 5 7 1 1 2
## 20000 20360 20400 21000 21600 22000 23000
## 33 1 1 2 1 3 1
## 23007 24000 25000 25500 26000 27000 30000
## 1 3 8 1 3 2 10
## 30200 32000 35000 36000 37000 38000 40000
## 1 3 2 1 1 1 4
## 45000 48000 50000 52000 60000 or more <NA>
## 2 1 4 1 18 17
mydata <- top_recode (variable="a525_schl_book", break_point=percentile_checker ("a525_schl_book"), missing=NA)
## [1] "Frequency table before encoding"
## a525_schl_book. 525 School books & other educational articles including newspaper, library charg
## 0 10 22 50 100 200 250 300 340 350 365 400 450 490 500
## 200 4 1 2 2 5 1 8 1 1 1 8 1 1 36
## 510 550 600 650 700 800 900 960 1000 1050 1100 1200 1300 1360 1400
## 1 2 12 2 5 8 4 1 83 1 2 21 5 1 3
## 1409 1500 1600 1700 1800 1900 2000 2160 2170 2200 2300 2340 2400 2500 2600
## 1 87 5 8 7 2 183 1 1 3 3 1 9 49 3
## 2700 2800 2900 3000 3100 3200 3276 3300 3350 3400 3500 3505 3600 3700 3800
## 2 1 2 206 1 2 1 2 1 1 26 1 6 1 1
## 4000 4200 4360 4455 4500 4700 4900 4998 5000 5200 5300 5500 5650 5750 5800
## 159 1 1 1 33 1 1 1 248 1 3 11 1 1 1
## 6000 6200 6230 6300 6500 6600 7000 7500 7600 7700 8000 8500 8600 9000 9200
## 148 2 1 1 9 1 71 17 1 1 108 1 1 44 1
## 9500 9600 9800 10000 10500 11000 11200 11700 11800 12000 12500 13000 13500 14000 14500
## 5 1 1 118 2 14 2 1 1 53 2 14 1 11 2
## 15000 16000 17000 17500 18000 20000 20009 20500 20900 21000 22000 23000 23700 24000 25000
## 54 8 5 1 8 44 1 1 1 5 1 1 1 3 11
## 27000 28000 30000 32000 35000 38000 40000 42000 45000 47000 50000 53000 60000 80000 1e+05
## 1 1 14 3 3 2 4 1 2 1 4 1 5 1 1
## 113000 <NA>
## 1 29
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a525_schl_book. 525 School books & other educational articles including newspaper, library charg
## 0 10 22 50 100 200 250
## 200 4 1 2 2 5 1
## 300 340 350 365 400 450 490
## 8 1 1 1 8 1 1
## 500 510 550 600 650 700 800
## 36 1 2 12 2 5 8
## 900 960 1000 1050 1100 1200 1300
## 4 1 83 1 2 21 5
## 1360 1400 1409 1500 1600 1700 1800
## 1 3 1 87 5 8 7
## 1900 2000 2160 2170 2200 2300 2340
## 2 183 1 1 3 3 1
## 2400 2500 2600 2700 2800 2900 3000
## 9 49 3 2 1 2 206
## 3100 3200 3276 3300 3350 3400 3500
## 1 2 1 2 1 1 26
## 3505 3600 3700 3800 4000 4200 4360
## 1 6 1 1 159 1 1
## 4455 4500 4700 4900 4998 5000 5200
## 1 33 1 1 1 248 1
## 5300 5500 5650 5750 5800 6000 6200
## 3 11 1 1 1 148 2
## 6230 6300 6500 6600 7000 7500 7600
## 1 1 9 1 71 17 1
## 7700 8000 8500 8600 9000 9200 9500
## 1 108 1 1 44 1 5
## 9600 9800 10000 10500 11000 11200 11700
## 1 1 118 2 14 2 1
## 11800 12000 12500 13000 13500 14000 14500
## 1 53 2 14 1 11 2
## 15000 16000 17000 17500 18000 20000 20009
## 54 8 5 1 8 44 1
## 20500 20900 21000 22000 23000 23700 24000
## 1 1 5 1 1 1 3
## 25000 27000 28000 30000 32000 35000 38000
## 11 1 1 14 3 3 2
## 40000 42000 45000 47000 50000 or more <NA>
## 4 1 2 1 13 29
mydata <- top_recode (variable="a526_cloth", break_point=60000, missing=NA)
## [1] "Frequency table before encoding"
## a526_cloth. 526 Clothing and bedding
## 0 1 6 10 40 200 300 380 400 480 500 600 700 800 900
## 60 1 1 2 1 1 2 1 4 1 9 4 2 3 2
## 1000 1100 1200 1250 1300 1400 1500 1660 2000 2400 2500 2800 3000 3200 3500
## 25 2 8 1 1 1 20 1 129 1 36 1 171 1 12
## 3800 4000 4200 4400 4500 4800 5000 5500 6000 6400 6500 7000 7500 8000 8500
## 1 185 1 1 7 3 498 6 217 1 4 118 9 160 6
## 9000 9800 10000 10500 11000 11500 12000 13000 14000 15000 16000 17000 18000 19000 20000
## 35 1 314 1 3 1 52 3 3 85 3 2 4 1 64
## 21000 22000 24000 25000 30000 40000 41000 45000 50000 50003 60000 70000 1e+05 110000 2e+05
## 1 1 1 12 14 4 1 2 9 1 2 2 5 1 2
## 240000 3e+05 <NA>
## 1 1 5
## [1] "Frequency table after encoding"
## a526_cloth. 526 Clothing and bedding
## 0 1 6 10 40 200 300
## 60 1 1 2 1 1 2
## 380 400 480 500 600 700 800
## 1 4 1 9 4 2 3
## 900 1000 1100 1200 1250 1300 1400
## 2 25 2 8 1 1 1
## 1500 1660 2000 2400 2500 2800 3000
## 20 1 129 1 36 1 171
## 3200 3500 3800 4000 4200 4400 4500
## 1 12 1 185 1 1 7
## 4800 5000 5500 6000 6400 6500 7000
## 3 498 6 217 1 4 118
## 7500 8000 8500 9000 9800 10000 10500
## 9 160 6 35 1 314 1
## 11000 11500 12000 13000 14000 15000 16000
## 3 1 52 3 3 85 3
## 17000 18000 19000 20000 21000 22000 24000
## 2 4 1 64 1 1 1
## 25000 30000 40000 41000 45000 50000 50003
## 12 14 4 1 2 9 1
## 60000 or more <NA>
## 14 5
mydata <- top_recode (variable="a527_footwear", break_point=16000, missing=NA)
## [1] "Frequency table before encoding"
## a527_footwear. 527 Footwear
## 0 10 50 100 120 150 200 210 240 250 280 300 370 400 450 500 560 600
## 23 1 1 3 1 1 16 1 1 1 1 20 1 12 2 124 1 47
## 700 750 800 810 900 1000 1100 1150 1200 1250 1260 1300 1400 1500 1550 1600 1700 1800
## 22 2 30 1 4 422 1 2 51 1 1 7 3 197 1 11 1 19
## 1845 2000 2100 2200 2400 2500 2600 2700 2800 3000 3200 3500 3600 4000 4500 5000 5600 6000
## 2 540 1 6 5 116 2 1 2 294 1 16 3 84 1 168 1 25
## 7000 7500 8000 9000 10000 12000 15000 20000 25000 30000 36000 40000 50000 <NA>
## 8 1 6 1 12 4 4 5 1 1 1 1 3 9
## [1] "Frequency table after encoding"
## a527_footwear. 527 Footwear
## 0 10 50 100 120 150 200
## 23 1 1 3 1 1 16
## 210 240 250 280 300 370 400
## 1 1 1 1 20 1 12
## 450 500 560 600 700 750 800
## 2 124 1 47 22 2 30
## 810 900 1000 1100 1150 1200 1250
## 1 4 422 1 2 51 1
## 1260 1300 1400 1500 1550 1600 1700
## 1 7 3 197 1 11 1
## 1800 1845 2000 2100 2200 2400 2500
## 19 2 540 1 6 5 116
## 2600 2700 2800 3000 3200 3500 3600
## 2 1 2 294 1 16 3
## 4000 4500 5000 5600 6000 7000 7500
## 84 1 168 1 25 8 1
## 8000 9000 10000 12000 15000 16000 or more <NA>
## 6 1 12 4 4 12 9
mydata <- top_recode (variable="a528_furniture", break_point=40000, missing=NA)
## [1] "Frequency table before encoding"
## a528_furniture. 528 Furniture and Fixtures including bedstead, almirah, suitcase, carpet, painti
## 0 0.100000001490116 1 100 200 300
## 2059 1 1 1 2 2
## 350 400 500 600 700 750
## 1 2 7 4 5 1
## 800 900 1000 1100 1200 1300
## 7 2 17 2 9 4
## 1400 1500 1550 1600 1700 1800
## 3 13 1 3 1 2
## 2000 2200 2400 2500 2600 2800
## 32 3 2 8 1 1
## 3000 3200 3500 4000 4500 5000
## 17 1 4 17 2 24
## 5500 5600 6000 6500 7000 7500
## 2 1 11 1 9 2
## 8000 8500 9000 10000 11000 12000
## 14 2 2 11 1 2
## 12100 13000 14000 15000 16000 18000
## 1 2 1 7 2 1
## 20000 22000 25000 27000 28000 44000
## 7 1 2 1 3 1
## 45000 50000 52000 60000 1e+05 <NA>
## 2 2 1 2 1 1
## [1] "Frequency table after encoding"
## a528_furniture. 528 Furniture and Fixtures including bedstead, almirah, suitcase, carpet, painti
## 0 0.100000001490116 1 100 200 300
## 2059 1 1 1 2 2
## 350 400 500 600 700 750
## 1 2 7 4 5 1
## 800 900 1000 1100 1200 1300
## 7 2 17 2 9 4
## 1400 1500 1550 1600 1700 1800
## 3 13 1 3 1 2
## 2000 2200 2400 2500 2600 2800
## 32 3 2 8 1 1
## 3000 3200 3500 4000 4500 5000
## 17 1 4 17 2 24
## 5500 5600 6000 6500 7000 7500
## 2 1 11 1 9 2
## 8000 8500 9000 10000 11000 12000
## 14 2 2 11 1 2
## 12100 13000 14000 15000 16000 18000
## 1 2 1 7 2 1
## 20000 22000 25000 27000 28000 40000 or more
## 7 1 2 1 3 9
## <NA>
## 1
mydata <- top_recode (variable="a529_crockery", break_point=percentile_checker ("a529_crockery"), missing=NA)
## [1] "Frequency table before encoding"
## a529_crockery. 529 Crockery & utensils including stainless steel utensils, casseroles, themos,
## 0 1 8 20 40 60 90 100 110 120 150 175 180 200 220 250 270 280
## 1946 1 2 1 1 2 1 5 1 1 6 1 1 22 1 11 1 1
## 300 350 400 500 510 550 600 650 700 750 800 850 900 950 960 1000 1099 1100
## 19 4 23 52 1 5 23 2 5 1 11 2 1 1 1 52 1 3
## 1200 1300 1400 1500 1600 1800 2000 2200 2400 2500 3000 3500 4000 4500 5000 6000 7000 8000
## 8 2 1 9 1 1 37 1 1 9 14 2 9 2 12 2 2 1
## 10000 11000 12000 14000 15000 16000 20000 40000 50000 60000 80000 <NA>
## 8 1 1 2 8 1 2 1 4 1 1 2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a529_crockery. 529 Crockery & utensils including stainless steel utensils, casseroles, themos,
## 0 1 8 20 40 60 90
## 1946 1 2 1 1 2 1
## 100 110 120 150 175 180 200
## 5 1 1 6 1 1 22
## 220 250 270 280 300 350 400
## 1 11 1 1 19 4 23
## 500 510 550 600 650 700 750
## 52 1 5 23 2 5 1
## 800 850 900 950 960 1000 1099
## 11 2 1 1 1 52 1
## 1100 1200 1300 1400 1500 1600 1800
## 3 8 2 1 9 1 1
## 2000 2200 2400 2500 3000 3500 4000
## 37 1 1 9 14 2 9
## 4500 5000 6000 7000 8000 10000 11000
## 2 12 2 2 1 8 1
## 12000 14000 15000 or more <NA>
## 1 2 18 2
mydata <- top_recode (variable="a530_cooking", break_point=30000, missing=NA)
## [1] "Frequency table before encoding"
## a530_cooking. 530 Cooking and household appliances including electric fan, air conditioners,
## 0 0.100000001490116 1 8 13.5 15.5
## 1735 1 1 1 1 1
## 100 150 200 300 350 400
## 1 1 5 5 4 3
## 450 500 550 600 700 750
## 2 22 1 5 11 3
## 800 850 900 950 1000 1100
## 11 2 3 1 27 10
## 1150 1200 1300 1400 1450 1500
## 3 21 12 4 1 26
## 1600 1700 1750 1800 1900 2000
## 8 1 1 4 1 23
## 2100 2200 2300 2400 2500 2600
## 1 2 1 3 10 1
## 2700 2900 3000 3200 3300 3450
## 2 2 37 2 2 1
## 3500 3700 3800 4000 4300 4350
## 14 1 1 43 1 1
## 4400 4500 4600 4800 5000 5200
## 1 15 2 4 44 1
## 5300 5500 6000 6500 6700 7000
## 1 6 28 3 1 6
## 7200 7500 8000 8500 8900 9000
## 1 4 5 4 1 2
## 9500 9900 10000 11000 11600 12000
## 2 1 21 2 1 16
## 12500 13000 13500 14000 14500 14700
## 3 5 3 12 1 1
## 15000 15800 16000 16500 17000 17500
## 16 1 10 1 4 2
## 18000 19000 20000 21000 22000 22500
## 4 2 5 2 3 1
## 23000 23300 24000 25000 30000 32500
## 3 1 2 2 2 1
## 35000 40000 50000 53000 60000 61500
## 2 2 3 1 1 1
## <NA>
## 2
## [1] "Frequency table after encoding"
## a530_cooking. 530 Cooking and household appliances including electric fan, air conditioners,
## 0 0.100000001490116 1 8 13.5 15.5
## 1735 1 1 1 1 1
## 100 150 200 300 350 400
## 1 1 5 5 4 3
## 450 500 550 600 700 750
## 2 22 1 5 11 3
## 800 850 900 950 1000 1100
## 11 2 3 1 27 10
## 1150 1200 1300 1400 1450 1500
## 3 21 12 4 1 26
## 1600 1700 1750 1800 1900 2000
## 8 1 1 4 1 23
## 2100 2200 2300 2400 2500 2600
## 1 2 1 3 10 1
## 2700 2900 3000 3200 3300 3450
## 2 2 37 2 2 1
## 3500 3700 3800 4000 4300 4350
## 14 1 1 43 1 1
## 4400 4500 4600 4800 5000 5200
## 1 15 2 4 44 1
## 5300 5500 6000 6500 6700 7000
## 1 6 28 3 1 6
## 7200 7500 8000 8500 8900 9000
## 1 4 5 4 1 2
## 9500 9900 10000 11000 11600 12000
## 2 1 21 2 1 16
## 12500 13000 13500 14000 14500 14700
## 3 5 3 12 1 1
## 15000 15800 16000 16500 17000 17500
## 16 1 10 1 4 2
## 18000 19000 20000 21000 22000 22500
## 4 2 5 2 3 1
## 23000 23300 24000 25000 30000 or more <NA>
## 3 1 2 2 13 2
mydata <- top_recode (variable="a531_tv_radio", break_point=17000, missing=NA)
## [1] "Frequency table before encoding"
## a531_tv_radio. 531 Goods for Recreation including TV, radio, tape recorder, musical instruments
## 0 1 7.5 8 20 30 100 200 300 400 500 800 850 1000 1200 1500 1730 2000
## 2157 1 1 2 1 1 2 3 1 1 3 3 1 7 3 7 1 10
## 2200 2500 3000 3500 4000 4500 5000 5500 6000 7000 7500 8000 8300 8500 9000 10000 10500 11000
## 2 6 11 3 6 1 21 1 6 8 3 14 1 2 7 13 1 3
## 12000 12100 12500 13000 13200 13500 14000 14500 15000 16000 17000 18000 20000 24000 25000 30000 35000 52000
## 6 1 2 7 1 1 2 1 5 2 1 1 5 1 2 1 1 1
## <NA>
## 3
## [1] "Frequency table after encoding"
## a531_tv_radio. 531 Goods for Recreation including TV, radio, tape recorder, musical instruments
## 0 1 7.5 8 20 30 100
## 2157 1 1 2 1 1 2
## 200 300 400 500 800 850 1000
## 3 1 1 3 3 1 7
## 1200 1500 1730 2000 2200 2500 3000
## 3 7 1 10 2 6 11
## 3500 4000 4500 5000 5500 6000 7000
## 3 6 1 21 1 6 8
## 7500 8000 8300 8500 9000 10000 10500
## 3 14 1 2 7 13 1
## 11000 12000 12100 12500 13000 13200 13500
## 3 6 1 2 7 1 1
## 14000 14500 15000 16000 17000 or more <NA>
## 2 1 5 2 13 3
mydata <- top_recode (variable="a532_jewelry", break_point=450000, missing=NA)
## [1] "Frequency table before encoding"
## a532_jewelry. 532 Jewelry & ornaments
## 0 1 7 8 100 200 300 392 400 500 550 600 700
## 2045 1 1 2 1 4 8 1 1 8 1 5 2
## 800 850 1000 1300 1400 1500 1800 2000 2300 2500 3000 3500 4000
## 3 1 7 1 1 5 1 5 2 1 10 2 4
## 4100 4200 4500 5000 5500 6000 6500 7000 7200 8000 9000 10000 10300
## 1 1 1 17 1 6 1 4 1 8 3 9 2
## 11000 11500 12000 12500 14000 15000 16000 17000 18000 19500 20000 22000 25000
## 2 2 3 1 1 5 7 1 2 1 17 1 10
## 27000 30000 32000 35000 37000 38000 40000 41000 42000 45000 49000 50000 60000
## 1 16 1 4 1 1 14 1 1 2 1 14 4
## 62500 70000 73000 75000 80000 81000 90000 1e+05 101700 110000 120000 150000 160000
## 1 3 1 1 2 1 1 16 1 1 1 7 1
## 180000 2e+05 250000 3e+05 4e+05 5e+05 6e+05 1e+06 1500000 2500000 <NA>
## 1 12 3 4 1 1 1 1 1 1 3
## [1] "Frequency table after encoding"
## a532_jewelry. 532 Jewelry & ornaments
## 0 1 7 8 100 200 300
## 2045 1 1 2 1 4 8
## 392 400 500 550 600 700 800
## 1 1 8 1 5 2 3
## 850 1000 1300 1400 1500 1800 2000
## 1 7 1 1 5 1 5
## 2300 2500 3000 3500 4000 4100 4200
## 2 1 10 2 4 1 1
## 4500 5000 5500 6000 6500 7000 7200
## 1 17 1 6 1 4 1
## 8000 9000 10000 10300 11000 11500 12000
## 8 3 9 2 2 2 3
## 12500 14000 15000 16000 17000 18000 19500
## 1 1 5 7 1 2 1
## 20000 22000 25000 27000 30000 32000 35000
## 17 1 10 1 16 1 4
## 37000 38000 40000 41000 42000 45000 49000
## 1 1 14 1 1 2 1
## 50000 60000 62500 70000 73000 75000 80000
## 14 4 1 3 1 1 2
## 81000 90000 1e+05 101700 110000 120000 150000
## 1 1 16 1 1 1 7
## 160000 180000 2e+05 250000 3e+05 4e+05 450000 or more
## 1 1 12 3 4 1 5
## <NA>
## 3
mydata <- top_recode (variable="a533_transport", break_point=500000, missing=NA)
## [1] "Frequency table before encoding"
## a533_transport. 533 Personal transport equipment including bicycle, scooter, car, tyres, tubes,
## 0 1 50 100 150 200 240 250 280 300 350 380 400
## 1713 1 1 1 1 19 1 10 1 16 3 1 12
## 450 480 500 600 700 712 750 800 850 900 1000 1100 1200
## 3 1 49 15 8 1 1 14 1 1 39 4 16
## 1300 1400 1500 1600 1700 1750 1800 2000 2100 2200 2300 2400 2500
## 7 3 24 4 1 1 13 71 1 1 1 2 13
## 2700 3000 3200 3500 4000 4400 4500 5000 5500 6000 7000 9000 10000
## 1 45 3 9 27 1 4 30 1 6 5 1 12
## 10900 12000 13000 14000 15000 17000 18000 20000 21000 25000 30000 35000 36000
## 1 3 1 1 5 1 2 4 1 3 2 2 1
## 37225 40000 42000 49000 50000 51000 52000 53000 54000 55000 56000 57000 60000
## 1 6 1 1 8 1 1 2 2 4 2 1 6
## 61000 62000 63000 65000 70000 72000 75000 76600 80000 82000 85000 90000 96000
## 2 1 1 5 12 1 4 1 3 1 2 4 1
## 1e+05 103000 140000 150000 170000 2e+05 3e+05 4e+05 7e+05 750000 1e+06 1500000 <NA>
## 2 1 1 1 1 1 1 1 1 1 1 1 24
## [1] "Frequency table after encoding"
## a533_transport. 533 Personal transport equipment including bicycle, scooter, car, tyres, tubes,
## 0 1 50 100 150 200 240
## 1713 1 1 1 1 19 1
## 250 280 300 350 380 400 450
## 10 1 16 3 1 12 3
## 480 500 600 700 712 750 800
## 1 49 15 8 1 1 14
## 850 900 1000 1100 1200 1300 1400
## 1 1 39 4 16 7 3
## 1500 1600 1700 1750 1800 2000 2100
## 24 4 1 1 13 71 1
## 2200 2300 2400 2500 2700 3000 3200
## 1 1 2 13 1 45 3
## 3500 4000 4400 4500 5000 5500 6000
## 9 27 1 4 30 1 6
## 7000 9000 10000 10900 12000 13000 14000
## 5 1 12 1 3 1 1
## 15000 17000 18000 20000 21000 25000 30000
## 5 1 2 4 1 3 2
## 35000 36000 37225 40000 42000 49000 50000
## 2 1 1 6 1 1 8
## 51000 52000 53000 54000 55000 56000 57000
## 1 1 2 2 4 2 1
## 60000 61000 62000 63000 65000 70000 72000
## 6 2 1 1 5 12 1
## 75000 76600 80000 82000 85000 90000 96000
## 4 1 3 1 2 4 1
## 1e+05 103000 140000 150000 170000 2e+05 3e+05
## 2 1 1 1 1 1 1
## 4e+05 5e+05 or more <NA>
## 1 4 24
mydata <- top_recode (variable="a534_hearing_aids", 3000, missing=NA)
## [1] "Frequency table before encoding"
## a534_hearing_aids. 534 Therapeutic appliances including glass eye, hearing aids, orthopaedic equipm
## 0 1 8 12 100 120 150 180 200 250 260 300 350 380 400
## 2155 2 1 1 2 1 5 2 5 3 1 7 2 1 3
## 450 500 550 600 650 700 750 800 850 900 1000 1100 1200 1300 1350
## 5 19 4 9 2 13 5 5 1 3 16 2 6 1 1
## 1500 1800 2000 2100 2160 2500 3000 3400 3500 4000 5000 6500 7000 8000 9000
## 10 1 9 1 1 5 4 1 1 4 7 1 2 1 1
## 10000 12000 14000 15000 20000 25000 30000 70000 350000 4e+05 <NA>
## 7 2 1 3 5 1 2 1 1 1 2
## [1] "Frequency table after encoding"
## a534_hearing_aids. 534 Therapeutic appliances including glass eye, hearing aids, orthopaedic equipm
## 0 1 8 12 100 120 150 180
## 2155 2 1 1 2 1 5 2
## 200 250 260 300 350 380 400 450
## 5 3 1 7 2 1 3 5
## 500 550 600 650 700 750 800 850
## 19 4 9 2 13 5 5 1
## 900 1000 1100 1200 1300 1350 1500 1800
## 3 16 2 6 1 1 10 1
## 2000 2100 2160 2500 3000 or more <NA>
## 9 1 1 5 46 2
mydata <- top_recode (variable="a535_oth_pers_good", 20000, missing=NA)
## [1] "Frequency table before encoding"
## a535_oth_pers_good. 535 Other personal goods including clocks, watches, PC, telephone, mobile, etc.
## 0 1 100 110 200 250 300 350 360 400 450 500 501 600 650 700 800 900
## 1183 1 7 2 6 1 8 1 1 2 1 16 1 13 1 7 11 6
## 950 1000 1080 1100 1150 1200 1250 1300 1350 1360 1400 1500 1550 1600 1700 1800 1850 1900
## 1 67 1 116 2 92 1 13 1 1 9 122 2 27 2 31 1 1
## 2000 2100 2160 2200 2300 2400 2420 2500 2600 2700 3000 3100 3200 3500 3600 3700 4000 4100
## 76 2 3 9 5 24 1 16 1 3 54 2 3 9 5 1 24 1
## 4150 4300 4500 4800 5000 5500 5600 5850 6000 6100 6300 6500 6600 6700 7000 7100 7200 7500
## 1 1 5 1 49 3 1 1 31 1 1 5 1 1 33 1 1 8
## 7800 8000 8200 8250 8500 8800 9000 9200 9300 9500 10000 10050 10700 11000 12000 12200 12500 12600
## 1 32 1 1 5 1 12 3 1 4 46 1 1 14 16 1 1 1
## 12700 13000 13500 14000 14200 15000 16000 17000 18000 18500 19000 20000 21000 22000 24000 25000 28000 32000
## 1 4 1 3 1 24 5 7 4 1 1 6 2 2 1 1 1 1
## 35000 37000 70000 <NA>
## 1 2 1 4
## [1] "Frequency table after encoding"
## a535_oth_pers_good. 535 Other personal goods including clocks, watches, PC, telephone, mobile, etc.
## 0 1 100 110 200 250 300
## 1183 1 7 2 6 1 8
## 350 360 400 450 500 501 600
## 1 1 2 1 16 1 13
## 650 700 800 900 950 1000 1080
## 1 7 11 6 1 67 1
## 1100 1150 1200 1250 1300 1350 1360
## 116 2 92 1 13 1 1
## 1400 1500 1550 1600 1700 1800 1850
## 9 122 2 27 2 31 1
## 1900 2000 2100 2160 2200 2300 2400
## 1 76 2 3 9 5 24
## 2420 2500 2600 2700 3000 3100 3200
## 1 16 1 3 54 2 3
## 3500 3600 3700 4000 4100 4150 4300
## 9 5 1 24 1 1 1
## 4500 4800 5000 5500 5600 5850 6000
## 5 1 49 3 1 1 31
## 6100 6300 6500 6600 6700 7000 7100
## 1 1 5 1 1 33 1
## 7200 7500 7800 8000 8200 8250 8500
## 1 8 1 32 1 1 5
## 8800 9000 9200 9300 9500 10000 10050
## 1 12 3 1 4 46 1
## 10700 11000 12000 12200 12500 12600 12700
## 1 14 16 1 1 1 1
## 13000 13500 14000 14200 15000 16000 17000
## 4 1 3 1 24 5 7
## 18000 18500 19000 20000 or more <NA>
## 4 1 1 18 4
mydata <- top_recode (variable="a536_repairs", break_point=percentile_checker ("a536_repairs"), missing=NA)
## [1] "Frequency table before encoding"
## a536_repairs. 536 Repair and maintenance of residential buildings, bathroom equipment, etc.
## 0 1 1.39999997615814 3 5 30
## 1892 2 1 1 1 1
## 125 300 400 500 600 1000
## 1 3 1 4 1 5
## 1200 1300 1500 1600 2000 2400
## 1 2 4 1 9 1
## 2500 3000 3500 4000 5000 6000
## 2 10 2 4 21 3
## 7000 8000 8500 9000 10000 11000
## 3 6 1 2 25 2
## 12000 13000 14000 15000 16000 17000
## 8 1 2 17 1 2
## 18000 20000 22000 23000 25000 30000
## 2 28 2 1 13 13
## 31000 33000 35000 40000 47000 50000
## 1 1 5 21 1 42
## 51000 55000 60000 70000 75000 80000
## 1 2 16 11 2 16
## 82000 85000 88000 90000 96000 1e+05
## 1 1 1 1 1 32
## 110000 130000 150000 175000 180000 2e+05
## 2 1 17 1 1 16
## 202000 250000 3e+05 350000 4e+05 475000
## 1 6 10 4 5 1
## 5e+05 500500 6e+05 7e+05 8e+05 9e+05
## 6 1 6 2 2 2
## 950000 1e+06 1050000 1300000 1500000 2e+06
## 1 2 1 1 1 2
## 2500000 <NA>
## 3 5
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## [1] "Frequency table after encoding"
## a536_repairs. 536 Repair and maintenance of residential buildings, bathroom equipment, etc.
## 0 1 1.39999997615814 3 5 30
## 1892 2 1 1 1 1
## 125 300 400 500 600 1000
## 1 3 1 4 1 5
## 1200 1300 1500 1600 2000 2400
## 1 2 4 1 9 1
## 2500 3000 3500 4000 5000 6000
## 2 10 2 4 21 3
## 7000 8000 8500 9000 10000 11000
## 3 6 1 2 25 2
## 12000 13000 14000 15000 16000 17000
## 8 1 2 17 1 2
## 18000 20000 22000 23000 25000 30000
## 2 28 2 1 13 13
## 31000 33000 35000 40000 47000 50000
## 1 1 5 21 1 42
## 51000 55000 60000 70000 75000 80000
## 1 2 16 11 2 16
## 82000 85000 88000 90000 96000 1e+05
## 1 1 1 1 1 32
## 110000 130000 150000 175000 180000 2e+05
## 2 1 17 1 1 16
## 202000 250000 3e+05 350000 4e+05 475000
## 1 6 10 4 5 1
## 5e+05 500500 6e+05 7e+05 8e+05 9e+05 or more
## 6 1 6 2 2 13
## <NA>
## 5
mydata <- top_recode (variable="a525_schl_book", break_point=20000, missing=NA)
## [1] "Frequency table before encoding"
## a525_schl_book. 525 School books & other educational articles including newspaper, library charg
## 0 10 22 50 100 200 250
## 200 4 1 2 2 5 1
## 300 340 350 365 400 450 490
## 8 1 1 1 8 1 1
## 500 510 550 600 650 700 800
## 36 1 2 12 2 5 8
## 900 960 1000 1050 1100 1200 1300
## 4 1 83 1 2 21 5
## 1360 1400 1409 1500 1600 1700 1800
## 1 3 1 87 5 8 7
## 1900 2000 2160 2170 2200 2300 2340
## 2 183 1 1 3 3 1
## 2400 2500 2600 2700 2800 2900 3000
## 9 49 3 2 1 2 206
## 3100 3200 3276 3300 3350 3400 3500
## 1 2 1 2 1 1 26
## 3505 3600 3700 3800 4000 4200 4360
## 1 6 1 1 159 1 1
## 4455 4500 4700 4900 4998 5000 5200
## 1 33 1 1 1 248 1
## 5300 5500 5650 5750 5800 6000 6200
## 3 11 1 1 1 148 2
## 6230 6300 6500 6600 7000 7500 7600
## 1 1 9 1 71 17 1
## 7700 8000 8500 8600 9000 9200 9500
## 1 108 1 1 44 1 5
## 9600 9800 10000 10500 11000 11200 11700
## 1 1 118 2 14 2 1
## 11800 12000 12500 13000 13500 14000 14500
## 1 53 2 14 1 11 2
## 15000 16000 17000 17500 18000 20000 20009
## 54 8 5 1 8 44 1
## 20500 20900 21000 22000 23000 23700 24000
## 1 1 5 1 1 1 3
## 25000 27000 28000 30000 32000 35000 38000
## 11 1 1 14 3 3 2
## 40000 42000 45000 47000 50000 or more <NA>
## 4 1 2 1 13 29
## [1] "Frequency table after encoding"
## a525_schl_book. 525 School books & other educational articles including newspaper, library charg
## 0 10 22 50 100 200 250
## 200 4 1 2 2 5 1
## 300 340 350 365 400 450 490
## 8 1 1 1 8 1 1
## 500 510 550 600 650 700 800
## 36 1 2 12 2 5 8
## 900 960 1000 1050 1100 1200 1300
## 4 1 83 1 2 21 5
## 1360 1400 1409 1500 1600 1700 1800
## 1 3 1 87 5 8 7
## 1900 2000 2160 2170 2200 2300 2340
## 2 183 1 1 3 3 1
## 2400 2500 2600 2700 2800 2900 3000
## 9 49 3 2 1 2 206
## 3100 3200 3276 3300 3350 3400 3500
## 1 2 1 2 1 1 26
## 3505 3600 3700 3800 4000 4200 4360
## 1 6 1 1 159 1 1
## 4455 4500 4700 4900 4998 5000 5200
## 1 33 1 1 1 248 1
## 5300 5500 5650 5750 5800 6000 6200
## 3 11 1 1 1 148 2
## 6230 6300 6500 6600 7000 7500 7600
## 1 1 9 1 71 17 1
## 7700 8000 8500 8600 9000 9200 9500
## 1 108 1 1 44 1 5
## 9600 9800 10000 10500 11000 11200 11700
## 1 1 118 2 14 2 1
## 11800 12000 12500 13000 13500 14000 14500
## 1 53 2 14 1 11 2
## 15000 16000 17000 17500 18000 20000 or more <NA>
## 54 8 5 1 8 114 29
# !!! No Indirect PII categorical
# !!! No direct demographic variables available in dataset
# !!! No open-ends
# !!! No GPS data
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)