rm(list=ls(all=t))
filename <- "India_Public Use" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
## --------
## This is sdcMicro v5.6.0.
## For references, please have a look at citation('sdcMicro')
## Note: since version 5.0.0, the graphical user-interface is a shiny-app that can be started with sdcApp().
## Please submit suggestions and bugs at: https://github.com/sdcTools/sdcMicro/issues
## --------
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: sp
## Checking rgeos availability: TRUE
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:sdcMicro':
##
## freq
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/proj
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 2.0-1
##
## Attaching package: 'spatstat.geom'
## The following objects are masked from 'package:raster':
##
## area, rotate, shift
## Loading required package: spatstat.core
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
##
## getData
## The following object is masked from 'package:dplyr':
##
## collapse
## Loading required package: rpart
## spatstat.core 2.0-0
## Loading required package: spatstat.linnet
## spatstat.linnet 2.1-1
##
## spatstat 2.0-1 (nickname: 'Caution: contains small parts')
## For an introduction to spatstat, type 'beginner'
## rgeos version: 0.5-5, (SVN revision 640)
## GEOS runtime version: 3.8.0-CAPI-1.13.1
## Linking to sp version: 1.4-4
## Polygon checking: TRUE
##
## Spatial Point Pattern Analysis Code in S-Plus
##
## Version 2 - Spatial and Space-Time analysis
##
## Attaching package: 'splancs'
## The following object is masked from 'package:raster':
##
## zoom
## The following object is masked from 'package:dplyr':
##
## tribble
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.6-0 (2020-12-14) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## See https://github.com/NCAR/Fields for
## an extensive vignette, other supplements and source code
##
## Attaching package: 'geosphere'
## The following object is masked from 'package:spatstat.geom':
##
## perimeter
##
## Attaching package: 'tibble'
## The following object is masked from 'package:splancs':
##
## tribble
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
#!!!Save flagged dictionary in .csv format, add "DatasetReview" to name and continue processing data with subset of flagged variables
# !!!No Direct PII
!!!No Direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("b_Panchayat", "e_Panchayat")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## b_Panchayat. Name Of Panchayat
## Badidih Gadar Jamdar Malda Nimadih Patna Pihra East
## 18789 4750 3110 2627 608 3346 860 2809
## Pihra West Sankh Serua
## 1010 1117 1642
## [1] "Frequency table after encoding"
## b_Panchayat. Name Of Panchayat
## 661 662 663 664 665 666 667 668 669 670 671
## 608 3346 4750 3110 2809 1117 1642 1010 2627 18789 860
## [1] "Frequency table before encoding"
## e_Panchayat. Name Of Panchayat
## Badidih Gadar Jamdar Malda Nimadih Patna Pihra East
## 21879 3659 2577 2301 368 3056 820 2547
## Pihra West Sankh Serua
## 903 1100 1458
## [1] "Frequency table after encoding"
## e_Panchayat. Name Of Panchayat
## 904 905 906 907 908 909 910 911 912 913 914
## 1458 3056 2547 903 21879 2577 368 3659 1100 2301 820
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Top code income variables
percentile_99.5 <- percentile_checker("earnmonth_tot_HH")
mydata <- top_recode (variable="earnmonth_tot_HH", break_point=40000, missing=NA)
## [1] "Frequency table before encoding"
## earnmonth_tot_HH. Total Earnings Per Month Across All Jobs And Hh Members (Inr)
## 0 2 12 40 50
## 19490 7 7 7 25
## 60 75 88 100 120
## 18 6 176 54 10
## 130 150 160 176 180
## 8 54 5 98 6
## 200 219 250 264 300
## 153 4 67 26 48
## 350 352 360 364 400
## 18 7 5 5 47
## 440 460 495 500 550
## 5 6 4 150 9
## 588 600 616 650 657
## 13 59 21 8 5
## 700 710 750 800 820
## 33 11 69 63 8
## 825 826 850 876 880
## 5 6 7 6 5
## 900 920 960 1000 1015
## 51 8 9 296 9
## 1050 1080 1088 1098 1100
## 41 15 3 3 16
## 1120 1125 1150 1160 1176
## 7 3 4 2 7
## 1200 1203 1250 1300 1314
## 40 4 52 8 11
## 1338 1350 1360 1380 1400
## 3 6 8 4 22
## 1410 1420 1440 1450 1470
## 5 6 5 5 4
## 1500 1520 1538 1564 1600
## 277 6 6 9 39
## 1610 1620 1638 1670 1676
## 6 5 4 8 4
## 1700 1750 1752 1780 1800
## 6 33 23 14 47
## 1825 1839.59997558594 1890 1920 1950
## 8 9 9 6 12
## 1952 2000 2080 2100 2150
## 5 363 5 27 9
## 2160 2190 2200 2240 2250
## 4 17 67 5 120
## 2280 2290 2298 2300 2310
## 6 5 9 15 3
## 2340 2350 2380 2400 2410
## 5 6 7 65 4
## 2450 2452.80004882812 2500 2520 2550
## 5 7 183 6 11
## 2600 2625 2628 2650 2652
## 29 9 27 5 5
## 2688 2700 2720 2738 2750
## 2 24 6 3 10
## 2790 2800 2820 2840 2850.39990234375
## 5 39 5 6 5
## 2876 2900 2950 2952 3000
## 6 42 9 7 678
## 3050 3066 3100 3110 3150
## 5 10 21 8 13
## 3200 3250 3300 3320 3350
## 36 23 34 7 10
## 3366 3400 3450 3460 3500
## 7 27 12 5 80
## 3504 3515 3550 3566 3595
## 29 5 6 3 7
## 3600 3628 3650 3676 3700
## 45 7 14 13 44
## 3750 3770 3790 3800 3814
## 204 7 3 33 5
## 3850 3876 3900 3909 3914
## 5 6 23 9 6
## 3940 3942 3950 3960 4000
## 6 17 31 4 513
## 4014 4050 4088 4100 4150
## 7 11 3 19 13
## 4160 4170 4200 4220 4250
## 3 6 21 7 41
## 4260 4276 4300 4320 4331.7998046875
## 7 6 23 8 6
## 4336.2001953125 4340 4350 4374.7998046875 4380
## 8 3 6 3 18
## 4400 4440 4450 4480 4500
## 46 5 28 14 259
## 4520 4550 4552 4590 4600
## 4 27 4 5 50
## 4628 4640 4650 4700 4720
## 7 7 11 4 5
## 4728 4750 4800 4818 4820
## 10 40 58 11 7
## 4840 4845 4850 4875 4876
## 4 3 7 6 13
## 4980 5000 5004 5037 5040
## 9 917 3 3 11
## 5050 5080 5088 5100 5120
## 11 7 5 41 5
## 5126 5150 5175 5176 5180
## 6 17 7 10 6
## 5190 5200 5240 5250 5256
## 16 31 4 62 14
## 5295 5300 5314 5350 5352
## 8 17 6 9 7
## 5356 5360 5378 5400 5420
## 5 7 6 25 6
## 5430 5438 5450 5500 5545
## 12 6 5 104 5
## 5580 5588 5600 5650 5660
## 6 6 22 17 5
## 5694 5700 5710 5720 5740
## 7 22 3 7 4
## 5750 5768 5790 5800 5814
## 39 6 9 21 5
## 5820 5850 5876 5900 5950
## 11 18 8 23 10
## 5974 5980 6000 6006 6010
## 12 6 1001 6 9
## 6028 6050 6066 6088 6100
## 5 6 4 5 19
## 6120 6128 6132 6140 6150
## 8 5 5 15 8
## 6190 6200 6210 6230 6250
## 6 46 5 9 61
## 6300 6314 6320 6338 6354
## 19 7 10 7 9
## 6390 6400 6456 6476 6495
## 7 33 4 7 6
## 6500 6504 6520 6550 6570
## 164 10 10 4 10
## 6600 6610 6620 6630 6647
## 53 6 20 6 7
## 6652 6700 6750 6780 6800
## 7 10 68 8 30
## 6820 6825 6828 6840 6875
## 5 7 5 7 4
## 6880 6890 6900 6914 6932
## 5 8 15 4 6
## 6950 7000 7008 7050 7100
## 5 567 21 12 21
## 7128 7155 7180 7200 7218
## 7 8 5 44 10
## 7250 7257 7270 7274 7300
## 51 6 5 4 20
## 7350 7366 7388 7392 7400
## 11 6 11 6 35
## 7450 7494 7500 7520 7600
## 15 6 247 5 34
## 7628 7650 7660 7680 7700
## 6 12 6 5 10
## 7720 7740 7750 7752 7760
## 4 8 36 8 5
## 7800 7814 7828 7840 7850
## 23 4 5 9 12
## 7875 7884 7900 7906.39990234375 7940
## 6 10 8 4 6
## 7950 7960 7990 8000 8100
## 12 4 7 694 27
## 8150 8160 8200 8230 8240
## 9 5 28 10 6
## 8250 8254 8265 8285 8300
## 75 6 3 4 16
## 8310 8340 8350 8356 8360
## 5 6 23 7 11
## 8370 8400 8450 8452 8460
## 5 53 15 6 6
## 8478 8480 8500 8504 8508
## 6 9 93 5 6
## 8520 8600 8610 8628 8650
## 11 42 5 4 5
## 8675 8700 8727 8744 8750
## 5 5 7 6 62
## 8752 8760 8800 8830 8905
## 7 46 51 9 5
## 8918 8950 8955 8990 9000
## 6 3 5 5 389
## 9007.2001953125 9030 9032 9050 9066
## 5 6 7 14 6
## 9080 9090 9095 9100 9103
## 6 3 5 27 3
## 9120 9180 9190 9198 9200
## 8 9 3 5 29
## 9220 9240 9250 9300 9311.400390625
## 5 6 47 19 4
## 9314 9350 9380 9400 9438
## 14 18 5 17 13
## 9450 9480 9500 9504 9510
## 18 6 67 8 4
## 9600 9604 9650 9678 9700
## 22 6 12 10 31
## 9720 9722 9738 9750 9752
## 4 8 5 12 6
## 9800 9816 9842 9866 9875
## 32 3 9 6 6
## 9884 9888 9900 9910 9936
## 7 5 43 6 6
## 9940 9950 9990 10000 10002
## 6 8 7 512 7
## 10066 10100 10157 10190 10200
## 7 29 8 12 16
## 10210 10226 10230 10250 10294
## 13 2 3 25 7
## 10300 10314 10333 10350 10378
## 6 5 6 10 5
## 10380 10400 10456 10500 10504
## 14 75 3 91 5
## 10530 10550 10600 10625 10670
## 5 7 4 6 5
## 10690 10700 10708 10731 10750
## 9 24 6 4 34
## 10752 10800 10820 10850 10866
## 6 11 5 6 7
## 10900 10928 11000 11040 11050
## 11 12 196 6 12
## 11064 11066 11096 11100 11132
## 8 7 6 27 9
## 11150 11200 11250 11260 11270
## 8 19 7 6 16
## 11276 11295 11300 11314 11375
## 10 4 6 9 8
## 11394 11400 11450 11500 11504
## 6 13 7 113 5
## 11600 11650 11700 11718 11730
## 30 14 15 6 6
## 11738 11750 11800 11820 11826
## 7 29 24 9 8
## 11850 11860 11900 11950 12000
## 15 7 12 8 386
## 12004 12050 12075 12100 12128
## 6 7 10 15 9
## 12152 12198 12200 12234 12250
## 4 7 13 6 30
## 12260 12264 12289 12346.2001953125 12352
## 7 8 5 10 6
## 12388 12400 12432 12466 12500
## 5 20 6 6 65
## 12504 12600 12612 12650 12680
## 4 39 6 7 12
## 12700 12750 12780 12800 12850
## 11 4 4 14 10
## 12910 12939.7998046875 12942 13000 13033.2001953125
## 8 8 8 154 8
## 13045 13100 13180 13200 13230
## 4 29 10 10 6
## 13250 13263 13300 13350 13388
## 26 4 22 6 6
## 13400 13406 13450 13500 13550
## 16 3 10 65 5
## 13600 13604 13628 13675 13700
## 5 6 7 5 39
## 13720 13750 13760 13800 13814
## 6 21 4 5 9
## 13840 13884 13900 13927 13950
## 8 18 7 7 5
## 14000 14019 14050 14100 14140
## 213 5 15 16 9
## 14142 14180 14190 14200 14250
## 6 9 7 20 6
## 14278 14340 14350 14400 14500
## 5 10 22 7 89
## 14510 14560 14600 14700 14750
## 8 20 15 5 14
## 14780 14789 14800 14900 14950
## 4 5 7 18 6
## 15000 15060 15132 15200 15220
## 284 9 11 21 7
## 15240 15250 15260 15350 15500
## 14 5 4 7 49
## 15750 15766 15850 15880 15960
## 29 5 10 7 7
## 16000 16180 16200 16220 16250
## 146 6 7 7 15
## 16260 16328 16400 16500 16560
## 7 5 7 50 9
## 16570 16690 16700 16750 16760
## 14 12 7 13 8
## 16800 16816.80078125 16826 16890 16900
## 11 9 5 8 6
## 17000 17038.19921875 17050 17100 17190
## 74 6 5 13 13
## 17200 17250 17300 17400 17414
## 22 10 6 18 9
## 17440 17500 17520 17530 17600
## 7 39 6 9 27
## 17610 17692 17736 17750 17794
## 11 10 6 7 10
## 17882 17900 18000 18050 18097
## 12 17 138 13 13
## 18150 18180 18250 18380 18400
## 8 5 8 12 20
## 18406 18434 18500 18680 18800
## 7 6 40 11 9
## 18892 19000 19100 19152 19200
## 3 55 7 15 5
## 19250 19314 19500 19568 19600
## 6 7 19 8 12
## 19650 19756 19794 20000 20012
## 5 6 7 155 16
## 20150 20250 20256 20270 20300
## 4 13 10 8 6
## 20400 20500 20550 20630 20700
## 17 43 7 6 8
## 20750 20900 20980 21000 21032
## 16 13 7 106 6
## 21098 21190 21650 22000 22028
## 5 7 17 54 6
## 22064 22146 22200 22250 22300
## 5 9 28 7 5
## 22500 22520 22564 22700 22760
## 42 6 8 15 15
## 22800 23000 23016 23200 23410
## 9 53 12 9 7
## 23500 23750 24000 24200 24500
## 19 27 205 11 7
## 24550 24680 24700 24746 24900
## 6 10 19 6 6
## 25000 25135 25266 25314 25500
## 42 13 12 5 5
## 25600 25657 25916 26000 26076
## 4 13 10 16 7
## 26500 26600 26900 27000 27170
## 10 7 12 43 13
## 27250 27400 27500 27600 27750
## 7 10 13 8 12
## 27800 28000 28006 28200 28300
## 10 29 5 8 4
## 28500 28580 28628 28900 29000
## 9 18 6 4 32
## 29100 29200 29450 29690 29800
## 6 8 10 16 6
## 29900 29950 30000 30100 30482
## 13 11 31 10 6
## 30550 30642 30870 31000 31530
## 6 10 7 23 5
## 31650 32000 32250 32400 32500
## 10 14 10 5 5
## 32640 32800 33000 33500 33548
## 11 9 13 15 8
## 34500 34800 34864 35366 36000
## 13 12 15 4 22
## 36250 36870 37000 37500 37700
## 12 11 32 6 8
## 38000 39290 40000 40500 40700
## 20 11 18 7 5
## 42000 42610 47000 48000 49800
## 15 6 15 12 13
## 49950 50000 50250 50490 53000
## 13 12 13 10 13
## 53500
## 15
## [1] "Frequency table after encoding"
## earnmonth_tot_HH. Total Earnings Per Month Across All Jobs And Hh Members (Inr)
## 0 2 12 40 50
## 19490 7 7 7 25
## 60 75 88 100 120
## 18 6 176 54 10
## 130 150 160 176 180
## 8 54 5 98 6
## 200 219 250 264 300
## 153 4 67 26 48
## 350 352 360 364 400
## 18 7 5 5 47
## 440 460 495 500 550
## 5 6 4 150 9
## 588 600 616 650 657
## 13 59 21 8 5
## 700 710 750 800 820
## 33 11 69 63 8
## 825 826 850 876 880
## 5 6 7 6 5
## 900 920 960 1000 1015
## 51 8 9 296 9
## 1050 1080 1088 1098 1100
## 41 15 3 3 16
## 1120 1125 1150 1160 1176
## 7 3 4 2 7
## 1200 1203 1250 1300 1314
## 40 4 52 8 11
## 1338 1350 1360 1380 1400
## 3 6 8 4 22
## 1410 1420 1440 1450 1470
## 5 6 5 5 4
## 1500 1520 1538 1564 1600
## 277 6 6 9 39
## 1610 1620 1638 1670 1676
## 6 5 4 8 4
## 1700 1750 1752 1780 1800
## 6 33 23 14 47
## 1825 1839.59997558594 1890 1920 1950
## 8 9 9 6 12
## 1952 2000 2080 2100 2150
## 5 363 5 27 9
## 2160 2190 2200 2240 2250
## 4 17 67 5 120
## 2280 2290 2298 2300 2310
## 6 5 9 15 3
## 2340 2350 2380 2400 2410
## 5 6 7 65 4
## 2450 2452.80004882812 2500 2520 2550
## 5 7 183 6 11
## 2600 2625 2628 2650 2652
## 29 9 27 5 5
## 2688 2700 2720 2738 2750
## 2 24 6 3 10
## 2790 2800 2820 2840 2850.39990234375
## 5 39 5 6 5
## 2876 2900 2950 2952 3000
## 6 42 9 7 678
## 3050 3066 3100 3110 3150
## 5 10 21 8 13
## 3200 3250 3300 3320 3350
## 36 23 34 7 10
## 3366 3400 3450 3460 3500
## 7 27 12 5 80
## 3504 3515 3550 3566 3595
## 29 5 6 3 7
## 3600 3628 3650 3676 3700
## 45 7 14 13 44
## 3750 3770 3790 3800 3814
## 204 7 3 33 5
## 3850 3876 3900 3909 3914
## 5 6 23 9 6
## 3940 3942 3950 3960 4000
## 6 17 31 4 513
## 4014 4050 4088 4100 4150
## 7 11 3 19 13
## 4160 4170 4200 4220 4250
## 3 6 21 7 41
## 4260 4276 4300 4320 4331.7998046875
## 7 6 23 8 6
## 4336.2001953125 4340 4350 4374.7998046875 4380
## 8 3 6 3 18
## 4400 4440 4450 4480 4500
## 46 5 28 14 259
## 4520 4550 4552 4590 4600
## 4 27 4 5 50
## 4628 4640 4650 4700 4720
## 7 7 11 4 5
## 4728 4750 4800 4818 4820
## 10 40 58 11 7
## 4840 4845 4850 4875 4876
## 4 3 7 6 13
## 4980 5000 5004 5037 5040
## 9 917 3 3 11
## 5050 5080 5088 5100 5120
## 11 7 5 41 5
## 5126 5150 5175 5176 5180
## 6 17 7 10 6
## 5190 5200 5240 5250 5256
## 16 31 4 62 14
## 5295 5300 5314 5350 5352
## 8 17 6 9 7
## 5356 5360 5378 5400 5420
## 5 7 6 25 6
## 5430 5438 5450 5500 5545
## 12 6 5 104 5
## 5580 5588 5600 5650 5660
## 6 6 22 17 5
## 5694 5700 5710 5720 5740
## 7 22 3 7 4
## 5750 5768 5790 5800 5814
## 39 6 9 21 5
## 5820 5850 5876 5900 5950
## 11 18 8 23 10
## 5974 5980 6000 6006 6010
## 12 6 1001 6 9
## 6028 6050 6066 6088 6100
## 5 6 4 5 19
## 6120 6128 6132 6140 6150
## 8 5 5 15 8
## 6190 6200 6210 6230 6250
## 6 46 5 9 61
## 6300 6314 6320 6338 6354
## 19 7 10 7 9
## 6390 6400 6456 6476 6495
## 7 33 4 7 6
## 6500 6504 6520 6550 6570
## 164 10 10 4 10
## 6600 6610 6620 6630 6647
## 53 6 20 6 7
## 6652 6700 6750 6780 6800
## 7 10 68 8 30
## 6820 6825 6828 6840 6875
## 5 7 5 7 4
## 6880 6890 6900 6914 6932
## 5 8 15 4 6
## 6950 7000 7008 7050 7100
## 5 567 21 12 21
## 7128 7155 7180 7200 7218
## 7 8 5 44 10
## 7250 7257 7270 7274 7300
## 51 6 5 4 20
## 7350 7366 7388 7392 7400
## 11 6 11 6 35
## 7450 7494 7500 7520 7600
## 15 6 247 5 34
## 7628 7650 7660 7680 7700
## 6 12 6 5 10
## 7720 7740 7750 7752 7760
## 4 8 36 8 5
## 7800 7814 7828 7840 7850
## 23 4 5 9 12
## 7875 7884 7900 7906.39990234375 7940
## 6 10 8 4 6
## 7950 7960 7990 8000 8100
## 12 4 7 694 27
## 8150 8160 8200 8230 8240
## 9 5 28 10 6
## 8250 8254 8265 8285 8300
## 75 6 3 4 16
## 8310 8340 8350 8356 8360
## 5 6 23 7 11
## 8370 8400 8450 8452 8460
## 5 53 15 6 6
## 8478 8480 8500 8504 8508
## 6 9 93 5 6
## 8520 8600 8610 8628 8650
## 11 42 5 4 5
## 8675 8700 8727 8744 8750
## 5 5 7 6 62
## 8752 8760 8800 8830 8905
## 7 46 51 9 5
## 8918 8950 8955 8990 9000
## 6 3 5 5 389
## 9007.2001953125 9030 9032 9050 9066
## 5 6 7 14 6
## 9080 9090 9095 9100 9103
## 6 3 5 27 3
## 9120 9180 9190 9198 9200
## 8 9 3 5 29
## 9220 9240 9250 9300 9311.400390625
## 5 6 47 19 4
## 9314 9350 9380 9400 9438
## 14 18 5 17 13
## 9450 9480 9500 9504 9510
## 18 6 67 8 4
## 9600 9604 9650 9678 9700
## 22 6 12 10 31
## 9720 9722 9738 9750 9752
## 4 8 5 12 6
## 9800 9816 9842 9866 9875
## 32 3 9 6 6
## 9884 9888 9900 9910 9936
## 7 5 43 6 6
## 9940 9950 9990 10000 10002
## 6 8 7 512 7
## 10066 10100 10157 10190 10200
## 7 29 8 12 16
## 10210 10226 10230 10250 10294
## 13 2 3 25 7
## 10300 10314 10333 10350 10378
## 6 5 6 10 5
## 10380 10400 10456 10500 10504
## 14 75 3 91 5
## 10530 10550 10600 10625 10670
## 5 7 4 6 5
## 10690 10700 10708 10731 10750
## 9 24 6 4 34
## 10752 10800 10820 10850 10866
## 6 11 5 6 7
## 10900 10928 11000 11040 11050
## 11 12 196 6 12
## 11064 11066 11096 11100 11132
## 8 7 6 27 9
## 11150 11200 11250 11260 11270
## 8 19 7 6 16
## 11276 11295 11300 11314 11375
## 10 4 6 9 8
## 11394 11400 11450 11500 11504
## 6 13 7 113 5
## 11600 11650 11700 11718 11730
## 30 14 15 6 6
## 11738 11750 11800 11820 11826
## 7 29 24 9 8
## 11850 11860 11900 11950 12000
## 15 7 12 8 386
## 12004 12050 12075 12100 12128
## 6 7 10 15 9
## 12152 12198 12200 12234 12250
## 4 7 13 6 30
## 12260 12264 12289 12346.2001953125 12352
## 7 8 5 10 6
## 12388 12400 12432 12466 12500
## 5 20 6 6 65
## 12504 12600 12612 12650 12680
## 4 39 6 7 12
## 12700 12750 12780 12800 12850
## 11 4 4 14 10
## 12910 12939.7998046875 12942 13000 13033.2001953125
## 8 8 8 154 8
## 13045 13100 13180 13200 13230
## 4 29 10 10 6
## 13250 13263 13300 13350 13388
## 26 4 22 6 6
## 13400 13406 13450 13500 13550
## 16 3 10 65 5
## 13600 13604 13628 13675 13700
## 5 6 7 5 39
## 13720 13750 13760 13800 13814
## 6 21 4 5 9
## 13840 13884 13900 13927 13950
## 8 18 7 7 5
## 14000 14019 14050 14100 14140
## 213 5 15 16 9
## 14142 14180 14190 14200 14250
## 6 9 7 20 6
## 14278 14340 14350 14400 14500
## 5 10 22 7 89
## 14510 14560 14600 14700 14750
## 8 20 15 5 14
## 14780 14789 14800 14900 14950
## 4 5 7 18 6
## 15000 15060 15132 15200 15220
## 284 9 11 21 7
## 15240 15250 15260 15350 15500
## 14 5 4 7 49
## 15750 15766 15850 15880 15960
## 29 5 10 7 7
## 16000 16180 16200 16220 16250
## 146 6 7 7 15
## 16260 16328 16400 16500 16560
## 7 5 7 50 9
## 16570 16690 16700 16750 16760
## 14 12 7 13 8
## 16800 16816.80078125 16826 16890 16900
## 11 9 5 8 6
## 17000 17038.19921875 17050 17100 17190
## 74 6 5 13 13
## 17200 17250 17300 17400 17414
## 22 10 6 18 9
## 17440 17500 17520 17530 17600
## 7 39 6 9 27
## 17610 17692 17736 17750 17794
## 11 10 6 7 10
## 17882 17900 18000 18050 18097
## 12 17 138 13 13
## 18150 18180 18250 18380 18400
## 8 5 8 12 20
## 18406 18434 18500 18680 18800
## 7 6 40 11 9
## 18892 19000 19100 19152 19200
## 3 55 7 15 5
## 19250 19314 19500 19568 19600
## 6 7 19 8 12
## 19650 19756 19794 20000 20012
## 5 6 7 155 16
## 20150 20250 20256 20270 20300
## 4 13 10 8 6
## 20400 20500 20550 20630 20700
## 17 43 7 6 8
## 20750 20900 20980 21000 21032
## 16 13 7 106 6
## 21098 21190 21650 22000 22028
## 5 7 17 54 6
## 22064 22146 22200 22250 22300
## 5 9 28 7 5
## 22500 22520 22564 22700 22760
## 42 6 8 15 15
## 22800 23000 23016 23200 23410
## 9 53 12 9 7
## 23500 23750 24000 24200 24500
## 19 27 205 11 7
## 24550 24680 24700 24746 24900
## 6 10 19 6 6
## 25000 25135 25266 25314 25500
## 42 13 12 5 5
## 25600 25657 25916 26000 26076
## 4 13 10 16 7
## 26500 26600 26900 27000 27170
## 10 7 12 43 13
## 27250 27400 27500 27600 27750
## 7 10 13 8 12
## 27800 28000 28006 28200 28300
## 10 29 5 8 4
## 28500 28580 28628 28900 29000
## 9 18 6 4 32
## 29100 29200 29450 29690 29800
## 6 8 10 16 6
## 29900 29950 30000 30100 30482
## 13 11 31 10 6
## 30550 30642 30870 31000 31530
## 6 10 7 23 5
## 31650 32000 32250 32400 32500
## 10 14 10 5 5
## 32640 32800 33000 33500 33548
## 11 9 13 15 8
## 34500 34800 34864 35366 36000
## 13 12 15 4 22
## 36250 36870 37000 37500 37700
## 12 11 32 6 8
## 38000 39290 40000 or more
## 20 11 167
percentile_99.5 <- percentile_checker("b_earnamonth")
## Warning: Removed 39244 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnamonth", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_earnamonth. Money Earned: Farming Someone'S Land In A Typical Month, Last 3 Months
## 0 12 50 60 100 140 150 160 200 210 240 250 280 300 350
## 6 1 1 2 5 2 1 1 8 1 4 8 4 23 6
## 360 400 404 420 438 450 480 490 500 560 600 650 657 660 700
## 4 18 1 1 1 5 4 2 40 3 60 1 1 1 24
## 720 735 750 800 840 900 960 1000 1040 1050 1080 1100 1120 1150 1190
## 10 1 45 51 8 17 10 114 1 16 1 2 1 1 1
## 1200 1250 1300 1314 1380 1400 1440 1500 1560 1600 1650 1680 1700 1800 1950
## 92 6 1 2 1 13 6 213 1 33 1 2 2 39 1
## 2000 2100 2160 2200 2250 2300 2400 2500 2628 2700 2800 2850 2880 3000 3200
## 127 4 1 1 95 1 12 27 1 8 1 1 1 106 1
## 3300 3400 3504 3600 3750 3800 3885 4000 4050 4350 4400 4500 4800 4860 5000
## 4 1 1 4 28 1 1 28 1 1 3 13 1 1 7
## 5600 6000 9000 12264 22500 <NA>
## 1 7 1 1 1 39244
## [1] "Frequency table after encoding"
## b_earnamonth. Money Earned: Farming Someone'S Land In A Typical Month, Last 3 Months
## 0 12 50 60 100 140 150
## 6 1 1 2 5 2 1
## 160 200 210 240 250 280 300
## 1 8 1 4 8 4 23
## 350 360 400 404 420 438 450
## 6 4 18 1 1 1 5
## 480 490 500 560 600 650 657
## 4 2 40 3 60 1 1
## 660 700 720 735 750 800 840
## 1 24 10 1 45 51 8
## 900 960 1000 1040 1050 1080 1100
## 17 10 114 1 16 1 2
## 1120 1150 1190 1200 1250 1300 1314
## 1 1 1 92 6 1 2
## 1380 1400 1440 1500 1560 1600 1650
## 1 13 6 213 1 33 1
## 1680 1700 1800 1950 2000 2100 2160
## 2 2 39 1 127 4 1
## 2200 2250 2300 2400 2500 2628 2700
## 1 95 1 12 27 1 8
## 2800 2850 2880 3000 3200 3300 3400
## 1 1 1 106 1 4 1
## 3504 3600 3750 3800 3885 4000 4050
## 1 4 28 1 1 28 1
## 4350 4400 4500 4800 4860 5000 5600
## 1 3 13 1 1 7 1
## 6000 or more <NA>
## 10 39244
percentile_99.5 <- percentile_checker("b_earnbmonth")
## Warning: Removed 40622 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnbmonth", break_point=5000, missing=NA)
## [1] "Frequency table before encoding"
## b_earnbmonth. Money Earned: Herding Or Helping With Animals In A Typical Month, Last 3 Months
## 0 4 48 100 150 200 300 400 500 600 640 720 800 900 1000
## 5 1 1 3 1 2 2 4 9 3 1 1 1 1 1
## 1095 1314 1500 2500 3000 4000 6000 <NA>
## 1 3 2 1 1 1 1 40622
## [1] "Frequency table after encoding"
## b_earnbmonth. Money Earned: Herding Or Helping With Animals In A Typical Month, Last 3 Months
## 0 4 48 100 150 200 300
## 5 1 1 3 1 2 2
## 400 500 600 640 720 800 900
## 4 9 3 1 1 1 1
## 1000 1095 1314 1500 2500 3000 4000
## 1 1 3 2 1 1 1
## 5000 or more <NA>
## 1 40622
percentile_99.5 <- percentile_checker("b_earncmonth")
## Warning: Removed 38640 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earncmonth", break_point=9000, missing=NA)
## [1] "Frequency table before encoding"
## b_earncmonth. Money Earned: Mica Mining In A Typical Month, Last 3 Months
## 12 20 30 40 45
## 1 4 1 11 1
## 48 50 60 75 80
## 1 3 10 4 15
## 90 100 120 125 128
## 1 14 11 4 1
## 131.399993896484 140 150 160 175
## 1 2 20 14 3
## 175.199996948242 180 200 210 219
## 2 4 51 1 6
## 240 245 250 262.799987792969 280
## 9 2 41 4 4
## 300 320 350 360 400
## 36 5 10 2 61
## 420 438 450 480 495
## 3 15 9 9 1
## 500 525 540 550 560
## 89 1 2 1 4
## 600 640 650 657 660
## 69 2 1 7 1
## 700 720 750 800 840
## 10 7 52 50 2
## 876 900 960 1000 1050
## 46 26 1 143 5
## 1095 1100 1120 1125 1200
## 1 1 2 1 57
## 1250 1300 1314 1350 1400
## 10 1 65 2 9
## 1440 1500 1533 1600 1700
## 1 134 1 17 2
## 1708.19995117188 1752 1800 1875 1900
## 1 53 12 3 1
## 1920 1971 2000 2080 2100
## 1 1 117 1 1
## 2190 2200 2250 2400 2409
## 85 3 37 8 1
## 2500 2600 2628 2700 2800
## 55 7 50 3 2
## 3000 3066 3200 3285 3300
## 90 23 1 3 3
## 3500 3504 3600 3750 3900
## 1 26 7 36 4
## 3942 4000 4250 4350 4380
## 2 42 4 1 26
## 4400 4500 4800 5000 5100
## 4 10 4 20 14
## 5200 5256 5400 5500 5600
## 2 8 2 1 2
## 6000 6250 6500 6570 7500
## 7 2 1 4 1
## 7800 8100 8760 9000 10000
## 1 1 3 1 2
## 11500 12000 13140 15000 20000
## 1 1 1 2 1
## 65700 <NA>
## 1 38640
## [1] "Frequency table after encoding"
## b_earncmonth. Money Earned: Mica Mining In A Typical Month, Last 3 Months
## 12 20 30 40 45
## 1 4 1 11 1
## 48 50 60 75 80
## 1 3 10 4 15
## 90 100 120 125 128
## 1 14 11 4 1
## 131.399993896484 140 150 160 175
## 1 2 20 14 3
## 175.199996948242 180 200 210 219
## 2 4 51 1 6
## 240 245 250 262.799987792969 280
## 9 2 41 4 4
## 300 320 350 360 400
## 36 5 10 2 61
## 420 438 450 480 495
## 3 15 9 9 1
## 500 525 540 550 560
## 89 1 2 1 4
## 600 640 650 657 660
## 69 2 1 7 1
## 700 720 750 800 840
## 10 7 52 50 2
## 876 900 960 1000 1050
## 46 26 1 143 5
## 1095 1100 1120 1125 1200
## 1 1 2 1 57
## 1250 1300 1314 1350 1400
## 10 1 65 2 9
## 1440 1500 1533 1600 1700
## 1 134 1 17 2
## 1708.19995117188 1752 1800 1875 1900
## 1 53 12 3 1
## 1920 1971 2000 2080 2100
## 1 1 117 1 1
## 2190 2200 2250 2400 2409
## 85 3 37 8 1
## 2500 2600 2628 2700 2800
## 55 7 50 3 2
## 3000 3066 3200 3285 3300
## 90 23 1 3 3
## 3500 3504 3600 3750 3900
## 1 26 7 36 4
## 3942 4000 4250 4350 4380
## 2 42 4 1 26
## 4400 4500 4800 5000 5100
## 4 10 4 20 14
## 5200 5256 5400 5500 5600
## 2 8 2 1 2
## 6000 6250 6500 6570 7500
## 7 2 1 4 1
## 7800 8100 8760 9000 or more <NA>
## 1 1 3 10 38640
percentile_99.5 <- percentile_checker("b_earndmonth")
## Warning: Removed 39790 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earndmonth", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_earndmonth. Money Earned: Construction, Building Homes In A Typical Month, Last 3 Months
## 180 200 240 250 300 350 360 375 380 400
## 1 3 1 4 5 1 1 1 1 10
## 450 480 500 600 700 720 750 800 840 900
## 2 2 14 23 5 1 26 30 1 12
## 960 1000 1050 1080 1120 1125 1200 1250 1275 1350
## 2 67 7 1 1 1 35 26 1 1
## 1400 1500 1600 1750 1752 1800 1870 2000 2100 2190
## 7 50 33 4 1 6 1 90 2 2
## 2250 2400 2450 2500 2600 2625 2628 2750 2800 3000
## 27 18 1 34 1 1 1 1 3 77
## 3066 3120 3200 3300 3400 3450 3500 3600 3750 3900
## 1 1 3 1 1 2 3 8 44 1
## 4000 4250 4380 4400 4500 4550 4800 5000 5100 5200
## 26 1 2 2 11 1 1 32 3 2
## 5250 5256 5400 5500 5600 6000 6132 6250 6500 7000
## 2 1 2 3 1 18 2 11 3 4
## 7008 7446 7500 8000 8750 9000 9198 10000 10400 10500
## 1 1 11 9 1 1 2 2 2 1
## 10512 11250 12000 11111000 17777600 31110800 <NA>
## 1 1 2 1 2 1 39790
## [1] "Frequency table after encoding"
## b_earndmonth. Money Earned: Construction, Building Homes In A Typical Month, Last 3 Months
## 180 200 240 250 300 350
## 1 3 1 4 5 1
## 360 375 380 400 450 480
## 1 1 1 10 2 2
## 500 600 700 720 750 800
## 14 23 5 1 26 30
## 840 900 960 1000 1050 1080
## 1 12 2 67 7 1
## 1120 1125 1200 1250 1275 1350
## 1 1 35 26 1 1
## 1400 1500 1600 1750 1752 1800
## 7 50 33 4 1 6
## 1870 2000 2100 2190 2250 2400
## 1 90 2 2 27 18
## 2450 2500 2600 2625 2628 2750
## 1 34 1 1 1 1
## 2800 3000 3066 3120 3200 3300
## 3 77 1 1 3 1
## 3400 3450 3500 3600 3750 3900
## 1 2 3 8 44 1
## 4000 4250 4380 4400 4500 4550
## 26 1 2 2 11 1
## 4800 5000 5100 5200 5250 5256
## 1 32 3 2 2 1
## 5400 5500 5600 6000 6132 6250
## 2 3 1 18 2 11
## 6500 7000 7008 7446 7500 8000
## 3 4 1 1 11 9
## 8750 9000 9198 10000 10400 10500
## 1 1 2 2 2 1
## 10512 11250 12000 or more <NA>
## 1 1 6 39790
percentile_99.5 <- percentile_checker("b_earnfmonth")
## Warning: Removed 40399 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnfmonth", break_point=7000, missing=NA)
## [1] "Frequency table before encoding"
## b_earnfmonth. Money Earned: Making Bricks In A Typical Month, Last 3 Months
## 100 180 200 225 240 250 300 400 420 438 450 490 500 600 630
## 1 1 5 1 3 1 10 8 2 1 4 1 9 13 1
## 657 750 800 875 876 900 1000 1050 1095 1200 1250 1314 1400 1500 1600
## 1 9 5 1 1 5 15 3 2 8 2 1 2 22 3
## 1752 2000 2100 2190 2250 2400 2500 2550 2628 2800 2850 3000 3066 3300 3500
## 3 17 2 10 8 2 5 1 2 1 1 17 2 1 1
## 3750 3900 4000 4380 4400 4500 4800 5000 5200 5256 5400 5600 5750 6000 6250
## 7 2 8 3 1 1 2 5 1 1 1 1 1 4 6
## 6500 6570 6600 7020 7500 8000 22500 <NA>
## 1 3 1 1 3 1 1 40399
## [1] "Frequency table after encoding"
## b_earnfmonth. Money Earned: Making Bricks In A Typical Month, Last 3 Months
## 100 180 200 225 240 250 300
## 1 1 5 1 3 1 10
## 400 420 438 450 490 500 600
## 8 2 1 4 1 9 13
## 630 657 750 800 875 876 900
## 1 1 9 5 1 1 5
## 1000 1050 1095 1200 1250 1314 1400
## 15 3 2 8 2 1 2
## 1500 1600 1752 2000 2100 2190 2250
## 22 3 3 17 2 10 8
## 2400 2500 2550 2628 2800 2850 3000
## 2 5 1 2 1 1 17
## 3066 3300 3500 3750 3900 4000 4380
## 2 1 1 7 2 8 3
## 4400 4500 4800 5000 5200 5256 5400
## 1 1 2 5 1 1 1
## 5600 5750 6000 6250 6500 6570 6600
## 1 1 4 6 1 3 1
## 7000 or more <NA>
## 6 40399
percentile_99.5 <- percentile_checker("b_earngmonth")
## Warning: Removed 40447 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earngmonth", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_earngmonth. Money Earned: Collecting Wood In A Typical Month, Last 3 Months
## 0 40 50 60 80
## 3 1 1 1 2
## 90 120 150 160 175.199996948242
## 1 3 2 1 1
## 180 200 219 240 250
## 1 7 7 1 1
## 262.799987792969 300 350.399993896484 360 400
## 1 12 1 1 9
## 438 480 500 600 657
## 9 2 23 5 1
## 700 750 788.400024414062 800 876
## 6 2 1 7 13
## 900 950 1000 1051.19995117188 1095
## 2 1 10 1 10
## 1200 1250 1314 1350 1500
## 4 1 6 1 16
## 1650 1750 1752 1800 1839.59997558594
## 1 2 6 1 1
## 2000 2100 2190 2200 2250
## 12 1 3 1 1
## 2400 3000 3066 3300 4000
## 1 5 1 1 3
## 4500 5000 <NA>
## 2 1 40447
## [1] "Frequency table after encoding"
## b_earngmonth. Money Earned: Collecting Wood In A Typical Month, Last 3 Months
## 0 40 50 60 80
## 3 1 1 1 2
## 90 120 150 160 175.199996948242
## 1 3 2 1 1
## 180 200 219 240 250
## 1 7 7 1 1
## 262.799987792969 300 350.399993896484 360 400
## 1 12 1 1 9
## 438 480 500 600 657
## 9 2 23 5 1
## 700 750 788.400024414062 800 876
## 6 2 1 7 13
## 900 950 1000 1051.19995117188 1095
## 2 1 10 1 10
## 1200 1250 1314 1350 1500
## 4 1 6 1 16
## 1650 1750 1752 1800 1839.59997558594
## 1 2 6 1 1
## 2000 2100 2190 2200 2250
## 12 1 3 1 1
## 2400 3000 3066 3300 4000
## 1 5 1 1 3
## 4500 or more <NA>
## 3 40447
percentile_99.5 <- percentile_checker("b_earnhmonth")
## Warning: Removed 40561 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnhmonth", break_point=10000, missing=NA)
## [1] "Frequency table before encoding"
## b_earnhmonth. Money Earned: Domestic Work For Someone Else In A Typical Month, Last 3 Months
## 100 150 300 400 500 540 560 600 750 800 810 1000 1050 1200 1250
## 1 1 3 1 4 1 1 5 7 2 1 3 1 2 1
## 1500 1800 2000 2190 2250 2400 2500 2628 2700 3000 3500 3600 3750 4000 4200
## 8 2 8 1 1 4 4 1 1 9 1 1 7 4 1
## 4380 4500 5000 5200 6000 6250 7500 8000 12000 80000 <NA>
## 1 2 8 1 2 1 1 2 1 1 40561
## [1] "Frequency table after encoding"
## b_earnhmonth. Money Earned: Domestic Work For Someone Else In A Typical Month, Last 3 Months
## 100 150 300 400 500 540
## 1 1 3 1 4 1
## 560 600 750 800 810 1000
## 1 5 7 2 1 3
## 1050 1200 1250 1500 1800 2000
## 1 2 1 8 2 8
## 2190 2250 2400 2500 2628 2700
## 1 1 4 4 1 1
## 3000 3500 3600 3750 4000 4200
## 9 1 1 7 4 1
## 4380 4500 5000 5200 6000 6250
## 1 2 8 1 2 1
## 7500 8000 10000 or more <NA>
## 1 2 2 40561
percentile_99.5 <- percentile_checker("b_earnimonth")
## Warning: Removed 40587 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnimonth", break_point=8000, missing=NA)
## [1] "Frequency table before encoding"
## b_earnimonth. Money Earned: Craft Work In A Typical Month, Last 3 Months
## 50 100 120 140 200 250 300 360 400 438 450 500 800 876 1000
## 1 1 1 1 2 1 3 1 1 2 2 6 2 1 10
## 1200 1250 1300 1314 1500 1752 1800 2000 2190 2500 2600 3000 3600 3750 4000
## 2 3 1 3 1 4 1 5 2 1 1 4 1 1 1
## 5000 6000 6400 7000 8000 8760 9000 <NA>
## 3 6 1 1 2 1 1 40587
## [1] "Frequency table after encoding"
## b_earnimonth. Money Earned: Craft Work In A Typical Month, Last 3 Months
## 50 100 120 140 200 250 300
## 1 1 1 1 2 1 3
## 360 400 438 450 500 800 876
## 1 1 2 2 6 2 1
## 1000 1200 1250 1300 1314 1500 1752
## 10 2 3 1 3 1 4
## 1800 2000 2190 2500 2600 3000 3600
## 1 5 2 1 1 4 1
## 3750 4000 5000 6000 6400 7000 8000 or more
## 1 1 3 6 1 1 4
## <NA>
## 40587
percentile_99.5 <- percentile_checker("b_earnjmonth")
## Warning: Removed 38070 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_earnjmonth", break_point=180000, missing=NA)
## [1] "Frequency table before encoding"
## b_earnjmonth. Money Earned: Other Paid Jobs In A Typical Month, Last 3 Months
## 0 3 75 80 100 200 240 250 300 400
## 4 1 1 1 1 7 1 2 3 2
## 438 450 500 600 625 640 700 720 750 800
## 1 1 18 3 1 1 4 1 3 4
## 840 876 900 1000 1095 1100 1200 1250 1314 1500
## 1 2 2 45 1 4 11 2 2 50
## 1533 1550 1560 1600 1680 1700 1750 1752 1800 1850
## 1 1 1 1 1 1 1 2 6 1
## 1890 1944 2000 2190 2200 2250 2400 2500 2600 2800
## 1 1 64 2 2 6 8 32 5 2
## 2860 2916 3000 3066 3120 3125 3150 3200 3300 3360
## 1 3 178 2 1 1 1 1 1 1
## 3500 3504 3600 3700 3750 3900 3942 4000 4350 4380
## 20 1 13 4 20 8 1 197 1 4
## 4400 4500 4600 4680 4800 4940 5000 5200 5250 5256
## 1 40 1 1 9 1 443 7 2 3
## 5360 5400 5500 5600 5800 6000 6132 6250 6500 6600
## 1 1 10 4 1 394 1 11 13 2
## 6700 6750 6800 7000 7200 7400 7500 7700 7800 8000
## 2 1 7 180 2 12 50 1 7 203
## 8100 8200 8325 8400 8500 8750 8760 8800 9000 9100
## 2 1 1 3 1 1 2 1 83 1
## 9198 10000 10400 10500 10800 11000 11200 12000 12500 13000
## 1 122 5 1 2 4 2 59 3 7
## 13140 14000 15000 15600 16000 17000 18000 20000 21000 22000
## 1 1 33 1 1 1 6 6 1 1
## 24000 25000 26000 27000 28000 29000 30000 35000 40000 45000
## 3 4 3 1 2 1 5 1 1 1
## 52000 60000 70000 75000 90000 91000 104000 120000 125000 130000
## 1 4 1 2 1 1 1 1 3 3
## 131400 180000 182000 2e+05 210000 225000 250000 270000 390000 13333200
## 1 1 1 1 1 1 1 1 1 4
## 17777600 26666400 <NA>
## 1 1 38070
## [1] "Frequency table after encoding"
## b_earnjmonth. Money Earned: Other Paid Jobs In A Typical Month, Last 3 Months
## 0 3 75 80 100 200
## 4 1 1 1 1 7
## 240 250 300 400 438 450
## 1 2 3 2 1 1
## 500 600 625 640 700 720
## 18 3 1 1 4 1
## 750 800 840 876 900 1000
## 3 4 1 2 2 45
## 1095 1100 1200 1250 1314 1500
## 1 4 11 2 2 50
## 1533 1550 1560 1600 1680 1700
## 1 1 1 1 1 1
## 1750 1752 1800 1850 1890 1944
## 1 2 6 1 1 1
## 2000 2190 2200 2250 2400 2500
## 64 2 2 6 8 32
## 2600 2800 2860 2916 3000 3066
## 5 2 1 3 178 2
## 3120 3125 3150 3200 3300 3360
## 1 1 1 1 1 1
## 3500 3504 3600 3700 3750 3900
## 20 1 13 4 20 8
## 3942 4000 4350 4380 4400 4500
## 1 197 1 4 1 40
## 4600 4680 4800 4940 5000 5200
## 1 1 9 1 443 7
## 5250 5256 5360 5400 5500 5600
## 2 3 1 1 10 4
## 5800 6000 6132 6250 6500 6600
## 1 394 1 11 13 2
## 6700 6750 6800 7000 7200 7400
## 2 1 7 180 2 12
## 7500 7700 7800 8000 8100 8200
## 50 1 7 203 2 1
## 8325 8400 8500 8750 8760 8800
## 1 3 1 1 2 1
## 9000 9100 9198 10000 10400 10500
## 83 1 1 122 5 1
## 10800 11000 11200 12000 12500 13000
## 2 4 2 59 3 7
## 13140 14000 15000 15600 16000 17000
## 1 1 33 1 1 1
## 18000 20000 21000 22000 24000 25000
## 6 6 1 1 3 4
## 26000 27000 28000 29000 30000 35000
## 3 1 2 1 5 1
## 40000 45000 52000 60000 70000 75000
## 1 1 1 4 1 2
## 90000 91000 104000 120000 125000 130000
## 1 1 1 1 3 3
## 131400 180000 or more <NA>
## 1 14 38070
percentile_99.5 <- percentile_checker("b_rentexpnew")
## Warning: Removed 19885 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_rentexpnew", break_point=4000, missing=NA)
## [1] "Frequency table before encoding"
## b_rentexpnew. Hh Monthly Expense: Rent Or Mortgage
## 0 1 2 3 8 10 20 30 40 50 60 80 100 188 200
## 18746 9 4 7 70 7 3 12 5 13 4 4 21 17 53
## 250 288 300 350 400 500 600 700 750 800 900 1000 1200 1500 1800
## 25 5 78 15 34 374 46 40 5 28 15 476 22 105 6
## 2000 2500 3000 3800 4000 5000 6000 9000 <NA>
## 269 43 115 11 40 11 30 15 19885
## [1] "Frequency table after encoding"
## b_rentexpnew. Hh Monthly Expense: Rent Or Mortgage
## 0 1 2 3 8 10 20
## 18746 9 4 7 70 7 3
## 30 40 50 60 80 100 188
## 12 5 13 4 4 21 17
## 200 250 288 300 350 400 500
## 53 25 5 78 15 34 374
## 600 700 750 800 900 1000 1200
## 46 40 5 28 15 476 22
## 1500 1800 2000 2500 3000 3800 4000 or more
## 105 6 269 43 115 11 96
## <NA>
## 19885
percentile_99.5 <- percentile_checker("b_foodexpnew")
## Warning: Removed 18911 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_foodexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_foodexpnew. Hh Monthly Expense: Food That You Buy
## 0 29 85 100 150 200 300 400 500 508 600 700 800 900 1000
## 7 7 3 20 23 85 68 30 244 6 99 46 134 25 1852
## 1008 1100 1200 1250 1300 1400 1500 1580 1600 1700 1800 1900 2000 2200 2300
## 5 36 479 21 32 13 1542 4 59 8 47 5 3973 71 14
## 2400 2500 2700 2800 3000 3008 3200 3300 3500 4000 4500 4800 5000 5500 6000
## 15 1376 25 14 3549 4 4 3 574 2404 109 6 2718 6 890
## 6100 6500 7000 7500 8000 9000 10000 12000 15000 20000 <NA>
## 6 23 191 4 207 32 486 46 47 60 18911
## [1] "Frequency table after encoding"
## b_foodexpnew. Hh Monthly Expense: Food That You Buy
## 0 29 85 100 150 200
## 7 7 3 20 23 85
## 300 400 500 508 600 700
## 68 30 244 6 99 46
## 800 900 1000 1008 1100 1200
## 134 25 1852 5 36 479
## 1250 1300 1400 1500 1580 1600
## 21 32 13 1542 4 59
## 1700 1800 1900 2000 2200 2300
## 8 47 5 3973 71 14
## 2400 2500 2700 2800 3000 3008
## 15 1376 25 14 3549 4
## 3200 3300 3500 4000 4500 4800
## 4 3 574 2404 109 6
## 5000 5500 6000 6100 6500 7000
## 2718 6 890 6 23 191
## 7500 8000 9000 10000 12000 or more <NA>
## 4 207 32 486 153 18911
percentile_99.5 <- percentile_checker("b_clothesexpnew")
## Warning: Removed 18984 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_clothesexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_clothesexpnew. Hh Monthly Expense: Clothes
## 0 8 10 12 13 15 20 40 50 70 80 84 90 100 110
## 292 5 25 4 6 8 15 5 62 6 4 6 9 1102 6
## 120 125 130 140 150 160 166 170 175 180 185 200 220 225 240
## 41 21 10 28 718 8 17 4 7 65 4 3801 6 8 13
## 250 275 280 300 320 325 330 333 350 360 380 400 416 450 480
## 610 7 24 1984 10 13 6 15 101 5 11 1682 70 207 9
## 500 550 580 600 650 700 800 833 840 850 900 950 1000 1020 1200
## 4205 46 6 874 22 401 787 7 5 38 60 95 1975 5 280
## 1300 1400 1500 1800 2000 2500 2600 3000 3500 4000 5000 6000 7000 8000 8500
## 23 13 279 10 559 50 6 209 18 84 322 46 15 85 6
## 9000 10000 12000 15000 20000 <NA>
## 24 9 35 15 10 18984
## [1] "Frequency table after encoding"
## b_clothesexpnew. Hh Monthly Expense: Clothes
## 0 8 10 12 13 15 20
## 292 5 25 4 6 8 15
## 40 50 70 80 84 90 100
## 5 62 6 4 6 9 1102
## 110 120 125 130 140 150 160
## 6 41 21 10 28 718 8
## 166 170 175 180 185 200 220
## 17 4 7 65 4 3801 6
## 225 240 250 275 280 300 320
## 8 13 610 7 24 1984 10
## 325 330 333 350 360 380 400
## 13 6 15 101 5 11 1682
## 416 450 480 500 550 580 600
## 70 207 9 4205 46 6 874
## 650 700 800 833 840 850 900
## 22 401 787 7 5 38 60
## 950 1000 1020 1200 1300 1400 1500
## 95 1975 5 280 23 13 279
## 1800 2000 2500 2600 3000 3500 4000
## 10 559 50 6 209 18 84
## 5000 6000 7000 8000 or more <NA>
## 322 46 15 184 18984
percentile_99.5 <- percentile_checker("b_travelexpnew")
## Warning: Removed 18956 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_travelexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_travelexpnew. Hh Monthly Expense: Travel/Transporation
## 0 1 10 15 20 25 30 40 50 60 70 80 90 100 108
## 509 10 33 11 179 10 57 104 1256 101 14 302 6 4394 15
## 120 125 130 140 150 155 160 175 180 190 200 208 220 240 250
## 164 14 6 10 1184 6 104 14 19 8 6062 23 6 20 315
## 280 300 308 350 380 400 450 500 580 600 700 800 1000 1008 1200
## 42 1549 4 35 7 668 6 2895 5 215 50 17 859 7 38
## 1500 2000 2500 3000 5000 <NA>
## 118 166 9 52 14 18956
## [1] "Frequency table after encoding"
## b_travelexpnew. Hh Monthly Expense: Travel/Transporation
## 0 1 10 15 20 25 30
## 509 10 33 11 179 10 57
## 40 50 60 70 80 90 100
## 104 1256 101 14 302 6 4394
## 108 120 125 130 140 150 155
## 15 164 14 6 10 1184 6
## 160 175 180 190 200 208 220
## 104 14 19 8 6062 23 6
## 240 250 280 300 308 350 380
## 20 315 42 1549 4 35 7
## 400 450 500 580 600 700 800
## 668 6 2895 5 215 50 17
## 1000 1008 1200 1500 2000 or more <NA>
## 859 7 38 118 241 18956
percentile_99.5 <- percentile_checker("b_healthexpnew")
## Warning: Removed 18891 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_healthexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_healthexpnew. Hh Monthly Expense: Health Such As For Doctors, Medicines And Drugs
## 0 3 10 20 50 60 70 80 90 100 108 110 125 135 150
## 391 9 8 3 157 12 7 14 4 1495 6 6 5 3 589
## 175 180 200 208 250 280 300 308 350 380 400 408 450 500 508
## 3 8 3529 15 223 19 1441 6 9 5 904 4 14 5067 5
## 540 550 580 600 700 750 800 850 900 950 1000 1100 1200 1300 1400
## 7 10 7 475 176 4 277 7 85 28 3248 14 234 9 6
## 1500 1600 1800 1900 2000 2500 2600 3000 3200 3500 4000 5000 6000 7000 8000
## 473 11 12 6 1259 199 12 551 9 21 182 275 55 43 49
## 9000 10000 12000 15000 27250 <NA>
## 8 40 5 11 8 18891
## [1] "Frequency table after encoding"
## b_healthexpnew. Hh Monthly Expense: Health Such As For Doctors, Medicines And Drugs
## 0 3 10 20 50 60 70
## 391 9 8 3 157 12 7
## 80 90 100 108 110 125 135
## 14 4 1495 6 6 5 3
## 150 175 180 200 208 250 280
## 589 3 8 3529 15 223 19
## 300 308 350 380 400 408 450
## 1441 6 9 5 904 4 14
## 500 508 540 550 580 600 700
## 5067 5 7 10 7 475 176
## 750 800 850 900 950 1000 1100
## 4 277 7 85 28 3248 14
## 1200 1300 1400 1500 1600 1800 1900
## 234 9 6 473 11 12 6
## 2000 2500 2600 3000 3200 3500 4000
## 1259 199 12 551 9 21 182
## 5000 6000 7000 8000 or more <NA>
## 275 55 43 121 18891
percentile_99.5 <- percentile_checker("b_schoolexpnew")
## Warning: Removed 18905 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_schoolexpnew", break_point=12000, missing=NA)
## [1] "Frequency table before encoding"
## b_schoolexpnew. Hh Monthly Expense: Schooling Or Education
## 0 1 2 8 10 20 25 30 40 50 60
## 2203 5 10 4 8 158 8 20 7 893 25
## 65 70 100 108 120 130 140 150 200 208 210
## 6 6 2139 4 16 6 10 385 2730 6 6
## 250 270 280 300 308 350 400 450 500 508 550
## 227 5 5 1631 7 19 740 28 3483 4 12
## 560 580 600 670 700 750 800 900 1000 1100 1200
## 6 6 669 6 280 5 349 60 2277 6 149
## 1300 1500 1600 1700 1800 2000 2300 2400 2500 2600 3000
## 11 528 23 6 5 858 6 8 145 9 443
## 3100 3500 4000 5000 5001 5600 6000 7000 7500 8000 9000
## 4 11 217 358 12 8 87 89 7 51 24
## 10000 10300 12000 13000 14000 15000 18000 20000 23000 35000 1e+05
## 96 4 15 12 6 20 7 31 5 5 5
## 100888 500200 1600300 <NA>
## 5 6 7 18905
## [1] "Frequency table after encoding"
## b_schoolexpnew. Hh Monthly Expense: Schooling Or Education
## 0 1 2 8 10 20
## 2203 5 10 4 8 158
## 25 30 40 50 60 65
## 8 20 7 893 25 6
## 70 100 108 120 130 140
## 6 2139 4 16 6 10
## 150 200 208 210 250 270
## 385 2730 6 6 227 5
## 280 300 308 350 400 450
## 5 1631 7 19 740 28
## 500 508 550 560 580 600
## 3483 4 12 6 6 669
## 670 700 750 800 900 1000
## 6 280 5 349 60 2277
## 1100 1200 1300 1500 1600 1700
## 6 149 11 528 23 6
## 1800 2000 2300 2400 2500 2600
## 5 858 6 8 145 9
## 3000 3100 3500 4000 5000 5001
## 443 4 11 217 358 12
## 5600 6000 7000 7500 8000 9000
## 8 87 89 7 51 24
## 10000 10300 12000 or more <NA>
## 96 4 124 18905
percentile_99.5 <- percentile_checker("b_utilityexpnew")
## Warning: Removed 19402 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_utilityexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_utilityexpnew. Hh Monthly Expense: Home Utilities Such As Electricity, Water, Cable, Etc.
## 0 8 12 20 25 28 30 40 45 50 60 62 70 80 85
## 14176 75 5 12 30 9 46 76 6 45 102 16 49 20 3
## 100 102 104 105 110 120 122 124 125 126 128 130 135 140 142
## 132 25 9 8 20 5027 180 30 132 14 12 104 5 88 5
## 148 150 160 170 180 200 220 230 240 250 300 370 400 500 520
## 5 253 23 9 25 171 25 13 12 26 34 19 46 59 6
## 600 700 1000 1500 2000 3000 5000 12010 12200 <NA>
## 11 11 4 6 26 7 4 5 5 19402
## [1] "Frequency table after encoding"
## b_utilityexpnew. Hh Monthly Expense: Home Utilities Such As Electricity, Water, Cable, Etc.
## 0 8 12 20 25 28 30
## 14176 75 5 12 30 9 46
## 40 45 50 60 62 70 80
## 76 6 45 102 16 49 20
## 85 100 102 104 105 110 120
## 3 132 25 9 8 20 5027
## 122 124 125 126 128 130 135
## 180 30 132 14 12 104 5
## 140 142 148 150 160 170 180
## 88 5 5 253 23 9 25
## 200 220 230 240 250 300 370
## 171 25 13 12 26 34 19
## 400 500 or more <NA>
## 46 144 19402
percentile_99.5 <- percentile_checker("b_phoneexpnew")
## Warning: Removed 19182 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_phoneexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_phoneexpnew. Hh Monthly Expense: Phone, Including Cell Phones
## 0 5 10 11 15 20 22 25 30 40 50 58 60 70 80
## 4877 9 142 6 10 687 11 30 335 128 2873 4 374 37 95
## 90 100 108 110 120 125 140 150 158 160 180 200 240 250 280
## 18 4716 22 9 45 11 7 705 4 3 20 3482 4 327 6
## 300 308 400 500 600 700 800 1000 1200 1300 1500 2000 5000 <NA>
## 829 11 298 930 119 12 26 193 9 16 14 19 13 19182
## [1] "Frequency table after encoding"
## b_phoneexpnew. Hh Monthly Expense: Phone, Including Cell Phones
## 0 5 10 11 15 20 22
## 4877 9 142 6 10 687 11
## 25 30 40 50 58 60 70
## 30 335 128 2873 4 374 37
## 80 90 100 108 110 120 125
## 95 18 4716 22 9 45 11
## 140 150 158 160 180 200 240
## 7 705 4 3 20 3482 4
## 250 280 300 308 400 500 600
## 327 6 829 11 298 930 119
## 700 800 1000 or more <NA>
## 12 26 264 19182
percentile_99.5 <- percentile_checker("b_alcoholexpnew")
## Warning: Removed 19716 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_alcoholexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_alcoholexpnew. Hh Monthly Expense: Alcohol, Tobacco Or Gambling, Including Chakna
## 0 1 2 5 8 10 20 25 30 40 42 50 58 60 65
## 5922 23 15 10 5 56 204 48 100 212 7 1294 4 152 6
## 70 80 100 108 120 150 160 180 200 240 250 280 300 340 350
## 7 165 2588 9 43 399 18 35 2649 13 187 20 1094 5 22
## 380 400 450 500 508 550 580 600 700 750 800 900 940 1000 1100
## 6 383 16 2697 7 15 6 315 45 33 160 159 9 924 4
## 1130 1200 1280 1300 1400 1500 1580 1600 1700 1800 2000 2400 2500 3000 10000
## 5 168 4 11 12 285 5 13 8 4 235 5 23 74 5
## 15000 <NA>
## 4 19716
## [1] "Frequency table after encoding"
## b_alcoholexpnew. Hh Monthly Expense: Alcohol, Tobacco Or Gambling, Including Chakna
## 0 1 2 5 8 10 20
## 5922 23 15 10 5 56 204
## 25 30 40 42 50 58 60
## 48 100 212 7 1294 4 152
## 65 70 80 100 108 120 150
## 6 7 165 2588 9 43 399
## 160 180 200 240 250 280 300
## 18 35 2649 13 187 20 1094
## 340 350 380 400 450 500 508
## 5 22 6 383 16 2697 7
## 550 580 600 700 750 800 900
## 15 6 315 45 33 160 159
## 940 1000 1100 1130 1200 1280 1300
## 9 924 4 5 168 4 11
## 1400 1500 1580 1600 1700 1800 2000
## 12 285 5 13 8 4 235
## 2400 2500 or more <NA>
## 5 106 19716
percentile_99.5 <- percentile_checker("b_recrexpnew")
## Warning: Removed 19482 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_recrexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_recrexpnew. Hh Monthly Expense: Recreation Or Entertainment
## 0 5 8 10 20 25 30 40 45 50 60 70 80 85 90
## 8579 4 15 14 96 9 103 180 5 1764 132 69 83 3 57
## 100 120 130 140 145 150 160 180 190 200 208 240 250 280 300
## 3233 37 16 11 9 274 8 44 10 3102 3 29 184 15 506
## 380 400 500 580 600 700 800 1000 1200 1300 1500 2000 3000 5000 <NA>
## 9 222 1386 6 71 5 33 702 10 6 62 47 15 18 19482
## [1] "Frequency table after encoding"
## b_recrexpnew. Hh Monthly Expense: Recreation Or Entertainment
## 0 5 8 10 20 25 30
## 8579 4 15 14 96 9 103
## 40 45 50 60 70 80 85
## 180 5 1764 132 69 83 3
## 90 100 120 130 140 145 150
## 57 3233 37 16 11 9 274
## 160 180 190 200 208 240 250
## 8 44 10 3102 3 29 184
## 280 300 380 400 500 580 600
## 15 506 9 222 1386 6 71
## 700 800 1000 1200 1300 1500 or more <NA>
## 5 33 702 10 6 142 19482
percentile_99.5 <- percentile_checker("b_debtexpnew")
## Warning: Removed 19901 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_debtexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_debtexpnew. Hh Monthly Expense: Dept Servicing
## 0 10 30 50 80 90 100 150 200 250 275 300 350 400 420
## 15113 6 5 48 19 7 205 79 531 97 4 267 12 156 7
## 460 500 550 600 700 750 800 900 1000 1200 1250 1400 1500 1600 1750
## 5 1480 6 82 38 12 49 6 943 78 7 11 192 19 6
## 1800 1850 2000 2100 2200 2300 2500 3000 3300 4000 4200 5000 6000 7000 8000
## 6 6 486 5 7 17 52 105 14 116 5 239 46 29 32
## 10000 12000 13000 20000 80000 <NA>
## 86 9 5 7 5 19901
## [1] "Frequency table after encoding"
## b_debtexpnew. Hh Monthly Expense: Dept Servicing
## 0 10 30 50 80 90
## 15113 6 5 48 19 7
## 100 150 200 250 275 300
## 205 79 531 97 4 267
## 350 400 420 460 500 550
## 12 156 7 5 1480 6
## 600 700 750 800 900 1000
## 82 38 12 49 6 943
## 1200 1250 1400 1500 1600 1750
## 78 7 11 192 19 6
## 1800 1850 2000 2100 2200 2300
## 6 6 486 5 7 17
## 2500 3000 3300 4000 4200 5000
## 52 105 14 116 5 239
## 6000 7000 8000 10000 or more <NA>
## 46 29 32 112 19901
percentile_99.5 <- percentile_checker("b_otherexpnew")
## Warning: Removed 19160 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_otherexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## b_otherexpnew. Hh Monthly Expense: Other
## 0 2 4 5 8 20 25 50 65 80 100 150 200 250 280
## 15721 11 5 6 5 62 4 277 7 16 653 72 1003 22 5
## 300 400 500 600 700 800 850 1000 1500 2000 2500 3000 4000 5000 6000
## 430 375 1892 62 18 39 7 567 14 134 6 35 12 27 6
## 9000 20000 66666 <NA>
## 5 6 4 19160
## [1] "Frequency table after encoding"
## b_otherexpnew. Hh Monthly Expense: Other
## 0 2 4 5 8 20 25
## 15721 11 5 6 5 62 4
## 50 65 80 100 150 200 250
## 277 7 16 653 72 1003 22
## 280 300 400 500 600 700 800
## 5 430 375 1892 62 18 39
## 850 1000 1500 2000 or more <NA>
## 7 567 14 235 19160
percentile_99.5 <- percentile_checker("b_landinc", missing=c(66666, 88888))
## Warning: Removed 26332 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="b_landinc", break_point=percentile_99.5, missing=c(66666, 88888))
## [1] "Frequency table before encoding"
## b_landinc. Earning From Land In Last Year
## 6 60 100 200 300 400 500 1000 1500 2000 2500 3000 4000
## 13 7 13 4 12 4 35 131 54 176 6 138 107
## 5000 6000 7000 8000 9000 10000 12000 15000 15600 18000 20000 24000 25000
## 462 164 48 38 11 419 138 326 9 5 466 9 275
## 28000 30000 35000 36000 40000 50000 65000 66666 88888 1e+05 120000 250000 <NA>
## 5 220 9 20 6 55 11 5752 5162 6 14 6 26332
## [1] "Frequency table after encoding"
## b_landinc. Earning From Land In Last Year
## 6 60 100 200 300 400
## 13 7 13 4 12 4
## 500 1000 1500 2000 2500 3000
## 35 131 54 176 6 138
## 4000 5000 6000 7000 8000 9000
## 107 462 164 48 38 11
## 10000 12000 15000 15600 18000 20000
## 419 138 326 9 5 466
## 24000 25000 28000 30000 35000 36000
## 9 275 5 220 9 20
## 40000 50000 65000 66666 88888 1e+05
## 6 55 11 5752 5162 6
## 120000 or more <NA>
## 20 26332
percentile_99.5 <- percentile_checker("e_earnamonth")
## Warning: Removed 40334 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnamonth", break_point=2000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnamonth. Money Earned: Farming Someone'S Land In A Typical Month, Last 3 Months
## 0 12 50 88 100 150 200 300 400 500 600 800 1000 1200 1500
## 261 1 1 37 1 5 3 2 1 4 1 1 4 1 3
## 2000 2300 2500 3000 4000 <NA>
## 4 1 1 1 1 40334
## [1] "Frequency table after encoding"
## e_earnamonth. Money Earned: Farming Someone'S Land In A Typical Month, Last 3 Months
## 0 12 50 88 100 150 200
## 261 1 1 37 1 5 3
## 300 400 500 600 800 1000 1200
## 2 1 4 1 1 4 1
## 1500 2000 or more <NA>
## 3 8 40334
percentile_99.5 <- percentile_checker("e_earnbmonth")
## Warning: Removed 40222 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnbmonth", break_point=1000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnbmonth. Money Earned: Herding Or Helping With Animals In A Typical Month, Last 3 Months
## 0 2 88 100 150 250 400 1000 2250 3000 <NA>
## 381 1 57 1 1 1 1 1 1 1 40222
## [1] "Frequency table after encoding"
## e_earnbmonth. Money Earned: Herding Or Helping With Animals In A Typical Month, Last 3 Months
## 0 2 88 100 150 250 400
## 381 1 57 1 1 1 1
## 1000 or more <NA>
## 3 40222
percentile_99.5 <- percentile_checker("e_earncmonth")
## Warning: Removed 40508 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earncmonth", break_point=6000, missing=NA)
## [1] "Frequency table before encoding"
## e_earncmonth. Money Earned: Mica Mining In A Typical Month, Last 3 Months
## 0 20 30 40 50 60 88 100 150 200 250 300 350 400 500
## 22 1 1 1 4 6 3 6 3 11 3 5 3 3 17
## 600 650 700 750 800 1000 1200 1500 1800 2000 3000 4000 4500 6000 6500
## 2 1 2 2 3 16 3 15 2 8 9 3 1 3 1
## <NA>
## 40508
## [1] "Frequency table after encoding"
## e_earncmonth. Money Earned: Mica Mining In A Typical Month, Last 3 Months
## 0 20 30 40 50 60 88
## 22 1 1 1 4 6 3
## 100 150 200 250 300 350 400
## 6 3 11 3 5 3 3
## 500 600 650 700 750 800 1000
## 17 2 1 2 2 3 16
## 1200 1500 1800 2000 3000 4000 4500
## 3 15 2 8 9 3 1
## 6000 or more <NA>
## 4 40508
percentile_99.5 <- percentile_checker("e_earndmonth")
## Warning: Removed 40628 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earndmonth", break_point=10000, missing=NA)
## [1] "Frequency table before encoding"
## e_earndmonth. Money Earned: Construction, Building Homes In A Typical Month, Last 3 Months
## 0 50 88 100 150 200 250 300 460 500 800 1000 1500 2000 2200
## 2 1 1 1 1 3 2 2 1 1 1 2 3 1 1
## 2500 3000 3300 5000 6000 7000 7500 8000 14000 <NA>
## 2 5 1 2 3 1 1 1 1 40628
## [1] "Frequency table after encoding"
## e_earndmonth. Money Earned: Construction, Building Homes In A Typical Month, Last 3 Months
## 0 50 88 100 150 200
## 2 1 1 1 1 3
## 250 300 460 500 800 1000
## 2 2 1 1 1 2
## 1500 2000 2200 2500 3000 3300
## 3 1 1 2 5 1
## 5000 6000 7000 7500 8000 10000 or more
## 2 3 1 1 1 1
## <NA>
## 40628
percentile_99.5 <- percentile_checker("e_earnemonth")
## Warning: Removed 40659 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnemonth", break_point=2000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnemonth. Money Earned: Building Roads And Bridges In A Typical Month, Last 3 Months
## 88 260 300 500 600 1000 1600 2000 <NA>
## 2 1 1 1 1 1 1 1 40659
## [1] "Frequency table after encoding"
## e_earnemonth. Money Earned: Building Roads And Bridges In A Typical Month, Last 3 Months
## 88 260 300 500 600 1000 1600
## 2 1 1 1 1 1 1
## 2000 or more <NA>
## 1 40659
percentile_99.5 <- percentile_checker("e_earnfmonth")
## Warning: Removed 40636 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnfmonth", break_point=4000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnfmonth. Money Earned: Making Bricks In A Typical Month, Last 3 Months
## 0 100 150 200 300 350 450 500 600 700 900 1000 1200 1500 1600
## 1 1 1 4 1 1 2 4 2 1 1 4 1 1 2
## 2000 2500 3000 5000 <NA>
## 2 1 1 1 40636
## [1] "Frequency table after encoding"
## e_earnfmonth. Money Earned: Making Bricks In A Typical Month, Last 3 Months
## 0 100 150 200 300 350 450
## 1 1 1 4 1 1 2
## 500 600 700 900 1000 1200 1500
## 4 2 1 1 4 1 1
## 1600 2000 2500 3000 4000 or more <NA>
## 2 2 1 1 1 40636
percentile_99.5 <- percentile_checker("e_earngmonth")
## Warning: Removed 40545 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earngmonth", break_point=500, missing=NA)
## [1] "Frequency table before encoding"
## e_earngmonth. Money Earned: Collecting Wood In A Typical Month, Last 3 Months
## 0 50 88 200 500 1000 <NA>
## 106 1 11 3 1 1 40545
## [1] "Frequency table after encoding"
## e_earngmonth. Money Earned: Collecting Wood In A Typical Month, Last 3 Months
## 0 50 88 200 500 or more <NA>
## 106 1 11 3 2 40545
percentile_99.5 <- percentile_checker("e_earnhmonth")
## Warning: Removed 40659 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnhmonth", break_point=5000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnhmonth. Money Earned: Domestic Work For Someone Else In A Typical Month, Last 3 Months
## 0 100 120 3000 4000 5000 <NA>
## 2 1 1 2 2 1 40659
## [1] "Frequency table after encoding"
## e_earnhmonth. Money Earned: Domestic Work For Someone Else In A Typical Month, Last 3 Months
## 0 100 120 3000 4000 5000 or more <NA>
## 2 1 1 2 2 1 40659
percentile_99.5 <- percentile_checker("e_earnimonth")
## Warning: Removed 40653 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnimonth", break_point=5000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnimonth. Money Earned: Craft Work In A Typical Month, Last 3 Months
## 0 88 100 200 300 500 1500 3000 5000 7000 <NA>
## 5 1 1 1 1 1 1 2 1 1 40653
## [1] "Frequency table after encoding"
## e_earnimonth. Money Earned: Craft Work In A Typical Month, Last 3 Months
## 0 88 100 200 300 500 1500
## 5 1 1 1 1 1 1
## 3000 5000 or more <NA>
## 2 2 40653
percentile_99.5 <- percentile_checker("e_earnjmonth")
## Warning: Removed 40378 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_earnjmonth", break_point=10000, missing=NA)
## [1] "Frequency table before encoding"
## e_earnjmonth. Money Earned: Other Paid Jobs In A Typical Month, Last 3 Months
## 0 75 88 100 150 160 200 250 500 560 1000 1500 2000 3000 3500
## 200 1 11 3 1 1 2 1 1 2 3 2 6 9 2
## 4000 5000 6000 7000 8000 10000 12000 <NA>
## 8 19 6 4 6 1 1 40378
## [1] "Frequency table after encoding"
## e_earnjmonth. Money Earned: Other Paid Jobs In A Typical Month, Last 3 Months
## 0 75 88 100 150 160
## 200 1 11 3 1 1
## 200 250 500 560 1000 1500
## 2 1 1 2 3 2
## 2000 3000 3500 4000 5000 6000
## 6 9 2 8 19 6
## 7000 8000 10000 or more <NA>
## 4 6 2 40378
percentile_99.5 <- percentile_checker("e_rentexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_rentexpnew", break_point=1000, missing=NA)
## [1] "Frequency table before encoding"
## e_rentexpnew. Household Rent/Mortgage Exp In Last Month
## 0 1 2 3 8 10 20 30 80 88 100 250 900 1000 2000
## 18649 8 11 5 34 5 5 14 5 12 7 4 9 14 7
## <NA>
## 21879
## [1] "Frequency table after encoding"
## e_rentexpnew. Household Rent/Mortgage Exp In Last Month
## 0 1 2 3 8 10 20
## 18649 8 11 5 34 5 5
## 30 80 88 100 250 900 1000 or more
## 14 5 12 7 4 9 21
## <NA>
## 21879
percentile_99.5 <- percentile_checker("e_foodexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_foodexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_foodexpnew. Household Food Exp In The Last Month
## 0 1 15 21 28 50 88 100 120 150 200
## 8 3 7 6 5 11 75 58 5 6 108
## 250 280 300 350 358 400 500 550 600 625 650
## 31 4 103 6 3 45 267 5 46 5 6
## 700 800 850 900 950 1000 1100 1200 1300 1400 1500
## 48 130 6 30 18 1240 20 287 18 3 1424
## 1600 1800 1900 2000 2100 2200 2400 2450 2500 2600 2700
## 37 66 8 3484 10 59 8 11 1226 38 17
## 2800 3000 3200 3300 3400 3500 3600 3800 4000 4200 4500
## 71 3349 19 6 14 731 10 6 2084 37 164
## 5000 5500 6000 6500 7000 7500 8000 9000 10000 12000 14000
## 1866 24 567 8 187 20 203 19 210 50 6
## 15000 20000 30000 30005 40000 45000 50000 80000 9913000 <NA>
## 51 15 7 6 14 10 14 15 5 21879
## [1] "Frequency table after encoding"
## e_foodexpnew. Household Food Exp In The Last Month
## 0 1 15 21 28 50
## 8 3 7 6 5 11
## 88 100 120 150 200 250
## 75 58 5 6 108 31
## 280 300 350 358 400 500
## 4 103 6 3 45 267
## 550 600 625 650 700 800
## 5 46 5 6 48 130
## 850 900 950 1000 1100 1200
## 6 30 18 1240 20 287
## 1300 1400 1500 1600 1800 1900
## 18 3 1424 37 66 8
## 2000 2100 2200 2400 2450 2500
## 3484 10 59 8 11 1226
## 2600 2700 2800 3000 3200 3300
## 38 17 71 3349 19 6
## 3400 3500 3600 3800 4000 4200
## 14 731 10 6 2084 37
## 4500 5000 5500 6000 6500 7000
## 164 1866 24 567 8 187
## 7500 8000 9000 10000 12000 14000
## 20 203 19 210 50 6
## 15000 or more <NA>
## 137 21879
percentile_99.5 <- percentile_checker("e_clothesexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_clothesexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_clothesexpnew. Household Clothes Expense In The Last Month
## 0 10 20 50 60 70 75 88 90 100 120 125 128 130 140
## 226 8 3 168 15 16 21 68 5 921 222 9 4 6 11
## 150 158 160 166 167 170 175 180 190 200 201 205 215 216 230
## 495 6 26 31 22 5 9 67 5 2652 9 6 12 4 12
## 240 245 250 255 256 258 268 269 272 280 292 300 310 316 320
## 13 14 829 15 7 32 6 3 13 20 5 1740 20 17 5
## 325 330 333 334 346 350 356 360 363 368 380 400 410 415 416
## 7 8 34 13 6 113 31 11 5 12 4 1371 21 11 68
## 417 418 420 422 425 428 438 450 458 480 483 500 508 516 518
## 13 6 15 11 6 6 7 63 6 29 11 3610 6 6 6
## 520 550 580 583 584 600 650 666 667 680 700 750 756 800 833
## 4 13 15 18 3 865 7 23 8 6 488 4 14 649 64
## 834 844 850 876 900 950 1000 1100 1200 1250 1300 1350 1400 1500 1750
## 5 5 41 4 148 4 1649 11 247 12 22 6 6 416 8
## 1800 2000 2010 2100 2400 2500 3000 3500 4000 5000 5500 10000 15000 <NA>
## 30 257 5 5 6 83 77 45 71 80 5 24 17 21879
## [1] "Frequency table after encoding"
## e_clothesexpnew. Household Clothes Expense In The Last Month
## 0 10 20 50 60 70 75
## 226 8 3 168 15 16 21
## 88 90 100 120 125 128 130
## 68 5 921 222 9 4 6
## 140 150 158 160 166 167 170
## 11 495 6 26 31 22 5
## 175 180 190 200 201 205 215
## 9 67 5 2652 9 6 12
## 216 230 240 245 250 255 256
## 4 12 13 14 829 15 7
## 258 268 269 272 280 292 300
## 32 6 3 13 20 5 1740
## 310 316 320 325 330 333 334
## 20 17 5 7 8 34 13
## 346 350 356 360 363 368 380
## 6 113 31 11 5 12 4
## 400 410 415 416 417 418 420
## 1371 21 11 68 13 6 15
## 422 425 428 438 450 458 480
## 11 6 6 7 63 6 29
## 483 500 508 516 518 520 550
## 11 3610 6 6 6 4 13
## 580 583 584 600 650 666 667
## 15 18 3 865 7 23 8
## 680 700 750 756 800 833 834
## 6 488 4 14 649 64 5
## 844 850 876 900 950 1000 1100
## 5 41 4 148 4 1649 11
## 1200 1250 1300 1350 1400 1500 1750
## 247 12 22 6 6 416 8
## 1800 2000 2010 2100 2400 2500 3000
## 30 257 5 5 6 83 77
## 3500 4000 5000 or more <NA>
## 45 71 126 21879
percentile_99.5 <- percentile_checker("e_travelexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_travelexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_travelexpnew. Household Travel Expense In Last Month
## 0 10 15 20 25 28 30 40 46 50 53 57 60
## 1305 25 7 84 47 4 11 47 4 1243 9 5 73
## 70 75 80 83 88 90 100 108 120 130 138 140 150
## 26 69 146 16 59 6 3615 4 159 10 5 28 940
## 160 166 180 200 208 225 230 236 250 255 256 260 280
## 46 8 14 4525 8 5 8 4 738 6 5 18 16
## 300 310 320 350 360 366 369 375 388 400 420 450 500
## 1731 12 22 122 5 22 5 7 5 507 7 36 1778
## 550 560 600 700 750 800 1000 1200 1500 1800 2000 2200 2500
## 7 8 212 53 8 45 536 35 38 7 96 7 23
## 3000 4000 5000 6000 200200 <NA>
## 24 7 48 20 8 21879
## [1] "Frequency table after encoding"
## e_travelexpnew. Household Travel Expense In Last Month
## 0 10 15 20 25 28 30
## 1305 25 7 84 47 4 11
## 40 46 50 53 57 60 70
## 47 4 1243 9 5 73 26
## 75 80 83 88 90 100 108
## 69 146 16 59 6 3615 4
## 120 130 138 140 150 160 166
## 159 10 5 28 940 46 8
## 180 200 208 225 230 236 250
## 14 4525 8 5 8 4 738
## 255 256 260 280 300 310 320
## 6 5 18 16 1731 12 22
## 350 360 366 369 375 388 400
## 122 5 22 5 7 5 507
## 420 450 500 550 560 600 700
## 7 36 1778 7 8 212 53
## 750 800 1000 1200 1500 1800 2000
## 8 45 536 35 38 7 96
## 2200 2500 3000 or more <NA>
## 7 23 107 21879
percentile_99.5 <- percentile_checker("e_healthexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_healthexpnew", break_point=7500, missing=NA)
## [1] "Frequency table before encoding"
## e_healthexpnew. Household Health Exp In The Last Month
## 0 5 11 20 30 40 41 50 58 60 70 75 80
## 882 3 7 19 3 6 5 609 15 55 33 6 26
## 83 84 85 88 100 104 108 120 125 130 140 150 154
## 5 4 7 49 1095 3 6 36 9 5 17 382 4
## 158 166 167 170 180 183 200 208 230 240 250 260 290
## 6 7 10 6 44 10 1951 5 7 11 328 6 6
## 300 310 316 320 333 334 350 360 400 410 415 416 417
## 1454 7 6 3 24 6 93 16 976 4 5 39 25
## 430 450 458 460 485 500 570 600 664 667 700 750 800
## 5 29 9 11 7 3656 8 562 4 7 327 9 553
## 833 900 916 1000 1100 1200 1250 1300 1400 1500 1667 1700 1800
## 11 75 11 2415 30 165 12 24 5 563 3 10 11
## 1900 2000 2083 2400 2500 2700 2900 3000 3333 3500 3600 4000 4500
## 11 834 8 16 153 6 8 327 4 44 5 89 15
## 5000 5833 6000 7000 7500 8000 10000 12000 15000 25000 35000 3e+05 500300
## 223 5 55 18 6 14 27 9 22 6 5 7 4
## <NA>
## 21879
## [1] "Frequency table after encoding"
## e_healthexpnew. Household Health Exp In The Last Month
## 0 5 11 20 30 40 41
## 882 3 7 19 3 6 5
## 50 58 60 70 75 80 83
## 609 15 55 33 6 26 5
## 84 85 88 100 104 108 120
## 4 7 49 1095 3 6 36
## 125 130 140 150 154 158 166
## 9 5 17 382 4 6 7
## 167 170 180 183 200 208 230
## 10 6 44 10 1951 5 7
## 240 250 260 290 300 310 316
## 11 328 6 6 1454 7 6
## 320 333 334 350 360 400 410
## 3 24 6 93 16 976 4
## 415 416 417 430 450 458 460
## 5 39 25 5 29 9 11
## 485 500 570 600 664 667 700
## 7 3656 8 562 4 7 327
## 750 800 833 900 916 1000 1100
## 9 553 11 75 11 2415 30
## 1200 1250 1300 1400 1500 1667 1700
## 165 12 24 5 563 3 10
## 1800 1900 2000 2083 2400 2500 2700
## 11 11 834 8 16 153 6
## 2900 3000 3333 3500 3600 4000 4500
## 8 327 4 44 5 89 15
## 5000 5833 6000 7000 7500 or more <NA>
## 223 5 55 18 100 21879
percentile_99.5 <- percentile_checker("e_schoolexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_schoolexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_schoolexpnew. Household School Exp In Last Month
## 0 10 11 20 25 30 40 50 60 70 80 88 100 108 110
## 1050 5 5 13 20 15 6 487 14 6 15 24 1126 8 9
## 120 130 140 150 180 190 200 208 220 240 250 260 275 300 320
## 34 4 7 439 12 6 2266 8 15 23 463 14 7 1616 5
## 330 344 350 369 400 430 450 470 480 500 501 510 550 560 580
## 4 4 133 11 879 11 91 9 8 3006 7 6 25 25 9
## 600 640 650 700 750 780 800 850 900 950 1000 1100 1150 1200 1250
## 796 5 23 252 19 7 426 26 80 3 1985 46 5 295 6
## 1300 1350 1400 1500 1600 1700 1800 1850 2000 2100 2200 2300 2400 2500 3000
## 24 8 3 538 16 10 44 9 743 13 13 11 9 148 419
## 3008 3200 3500 3600 4000 4166 4200 4500 4750 5000 5500 6000 6500 7000 7300
## 6 8 43 7 157 6 6 4 10 269 7 89 12 53 4
## 8000 9000 10000 11000 12000 14000 15000 25000 <NA>
## 57 5 32 13 16 12 19 22 21879
## [1] "Frequency table after encoding"
## e_schoolexpnew. Household School Exp In Last Month
## 0 10 11 20 25 30
## 1050 5 5 13 20 15
## 40 50 60 70 80 88
## 6 487 14 6 15 24
## 100 108 110 120 130 140
## 1126 8 9 34 4 7
## 150 180 190 200 208 220
## 439 12 6 2266 8 15
## 240 250 260 275 300 320
## 23 463 14 7 1616 5
## 330 344 350 369 400 430
## 4 4 133 11 879 11
## 450 470 480 500 501 510
## 91 9 8 3006 7 6
## 550 560 580 600 640 650
## 25 25 9 796 5 23
## 700 750 780 800 850 900
## 252 19 7 426 26 80
## 950 1000 1100 1150 1200 1250
## 3 1985 46 5 295 6
## 1300 1350 1400 1500 1600 1700
## 24 8 3 538 16 10
## 1800 1850 2000 2100 2200 2300
## 44 9 743 13 13 11
## 2400 2500 3000 3008 3200 3500
## 9 148 419 6 8 43
## 3600 4000 4166 4200 4500 4750
## 7 157 6 6 4 10
## 5000 5500 6000 6500 7000 7300
## 269 7 89 12 53 4
## 8000 9000 10000 or more <NA>
## 57 5 114 21879
percentile_99.5 <- percentile_checker("e_utilityexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_utilityexpnew", break_point=400, missing=NA)
## [1] "Frequency table before encoding"
## e_utilityexpnew. Household Utility Exp In Last Month
## 0 2 8 30 40 50 52 60 63 68 70 76 80 88 100
## 13165 15 7 5 12 64 4 10 5 5 17 7 9 41 397
## 102 103 104 105 106 108 110 120 122 124 125 129 130 132 134
## 26 4 36 17 7 5 10 813 21 4 15 6 178 49 9
## 135 136 140 142 150 160 165 170 175 180 190 200 201 202 204
## 7 10 73 6 319 20 9 17 5 11 6 1030 6 36 657
## 205 206 207 208 209 210 212 215 217 220 224 225 230 232 235
## 36 28 9 24 34 75 17 5 24 494 17 9 98 45 14
## 240 250 254 260 263 264 280 290 300 320 360 375 380 400 500
## 207 172 15 11 6 5 7 5 120 6 21 9 7 26 23
## 625 700 1000 1300 2000 4000 <NA>
## 6 15 5 6 10 3 21879
## [1] "Frequency table after encoding"
## e_utilityexpnew. Household Utility Exp In Last Month
## 0 2 8 30 40 50 52
## 13165 15 7 5 12 64 4
## 60 63 68 70 76 80 88
## 10 5 5 17 7 9 41
## 100 102 103 104 105 106 108
## 397 26 4 36 17 7 5
## 110 120 122 124 125 129 130
## 10 813 21 4 15 6 178
## 132 134 135 136 140 142 150
## 49 9 7 10 73 6 319
## 160 165 170 175 180 190 200
## 20 9 17 5 11 6 1030
## 201 202 204 205 206 207 208
## 6 36 657 36 28 9 24
## 209 210 212 215 217 220 224
## 34 75 17 5 24 494 17
## 225 230 232 235 240 250 254
## 9 98 45 14 207 172 15
## 260 263 264 280 290 300 320
## 11 6 5 7 5 120 6
## 360 375 380 400 or more <NA>
## 21 9 7 94 21879
percentile_99.5 <- percentile_checker("e_phoneexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_phoneexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_phoneexpnew. Household Phone Expense In The Last Monnth
## 0 5 10 11 12 15 20 21 22 25 30 35 38 40 45
## 1772 11 239 15 16 16 1056 13 9 103 648 33 14 364 16
## 50 55 60 66 70 80 84 88 90 93 95 98 99 100 105
## 4492 6 573 13 64 276 5 49 5 140 3 6 12 4731 6
## 108 109 116 120 130 140 145 148 149 150 170 179 180 189 190
## 13 57 8 77 16 10 5 6 24 565 5 8 45 7 4
## 198 199 200 210 250 289 300 350 399 400 450 500 600 700 716
## 5 119 1801 7 72 6 570 12 15 158 6 307 39 38 4
## 800 1000 1200 5730 <NA>
## 9 49 5 11 21879
## [1] "Frequency table after encoding"
## e_phoneexpnew. Household Phone Expense In The Last Monnth
## 0 5 10 11 12 15 20
## 1772 11 239 15 16 16 1056
## 21 22 25 30 35 38 40
## 13 9 103 648 33 14 364
## 45 50 55 60 66 70 80
## 16 4492 6 573 13 64 276
## 84 88 90 93 95 98 99
## 5 49 5 140 3 6 12
## 100 105 108 109 116 120 130
## 4731 6 13 57 8 77 16
## 140 145 148 149 150 170 179
## 10 5 6 24 565 5 8
## 180 189 190 198 199 200 210
## 45 7 4 5 119 1801 7
## 250 289 300 350 399 400 450
## 72 6 570 12 15 158 6
## 500 600 700 or more <NA>
## 307 39 116 21879
percentile_99.5 <- percentile_checker("e_alcoholexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_alcoholexpnew", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_alcoholexpnew. Household Alocohol Exp In The Last Month
## 0 8 10 20 25 30 40 50 60 70 75 80 88 90 100
## 7689 12 20 196 41 217 209 1244 160 6 16 145 38 5 1979
## 108 110 120 125 130 150 160 175 180 200 240 250 280 300 350
## 11 5 67 69 8 605 13 6 6 1951 5 116 4 929 6
## 360 400 450 500 600 700 800 900 950 1000 1200 1208 1300 1500 1600
## 4 179 29 1425 146 31 67 80 6 522 42 7 4 184 6
## 2000 2500 3000 3500 4000 5000 <NA>
## 126 28 104 7 8 6 21879
## [1] "Frequency table after encoding"
## e_alcoholexpnew. Household Alocohol Exp In The Last Month
## 0 8 10 20 25 30 40
## 7689 12 20 196 41 217 209
## 50 60 70 75 80 88 90
## 1244 160 6 16 145 38 5
## 100 108 110 120 125 130 150
## 1979 11 5 67 69 8 605
## 160 175 180 200 240 250 280
## 13 6 6 1951 5 116 4
## 300 350 360 400 450 500 600
## 929 6 4 179 29 1425 146
## 700 800 900 950 1000 1200 1208
## 31 67 80 6 522 42 7
## 1300 1500 1600 2000 2500 3000 or more <NA>
## 4 184 6 126 28 125 21879
percentile_99.5 <- percentile_checker("e_recrexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_recrexpnew", break_point=1200, missing=NA)
## [1] "Frequency table before encoding"
## e_recrexpnew. Household Recr Exp In Last Month
## 0 10 20 25 30 34 35 40 41 50 60 70 75
## 8141 24 129 35 51 3 6 53 28 977 76 5 38
## 80 83 85 88 90 100 108 110 120 125 130 140 143
## 29 22 7 41 7 2412 4 5 86 4 5 19 4
## 150 160 166 167 170 180 200 208 210 220 238 250 260
## 761 5 8 4 7 18 2558 4 6 7 7 204 5
## 280 290 300 320 334 350 400 410 450 500 600 700 800
## 8 6 909 16 9 22 322 10 5 1036 168 21 23
## 900 1000 1200 1500 2000 2500 3000 5000 6000 7500 100020 <NA>
## 3 279 53 5 32 10 12 10 14 5 6 21879
## [1] "Frequency table after encoding"
## e_recrexpnew. Household Recr Exp In Last Month
## 0 10 20 25 30 34 35
## 8141 24 129 35 51 3 6
## 40 41 50 60 70 75 80
## 53 28 977 76 5 38 29
## 83 85 88 90 100 108 110
## 22 7 41 7 2412 4 5
## 120 125 130 140 143 150 160
## 86 4 5 19 4 761 5
## 166 167 170 180 200 208 210
## 8 4 7 18 2558 4 6
## 220 238 250 260 280 290 300
## 7 7 204 5 8 6 909
## 320 334 350 400 410 450 500
## 16 9 22 322 10 5 1036
## 600 700 800 900 1000 1200 or more <NA>
## 168 21 23 3 279 147 21879
percentile_99.5 <- percentile_checker("e_debtexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_debtexpnew", break_point=9000, missing=NA)
## [1] "Frequency table before encoding"
## e_debtexpnew. Household Debt Exp In Last Month
## 0 2 8 20 45 50 60 88 100 120 150 180 200 250 300
## 14906 9 10 7 5 45 23 80 133 4 10 3 275 79 176
## 325 333 350 400 500 600 625 640 650 672 700 750 800 1000 1200
## 6 8 14 103 970 24 6 3 6 4 49 18 55 599 18
## 1250 1300 1400 1500 1600 1700 1750 1800 1900 2000 2100 2200 2400 2500 3000
## 8 8 4 114 17 7 7 5 6 310 4 8 4 76 138
## 3500 4000 4600 4800 5000 6000 7000 7500 8000 9000 10000 16000 20000 50000 <NA>
## 5 44 7 5 160 33 20 22 25 10 66 8 13 7 21879
## [1] "Frequency table after encoding"
## e_debtexpnew. Household Debt Exp In Last Month
## 0 2 8 20 45 50 60
## 14906 9 10 7 5 45 23
## 88 100 120 150 180 200 250
## 80 133 4 10 3 275 79
## 300 325 333 350 400 500 600
## 176 6 8 14 103 970 24
## 625 640 650 672 700 750 800
## 6 3 6 4 49 18 55
## 1000 1200 1250 1300 1400 1500 1600
## 599 18 8 8 4 114 17
## 1700 1750 1800 1900 2000 2100 2200
## 7 7 5 6 310 4 8
## 2400 2500 3000 3500 4000 4600 4800
## 4 76 138 5 44 7 5
## 5000 6000 7000 7500 8000 9000 or more <NA>
## 160 33 20 22 25 104 21879
percentile_99.5 <- percentile_checker("e_otherexpnew")
## Warning: Removed 21879 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_otherexpnew", break_point=500, missing=NA)
## [1] "Frequency table before encoding"
## e_otherexpnew. Other Household Exp In The Last Month
## 0 4 8 50 88 100 200 300 500 800 1000 1500 4000 8000 10000
## 18609 5 13 5 12 16 31 13 25 13 8 4 3 5 6
## 12740 6e+05 <NA>
## 10 11 21879
## [1] "Frequency table after encoding"
## e_otherexpnew. Other Household Exp In The Last Month
## 0 4 8 50 88 100 200
## 18609 5 13 5 12 16 31
## 300 500 or more <NA>
## 13 85 21879
percentile_99.5 <- percentile_checker("e_landinc")
## Warning: Removed 30701 rows containing non-finite values (stat_bin).
## Warning: Computation failed in `stat_bin()`:
## missing value where TRUE/FALSE needed
mydata <- top_recode (variable="e_landinc", break_point=percentile_99.5, missing=NA)
## [1] "Frequency table before encoding"
## e_landinc. Earning From Land In Last Year
## 0 2 8 88 350 500 600 700 800 1000 1200 1500 2000
## 4044 6 5 2091 3 59 5 6 5 257 8 27 249
## 2500 3000 3200 3500 3600 4000 4500 5000 5200 5400 5500 6000 6700
## 18 243 6 19 5 231 32 702 6 7 13 105 8
## 7000 7500 8000 8500 9000 10000 12000 13000 14000 15000 16000 17000 18000
## 63 12 252 6 91 500 57 11 9 183 8 16 21
## 20000 24000 25000 26000 30000 40000 45000 50000 55000 60000 80000 1e+05 120000
## 244 6 12 14 18 29 5 61 3 48 18 31 9
## 150000 2e+05 250000 3e+05 <NA>
## 38 25 7 10 30701
## [1] "Frequency table after encoding"
## e_landinc. Earning From Land In Last Year
## 0 2 8 88 350 500
## 4044 6 5 2091 3 59
## 600 700 800 1000 1200 1500
## 5 6 5 257 8 27
## 2000 2500 3000 3200 3500 3600
## 249 18 243 6 19 5
## 4000 4500 5000 5200 5400 5500
## 231 32 702 6 7 13
## 6000 6700 7000 7500 8000 8500
## 105 8 63 12 252 6
## 9000 10000 12000 13000 14000 15000
## 91 500 57 11 9 183
## 16000 17000 18000 20000 24000 25000
## 8 16 21 244 6 12
## 26000 30000 40000 45000 50000 55000
## 14 18 29 5 61 3
## 60000 80000 1e+05 120000 150000 or more <NA>
## 48 18 31 9 80 30701
percentile_99.5 <- percentile_checker("HHincome_percapita")
mydata <- top_recode (variable="HHincome_percapita", break_point=5000, missing=NA)
## [1] "Frequency table before encoding"
## HHincome_percapita. Household Income Per Capita
## 0 0.28571429848671 1.71428573131561 5.7142858505249 6.76923084259033
## 19490 7 7 7 26
## 7.33333349227905 8.33333301544189 8.5714282989502 8.80000019073486 9.77777767181396
## 12 6 7 10 9
## 10 11 12 12.5 12.5714282989502
## 21 32 5 26 42
## 13.3333330154419 14.2857141494751 14.6666669845581 15 15.384614944458
## 15 7 6 10 13
## 16.25 16.6666660308838 17.6000003814697 18.75 20
## 8 18 20 16 26
## 20.8333339691162 21.4285717010498 22 25 25.1428565979004
## 12 14 32 38 35
## 27.7777786254883 28.5714282989502 29.3333339691162 30 31.25
## 9 28 36 10 16
## 32 33.3333320617676 35 35.2000007629395 37.5
## 5 30 10 20 8
## 37.7142868041992 40 41.6666679382324 42.8571434020996 43.75
## 7 25 6 7 8
## 44 50 50.2857131958008 51.3333320617676 54.75
## 6 48 7 12 4
## 55.5555572509766 58.6666679382324 60 61.1111106872559 62.5
## 9 3 5 9 16
## 65.3333358764648 66 66.6666641235352 68.1818161010742 68.4444427490234
## 9 4 30 11 9
## 71.4285736083984 72 72.8000030517578 75 76.6666641235352
## 28 5 5 20 6
## 76.9230804443359 80 81.25 83.3333358764648 85.7142868041992
## 26 15 8 36 14
## 87.5 88 90 90.9090881347656 93.75
## 8 5 10 22 16
## 98.1818161010742 100 102.5 105 107.142860412598
## 11 73 8 10 7
## 111.111114501953 112.777778625488 115 116.666664123535 118.333335876465
## 9 9 8 6 6
## 120 121.428573608398 123.75 125 128.571426391602
## 20 7 4 54 14
## 131.25 131.399993896484 133.33332824707 137.66667175293 138.888885498047
## 8 5 24 6 9
## 140 142 142.857147216797 146 147
## 15 5 91 6 4
## 150 157.142852783203 160 164.25 165
## 62 7 33 8 5
## 166.66667175293 168 169.230773925781 170 173.777770996094
## 78 7 13 8 9
## 175 176 178.571426391602 180 187.5
## 10 5 7 15 68
## 197.777770996094 200 204.399993896484 208.33332824707 208.75
## 9 117 9 12 8
## 210 214.285720825195 218.75 220 222.222229003906
## 19 91 8 5 18
## 225 228.125 233.33332824707 236.66667175293 238.888885498047
## 10 8 6 6 9
## 240 244.444442749023 245.454544067383 250 253.33332824707
## 15 9 11 176 6
## 255.33332824707 256.333343505859 257.142852783203 260 262.5
## 9 6 7 5 16
## 266.666656494141 268.333343505859 270 272.727264404297 275
## 27 6 4 33 4
## 277.777770996094 280 281.25 282 282.769226074219
## 27 5 32 5 13
## 283.333343505859 285.714294433594 287.5 288 290
## 6 49 4 5 5
## 291.666656494141 292 300 300.75 307.692321777344
## 6 18 103 4 26
## 311.111114501953 312.5 314.285705566406 320 321.428558349609
## 9 44 7 24 14
## 324 325 326.086944580078 327.777770996094 328.571441650391
## 5 20 23 9 7
## 329.166656494141 333.333343505859 340 342.857147216797 345
## 12 117 7 14 4
## 346.153839111328 350 350.399993896484 355.555541992188 356
## 13 33 12 9 5
## 357.142852783203 360 362.5 362.666656494141 363.636352539062
## 49 5 8 3 11
## 366 366.666656494141 367.5 375 375.428558349609
## 3 18 4 99 7
## 380 388.75 390.399993896484 391.666656494141 393.75
## 6 8 5 6 8
## 394.200012207031 400 404.545440673828 406.25 409.5
## 10 103 11 8 4
## 411.111114501953 412.5 413.636352539062 414.285705566406 416
## 9 8 11 21 5
## 416.666656494141 419 420 421.714294433594 425
## 51 4 11 7 6
## 428.571441650391 433.333343505859 434.333343505859 437.5 438
## 154 18 9 4 20
## 440 442.857147216797 444.444458007812 446 448
## 10 7 9 3 5
## 450 453.333343505859 454.545440673828 456.25 457.142852783203
## 64 6 11 8 7
## 458 458.333343505859 460 461.538452148438 466.666656494141
## 5 6 5 13 12
## 468 468.75 472.799987792969 472.857147216797 473.333343505859
## 5 16 10 14 6
## 474.285705566406 477.272735595703 479.333343505859 480 480.857147216797
## 7 11 6 35 7
## 483.333343505859 485.714294433594 488.888885498047 490 492.857147216797
## 15 7 9 5 7
## 493.75 497.833343505859 500 500.571441650391 506.666656494141
## 8 12 342 14 15
## 510 513.571411132812 514.285705566406 518.285705566406 519
## 5 7 7 7 10
## 520 525 530 530.400024414062 533.333312988281
## 15 23 5 5 9
## 535.714294433594 538.571411132812 540 541.666687011719 542.025024414062
## 63 7 22 12 8
## 542.857116699219 545.454528808594 547.5 550 555.555541992188
## 14 11 12 10 27
## 558 558.181823730469 558.333312988281 560 562.5
## 5 11 6 18 40
## 563.142883300781 564 566.666687011719 570.079956054688 571.428588867188
## 7 5 15 5 105
## 573.428588867188 575 577.777770996094 580 583.333312988281
## 7 8 9 12 39
## 584 585.714294433594 588.235290527344 591.666687011719 594.444458007812
## 6 7 17 6 9
## 600 602.5 602.857116699219 608.333312988281 608.571411132812
## 241 4 7 6 7
## 609.5 610 613.200012207031 613.636352539062 615.384643554688
## 8 5 10 11 13
## 616.666687011719 620 625 625.714294433594 628.571411132812
## 12 10 190 7 14
## 630 632 633.333312988281 636.363647460938 640
## 5 10 6 33 15
## 642.857116699219 643.333312988281 646 652 652.333312988281
## 35 9 6 10 6
## 656.25 656.666687011719 657 657.142883300781 658.333312988281
## 12 6 8 14 6
## 660 661.142883300781 661.538452148438 661.875 662.857116699219
## 15 7 13 8 7
## 666.666687011719 671.636352539062 675 678.571411132812 680
## 207 11 6 7 10
## 683.333312988281 685.714294433594 687.5 688.571411132812 690
## 27 21 32 7 5
## 692 692.222229003906 692.857116699219 695 700
## 5 9 7 6 20
## 700.799987792969 703 706 708.333312988281 712.5
## 5 5 9 24 8
## 712.666687011719 714.285705566406 716.666687011719 718.75 720
## 6 210 6 16 5
## 721.428588867188 721.799987792969 721.966613769531 725 725.714294433594
## 7 10 6 6 7
## 727.272705078125 728.571411132812 730 733.333312988281 734.5
## 44 7 18 30 8
## 735.714294433594 739.285705566406 740 741.666687011719 746.666687011719
## 7 7 15 12 6
## 750 755.555541992188 757.142883300781 758.333312988281 760
## 392 9 7 6 10
## 762.799987792969 764.571411132812 765 765.714294433594 766.666687011719
## 5 7 8 7 51
## 767.857116699219 768.75 770 770.833312988281 771.428588867188
## 14 8 18 12 14
## 773.333312988281 775 775.714294433594 777.777770996094 778.666687011719
## 15 44 7 45 9
## 780 785.384643554688 785.714294433594 788.400024414062 790
## 10 13 35 10 5
## 791.666687011719 795.454528808594 800 803 805.555541992188
## 24 11 174 6 9
## 807.692321777344 810 812.5 813.428588867188 817.142883300781
## 13 5 54 7 7
## 818.181823730469 821.428588867188 823 823.529418945312 825
## 22 7 10 17 20
## 827.777770996094 828.571411132812 830 831.428588867188 833.333312988281
## 9 21 5 7 210
## 835.714294433594 837.5 840 842.857116699219 843.75
## 7 4 6 14 8
## 845.454528808594 847.5 850 854.333312988281 854.545471191406
## 11 8 26 6 11
## 857.142883300781 860 861.111083984375 861.25 862.666687011719
## 189 10 9 8 6
## 863.333312988281 865 866.666687011719 871.111083984375 873.333312988281
## 6 6 33 9 15
## 875 876 880 883.333312988281 884.615356445312
## 66 15 20 9 13
## 885.666687011719 885.714294433594 888 888.888916015625 890
## 6 14 5 45 5
## 891.666687011719 892.857116699219 894.375 896.333312988281 900
## 12 14 8 6 105
## 902 903.333312988281 905.428588867188 906.333312988281 909.090881347656
## 7 6 7 6 22
## 910 912.5 912.666687011719 912.857116699219 914.285705566406
## 10 16 3 7 7
## 916.666687011719 918 920 923.076904296875 925
## 18 5 10 13 28
## 925.142883300781 927.777770996094 928.571411132812 930 931.333312988281
## 7 9 49 11 6
## 933.333312988281 935 937.5 941.666687011719 942.222229003906
## 51 10 36 12 9
## 942.857116699219 944 944.444458007812 949.571411132812 950
## 7 5 27 7 15
## 950.285705566406 958.333312988281 960 961.333312988281 963.599975585938
## 7 18 10 6 5
## 964.285705566406 966.666687011719 967.799987792969 969 969.230773925781
## 14 3 10 8 13
## 970 971.428588867188 975 975.200012207031 977.142883300781
## 10 7 17 5 7
## 977.777770996094 981.111083984375 987.5 990 996
## 9 9 8 4 5
## 996.666687011719 1000 1001 1001.66668701172 1007.14288330078
## 6 760 6 6 7
## 1008 1008.33331298828 1011.11108398438 1017.59997558594 1018.28570556641
## 5 6 9 5 7
## 1020 1024 1024.54541015625 1025 1026.31579589844
## 24 5 11 30 19
## 1028.57141113281 1030 1031.25 1031.66662597656 1033.33337402344
## 7 10 8 6 27
## 1035.71423339844 1037.5 1040 1041.66662597656 1045.45458984375
## 7 8 24 6 11
## 1050 1051.19995117188 1052.63159179688 1052.94116210938 1055.55554199219
## 24 5 19 17 9
## 1057.14282226562 1060 1062.5 1066.66662597656 1071.19995117188
## 21 10 12 15 5
## 1071.42858886719 1075 1076.92309570312 1080 1082.5
## 49 20 13 5 6
## 1083.33337402344 1085.71423339844 1086 1090 1090.90905761719
## 51 7 5 5 33
## 1092.85717773438 1093.55554199219 1093.75 1100 1101.66662597656
## 7 9 8 67 6
## 1103.33337402344 1105 1107.14282226562 1109 1111.11108398438
## 6 6 7 5 18
## 1115.38464355469 1116.66662597656 1120 1121.42858886719 1125
## 13 6 10 7 80
## 1127.59997558594 1130 1131.25 1132 1137.5
## 10 9 8 5 8
## 1138 1140 1141.42858886719 1142.85717773438 1150
## 4 18 7 112 34
## 1153.84619140625 1155.33337402344 1156.25 1157.14282226562 1162.5
## 13 6 16 7 8
## 1162.80004882812 1164.25 1166.66662597656 1168 1168.75
## 5 8 171 12 8
## 1170 1173.33337402344 1175 1178.57141113281 1180
## 5 15 14 14 15
## 1181.25 1183.33337402344 1183.57141113281 1184.21057128906 1187.5
## 8 18 14 19 20
## 1187.77783203125 1188 1188.66662597656 1190 1192.85717773438
## 9 8 3 10 14
## 1193.71423339844 1200 1205.59997558594 1207.5 1208.33337402344
## 7 270 5 10 24
## 1209.5 1210 1214.28576660156 1215.25 1217.59997558594
## 6 4 42 8 5
## 1220 1222.22216796875 1225 1225.59997558594 1226.40002441406
## 15 9 6 5 5
## 1227.27270507812 1227.66662597656 1228.57141113281 1230.76928710938 1232
## 11 6 21 26 6
## 1233.33337402344 1234.61999511719 1236.88891601562 1241.66662597656 1242
## 6 10 9 18 5
## 1243.75 1245 1245.45458984375 1246.71423339844 1249
## 8 4 22 7 6
## 1250 1250.28576660156 1250.75 1251.42858886719 1257.11108398438
## 304 7 16 14 9
## 1257.14282226562 1260 1262.5 1263.33337402344 1266.66662597656
## 7 10 12 3 30
## 1269.23071289062 1269.625 1271.33337402344 1272.72729492188 1273.52941894531
## 13 8 6 11 17
## 1275 1276.66662597656 1276.80004882812 1285.71423339844 1290.28576660156
## 8 6 15 63 7
## 1291.66662597656 1294 1294.44445800781 1297.5 1300
## 6 4 9 8 68
## 1300.80004882812 1310 1312.5 1313.33337402344 1314
## 10 4 18 9 10
## 1318 1320 1321.42858886719 1322.30773925781 1323.33337402344
## 10 15 7 13 6
## 1333.33337402344 1344 1345 1347.55554199219 1348.28576660156
## 243 2 10 9 7
## 1350 1357.14282226562 1358.33337402344 1360 1361.11108398438
## 31 14 6 10 9
## 1362.66662597656 1363.63635253906 1364 1365.59997558594 1366.66662597656
## 3 44 5 5 12
## 1373.33337402344 1375 1375.63635253906 1375.66662597656 1376
## 6 54 11 6 5
## 1380 1383 1383.33337402344 1386.66662597656 1387.5
## 5 8 6 3 8
## 1388.46154785156 1390 1390.83337402344 1392.07690429688 1393.33337402344
## 13 11 12 13 6
## 1400 1408.33337402344 1408.66662597656 1410 1412
## 213 6 6 11 7
## 1413 1414.28576660156 1416.66662597656 1418 1420
## 6 7 27 6 6
## 1421.875 1425 1427.14282226562 1428.57141113281 1428.85717773438
## 8 12 7 105 7
## 1433.33337402344 1434 1435 1436 1437.5
## 9 10 4 5 12
## 1438 1440 1442.85717773438 1444.44445800781 1446.66662597656
## 7 5 7 27 3
## 1454 1454.54541015625 1455 1455.71423339844 1457.33337402344
## 5 11 4 7 6
## 1458.2666015625 1458.33337402344 1460 1466.66662597656 1470
## 3 24 12 33 5
## 1470.57141113281 1472.22216796875 1475 1477.27270507812 1477.77783203125
## 7 9 4 11 9
## 1478.25 1481.25 1485.71423339844 1486.33337402344 1488.88891601562
## 8 8 21 6 9
## 1490.16662597656 1493.75 1500 1504 1505
## 12 8 390 5 6
## 1507.14282226562 1508.33337402344 1511 1513.33337402344 1516.5
## 7 6 6 21 4
## 1516.66662597656 1517.33337402344 1520 1522.22216796875 1525
## 6 15 5 9 4
## 1528.57141113281 1530 1531.25 1531.66662597656 1533
## 7 5 8 12 8
## 1533.33337402344 1534.88891601562 1535 1536 1540
## 24 9 4 5 6
## 1541.66662597656 1542.85717773438 1545.45458984375 1548 1550
## 24 7 22 5 26
## 1552 1552.28576660156 1552.33337402344 1555.55554199219 1561.11108398438
## 5 7 6 36 9
## 1562.5 1565.59997558594 1566.66662597656 1570 1571.11108398438
## 16 5 6 5 9
## 1571.42858886719 1573 1575 1575.55554199219 1576.92309570312
## 35 6 6 9 13
## 1580 1580.85717773438 1583.33337402344 1585.71423339844 1587.77783203125
## 6 7 27 7 18
## 1600 1600.66662597656 1600.90905761719 1607.14282226562 1607.69226074219
## 182 6 11 7 13
## 1608.33337402344 1611.11108398438 1613.75 1614 1615
## 12 9 8 4 3
## 1615.38464355469 1616.66662597656 1617.47497558594 1617.75 1620
## 13 12 8 8 5
## 1622.22216796875 1625 1625.33337402344 1629.15002441406 1632
## 9 52 6 8 5
## 1633.33337402344 1635.71423339844 1637.5 1642.85717773438 1644.33337402344
## 12 7 4 14 6
## 1645.83337402344 1650 1651.66662597656 1656 1656.66662597656
## 6 56 6 6 6
## 1660 1662 1662.5 1666.66662597656 1668
## 10 5 8 207 3
## 1672 1673.33337402344 1674 1675 1676.85717773438
## 5 9 5 4 7
## 1678.57141113281 1679 1680 1685.71423339844 1687.5
## 14 3 5 14 8
## 1688.88891601562 1690 1692.85717773438 1694.28576660156 1698.18176269531
## 9 5 7 7 11
## 1700 1700.80004882812 1704 1708.33337402344 1712.5
## 30 5 5 6 8
## 1714.28576660156 1716.66662597656 1718.75 1721.42858886719 1722
## 42 6 4 7 5
## 1722.16662597656 1725 1727.27270507812 1728.5 1728.57141113281
## 6 20 11 4 7
## 1730 1733.33337402344 1735 1740 1742.57141113281
## 19 24 5 15 7
## 1742.85717773438 1750 1751.42858886719 1752 1760
## 7 117 7 8 5
## 1762.5 1769.19995117188 1769.23071289062 1770.83337402344 1777.77783203125
## 16 10 13 6 27
## 1779.40002441406 1781 1784.66662597656 1785.71423339844 1791
## 10 5 6 21 5
## 1791.66662597656 1792 1793.75 1798 1800
## 6 6 8 5 95
## 1801.44006347656 1807.14282226562 1808.33337402344 1811.42858886719 1812.5
## 5 7 6 7 24
## 1813.75 1816.66662597656 1818.18176269531 1818.5 1819
## 8 6 22 4 5
## 1821.33337402344 1825 1833.33337402344 1839.59997558594 1840
## 12 4 24 5 20
## 1841.66662597656 1843.75 1844 1846.15380859375 1849.33337402344
## 12 8 5 13 6
## 1850 1857.14282226562 1866.66662597656 1868.53344726562 1871.42858886719
## 22 28 6 9 14
## 1875 1876 1876.66662597656 1883.33337402344 1899
## 74 5 6 6 6
## 1900 1903.33337402344 1909.09094238281 1914.28576660156 1916.66662597656
## 24 3 11 7 12
## 1918 1920 1930 1933.33337402344 1933.46154785156
## 12 10 4 6 13
## 1934.88891601562 1937.5 1940 1944.44445800781 1946.85717773438
## 9 4 5 9 7
## 1947.59997558594 1947.77783203125 1950 1953 1953.5
## 5 9 14 6 4
## 1955 1958.33337402344 1960 1968.75 1973.61535644531
## 6 12 5 16 13
## 1976.59997558594 1977.59997558594 1979.16662597656 1983.33337402344 1985.71423339844
## 4 5 12 12 7
## 1989.57141113281 1990 2000 2000.66662597656 2003.33337402344
## 7 4 324 6 3
## 2020 2025.59997558594 2027.14282226562 2028.57141113281 2033.33337402344
## 10 10 7 14 6
## 2038 2039 2041.66662597656 2050 2057.14282226562
## 5 6 6 10 7
## 2058.33325195312 2058.66674804688 2062.5 2062.80004882812 2071.25
## 12 6 8 5 4
## 2071.42846679688 2072 2075.60009765625 2077.66674804688 2080
## 14 6 5 6 10
## 2083.33325195312 2088.88891601562 2090 2093.75 2095
## 9 9 13 8 8
## 2100 2100.80004882812 2102 2105.5 2106
## 32 5 6 12 5
## 2111.11108398438 2111.25 2112.5 2114.28564453125 2115.38452148438
## 9 8 4 7 13
## 2116.66674804688 2125 2133.33325195312 2134 2137.5
## 6 12 6 5 8
## 2140 2142.85717773438 2150 2157 2164
## 5 28 18 4 5
## 2166.66674804688 2174.28564453125 2175 2177.14282226562 2180
## 18 7 8 14 5
## 2187.5 2192.85717773438 2200 2205 2208.33325195312
## 12 7 35 6 12
## 2222.22216796875 2225 2230 2230.76928710938 2231.33325195312
## 18 6 5 13 6
## 2233.33325195312 2240 2241.66674804688 2250 2254
## 15 5 12 34 5
## [ reached getOption("max.print") -- omitted 267 entries ]
## [1] "Frequency table after encoding"
## HHincome_percapita. Household Income Per Capita
## 0 0.28571429848671 1.71428573131561 5.7142858505249 6.76923084259033
## 19490 7 7 7 26
## 7.33333349227905 8.33333301544189 8.5714282989502 8.80000019073486 9.77777767181396
## 12 6 7 10 9
## 10 11 12 12.5 12.5714282989502
## 21 32 5 26 42
## 13.3333330154419 14.2857141494751 14.6666669845581 15 15.384614944458
## 15 7 6 10 13
## 16.25 16.6666660308838 17.6000003814697 18.75 20
## 8 18 20 16 26
## 20.8333339691162 21.4285717010498 22 25 25.1428565979004
## 12 14 32 38 35
## 27.7777786254883 28.5714282989502 29.3333339691162 30 31.25
## 9 28 36 10 16
## 32 33.3333320617676 35 35.2000007629395 37.5
## 5 30 10 20 8
## 37.7142868041992 40 41.6666679382324 42.8571434020996 43.75
## 7 25 6 7 8
## 44 50 50.2857131958008 51.3333320617676 54.75
## 6 48 7 12 4
## 55.5555572509766 58.6666679382324 60 61.1111106872559 62.5
## 9 3 5 9 16
## 65.3333358764648 66 66.6666641235352 68.1818161010742 68.4444427490234
## 9 4 30 11 9
## 71.4285736083984 72 72.8000030517578 75 76.6666641235352
## 28 5 5 20 6
## 76.9230804443359 80 81.25 83.3333358764648 85.7142868041992
## 26 15 8 36 14
## 87.5 88 90 90.9090881347656 93.75
## 8 5 10 22 16
## 98.1818161010742 100 102.5 105 107.142860412598
## 11 73 8 10 7
## 111.111114501953 112.777778625488 115 116.666664123535 118.333335876465
## 9 9 8 6 6
## 120 121.428573608398 123.75 125 128.571426391602
## 20 7 4 54 14
## 131.25 131.399993896484 133.33332824707 137.66667175293 138.888885498047
## 8 5 24 6 9
## 140 142 142.857147216797 146 147
## 15 5 91 6 4
## 150 157.142852783203 160 164.25 165
## 62 7 33 8 5
## 166.66667175293 168 169.230773925781 170 173.777770996094
## 78 7 13 8 9
## 175 176 178.571426391602 180 187.5
## 10 5 7 15 68
## 197.777770996094 200 204.399993896484 208.33332824707 208.75
## 9 117 9 12 8
## 210 214.285720825195 218.75 220 222.222229003906
## 19 91 8 5 18
## 225 228.125 233.33332824707 236.66667175293 238.888885498047
## 10 8 6 6 9
## 240 244.444442749023 245.454544067383 250 253.33332824707
## 15 9 11 176 6
## 255.33332824707 256.333343505859 257.142852783203 260 262.5
## 9 6 7 5 16
## 266.666656494141 268.333343505859 270 272.727264404297 275
## 27 6 4 33 4
## 277.777770996094 280 281.25 282 282.769226074219
## 27 5 32 5 13
## 283.333343505859 285.714294433594 287.5 288 290
## 6 49 4 5 5
## 291.666656494141 292 300 300.75 307.692321777344
## 6 18 103 4 26
## 311.111114501953 312.5 314.285705566406 320 321.428558349609
## 9 44 7 24 14
## 324 325 326.086944580078 327.777770996094 328.571441650391
## 5 20 23 9 7
## 329.166656494141 333.333343505859 340 342.857147216797 345
## 12 117 7 14 4
## 346.153839111328 350 350.399993896484 355.555541992188 356
## 13 33 12 9 5
## 357.142852783203 360 362.5 362.666656494141 363.636352539062
## 49 5 8 3 11
## 366 366.666656494141 367.5 375 375.428558349609
## 3 18 4 99 7
## 380 388.75 390.399993896484 391.666656494141 393.75
## 6 8 5 6 8
## 394.200012207031 400 404.545440673828 406.25 409.5
## 10 103 11 8 4
## 411.111114501953 412.5 413.636352539062 414.285705566406 416
## 9 8 11 21 5
## 416.666656494141 419 420 421.714294433594 425
## 51 4 11 7 6
## 428.571441650391 433.333343505859 434.333343505859 437.5 438
## 154 18 9 4 20
## 440 442.857147216797 444.444458007812 446 448
## 10 7 9 3 5
## 450 453.333343505859 454.545440673828 456.25 457.142852783203
## 64 6 11 8 7
## 458 458.333343505859 460 461.538452148438 466.666656494141
## 5 6 5 13 12
## 468 468.75 472.799987792969 472.857147216797 473.333343505859
## 5 16 10 14 6
## 474.285705566406 477.272735595703 479.333343505859 480 480.857147216797
## 7 11 6 35 7
## 483.333343505859 485.714294433594 488.888885498047 490 492.857147216797
## 15 7 9 5 7
## 493.75 497.833343505859 500 500.571441650391 506.666656494141
## 8 12 342 14 15
## 510 513.571411132812 514.285705566406 518.285705566406 519
## 5 7 7 7 10
## 520 525 530 530.400024414062 533.333312988281
## 15 23 5 5 9
## 535.714294433594 538.571411132812 540 541.666687011719 542.025024414062
## 63 7 22 12 8
## 542.857116699219 545.454528808594 547.5 550 555.555541992188
## 14 11 12 10 27
## 558 558.181823730469 558.333312988281 560 562.5
## 5 11 6 18 40
## 563.142883300781 564 566.666687011719 570.079956054688 571.428588867188
## 7 5 15 5 105
## 573.428588867188 575 577.777770996094 580 583.333312988281
## 7 8 9 12 39
## 584 585.714294433594 588.235290527344 591.666687011719 594.444458007812
## 6 7 17 6 9
## 600 602.5 602.857116699219 608.333312988281 608.571411132812
## 241 4 7 6 7
## 609.5 610 613.200012207031 613.636352539062 615.384643554688
## 8 5 10 11 13
## 616.666687011719 620 625 625.714294433594 628.571411132812
## 12 10 190 7 14
## 630 632 633.333312988281 636.363647460938 640
## 5 10 6 33 15
## 642.857116699219 643.333312988281 646 652 652.333312988281
## 35 9 6 10 6
## 656.25 656.666687011719 657 657.142883300781 658.333312988281
## 12 6 8 14 6
## 660 661.142883300781 661.538452148438 661.875 662.857116699219
## 15 7 13 8 7
## 666.666687011719 671.636352539062 675 678.571411132812 680
## 207 11 6 7 10
## 683.333312988281 685.714294433594 687.5 688.571411132812 690
## 27 21 32 7 5
## 692 692.222229003906 692.857116699219 695 700
## 5 9 7 6 20
## 700.799987792969 703 706 708.333312988281 712.5
## 5 5 9 24 8
## 712.666687011719 714.285705566406 716.666687011719 718.75 720
## 6 210 6 16 5
## 721.428588867188 721.799987792969 721.966613769531 725 725.714294433594
## 7 10 6 6 7
## 727.272705078125 728.571411132812 730 733.333312988281 734.5
## 44 7 18 30 8
## 735.714294433594 739.285705566406 740 741.666687011719 746.666687011719
## 7 7 15 12 6
## 750 755.555541992188 757.142883300781 758.333312988281 760
## 392 9 7 6 10
## 762.799987792969 764.571411132812 765 765.714294433594 766.666687011719
## 5 7 8 7 51
## 767.857116699219 768.75 770 770.833312988281 771.428588867188
## 14 8 18 12 14
## 773.333312988281 775 775.714294433594 777.777770996094 778.666687011719
## 15 44 7 45 9
## 780 785.384643554688 785.714294433594 788.400024414062 790
## 10 13 35 10 5
## 791.666687011719 795.454528808594 800 803 805.555541992188
## 24 11 174 6 9
## 807.692321777344 810 812.5 813.428588867188 817.142883300781
## 13 5 54 7 7
## 818.181823730469 821.428588867188 823 823.529418945312 825
## 22 7 10 17 20
## 827.777770996094 828.571411132812 830 831.428588867188 833.333312988281
## 9 21 5 7 210
## 835.714294433594 837.5 840 842.857116699219 843.75
## 7 4 6 14 8
## 845.454528808594 847.5 850 854.333312988281 854.545471191406
## 11 8 26 6 11
## 857.142883300781 860 861.111083984375 861.25 862.666687011719
## 189 10 9 8 6
## 863.333312988281 865 866.666687011719 871.111083984375 873.333312988281
## 6 6 33 9 15
## 875 876 880 883.333312988281 884.615356445312
## 66 15 20 9 13
## 885.666687011719 885.714294433594 888 888.888916015625 890
## 6 14 5 45 5
## 891.666687011719 892.857116699219 894.375 896.333312988281 900
## 12 14 8 6 105
## 902 903.333312988281 905.428588867188 906.333312988281 909.090881347656
## 7 6 7 6 22
## 910 912.5 912.666687011719 912.857116699219 914.285705566406
## 10 16 3 7 7
## 916.666687011719 918 920 923.076904296875 925
## 18 5 10 13 28
## 925.142883300781 927.777770996094 928.571411132812 930 931.333312988281
## 7 9 49 11 6
## 933.333312988281 935 937.5 941.666687011719 942.222229003906
## 51 10 36 12 9
## 942.857116699219 944 944.444458007812 949.571411132812 950
## 7 5 27 7 15
## 950.285705566406 958.333312988281 960 961.333312988281 963.599975585938
## 7 18 10 6 5
## 964.285705566406 966.666687011719 967.799987792969 969 969.230773925781
## 14 3 10 8 13
## 970 971.428588867188 975 975.200012207031 977.142883300781
## 10 7 17 5 7
## 977.777770996094 981.111083984375 987.5 990 996
## 9 9 8 4 5
## 996.666687011719 1000 1001 1001.66668701172 1007.14288330078
## 6 760 6 6 7
## 1008 1008.33331298828 1011.11108398438 1017.59997558594 1018.28570556641
## 5 6 9 5 7
## 1020 1024 1024.54541015625 1025 1026.31579589844
## 24 5 11 30 19
## 1028.57141113281 1030 1031.25 1031.66662597656 1033.33337402344
## 7 10 8 6 27
## 1035.71423339844 1037.5 1040 1041.66662597656 1045.45458984375
## 7 8 24 6 11
## 1050 1051.19995117188 1052.63159179688 1052.94116210938 1055.55554199219
## 24 5 19 17 9
## 1057.14282226562 1060 1062.5 1066.66662597656 1071.19995117188
## 21 10 12 15 5
## 1071.42858886719 1075 1076.92309570312 1080 1082.5
## 49 20 13 5 6
## 1083.33337402344 1085.71423339844 1086 1090 1090.90905761719
## 51 7 5 5 33
## 1092.85717773438 1093.55554199219 1093.75 1100 1101.66662597656
## 7 9 8 67 6
## 1103.33337402344 1105 1107.14282226562 1109 1111.11108398438
## 6 6 7 5 18
## 1115.38464355469 1116.66662597656 1120 1121.42858886719 1125
## 13 6 10 7 80
## 1127.59997558594 1130 1131.25 1132 1137.5
## 10 9 8 5 8
## 1138 1140 1141.42858886719 1142.85717773438 1150
## 4 18 7 112 34
## 1153.84619140625 1155.33337402344 1156.25 1157.14282226562 1162.5
## 13 6 16 7 8
## 1162.80004882812 1164.25 1166.66662597656 1168 1168.75
## 5 8 171 12 8
## 1170 1173.33337402344 1175 1178.57141113281 1180
## 5 15 14 14 15
## 1181.25 1183.33337402344 1183.57141113281 1184.21057128906 1187.5
## 8 18 14 19 20
## 1187.77783203125 1188 1188.66662597656 1190 1192.85717773438
## 9 8 3 10 14
## 1193.71423339844 1200 1205.59997558594 1207.5 1208.33337402344
## 7 270 5 10 24
## 1209.5 1210 1214.28576660156 1215.25 1217.59997558594
## 6 4 42 8 5
## 1220 1222.22216796875 1225 1225.59997558594 1226.40002441406
## 15 9 6 5 5
## 1227.27270507812 1227.66662597656 1228.57141113281 1230.76928710938 1232
## 11 6 21 26 6
## 1233.33337402344 1234.61999511719 1236.88891601562 1241.66662597656 1242
## 6 10 9 18 5
## 1243.75 1245 1245.45458984375 1246.71423339844 1249
## 8 4 22 7 6
## 1250 1250.28576660156 1250.75 1251.42858886719 1257.11108398438
## 304 7 16 14 9
## 1257.14282226562 1260 1262.5 1263.33337402344 1266.66662597656
## 7 10 12 3 30
## 1269.23071289062 1269.625 1271.33337402344 1272.72729492188 1273.52941894531
## 13 8 6 11 17
## 1275 1276.66662597656 1276.80004882812 1285.71423339844 1290.28576660156
## 8 6 15 63 7
## 1291.66662597656 1294 1294.44445800781 1297.5 1300
## 6 4 9 8 68
## 1300.80004882812 1310 1312.5 1313.33337402344 1314
## 10 4 18 9 10
## 1318 1320 1321.42858886719 1322.30773925781 1323.33337402344
## 10 15 7 13 6
## 1333.33337402344 1344 1345 1347.55554199219 1348.28576660156
## 243 2 10 9 7
## 1350 1357.14282226562 1358.33337402344 1360 1361.11108398438
## 31 14 6 10 9
## 1362.66662597656 1363.63635253906 1364 1365.59997558594 1366.66662597656
## 3 44 5 5 12
## 1373.33337402344 1375 1375.63635253906 1375.66662597656 1376
## 6 54 11 6 5
## 1380 1383 1383.33337402344 1386.66662597656 1387.5
## 5 8 6 3 8
## 1388.46154785156 1390 1390.83337402344 1392.07690429688 1393.33337402344
## 13 11 12 13 6
## 1400 1408.33337402344 1408.66662597656 1410 1412
## 213 6 6 11 7
## 1413 1414.28576660156 1416.66662597656 1418 1420
## 6 7 27 6 6
## 1421.875 1425 1427.14282226562 1428.57141113281 1428.85717773438
## 8 12 7 105 7
## 1433.33337402344 1434 1435 1436 1437.5
## 9 10 4 5 12
## 1438 1440 1442.85717773438 1444.44445800781 1446.66662597656
## 7 5 7 27 3
## 1454 1454.54541015625 1455 1455.71423339844 1457.33337402344
## 5 11 4 7 6
## 1458.2666015625 1458.33337402344 1460 1466.66662597656 1470
## 3 24 12 33 5
## 1470.57141113281 1472.22216796875 1475 1477.27270507812 1477.77783203125
## 7 9 4 11 9
## 1478.25 1481.25 1485.71423339844 1486.33337402344 1488.88891601562
## 8 8 21 6 9
## 1490.16662597656 1493.75 1500 1504 1505
## 12 8 390 5 6
## 1507.14282226562 1508.33337402344 1511 1513.33337402344 1516.5
## 7 6 6 21 4
## 1516.66662597656 1517.33337402344 1520 1522.22216796875 1525
## 6 15 5 9 4
## 1528.57141113281 1530 1531.25 1531.66662597656 1533
## 7 5 8 12 8
## 1533.33337402344 1534.88891601562 1535 1536 1540
## 24 9 4 5 6
## 1541.66662597656 1542.85717773438 1545.45458984375 1548 1550
## 24 7 22 5 26
## 1552 1552.28576660156 1552.33337402344 1555.55554199219 1561.11108398438
## 5 7 6 36 9
## 1562.5 1565.59997558594 1566.66662597656 1570 1571.11108398438
## 16 5 6 5 9
## 1571.42858886719 1573 1575 1575.55554199219 1576.92309570312
## 35 6 6 9 13
## 1580 1580.85717773438 1583.33337402344 1585.71423339844 1587.77783203125
## 6 7 27 7 18
## 1600 1600.66662597656 1600.90905761719 1607.14282226562 1607.69226074219
## 182 6 11 7 13
## 1608.33337402344 1611.11108398438 1613.75 1614 1615
## 12 9 8 4 3
## 1615.38464355469 1616.66662597656 1617.47497558594 1617.75 1620
## 13 12 8 8 5
## 1622.22216796875 1625 1625.33337402344 1629.15002441406 1632
## 9 52 6 8 5
## 1633.33337402344 1635.71423339844 1637.5 1642.85717773438 1644.33337402344
## 12 7 4 14 6
## 1645.83337402344 1650 1651.66662597656 1656 1656.66662597656
## 6 56 6 6 6
## 1660 1662 1662.5 1666.66662597656 1668
## 10 5 8 207 3
## 1672 1673.33337402344 1674 1675 1676.85717773438
## 5 9 5 4 7
## 1678.57141113281 1679 1680 1685.71423339844 1687.5
## 14 3 5 14 8
## 1688.88891601562 1690 1692.85717773438 1694.28576660156 1698.18176269531
## 9 5 7 7 11
## 1700 1700.80004882812 1704 1708.33337402344 1712.5
## 30 5 5 6 8
## 1714.28576660156 1716.66662597656 1718.75 1721.42858886719 1722
## 42 6 4 7 5
## 1722.16662597656 1725 1727.27270507812 1728.5 1728.57141113281
## 6 20 11 4 7
## 1730 1733.33337402344 1735 1740 1742.57141113281
## 19 24 5 15 7
## 1742.85717773438 1750 1751.42858886719 1752 1760
## 7 117 7 8 5
## 1762.5 1769.19995117188 1769.23071289062 1770.83337402344 1777.77783203125
## 16 10 13 6 27
## 1779.40002441406 1781 1784.66662597656 1785.71423339844 1791
## 10 5 6 21 5
## 1791.66662597656 1792 1793.75 1798 1800
## 6 6 8 5 95
## 1801.44006347656 1807.14282226562 1808.33337402344 1811.42858886719 1812.5
## 5 7 6 7 24
## 1813.75 1816.66662597656 1818.18176269531 1818.5 1819
## 8 6 22 4 5
## 1821.33337402344 1825 1833.33337402344 1839.59997558594 1840
## 12 4 24 5 20
## 1841.66662597656 1843.75 1844 1846.15380859375 1849.33337402344
## 12 8 5 13 6
## 1850 1857.14282226562 1866.66662597656 1868.53344726562 1871.42858886719
## 22 28 6 9 14
## 1875 1876 1876.66662597656 1883.33337402344 1899
## 74 5 6 6 6
## 1900 1903.33337402344 1909.09094238281 1914.28576660156 1916.66662597656
## 24 3 11 7 12
## 1918 1920 1930 1933.33337402344 1933.46154785156
## 12 10 4 6 13
## 1934.88891601562 1937.5 1940 1944.44445800781 1946.85717773438
## 9 4 5 9 7
## 1947.59997558594 1947.77783203125 1950 1953 1953.5
## 5 9 14 6 4
## 1955 1958.33337402344 1960 1968.75 1973.61535644531
## 6 12 5 16 13
## 1976.59997558594 1977.59997558594 1979.16662597656 1983.33337402344 1985.71423339844
## 4 5 12 12 7
## 1989.57141113281 1990 2000 2000.66662597656 2003.33337402344
## 7 4 324 6 3
## 2020 2025.59997558594 2027.14282226562 2028.57141113281 2033.33337402344
## 10 10 7 14 6
## 2038 2039 2041.66662597656 2050 2057.14282226562
## 5 6 6 10 7
## 2058.33325195312 2058.66674804688 2062.5 2062.80004882812 2071.25
## 12 6 8 5 4
## 2071.42846679688 2072 2075.60009765625 2077.66674804688 2080
## 14 6 5 6 10
## 2083.33325195312 2088.88891601562 2090 2093.75 2095
## 9 9 13 8 8
## 2100 2100.80004882812 2102 2105.5 2106
## 32 5 6 12 5
## 2111.11108398438 2111.25 2112.5 2114.28564453125 2115.38452148438
## 9 8 4 7 13
## 2116.66674804688 2125 2133.33325195312 2134 2137.5
## 6 12 6 5 8
## 2140 2142.85717773438 2150 2157 2164
## 5 28 18 4 5
## 2166.66674804688 2174.28564453125 2175 2177.14282226562 2180
## 18 7 8 14 5
## 2187.5 2192.85717773438 2200 2205 2208.33325195312
## 12 7 35 6 12
## 2222.22216796875 2225 2230 2230.76928710938 2231.33325195312
## 18 6 5 13 6
## 2233.33325195312 2240 2241.66674804688 2250 2254
## 15 5 12 34 5
## [ reached getOption("max.print") -- omitted 237 entries ]
mydata <- top_recode (variable="HHsize", break_point=20, missing=NA)
## [1] "Frequency table before encoding"
## HHsize. Number Of Members In The Household
## 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 70 843 3244 7165 8496 6601 4496 2547 2050 1496 1236 836 574 720 80 68 36 57
## 23 30
## 23 30
## [1] "Frequency table after encoding"
## HHsize. Number Of Members In The Household
## 2 3 4 5 6 7 8 9
## 70 843 3244 7165 8496 6601 4496 2547
## 10 11 12 13 14 15 16 17
## 2050 1496 1236 836 574 720 80 68
## 18 19 20 or more
## 36 57 53
mydata <- top_recode (variable="b_numfamily", break_point=20, missing=NA)
## [1] "Frequency table before encoding"
## b_numfamily. Number Of Household Members
## 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 40 390 1612 3745 4548 3521 2376 1458 1160 781 708 533 308 390 80
## 17 18 19 23 30 <NA>
## 68 51 57 23 30 18789
## [1] "Frequency table after encoding"
## b_numfamily. Number Of Household Members
## 2 3 4 5 6 7 8 9
## 40 390 1612 3745 4548 3521 2376 1458
## 10 11 12 13 14 15 16 17
## 1160 781 708 533 308 390 80 68
## 18 19 20 or more <NA>
## 51 57 53 18789
#percentile_checker("b_advamt", missing=88888)
mydata <- top_recode (variable="b_advamt", break_point=10000, missing=88888)
## [1] "Frequency table before encoding"
## b_advamt. Amt. Advance For Migration
## 200 300 500 1000 1100 1200 1800 2000 3000 4000 5000 6000 7000 8000 9000
## 1 1 6 9 1 1 1 8 6 6 14 3 3 6 1
## 10000 12000 15000 80000 88888 <NA>
## 11 1 2 1 2 40584
## [1] "Frequency table after encoding"
## b_advamt. Amt. Advance For Migration
## 200 300 500 1000 1100 1200
## 1 1 6 9 1 1
## 1800 2000 3000 4000 5000 6000
## 1 8 6 6 14 3
## 7000 8000 9000 10000 or more 88888 <NA>
## 3 6 1 15 2 40584
#percentile_checker("e_cellpone")
mydata <- top_recode (variable="e_cellpone", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_cellpone. # Of Cellphone
## 0 1 2 3 4 5 6 7 10 11 20 31 50 84 100
## 1466 11522 4057 1091 404 121 43 15 13 7 7 10 7 13 8
## 150 <NA>
## 5 21879
## [1] "Frequency table after encoding"
## e_cellpone. # Of Cellphone
## 0 1 2 3 4 5 6 7
## 1466 11522 4057 1091 404 121 43 15
## 10 or more <NA>
## 70 21879
mydata <- top_recode (variable="e_computer", break_point=2, missing=NA)
## [1] "Frequency table before encoding"
## e_computer. # Of Computer
## 0 1 2 8 <NA>
## 18682 60 6 41 21879
## [1] "Frequency table after encoding"
## e_computer. # Of Computer
## 0 1 2 or more <NA>
## 18682 60 47 21879
mydata <- top_recode (variable="e_bicycle", break_point=2, missing=NA)
## [1] "Frequency table before encoding"
## e_bicycle. # Of Bicycle
## 0 1 2 3 4 8 10 <NA>
## 11167 7305 258 26 15 12 6 21879
## [1] "Frequency table after encoding"
## e_bicycle. # Of Bicycle
## 0 1 2 or more <NA>
## 11167 7305 317 21879
mydata <- top_recode (variable="e_motorcycle", break_point=2, missing=NA)
## [1] "Frequency table before encoding"
## e_motorcycle. # Of Motorcycle/Moped/Scooter
## 0 1 2 4 8 <NA>
## 14119 4427 218 14 11 21879
## [1] "Frequency table after encoding"
## e_motorcycle. # Of Motorcycle/Moped/Scooter
## 0 1 2 or more <NA>
## 14119 4427 243 21879
mydata <- top_recode (variable="e_car", break_point=1, missing=NA)
## [1] "Frequency table before encoding"
## e_car. # Of Car/Jeep
## 0 1 3 8 <NA>
## 18601 168 14 6 21879
## [1] "Frequency table after encoding"
## e_car. # Of Car/Jeep
## 0 1 or more <NA>
## 18601 188 21879
mydata <- top_recode (variable="e_tv", break_point=2, missing=NA)
## [1] "Frequency table before encoding"
## e_tv. # Of Tv
## 0 1 2 3 8 <NA>
## 16492 2233 45 14 5 21879
## [1] "Frequency table after encoding"
## e_tv. # Of Tv
## 0 1 2 or more <NA>
## 16492 2233 64 21879
mydata <- top_recode (variable="e_cow", break_point=50, missing=NA)
## [1] "Frequency table before encoding"
## e_cow. # Of Cows
## 0 1 2 3 4 5 6 7 8 9 10 12 14 16 20
## 8908 4321 3312 1016 573 282 147 64 43 5 21 44 6 14 7
## 40 51 62 200 <NA>
## 6 4 9 7 21879
## [1] "Frequency table after encoding"
## e_cow. # Of Cows
## 0 1 2 3 4 5 6 7
## 8908 4321 3312 1016 573 282 147 64
## 8 9 10 12 14 16 20 40
## 43 5 21 44 6 14 7 6
## 50 or more <NA>
## 20 21879
mydata <- top_recode (variable="e_bullocks", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_bullocks. # Of Bullocks
## 0 1 2 3 4 5 6 8 12 20 <NA>
## 9157 1519 7086 526 299 85 43 61 3 10 21879
## [1] "Frequency table after encoding"
## e_bullocks. # Of Bullocks
## 0 1 2 3 4 5 6 8
## 9157 1519 7086 526 299 85 43 61
## 10 or more <NA>
## 13 21879
mydata <- top_recode (variable="e_buffalo", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_buffalo. # Of Buffalo
## 0 1 2 3 4 5 6 7 8 10 12 15 <NA>
## 17601 489 377 110 60 16 30 8 79 6 6 7 21879
## [1] "Frequency table after encoding"
## e_buffalo. # Of Buffalo
## 0 1 2 3 4 5 6 7
## 17601 489 377 110 60 16 30 8
## 8 10 or more <NA>
## 79 19 21879
mydata <- top_recode (variable="e_sheep", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_sheep. # Of Sheep And Goats
## 0 1 2 3 4 5 6 7 8 9 10 12 13 15 17
## 4568 4712 4797 1787 1423 791 321 142 78 71 41 24 5 15 6
## 28 <NA>
## 8 21879
## [1] "Frequency table after encoding"
## e_sheep. # Of Sheep And Goats
## 0 1 2 3 4 5 6 7
## 4568 4712 4797 1787 1423 791 321 142
## 8 9 10 or more <NA>
## 78 71 99 21879
mydata <- top_recode (variable="e_sheep", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_sheep. # Of Sheep And Goats
## 0 1 2 3 4 5 6 7
## 4568 4712 4797 1787 1423 791 321 142
## 8 9 10 or more <NA>
## 78 71 99 21879
## [1] "Frequency table after encoding"
## e_sheep. # Of Sheep And Goats
## 0 1 2 3 4 5 6 7
## 4568 4712 4797 1787 1423 791 321 142
## 8 9 10 or more <NA>
## 78 71 99 21879
mydata <- top_recode (variable="e_chicken", break_point=20, missing=NA)
## [1] "Frequency table before encoding"
## e_chicken. # Of Chickens
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 15
## 15162 805 992 330 285 546 209 65 117 40 131 6 37 11 32
## 20 22 <NA>
## 9 12 21879
## [1] "Frequency table after encoding"
## e_chicken. # Of Chickens
## 0 1 2 3 4 5 6 7
## 15162 805 992 330 285 546 209 65
## 8 9 10 11 12 13 15 20 or more
## 117 40 131 6 37 11 32 21
## <NA>
## 21879
# Drop identifiers for small religious minorities
dropvars <- c("b_rel3", "e_rel3", "e_rel4")
mydata <- mydata[!names(mydata) %in% dropvars]
break_rel <- c(1,2,3,8)
labels_rel <- c("Hinduism" = 1,
"Islam" = 2,
"Other" = 3,
"Unsure" = 4)
mydata <- ordinal_recode (variable="rel", break_points=break_rel, missing=999999, value_labels=labels_rel)
## [1] "Frequency table before encoding"
## rel. Religion Of Household Head
## Hinduism Islam
## 35316 5234
## Christianity Jainism/Parsee/Jewish or Other
## 68 35
## Unsure <NA>
## 9 6
## recoded
## [1,2) [2,3) [3,8) [8,1e+06)
## 1 35316 0 0 0
## 2 0 5234 0 0
## 3 0 0 68 0
## 6 0 0 35 0
## 8 0 0 0 9
## [1] "Frequency table after encoding"
## rel. Religion Of Household Head
## Hinduism Islam Other Unsure <NA>
## 35316 5234 103 9 6
## [1] "Inspect value labels and relabel as necessary"
## Hinduism Islam Other Unsure
## 1 2 3 4
break_mar <- c(1,2,3,4)
labels_mar <- c("Never Married" = 1,
"Currently married" = 2,
"Currently married but no guana" = 3,
"Widowed/Divorced/Separated" = 4)
mydata <- ordinal_recode (variable="b_marstat", break_points=break_mar, missing=999999, value_labels=labels_mar)
## [1] "Frequency table before encoding"
## b_marstat. Marital Status
## Never married Currently married
## 11615 9248
## Currently married but no guana Widowed
## 273 719
## Divorced Separated
## 8 16
## <NA>
## 18789
## recoded
## [1,2) [2,3) [3,4) [4,1e+06)
## 1 11615 0 0 0
## 2 0 9248 0 0
## 3 0 0 273 0
## 4 0 0 0 719
## 5 0 0 0 8
## 6 0 0 0 16
## [1] "Frequency table after encoding"
## b_marstat. Marital Status
## Never Married Currently married
## 11615 9248
## Currently married but no guana Widowed/Divorced/Separated
## 273 743
## <NA>
## 18789
## [1] "Inspect value labels and relabel as necessary"
## Never Married Currently married
## 1 2
## Currently married but no guana Widowed/Divorced/Separated
## 3 4
mydata2 <- ordinal_recode (variable="e_marstat", break_points=break_mar, missing=999999, value_labels=labels_mar)
## [1] "Frequency table before encoding"
## e_marstat. Marital Status
## Never married Currently married
## 9851 8141
## Currently married but no guana Widowed
## 171 610
## Divorced Separated
## 6 10
## <NA>
## 21879
## recoded
## [1,2) [2,3) [3,4) [4,1e+06)
## 1 9851 0 0 0
## 2 0 8141 0 0
## 3 0 0 171 0
## 4 0 0 0 610
## 5 0 0 0 6
## 6 0 0 0 10
## [1] "Frequency table after encoding"
## e_marstat. Marital Status
## Never Married Currently married
## 9851 8141
## Currently married but no guana Widowed/Divorced/Separated
## 171 626
## <NA>
## 21879
## [1] "Inspect value labels and relabel as necessary"
## Never Married Currently married
## 1 2
## Currently married but no guana Widowed/Divorced/Separated
## 3 4
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("b_Interviewer_Q4_2",
"b_Interviewer_Q4_3",
"b_Interviewer_Q4_4",
"b_Interviewer_Q4_5",
"b_Interviewer_Q4_6",
"b_Interviewer_Q4_7",
"b_Interviewer_Q4_8",
"e_nenroll_masked",
"e_seas_2",
"e_natl_masked",
"e_disab",
"e_impairment",
"e_impairment_1",
"e_impairment_2",
"e_impairment_3",
"e_impairment_4",
"e_impairment_5",
"e_impairment_6",
"e_impairment_7",
"e_impairment_8")
capture_tables (indirect_PII)
# Recode those with very specific values.
legis_mem1_label <- "Member of gram panchayat, other local, state or national legislative body or school management committee"
var_label (mydata$b_legis_mem1) <- legis_mem1_label
var_label (mydata$b_W2_legis1) <- legis_mem1_label
var_label (mydata$e_legis_mem1) <- legis_mem1_label
var_label (mydata$e_W2_legis2) <- legis_mem1_label
dropvars <- c("b_legis_mem2",
"b_legis_mem3",
"b_legis_mem4",
"b_legis_mem5",
"b_legis_mem6",
"b_W2_legis2",
"b_W2_legis3",
"b_W2_legis4",
"b_W2_legis5",
"e_legis_mem2",
"e_legis_mem3",
"e_legis_mem4",
"e_legis_mem5",
"e_legis_mem6",
"e_W2_legis2",
"e_W2_legis3",
"e_W2_legis4",
"e_W2_legis5")
mydata <- mydata[!names(mydata) %in% dropvars] # Drop specific flags for type of government position held as strong identifier
dropvars <- c("e_impairment",
"e_impairment_1",
"e_impairment_2",
"e_impairment_3",
"e_impairment_4",
"e_impairment_5",
"e_impairment_6",
"e_impairment_7",
"b_Interviewer_Q4_2",
"b_Interviewer_Q4_3",
"b_Interviewer_Q4_4",
"b_Interviewer_Q4_5",
"b_Interviewer_Q4_6",
"b_Interviewer_Q4_7")
mydata <- mydata[!names(mydata) %in% dropvars] # Drop specific disabilities as strong identifier
# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('gender', 'age_masked', 'edu_control') ##!!! Replace with candidate categorical demo vars
# weight variable (add if available)
# selectedWeightVar = c('projwt') ##!!! Replace with weight var
# household id variable (cluster)
selectedHouseholdID = c('hhid') ##!!! Replace with household id
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata,
keyVars = selectedKeyVars,
hhId = selectedHouseholdID)
sdcInitial # No records violate 2-anonimity
## The input dataset consists of 40668 rows and 758 variables.
## --> Categorical key variables: gender, age_masked, edu_control
## --> Cluster/Household-Id variable: hhid
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## gender 3 (3) 20322.500 (20322.500) 20159 (20159)
## age_masked 5 (5) 8133.600 (8133.600) 2773 (2773)
## edu_control 2 (2) 20334.000 (20334.000) 3166 (3166)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 0 (0.000%)
##
## ----------------------------------------------------------------------
# !!! Identify open-end variables here:
open_ends <- c("b_migpurp",
"b_BBA_specify_masked",
"b_Fr1_topic",
"b_Fr2_topic",
"b_Fr3_topic",
"b_Fr4_topic",
"b_Fr5_topic",
"b_W2_rnd_work_sch_why",
"b_Interviewer_Q2_masked")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$b_BBA_specify_masked[35727] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35729] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35731] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35732] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35736] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35737] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35738] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35739] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35740] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35742] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[35743] <- "In Panchayat bhawan and [location name redacted]."
mydata$b_BBA_specify_masked[19084] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19086] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19088] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19091] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19092] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19093] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[19097] <- "I've seen it written on the wall in [location name redacted]"
mydata$b_BBA_specify_masked[15372] <- "Have seen a procession towards [location name redacted]"
mydata$b_BBA_specify_masked[15373] <- "Have seen a procession towards [location name redacted]"
mydata$b_BBA_specify_masked[15375] <- "Have seen a procession towards [location name redacted]"
mydata$b_BBA_specify_masked[15380] <- "Have seen a procession towards [location name redacted]"
mydata$b_BBA_specify_masked[15381] <- "Have seen a procession towards [location name redacted]"
mydata$b_BBA_specify_masked[15385] <- "Have seen a procession towards [location name redacted]"
mydata$b_migpurp[13630] <- "Went to [location redacted] to stay"
mydata$b_migpurp[25055] <- "[Individual's name redacted]"
mydata$b_migpurp[25511] <- "[Individual's name redacted]"
mydata$b_migpurp[25518] <- "[Individual's name redacted]"
mydata$b_migpurp[35158] <- "[Individual's name redacted] has gone for education"
mydata$b_Interviewer_Q2_masked[19178] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19179] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19180] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19181] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19182] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19183] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19188] <- "[Location name redacted]"
mydata$b_Interviewer_Q2_masked[19192] <- "[Location name redacted]"
# !!! No GPS data
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)