rm(list=ls(all=t))

Setup filenames, data, functions and create dictionary for dataset review

filename <- "Malawi_HHHead_Public Use" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("b_edunames_",
              "b_empname_",
              "b_else_members_",
              "b_tob_members_",
              "b_rel_names_") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

!!!No Direct PII-Team #Small locations: Encode locations with pop <100,000 using random large numbers !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("community",
             "comm",
             "b_treat",
             "b_comm",
             "e_community",
             "e_comm",
             "b_ta",
             "e_ta") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## community. Community
##                    Chaola  Chazim'bobo     Chikho 2     Chinyata      Choumba     Kakoloha      Kanongo       Luwira       Mafuta 
##        27926          363          312         1145          658          627          288          845          237          345 
##     Mkombezi       Mlambe      Mzokoto     Nanzomba       Ndaula     Nyongani      Pondani Tamanimwendo    Waliranji 
##          553          496          625          584          728           73          597          552          811 
## [1] "Frequency table after encoding"
## community. Community
##   841   842   843   844   845   846   847   848   849   850   851   852   853   854   855   856   857   858   859 
##   811 27926   363   552   237   496   584    73   553   288  1145   845   312   627   728   345   658   597   625 
## [1] "Frequency table before encoding"
## comm. Numeric Values For Community
##     1     2     3     4     5     6     7     8     9    10    11    12    13    14    15    16    17    18  <NA> 
##   658   345   728    73   552   363   312   584   597   625   627   845   811  1145   496   288   237   553 27926 
## [1] "Frequency table after encoding"
## comm. Numeric Values For Community
##  1001  1002  1003  1004  1005  1006  1007  1008  1009  1010  1011  1012  1013  1014  1015  1016  1017  1018  <NA> 
##   496   584   288   728  1145   552   312   845   625   658   811   627   237    73   553   345   597   363 27926 
## [1] "Frequency table before encoding"
## b_treat. b_Community Name (Lower)
##                      chaola   chazim'bobo      chikho 2      chinyata       choumba      kakoloha       kanongo        luwira 
##         19563           211           535          2672          1131          1091           517          1555           441 
##        mafuta      mkombezi        mlambe       mzokoto      nanzomba        ndaula      nyongani       pondani tamani mwendo 
##           675          1094           802          1162          1056          1338           115          1200          1001 
##     waliranji 
##          1606 
## [1] "Frequency table after encoding"
## b_treat. b_Community Name (Lower)
##   757   758   759   760   761   762   763   764   765   766   767   768   769   770   771   772   773   774   775 
##   115   802  1056  1606   535  1162  1555  2672  1338 19563  1131   441   211   675   517  1200  1001  1094  1091 
## [1] "Frequency table before encoding"
## b_comm. b_Community
##                      CHAOLA   CHAZIM'BOBO      CHIKHO 2      CHINYATA       CHOUMBA      KAKOLOHA       KANONGO        LUWIRA 
##         19563           211           535          2672          1131          1091           517          1555           441 
##        MAFUTA      MKOMBEZI        MLAMBE       MZOKOTO      NANZOMBA        NDAULA      NYONGANI       PONDANI TAMANI MWENDO 
##           675          1094           802          1162          1056          1338           115          1200          1001 
##     WALIRANJI 
##          1606 
## [1] "Frequency table after encoding"
## b_comm. b_Community
##   687   688   689   690   691   692   693   694   695   696   697   698   699   700   701   702   703   704   705 
##  1555  1091  1162  1131   675   517  1001   802   535   115   211  1056  1606  1338   441 19563  1094  1200  2672 
## [1] "Frequency table before encoding"
## e_community. e_Community
##                    Chaola  Chazim'bobo     Chikho 2     Chinyata      Choumba     Kakoloha      Kanongo       Luwira       Mafuta 
##        18202          705          600         2270         1277         1213          601         1707          479          704 
##     Mkombezi       Mlambe      Mzokoto     Nanzomba       Ndaula     Nyongani      Pondani Tamanimwendo    Waliranji 
##         1142          947         1302         1145         1411          138         1174         1106         1642 
## [1] "Frequency table after encoding"
## e_community. e_Community
##   700   701   702   703   704   705   706   707   708   709   710   711   712   713   714   715   716   717   718 
##  1411   705  1142   947  1145  1277  1106  1213   600  2270   704  1707  1174   601  1642  1302   138 18202   479 
## [1] "Frequency table before encoding"
## e_comm. e_Numeric Values For Community
##     1     2     3     4     5     6     7     8     9    10    11    12    13    14    15    16    17    18  <NA> 
##  1277   704  1411   138  1106   705   600  1145  1174  1302  1213  1707  1642  2270   947   601   479  1142 18202 
## [1] "Frequency table after encoding"
## e_comm. e_Numeric Values For Community
##   834   835   836   837   838   839   840   841   842   843   844   845   846   847   848   849   850   851  <NA> 
##   947  1213  1277   600  1106   138   704  1302  1411  2270   601   705  1174   479  1707  1142  1642  1145 18202 
## [1] "Frequency table before encoding"
## b_ta. b_Traditional Authority
##                    KASAKULA       MAVWERE MWANKHUNIKILA 
##         19563          6476          8512          3214 
## [1] "Frequency table after encoding"
## b_ta. b_Traditional Authority
##   915   916   917   918 
## 19563  6476  8512  3214 
## [1] "Frequency table before encoding"
## e_ta. e_Traditional Authority Name:
##                              ,mavwere                  1            3514007            Asakula             Bokosi 
##              18202                  5                  3                  6                  4                  6 
##           Chigunda             Chikho             CHIKHO           CHIKHO 2         Chinthumba           Chinyata 
##                  8                322                108                  4                  6                 22 
##           Jasakula           Kabudula            Kachiza          Kamtsukwa           Kasakuka           kasakula 
##                  5                  5                  3                  5                  3                 15 
##           Kasakula           KASAKULA Kasakula 2(Lufeyo)           Kasakule          Kasakulla           Kasakulu 
##               5757                361                  3                  4                  4                  4 
##          KASAUKULA            Kaskula           Kasskula           Kasukula           Kasula 1          Katsakula 
##                  3                  6                  7                 10                  3                150 
##            MABVERE           Maliketi            Mamvere            Mamwere           Manvwere             Mavere 
##                 87                 10                  5                 11                  5                  5 
##            Mavwele            Mavwera            mavwere            Mavwere            MAVWERE           Mavwerer 
##                 72                  4                 15               8675                 62                  3 
##            Mawvere            Mchinji             Mlambe       Mwakhunikila           Mwamvele      MWANHKUNIKIRA 
##                150                  6                  8                  4                  9                 11 
##       Mwanhunikira          Mwankhuni      Mwankhunikala       Mwankhunikil      Mwankhunikila      mwankhunikira 
##                  4                  6                 33                  5               1032                 43 
##      Mwankhunikira      MWANKHUNIKIRA      Mwankhunikjra       Mwankhunikra      Mwankhunikura    Mwankhuninikira 
##               1234               1079                  6                 23                  3                  5 
##        Mwankhunira       Mwankhunkila            Navwere      NWANKHUNIKIRA            Pondani    Rosalina Josamu 
##                  7                  7                  4                  7                  6                  4 
##             SAKULA        TA Kasakula         TA Mavwele    Vesinati Mayeso       Victor Phiri          Waliranji 
##                  3                 23                  8                  4                  4                 14 
##       Wankhunikila      Wmankhunikila 
##                  9                  6 
## [1] "Frequency table after encoding"
## e_ta. e_Traditional Authority Name:
##   478   479   480   481   482   483   484   485   486   487   488   489   490   491   492   493   494   495   496   497   498 
##     4     6    11   361    10    11    23     3    87     6 18202    43     4     4     3    23  1079     8     5     6     4 
##   499   500   501   502   503   504   505   506   507   508   509   510   511   512   513   514   515   516   517   518   519 
##     6   150     4     5     9     7     7  1234     4     3     8     9     4     6     8    72     5    15     5    10    15 
##   520   521   522   523   524   525   526   527   528   529   530   531   532   533   534   535   536   537   538   539   540 
##     4     6     4  5757     3     4     6     5     7     6    62     5     3     3    14   108     4    22     5     6   150 
##   541   542   543   544   545   546   547   548   549   550   551 
##  8675  1032     7     5     3     5     4     3   322     3    33

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

mydata <- top_recode ("num_people", break_point=10, missing=c(888, 999999)) # Topcode cases with 10 or more household members. 
## [1] "Frequency table before encoding"
## num_people. Can You Please Tell Me How Many People Live In This Household, Including Yoursel
##     2     3     4     5     6     7     8     9    10    11    12    13    14  <NA> 
##    77   391  1195  2072  2381  1872  1024   474   189   115    37     4     8 27926

## [1] "Frequency table after encoding"
## num_people. Can You Please Tell Me How Many People Live In This Household, Including Yoursel
##          2          3          4          5          6          7          8          9 10 or more       <NA> 
##         77        391       1195       2072       2381       1872       1024        474        353      27926

mydata <- top_recode ("b_hhcount", break_point=10, missing=c(888, 999999)) # Topcode cases with 10 or more household members.
## [1] "Frequency table before encoding"
## b_hhcount. b_Number Of Household Members
##     1     2     3     4     5     6     7     8     9    10    11    12    13    14  <NA> 
##     5   190   914  2679  4003  3843  3071  1844  1010   387   137    65    43    11 19563

## [1] "Frequency table after encoding"
## b_hhcount. b_Number Of Household Members
##          1          2          3          4          5          6          7          8          9 10 or more       <NA> 
##          5        190        914       2679       4003       3843       3071       1844       1010        643      19563

mydata <- top_recode ("e_hhcount", break_point=10, missing=c(888, 999999)) # Topcode cases with 10 or more household members.
## [1] "Frequency table before encoding"
## e_hhcount. e_Number Of Household Members
##     2     3     4     5     6     7     8     9    10    11    12    13    14  <NA> 
##   154   910  2678  4169  4500  3508  1995   942   387   210    88    10    12 18202

## [1] "Frequency table after encoding"
## e_hhcount. e_Number Of Household Members
##          2          3          4          5          6          7          8          9 10 or more       <NA> 
##        154        910       2678       4169       4500       3508       1995        942        707      18202

mydata <- top_recode ("e_num_people", break_point=10, missing=c(888, 999999)) # Topcode cases with 10 or more household members.
## [1] "Frequency table before encoding"
## e_num_people. e_Can You Please Tell Me How Many People Live In This Household, Including Yours
##     2     3     4     5     6     7     8     9    10    11    12    13    14  <NA> 
##   154   910  2678  4169  4500  3508  1995   942   387   210    88    10    12 18202

## [1] "Frequency table after encoding"
## e_num_people. e_Can You Please Tell Me How Many People Live In This Household, Including Yours
##          2          3          4          5          6          7          8          9 10 or more       <NA> 
##        154        910       2678       4169       4500       3508       1995        942        707      18202

# Top code number of loans taken in the last year

mydata <- top_recode ("e_loannum", break_point=10, missing=c(88)) # Topcode cases with 10 or more loans taken. 
## [1] "Frequency table before encoding"
## e_loannum. e_Number Of Loans Taken In Last Year
##     0     1     2     3     4     5     6     7     8     9    10    11    12    13    15    88  <NA> 
##  1456  4335  2657  1756   743   482   221    88    65    34    86    12    35     9     4    83 25699

## [1] "Frequency table after encoding"
## e_loannum. e_Number Of Loans Taken In Last Year
##          0          1          2          3          4          5          6          7          8          9 10 or more 
##       1456       4335       2657       1756        743        482        221         88         65         34        146 
##         88       <NA> 
##         83      25699

mydata <- top_recode ("loannum", break_point=10, missing=c(88)) # Topcode cases with 10 or more loans taken. 
## [1] "Frequency table before encoding"
## loannum. Number Of Loans Taken In Last Year
##     0     1     2     3     4     5     6     7     8     9    10    11    12    13    15    88  <NA> 
##  4471  2209  1330   892   371   250   106    47    35    16    46     5    17     2     2    40 27926

## [1] "Frequency table after encoding"
## loannum. Number Of Loans Taken In Last Year
##          0          1          2          3          4          5          6          7          8          9 10 or more 
##       4471       2209       1330        892        371        250        106         47         35         16         72 
##         88       <NA> 
##         40      27926

mydata <- top_recode ("b_loannum", break_point=10, missing=88) # Topcode cases with 10 or more loans taken. 
## [1] "Frequency table before encoding"
## b_loannum. b_Number Of Loans Taken In Last Year
##          0          1          2          3          4          5          6          7          8          9         10 
##        178       2849       1518       1678        650        397        222         39         76         28         92 
##         12         15         19 Don't know       <NA> 
##         18          4          4         32      29980

## [1] "Frequency table after encoding"
## b_loannum. b_Number Of Loans Taken In Last Year
##          0          1          2          3          4          5          6          7          8          9 10 or more 
##        178       2849       1518       1678        650        397        222         39         76         28        118 
## Don't know       <NA> 
##         32      29980

mydata <- top_recode ("b_tobearn_", break_point=5000, missing=c(66666, 88888)) # Topcode cases with 10 or more loans taken. 
## [1] "Frequency table before encoding"
## b_tobearn_. b_(Tobearn) In A Typical Week, How Much Did <<Name>> Earn From Working In Tobacc
##       None          5         21         30         50        100        120        150        200        250        300 
##         44          1          1          1          2          3          1          6         27          2         12 
##        350        400        450        500        550        600        700        750        800       1000       1050 
##          1          5          2         15          2          5          3          2          6         34          1 
##       1200       1250       1400       1500       1800       2000       2500       2800       3000       3200       3500 
##          1          1          8          6          2         14          1          1          9          1          4 
##       4000       4500       4900       5000       6000       7000       8000       9000      10000      15000      20000 
##          7          1          1          8          3          3          1          1          1          1          1 
##      30000      50000    Ik kind Don't know       <NA> 
##          2          2         35         16      37458

## [1] "Frequency table after encoding"
## b_tobearn_. b_(Tobearn) In A Typical Week, How Much Did <<Name>> Earn From Working In Tobacc
##         None            5           21           30           50          100          120          150          200          250 
##           44            1            1            1            2            3            1            6           27            2 
##          300          350          400          450          500          550          600          700          750          800 
##           12            1            5            2           15            2            5            3            2            6 
##         1000         1050         1200         1250         1400         1500         1800         2000         2500         2800 
##           34            1            1            1            8            6            2           14            1            1 
##         3000         3200         3500         4000         4500         4900 5000 or more      Ik kind   Don't know         <NA> 
##            9            1            4            7            1            1           23           35           16        37458

mydata <- top_recode ("b_adultearn_", break_point=50000, missing=c(66666, 88888)) # Topcode cases with 10 or more loans taken. 
## [1] "Frequency table before encoding"
## b_adultearn_. b_(Adultearn) How Much Did You/<<Name>> Earn In Wages, Salary, Commission Or Pay
##          0          2          7         10         20         50         88        100        150        200        250 
##         43          1          1          1          2          1          1         11          4         18          4 
##        300        350        400        450        500        600        700        750        800        900       1000 
##         14          3         13          2         89         14          7          4         14          4        172 
##       1050       1060       1100       1150       1200       1300       1400       1500       1600       1650       1700 
##          1          2          1          1         17          2          1         93          2          1          5 
##       1800       2000       2200       2250       2400       2500       2800       3000       3100       3200       3250 
##          7        227          1          1          2         77          3        195          2          1          1 
##       3300       3500       3600       3700       3750       3800       4000       4250       4500       4800       5000 
##          1         38          3          1          1          1        103          1          8          1        171 
##       5500       6000       6500       6666       7000       7200       7500       7800       8000       8500       9000 
##          7         46          5          3         55          6          2          1         33          1         11 
##      10000      11000      11600      12000      12500      13000      13500      14000      14400      14500      15000 
##        102          3          1         20          1          6          1          7          4          1         37 
##      15550      16000      17000      17500      18000      19000      20000      21000      22000      24000      25000 
##          1          6          3          1         15          3         46          4          3          1          8 
##      27000      28000      28700      30000      32000      35000      36000      40000      40600      43000      45000 
##          1          1          1         28          1          4          1          8          1          1          3 
##      50000      54000      56000      60000      65000    In kind      70000      80000      85000      87000 Don't know 
##         14          1          1          9          1        202          1          5          2          1        147 
##      90000       <NA> 
##          1      35491

## [1] "Frequency table after encoding"
## b_adultearn_. b_(Adultearn) How Much Did You/<<Name>> Earn In Wages, Salary, Commission Or Pay
##             0             2             7            10            20            50            88           100           150 
##            43             1             1             1             2             1             1            11             4 
##           200           250           300           350           400           450           500           600           700 
##            18             4            14             3            13             2            89            14             7 
##           750           800           900          1000          1050          1060          1100          1150          1200 
##             4            14             4           172             1             2             1             1            17 
##          1300          1400          1500          1600          1650          1700          1800          2000          2200 
##             2             1            93             2             1             5             7           227             1 
##          2250          2400          2500          2800          3000          3100          3200          3250          3300 
##             1             2            77             3           195             2             1             1             1 
##          3500          3600          3700          3750          3800          4000          4250          4500          4800 
##            38             3             1             1             1           103             1             8             1 
##          5000          5500          6000          6500          6666          7000          7200          7500          7800 
##           171             7            46             5             3            55             6             2             1 
##          8000          8500          9000         10000         11000         11600         12000         12500         13000 
##            33             1            11           102             3             1            20             1             6 
##         13500         14000         14400         14500         15000         15550         16000         17000         17500 
##             1             7             4             1            37             1             6             3             1 
##         18000         19000         20000         21000         22000         24000         25000         27000         28000 
##            15             3            46             4             3             1             8             1             1 
##         28700         30000         32000         35000         36000         40000         40600         43000         45000 
##             1            28             1             4             1             8             1             1             3 
## 50000 or more       In kind    Don't know          <NA> 
##            36           202           147         35491

# Top code high income to the 99.5 percentile

mydata <- top_recode (variable="save1", break_point=70000, missing=88888)
## [1] "Frequency table before encoding"
## save1. Household Savings Amount In The Last Month
##      0    100    200    300    400    500    600    800   1000   1200   1300   1400   1500   1600   1800   2000   2200   2300 
##   7699      5     52     10     45     82     32     15    261     15      2      3     48      2      6    279      4      5 
##   2400   2500   2800   3000   3200   3500   3600   3700   3800   4000   4500   4600   5000   5500   5600   6000   7000   7500 
##      4     32      2    144      4     21      6      3      3    121      6      1    241      3      1     45     26      3 
##   8000   9000   9500  10000  11000  11500  11600  12000  13000  14000  15000  16000  17500  18000  19000  20000  21000  24000 
##     37      6      3    114      8      4      1      6     12      6     42      4      2      4      2    101      3     14 
##  25000  26000  27000  30000  35000  36000  40000  42000  44000  45000  50000  52000  60000  70000  72000  75000  80000  88888 
##     10      3      6     47     23      1     10      2      1      3     50      4      7      7      2      1      2     16 
##  1e+05 120000 121000 130000 150000  2e+05  5e+05   <NA> 
##     21      4      1      3      7      6      2  27926

## [1] "Frequency table after encoding"
## save1. Household Savings Amount In The Last Month
##             0           100           200           300           400           500           600           800          1000 
##          7699             5            52            10            45            82            32            15           261 
##          1200          1300          1400          1500          1600          1800          2000          2200          2300 
##            15             2             3            48             2             6           279             4             5 
##          2400          2500          2800          3000          3200          3500          3600          3700          3800 
##             4            32             2           144             4            21             6             3             3 
##          4000          4500          4600          5000          5500          5600          6000          7000          7500 
##           121             6             1           241             3             1            45            26             3 
##          8000          9000          9500         10000         11000         11500         11600         12000         13000 
##            37             6             3           114             8             4             1             6            12 
##         14000         15000         16000         17500         18000         19000         20000         21000         24000 
##             6            42             4             2             4             2           101             3            14 
##         25000         26000         27000         30000         35000         36000         40000         42000         44000 
##            10             3             6            47            23             1            10             2             1 
##         45000         50000         52000         60000 70000 or more         88888          <NA> 
##             3            50             4             7            56            16         27926

percentile_99.5 <- percentile_checker("saveall", missing=c(88888))

mydata <- top_recode (variable="saveall", break_point=percentile_99.5, missing=88888)
## [1] "Frequency table before encoding"
## saveall. Total Household Savings
##       0       1       9     100     200     300     400     450     500     600     800     900    1000    1200    1300    1400 
##    7097       5       4       3      41      11      25       2      69      31      19       8     185      11       2      12 
##    1500    1600    1800    2000    2200    2300    2400    2500    2600    2800    2900    3000    3200    3400    3500    3600 
##      69       5       9     253       4       5       4      29       3       6       2     151       7       6      35       5 
##    3800    4000    4300    4500    5000    5500    5600    5800    6000    6500    7000    7500    7600    7700    8000    8500 
##       3      82       3       6     269       3       2       2      63       5      43       8       1       3      59       3 
##    8888    9000    9500   10000   10500   11000   11500   11600   12000   12800   13000   13600   14000   14500   15000   16000 
##       1      17       3     210       4      18       6       1      40       2      24       1      15       3     105       9 
##   17200   17500   18000   19000   19300   20000   21000   22000   24000   25000   26000   27000   30000   31000   32000   33500 
##       3       2      14       2       3     135       7       4       6      27       2       6      83       1       6       1 
##   34000   35000   36000   37000   39000   40000   42000   43000   44000   45000   46000   50000   52000   60000   65000   70000 
##       1      17       1       3       3      26       2       3       4      13       6      62       4      23       2       9 
##   74000   80000   85000   88888   89000   90000   94000   95000   1e+05  106000  108000  120000  121000  125000  130000  140000 
##       3       8       3      36       2       2       3       3      47       4       4       9       1       4       3       4 
##  142000  144000  150000  160000  172000  175000   2e+05  250000   3e+05  320000  350000  450000   5e+05  566000   6e+05   8e+05 
##       4       3      29       1       2       2       4       8       5       2       2       4       8       3       3       1 
##   1e+06 1500000 2500000   3e+06    <NA> 
##       1       2       3       3   27926

## [1] "Frequency table after encoding"
## saveall. Total Household Savings
##              0              1              9            100            200            300            400            450 
##           7097              5              4              3             41             11             25              2 
##            500            600            800            900           1000           1200           1300           1400 
##             69             31             19              8            185             11              2             12 
##           1500           1600           1800           2000           2200           2300           2400           2500 
##             69              5              9            253              4              5              4             29 
##           2600           2800           2900           3000           3200           3400           3500           3600 
##              3              6              2            151              7              6             35              5 
##           3800           4000           4300           4500           5000           5500           5600           5800 
##              3             82              3              6            269              3              2              2 
##           6000           6500           7000           7500           7600           7700           8000           8500 
##             63              5             43              8              1              3             59              3 
##           8888           9000           9500          10000          10500          11000          11500          11600 
##              1             17              3            210              4             18              6              1 
##          12000          12800          13000          13600          14000          14500          15000          16000 
##             40              2             24              1             15              3            105              9 
##          17200          17500          18000          19000          19300          20000          21000          22000 
##              3              2             14              2              3            135              7              4 
##          24000          25000          26000          27000          30000          31000          32000          33500 
##              6             27              2              6             83              1              6              1 
##          34000          35000          36000          37000          39000          40000          42000          43000 
##              1             17              1              3              3             26              2              3 
##          44000          45000          46000          50000          52000          60000          65000          70000 
##              4             13              6             62              4             23              2              9 
##          74000          80000          85000          88888          89000          90000          94000          95000 
##              3              8              3             36              2              2              3              3 
##          1e+05         106000         108000         120000         121000         125000         130000         140000 
##             47              4              4              9              1              4              3              4 
##         142000         144000         150000         160000         172000 175000 or more           <NA> 
##              4              3             29              1              2             51          27926

percentile_99.5 <- percentile_checker("loanval", missing=c(88888,888888))

mydata <- top_recode (variable="loanval", break_point=percentile_99.5, missing=c(88888,888888))
## [1] "Frequency table before encoding"
## loanval. Value Of All Loans Taken In Last Year
##       0       1       2       3       4       5       6       7      10      11      16      60      75      88     100     110 
##    4471      12      15       7       7       2       1       3       4       3       4       2       7       2       8       1 
##     200     250     300     350     400     500     600     700     750     760     800    1000    1200    1400    1500    1700 
##      14       6       9       6      10      56       2       7      10       6       5     128       5       4      43       4 
##    1750    2000    2200    2270    2300    2400    2500    2600    2700    3000    3500    3600    4000    4500    4600    4700 
##       1     223       6       1       2       3      15       3       3     149      12       2     111      12       2       3 
##    4800    5000    5200    5500    5800    6000    6500    7000    7500    7680    8000    8400    8500    9000    9500    9720 
##       6     296       4       4       3     143       1      77       6       1     114       3       2      86      18       1 
##   10000   10500   10800   11000   11200   11300   11500   12000   12500   12600   13000   13500   13600   14000   14400   14500 
##     510      17       2      20       4       3       3     122       4       4      38       3       4      43       4       3 
##   15000   15500   15600   16000   16500   16800   17000   17400   17500   18000   18400   18500   19000   19700   20000   20500 
##     249       9       4      34       5       2      40       4       7      46       2       4      11       4     361       1 
##   21000   22000   22500   22700   23000   23500   24000   24020   25000   25500   26000   27000   28000   29000   29400   30000 
##      24      29       2       3      41       2      39       1      87       3      10      17      27       4       3     233 
##   30500   31000   32000   32400   33000   34000   35000   36000   36800   37000   38000   38200   39000   40000   41000   42000 
##       8       8      30       3      20       4      63      13       2       4      19       4       8     162       2       5 
##   42200   43500   44000   45000   45500   46000   47000   47500   48000   48400   49000   50000   52000   53000   54000   55000 
##       3       3       8      31       3       3       7       2      18       3      13     245       7       5       5      17 
##   56000   57000   58000   59000   60000   60600   61800   62000   62500   63000   64000   64600   65000   66000   66980   67000 
##      10       3       1       4      91       4       3       6       2       4       4       3      17       8       4       5 
##   68000   69000   70000   71000   73000   74000   75000   76000   78000   80000   82000   84000   84500   85000   88000   88888 
##       4       2      56       2       9       3      29       3       6      56       3       5       1      17       3       4 
##   89000   90000   90500   93000   96000   97000   99000   1e+05  104000  105000  108000  112000  115000  119000  120000  124000 
##       2      22       2       2       2       5       4     128       4       3       3       1       2       3      25       3 
##  125000  126000  128000  136000  138000  139000  140000  143000  147000  150000  157000  160000  165000  173000  195000  196000 
##       4       5       2       3       2       4      11       3       1      21       2       5       2       1       4       5 
##   2e+05  219000  220000  230000  240000  250000  260000  270000  280000   3e+05  316000  320000  330000  350000   4e+05  450000 
##      22       5       2       8       1      13       3       2       2      19       2       5       1       6      10       2 
##   5e+05  520000   6e+05  640000  650000   7e+05  888888   1e+06 1011000 1345000 1400000 1500000 1600000 1610000   3e+06    <NA> 
##       2       2       9       1       1       8      40       4       2       2       2       2       3       2       1   27926

## [1] "Frequency table after encoding"
## loanval. Value Of All Loans Taken In Last Year
##             0             1             2             3             4             5             6             7            10 
##          4471            12            15             7             7             2             1             3             4 
##            11            16            60            75            88           100           110           200           250 
##             3             4             2             7             2             8             1            14             6 
##           300           350           400           500           600           700           750           760           800 
##             9             6            10            56             2             7            10             6             5 
##          1000          1200          1400          1500          1700          1750          2000          2200          2270 
##           128             5             4            43             4             1           223             6             1 
##          2300          2400          2500          2600          2700          3000          3500          3600          4000 
##             2             3            15             3             3           149            12             2           111 
##          4500          4600          4700          4800          5000          5200          5500          5800          6000 
##            12             2             3             6           296             4             4             3           143 
##          6500          7000          7500          7680          8000          8400          8500          9000          9500 
##             1            77             6             1           114             3             2            86            18 
##          9720         10000         10500         10800         11000         11200         11300         11500         12000 
##             1           510            17             2            20             4             3             3           122 
##         12500         12600         13000         13500         13600         14000         14400         14500         15000 
##             4             4            38             3             4            43             4             3           249 
##         15500         15600         16000         16500         16800         17000         17400         17500         18000 
##             9             4            34             5             2            40             4             7            46 
##         18400         18500         19000         19700         20000         20500         21000         22000         22500 
##             2             4            11             4           361             1            24            29             2 
##         22700         23000         23500         24000         24020         25000         25500         26000         27000 
##             3            41             2            39             1            87             3            10            17 
##         28000         29000         29400         30000         30500         31000         32000         32400         33000 
##            27             4             3           233             8             8            30             3            20 
##         34000         35000         36000         36800         37000         38000         38200         39000         40000 
##             4            63            13             2             4            19             4             8           162 
##         41000         42000         42200         43500         44000         45000         45500         46000         47000 
##             2             5             3             3             8            31             3             3             7 
##         47500         48000         48400         49000         50000         52000         53000         54000         55000 
##             2            18             3            13           245             7             5             5            17 
##         56000         57000         58000         59000         60000         60600         61800         62000         62500 
##            10             3             1             4            91             4             3             6             2 
##         63000         64000         64600         65000         66000         66980         67000         68000         69000 
##             4             4             3            17             8             4             5             4             2 
##         70000         71000         73000         74000         75000         76000         78000         80000         82000 
##            56             2             9             3            29             3             6            56             3 
##         84000         84500         85000         88000         88888         89000         90000         90500         93000 
##             5             1            17             3             4             2            22             2             2 
##         96000         97000         99000         1e+05        104000        105000        108000        112000        115000 
##             2             5             4           128             4             3             3             1             2 
##        119000        120000        124000        125000        126000        128000        136000        138000        139000 
##             3            25             3             4             5             2             3             2             4 
##        140000        143000        147000        150000        157000        160000        165000        173000        195000 
##            11             3             1            21             2             5             2             1             4 
##        196000         2e+05        219000        220000        230000        240000        250000        260000        270000 
##             5            22             5             2             8             1            13             3             2 
##        280000         3e+05        316000        320000        330000        350000 4e+05 or more        888888          <NA> 
##             2            19             2             5             1             6            53            40         27926

percentile_99.5 <- percentile_checker("b_totalsave", missing=c(88888,888888))

mydata <- top_recode (variable="b_totalsave", break_point=percentile_99.5, missing=c(88888,888888))
## [1] "Frequency table before encoding"
## b_totalsave. b_Total Household Savings
##       0      15      20      30      50      73      88      99     200     300     400     500     720     800    1000    1200 
##    2494       1       2       1       1       1       8       2       3       1       1      15       1       1      34       2 
##    1400    1500    1600    1800    2000    2500    2530    3000    3200    3500    3600    3800    4000    4500    4800    5000 
##       1      14       1       1      42       9       1      35       2       4       2       1      34       5       2      95 
##    5500    6000    6500    6700    6800    7000    7400    7500    8000    8500    9000    9200    9400    9500    9600   10000 
##       2      27       1       1       1      18       1       1      29       2      11       1       2       1       2     117 
##   10100   11000   11250   11400   12000   12500   13000   14000   14500   15000   15200   15500   16000   16500   17000   17500 
##       1       7       1       1      21       1      11       5       1      80       1       1       8       1       9       1 
##   18000   19000   19500   20000   20800   21000   21500   22000   23000   23500   24000   24500   25000   25200   26000   26400 
##      15       7       2     106       1       8       1       4       5       1       9       1      27       1       5       1 
##   27000   28000   29000   30000   31000   32000   34000   35000   36000   38000   40000   41000   42000   43000   45000   48000 
##       5       5       4      66       3       3       6      12       4       4      41       2       1       3       6       6 
##   49000   50000   52000   53000   54000   55000   56000   57000   59000   60000   62000   64000   65000   67500   68000   69000 
##       1      56       4       1       2       1       1       2       1      26       1       2       3       1       3       2 
##   70000   70200   72000   75000   80000   82000   85000   88888   90000   92000   95000   1e+05  101000  108000  115000  117000 
##      10       1       2       4       9       1       3      18       5       1       1      21       1       2       1       1 
##  119000  120000  121000  122000  123000  125000  130000  145000  150000  152000  160000  168000  175000  180000  190000   2e+05 
##       1       6       1       1       1       1       1       1      17       1       2       1       1       2       1      14 
##  204000  228000  250000  260000  270000  280000   3e+05  315000  325000  340000  350000  375000   4e+05  420000  450000  480000 
##       1       1       2       1       1       1       6       1       1       3       2       1       3       1       2       1 
##   5e+05  504000   6e+05  620000  650000   7e+05  780000   8e+05  888888   9e+05   1e+06 1200000 1500000   2e+06 2500000   3e+06 
##       6       1       4       1       2       2       1       3      11       1       2       1       1       3       1       2 
##   1e+07 1.1e+07   3e+07 1.2e+08    <NA> 
##       1       1       1       1   33902

## [1] "Frequency table after encoding"
## b_totalsave. b_Total Household Savings
##             0            15            20            30            50            73            88            99           200 
##          2494             1             2             1             1             1             8             2             3 
##           300           400           500           720           800          1000          1200          1400          1500 
##             1             1            15             1             1            34             2             1            14 
##          1600          1800          2000          2500          2530          3000          3200          3500          3600 
##             1             1            42             9             1            35             2             4             2 
##          3800          4000          4500          4800          5000          5500          6000          6500          6700 
##             1            34             5             2            95             2            27             1             1 
##          6800          7000          7400          7500          8000          8500          9000          9200          9400 
##             1            18             1             1            29             2            11             1             2 
##          9500          9600         10000         10100         11000         11250         11400         12000         12500 
##             1             2           117             1             7             1             1            21             1 
##         13000         14000         14500         15000         15200         15500         16000         16500         17000 
##            11             5             1            80             1             1             8             1             9 
##         17500         18000         19000         19500         20000         20800         21000         21500         22000 
##             1            15             7             2           106             1             8             1             4 
##         23000         23500         24000         24500         25000         25200         26000         26400         27000 
##             5             1             9             1            27             1             5             1             5 
##         28000         29000         30000         31000         32000         34000         35000         36000         38000 
##             5             4            66             3             3             6            12             4             4 
##         40000         41000         42000         43000         45000         48000         49000         50000         52000 
##            41             2             1             3             6             6             1            56             4 
##         53000         54000         55000         56000         57000         59000         60000         62000         64000 
##             1             2             1             1             2             1            26             1             2 
##         65000         67500         68000         69000         70000         70200         72000         75000         80000 
##             3             1             3             2            10             1             2             4             9 
##         82000         85000         88888         90000         92000         95000         1e+05        101000        108000 
##             1             3            18             5             1             1            21             1             2 
##        115000        117000        119000        120000        121000        122000        123000        125000        130000 
##             1             1             1             6             1             1             1             1             1 
##        145000        150000        152000        160000        168000        175000        180000        190000         2e+05 
##             1            17             1             2             1             1             2             1            14 
##        204000        228000        250000        260000        270000        280000         3e+05        315000        325000 
##             1             1             2             1             1             1             6             1             1 
##        340000        350000        375000         4e+05        420000        450000        480000         5e+05        504000 
##             3             2             1             3             1             2             1             6             1 
##         6e+05        620000        650000 7e+05 or more        888888          <NA> 
##             4             1             2            21            11         33902

mydata <- top_recode (variable="b_loanval", break_point=10000000, missing=c(88888,888888, 88888888))
## [1] "Frequency table before encoding"
## b_loanval. b_Value Of All Loans Taken In Last Year
##          0          1          2          3          5          6          9         10         12         14         18 
##        178          6          5          4          5          8          5          5         11          4          7 
##         21         25         30         45         75         80         88        250        300        350        500 
##          7          6          6          2          6          4          7          4          3          3         32 
##       1000       1200       1400       1500       2000       2500       2600       2800       3000       3500       3800 
##        121         10          5         45        168         22          6          3        246         17          9 
##       4000       4500       4800       5000       5500       5800       6000       6400       6500       7000       7500 
##        121         10          5        422         10          6        164          3         40        136         17 
##       8000       8300       8500       9000       9500      10000      10500      11000      11500      11900      12000 
##        145          6          5         83         11        691         12         36         13          7        142 
##      12200      12500      13000      14000      15000      15500      15750      16000      16300      16500      17000 
##          3          8         65         45        356          3          7         46          4          6         29 
##      17500      18000      18500      19000      19500      20000      20200      21000      21500      21800      22000 
##         15         74          5         12          4        615          7         30          3          5         59 
##      22500      23000      24000      25000      26000      27000      28000      29000      30000      31000      31800 
##          4         60         47        125         14         17         34          3        385         26          7 
##      32000      33000      34000      35000      35500      36000      36500      37000      37500      38000      39000 
##         29          6         12         91          3         20          5         22          4         32         14 
##      40000      41000      42000      43000      44000      45000      46000      47000      48000      49000      50000 
##        278         12         14          5         15         74          5         17         15         12        416 
##      52000      53000      54000      55000      56000      56888      58000      59000      60000      61000      62000 
##         19          6         23         34         24          7          7          7         80          4          5 
##      62500      63000      64000      65000      67000      68000      69000      70000      72000      73000      75000 
##          5          2         11         27         13          7          5         79          4          7         16 
##      76000      78000      80000      83000      85000      88888      90000      94000      95000      96000      97000 
##          4          6        110          5         21         23         28          4         17          6          5 
##      98000      1e+05     105000     110000     112000     114000     118000     120000     123000     125000     126000 
##          5        212          7         16          4          3          3         52          3         17          3 
##     130000     134000     140000     150000     160000     170000     171000     175000     180000     183000     187000 
##         20          8         15         65          5          3          6          3         34          5          6 
##     190000      2e+05     215000     218000     230000     240000     250000     260000     270000     280000      3e+05 
##          5         70          3          3          8         13         30          5          7         11         29 
##     340000     351000     365000      4e+05     420350     441000     498000      5e+05     550000      8e+05     888888 
##          6          6          4         20          2          6          9         20          3          4         33 
##      9e+05      1e+06    1100000    1150000    1200000      2e+06    2005000      3e+06    8888888    1.1e+07   11500000 
##          9         10          4          4          4          8          3         11          4          6          6 
##    1.3e+07 Don't know       <NA> 
##          7         17      29980

## [1] "Frequency table after encoding"
## b_loanval. b_Value Of All Loans Taken In Last Year
##             0             1             2             3             5             6             9            10            12 
##           178             6             5             4             5             8             5             5            11 
##            14            18            21            25            30            45            75            80            88 
##             4             7             7             6             6             2             6             4             7 
##           250           300           350           500          1000          1200          1400          1500          2000 
##             4             3             3            32           121            10             5            45           168 
##          2500          2600          2800          3000          3500          3800          4000          4500          4800 
##            22             6             3           246            17             9           121            10             5 
##          5000          5500          5800          6000          6400          6500          7000          7500          8000 
##           422            10             6           164             3            40           136            17           145 
##          8300          8500          9000          9500         10000         10500         11000         11500         11900 
##             6             5            83            11           691            12            36            13             7 
##         12000         12200         12500         13000         14000         15000         15500         15750         16000 
##           142             3             8            65            45           356             3             7            46 
##         16300         16500         17000         17500         18000         18500         19000         19500         20000 
##             4             6            29            15            74             5            12             4           615 
##         20200         21000         21500         21800         22000         22500         23000         24000         25000 
##             7            30             3             5            59             4            60            47           125 
##         26000         27000         28000         29000         30000         31000         31800         32000         33000 
##            14            17            34             3           385            26             7            29             6 
##         34000         35000         35500         36000         36500         37000         37500         38000         39000 
##            12            91             3            20             5            22             4            32            14 
##         40000         41000         42000         43000         44000         45000         46000         47000         48000 
##           278            12            14             5            15            74             5            17            15 
##         49000         50000         52000         53000         54000         55000         56000         56888         58000 
##            12           416            19             6            23            34            24             7             7 
##         59000         60000         61000         62000         62500         63000         64000         65000         67000 
##             7            80             4             5             5             2            11            27            13 
##         68000         69000         70000         72000         73000         75000         76000         78000         80000 
##             7             5            79             4             7            16             4             6           110 
##         83000         85000         88888         90000         94000         95000         96000         97000         98000 
##             5            21            23            28             4            17             6             5             5 
##         1e+05        105000        110000        112000        114000        118000        120000        123000        125000 
##           212             7            16             4             3             3            52             3            17 
##        126000        130000        134000        140000        150000        160000        170000        171000        175000 
##             3            20             8            15            65             5             3             6             3 
##        180000        183000        187000        190000         2e+05        215000        218000        230000        240000 
##            34             5             6             5            70             3             3             8            13 
##        250000        260000        270000        280000         3e+05        340000        351000        365000         4e+05 
##            30             5             7            11            29             6             6             4            20 
##        420350        441000        498000         5e+05        550000         8e+05        888888         9e+05         1e+06 
##             2             6             9            20             3             4            33             9            10 
##       1100000       1150000       1200000         2e+06       2005000         3e+06       8888888 1e+07 or more    Don't know 
##             4             4             4             8             3            11             4            19            17 
##          <NA> 
##         29980

percentile_99.5 <- percentile_checker("e_save1", missing=c(88888))

mydata <- top_recode (variable="e_save1", break_point=percentile_99.5, missing=88888)
## [1] "Frequency table before encoding"
## e_save1. e_Household Savings Amount In The Last Month
##      0    100    200    300    400    500    600    800   1000   1200   1300   1400   1500   1600   1800   2000   2200   2300 
##  15219      8    116     19     84    152     64     37    523     29      5      4    106      3     11    568      6      8 
##   2400   2500   2800   3000   3200   3500   3600   3700   3800   4000   4500   4600   5000   5500   5600   6000   7000   7500 
##      6     66      5    278      8     46      9      5      5    250     14      3    490      5      4     97     51      6 
##   8000   9000   9500  10000  11000  11500  11600  12000  13000  14000  15000  16000  17500  18000  19000  20000  21000  24000 
##     67     11      5    240     15      6      2     14     20     13     88     13      4     13      5    212      5     27 
##  25000  26000  27000  30000  35000  36000  40000  42000  44000  45000  50000  52000  60000  70000  72000  75000  80000  88888 
##     22      6      8     98     38      4     19      5      4      6    100      7     17     13      4      3      9     33 
##  1e+05 120000 121000 130000 150000  2e+05  5e+05   <NA> 
##     46      9      5      6     13     13      5  18202

## [1] "Frequency table after encoding"
## e_save1. e_Household Savings Amount In The Last Month
##             0           100           200           300           400           500           600           800          1000 
##         15219             8           116            19            84           152            64            37           523 
##          1200          1300          1400          1500          1600          1800          2000          2200          2300 
##            29             5             4           106             3            11           568             6             8 
##          2400          2500          2800          3000          3200          3500          3600          3700          3800 
##             6            66             5           278             8            46             9             5             5 
##          4000          4500          4600          5000          5500          5600          6000          7000          7500 
##           250            14             3           490             5             4            97            51             6 
##          8000          9000          9500         10000         11000         11500         11600         12000         13000 
##            67            11             5           240            15             6             2            14            20 
##         14000         15000         16000         17500         18000         19000         20000         21000         24000 
##            13            88            13             4            13             5           212             5            27 
##         25000         26000         27000         30000         35000         36000         40000         42000         44000 
##            22             6             8            98            38             4            19             5             4 
##         45000         50000         52000         60000         70000         72000         75000 80000 or more         88888 
##             6           100             7            17            13             4             3           106            33 
##          <NA> 
##         18202

percentile_99.5 <- percentile_checker("e_saveall", missing=c(88888))

mydata <- top_recode (variable="e_saveall", break_point=percentile_99.5, missing=88888)
## [1] "Frequency table before encoding"
## e_saveall. e_Total Household Savings
##       0       1       9     100     200     300     400     450     500     600     800     900    1000    1200    1300    1400 
##   13984       8       7       5      86      24      45       5     131      59      38      15     382      21       5      21 
##    1500    1600    1800    2000    2200    2300    2400    2500    2600    2800    2900    3000    3200    3400    3500    3600 
##     140       8      11     499       6       8       6      56       6      11       5     310      13      14      75       8 
##    3800    4000    4300    4500    5000    5500    5600    5800    6000    6500    7000    7500    7600    7700    8000    8500 
##       5     165       8      11     553       5       4       4     130       9      77      18       3       5     121       5 
##    8888    9000    9500   10000   10500   11000   11500   11600   12000   12800   13000   13600   14000   14500   15000   16000 
##       3      32       5     428       9      38      12       2      82       4      46       4      34       5     211      22 
##   17200   17500   18000   19000   19300   20000   21000   22000   24000   25000   26000   27000   30000   31000   32000   33500 
##       5       4      33       4       6     273      15       6      17      59       5       8     164       5      13       3 
##   34000   35000   36000   37000   39000   40000   42000   43000   44000   45000   46000   50000   52000   60000   65000   70000 
##       3      31       4       6       7      56       5       6       6      34      11     143       7      47       5      21 
##   74000   80000   85000   88888   89000   90000   94000   95000   1e+05  106000  108000  120000  121000  125000  130000  140000 
##       5      21       5      83       5       3       5       5      92       6       7      29       5       7       8       6 
##  142000  144000  150000  160000  172000  175000   2e+05  250000   3e+05  320000  350000  450000   5e+05  566000   6e+05   8e+05 
##       7       6      47       5       4       4      12      15      12       4       6      10      18       6       6       3 
##   1e+06 1500000 2500000   3e+06    <NA> 
##       4       5       5       8   18202

## [1] "Frequency table after encoding"
## e_saveall. e_Total Household Savings
##              0              1              9            100            200            300            400            450 
##          13984              8              7              5             86             24             45              5 
##            500            600            800            900           1000           1200           1300           1400 
##            131             59             38             15            382             21              5             21 
##           1500           1600           1800           2000           2200           2300           2400           2500 
##            140              8             11            499              6              8              6             56 
##           2600           2800           2900           3000           3200           3400           3500           3600 
##              6             11              5            310             13             14             75              8 
##           3800           4000           4300           4500           5000           5500           5600           5800 
##              5            165              8             11            553              5              4              4 
##           6000           6500           7000           7500           7600           7700           8000           8500 
##            130              9             77             18              3              5            121              5 
##           8888           9000           9500          10000          10500          11000          11500          11600 
##              3             32              5            428              9             38             12              2 
##          12000          12800          13000          13600          14000          14500          15000          16000 
##             82              4             46              4             34              5            211             22 
##          17200          17500          18000          19000          19300          20000          21000          22000 
##              5              4             33              4              6            273             15              6 
##          24000          25000          26000          27000          30000          31000          32000          33500 
##             17             59              5              8            164              5             13              3 
##          34000          35000          36000          37000          39000          40000          42000          43000 
##              3             31              4              6              7             56              5              6 
##          44000          45000          46000          50000          52000          60000          65000          70000 
##              6             34             11            143              7             47              5             21 
##          74000          80000          85000          88888          89000          90000          94000          95000 
##              5             21              5             83              5              3              5              5 
##          1e+05         106000         108000         120000         121000         125000         130000         140000 
##             92              6              7             29              5              7              8              6 
##         142000         144000         150000         160000         172000         175000          2e+05 250000 or more 
##              7              6             47              5              4              4             12            102 
##           <NA> 
##          18202

percentile_99.5 <- percentile_checker("e_loanval", missing=c(88888,888888))

mydata <- top_recode (variable="e_loanval", break_point=percentile_99.5, missing=c(88888,888888))
## [1] "Frequency table before encoding"
## e_loanval. e_Value Of All Loans Taken In Last Year
##       1       2       3       4       5       6       7      10      11      16      60      75      88     100     110     200 
##      32      33      13      12       6       4       5       9       6       7       4      14       4      13       4      25 
##     250     300     350     400     500     600     700     750     760     800    1000    1200    1400    1500    1700    1750 
##      12      18       9      18     102       6      17      20       8      10     224      10       7      79       9       2 
##    2000    2200    2270    2300    2400    2500    2600    2700    3000    3500    3600    4000    4500    4600    4700    4800 
##     441      11       3       4       7      29       6       5     275      23       6     210      32       6       5      10 
##    5000    5200    5500    5800    6000    6500    7000    7500    7680    8000    8400    8500    9000    9500    9720   10000 
##     573       6       9       4     289       2     157      15       4     222       7       6     154      33       5    1017 
##   10500   10800   11000   11200   11300   11500   12000   12500   12600   13000   13500   13600   14000   14400   14500   15000 
##      29       6      43       6       4       6     231      10       5      78       5       7      87       6       4     495 
##   15500   15600   16000   16500   16800   17000   17400   17500   18000   18400   18500   19000   19700   20000   20500   21000 
##      13       7      75      13       6      79       6      16      87       4       8      19       6     744       3      46 
##   22000   22500   22700   23000   23500   24000   24020   25000   25500   26000   27000   28000   29000   29400   30000   30500 
##      66       3       6      86       5      68       3     178       5      15      36      54       8       8     455      16 
##   31000   32000   32400   33000   34000   35000   36000   36800   37000   38000   38200   39000   40000   41000   42000   42200 
##      12      62       5      38      10     128      27       4       7      34       6      14     315       6       8       4 
##   43500   44000   45000   45500   46000   47000   47500   48000   48400   49000   50000   52000   53000   54000   55000   56000 
##       5      14      64       5       7      17       5      31       5      26     498      19       9       9      32      19 
##   57000   58000   59000   60000   60600   61800   62000   62500   63000   64000   64600   65000   66000   66980   67000   68000 
##       5       3       7     189       7       5      15       6      10       6       5      29      14       6       7       7 
##   69000   70000   71000   73000   74000   75000   76000   78000   80000   82000   84000   84500   85000   88000   88888   89000 
##       5     109       3      18       5      57       8      13     103       7      10       3      31       7       6       5 
##   90000   90500   93000   96000   97000   99000   1e+05  104000  105000  108000  112000  115000  119000  120000  124000  125000 
##      35       5       3       5       9       8     261       6       8       4       3       5       5      42       4       6 
##  126000  128000  136000  138000  139000  140000  143000  147000  150000  157000  160000  165000  173000  195000  196000   2e+05 
##       8       5       5       5       7      18       8       3      55       7      10       5       4       7       8      47 
##  219000  220000  230000  240000  250000  260000  270000  280000   3e+05  316000  320000  330000  350000   4e+05  450000   5e+05 
##       7       7      17       3      25       6       5       4      44       7       9       4      13      18       4      10 
##  520000   6e+05  640000  650000   7e+05  888888   1e+06 1011000 1345000 1400000 1500000 1600000 1610000   3e+06    <NA> 
##       5      17       4       5      18      81       9       4       5       5       4       6       5       8   27155

## [1] "Frequency table after encoding"
## e_loanval. e_Value Of All Loans Taken In Last Year
##             1             2             3             4             5             6             7            10            11 
##            32            33            13            12             6             4             5             9             6 
##            16            60            75            88           100           110           200           250           300 
##             7             4            14             4            13             4            25            12            18 
##           350           400           500           600           700           750           760           800          1000 
##             9            18           102             6            17            20             8            10           224 
##          1200          1400          1500          1700          1750          2000          2200          2270          2300 
##            10             7            79             9             2           441            11             3             4 
##          2400          2500          2600          2700          3000          3500          3600          4000          4500 
##             7            29             6             5           275            23             6           210            32 
##          4600          4700          4800          5000          5200          5500          5800          6000          6500 
##             6             5            10           573             6             9             4           289             2 
##          7000          7500          7680          8000          8400          8500          9000          9500          9720 
##           157            15             4           222             7             6           154            33             5 
##         10000         10500         10800         11000         11200         11300         11500         12000         12500 
##          1017            29             6            43             6             4             6           231            10 
##         12600         13000         13500         13600         14000         14400         14500         15000         15500 
##             5            78             5             7            87             6             4           495            13 
##         15600         16000         16500         16800         17000         17400         17500         18000         18400 
##             7            75            13             6            79             6            16            87             4 
##         18500         19000         19700         20000         20500         21000         22000         22500         22700 
##             8            19             6           744             3            46            66             3             6 
##         23000         23500         24000         24020         25000         25500         26000         27000         28000 
##            86             5            68             3           178             5            15            36            54 
##         29000         29400         30000         30500         31000         32000         32400         33000         34000 
##             8             8           455            16            12            62             5            38            10 
##         35000         36000         36800         37000         38000         38200         39000         40000         41000 
##           128            27             4             7            34             6            14           315             6 
##         42000         42200         43500         44000         45000         45500         46000         47000         47500 
##             8             4             5            14            64             5             7            17             5 
##         48000         48400         49000         50000         52000         53000         54000         55000         56000 
##            31             5            26           498            19             9             9            32            19 
##         57000         58000         59000         60000         60600         61800         62000         62500         63000 
##             5             3             7           189             7             5            15             6            10 
##         64000         64600         65000         66000         66980         67000         68000         69000         70000 
##             6             5            29            14             6             7             7             5           109 
##         71000         73000         74000         75000         76000         78000         80000         82000         84000 
##             3            18             5            57             8            13           103             7            10 
##         84500         85000         88000         88888         89000         90000         90500         93000         96000 
##             3            31             7             6             5            35             5             3             5 
##         97000         99000         1e+05        104000        105000        108000        112000        115000        119000 
##             9             8           261             6             8             4             3             5             5 
##        120000        124000        125000        126000        128000        136000        138000        139000        140000 
##            42             4             6             8             5             5             5             7            18 
##        143000        147000        150000        157000        160000        165000        173000        195000        196000 
##             8             3            55             7            10             5             4             7             8 
##         2e+05        219000        220000        230000        240000        250000        260000        270000        280000 
##            47             7             7            17             3            25             6             5             4 
##         3e+05        316000        320000        330000        350000         4e+05        450000         5e+05        520000 
##            44             7             9             4            13            18             4            10             5 
##         6e+05        640000        650000 7e+05 or more        888888          <NA> 
##            17             4             5            64            81         27155

percentile_99.5 <- percentile_checker("b_sav", missing=c(88888,888888888))

mydata <- top_recode (variable="b_sav", break_point=percentile_99.5, missing=c(88888,888888888))
## [1] "Frequency table before encoding"
## b_sav. b_(Save) Now, I Have Some Questions About Your Household. How Much Did Your Hous
##         0         1        10        18        24        88       100       200       250       300       400       500       522 
##     10646         4         6         6         4        40        33        47         4        20        32       234         3 
##       600       700       720       750       800       900      1000      1008      1100      1200      1400      1500      1600 
##        27         9         5         6        15         7       582         4         3        11         6        93         5 
##      1750      1800      2000      2200      2400      2500      3000      3500      4000      4500      4600      5000      5200 
##         6        10       736         6        10       122       344        55       319        34         4       776         4 
##      5500      5600      6000      6200      6300      6500      7000      7400      7500      8000      8250      9000      9500 
##         8         5       206         5         4         7       125         6         7       203         6        62        10 
##     10000     10500     11000     11500     12000     12500     13000     14000     14400     15000     16000     17000     18000 
##       595        12        27         5        95         6        24        52         3       193        17        19        52 
##     19000     20000     21000     22000     23000     23500     24000     25000     26000     26500     27000     28000     30000 
##        11       503         6        26        18         5        32       152        13         4         5        12       218 
##     31000     32000     33000     35000     36000     39000     40000     42000     45000     48000     50000     51000     52000 
##         6         8         3        31         3        15       169         5        38         6       222         3         9 
##     55000     60000     62000     65000     69000     70000     80000     85000     86000     88888     90000     95000     1e+05 
##         7        76         4         5         6        19        39         4         3        26         4         6        59 
##    120000    123000    125000    130000    150000    152000    160000    178000    190000     2e+05    228000    230000    240000 
##        35         6         5        12        40         7         5         7         5        45         6         4         6 
##    250000    270000     3e+05    330000    340000    346000    350000     4e+05    450000     5e+05     6e+05    780000    850000 
##        14         3         6         7         4         5         3        18        10        18         3         6         6 
##   1200000   1900000   2500000     3e+06     5e+06   2.5e+07 888888888      <NA> 
##         7         4         9         9         7         3        64     19563

## [1] "Frequency table after encoding"
## b_sav. b_(Save) Now, I Have Some Questions About Your Household. How Much Did Your Hous
##             0             1            10            18            24            88           100           200           250 
##         10646             4             6             6             4            40            33            47             4 
##           300           400           500           522           600           700           720           750           800 
##            20            32           234             3            27             9             5             6            15 
##           900          1000          1008          1100          1200          1400          1500          1600          1750 
##             7           582             4             3            11             6            93             5             6 
##          1800          2000          2200          2400          2500          3000          3500          4000          4500 
##            10           736             6            10           122           344            55           319            34 
##          4600          5000          5200          5500          5600          6000          6200          6300          6500 
##             4           776             4             8             5           206             5             4             7 
##          7000          7400          7500          8000          8250          9000          9500         10000         10500 
##           125             6             7           203             6            62            10           595            12 
##         11000         11500         12000         12500         13000         14000         14400         15000         16000 
##            27             5            95             6            24            52             3           193            17 
##         17000         18000         19000         20000         21000         22000         23000         23500         24000 
##            19            52            11           503             6            26            18             5            32 
##         25000         26000         26500         27000         28000         30000         31000         32000         33000 
##           152            13             4             5            12           218             6             8             3 
##         35000         36000         39000         40000         42000         45000         48000         50000         51000 
##            31             3            15           169             5            38             6           222             3 
##         52000         55000         60000         62000         65000         69000         70000         80000         85000 
##             9             7            76             4             5             6            19            39             4 
##         86000         88888         90000         95000         1e+05        120000        123000        125000        130000 
##             3            26             4             6            59            35             6             5            12 
##        150000        152000        160000        178000        190000         2e+05        228000        230000        240000 
##            40             7             5             7             5            45             6             4             6 
##        250000        270000         3e+05        330000        340000        346000        350000 4e+05 or more     888888888 
##            14             3             6             7             4             5             3           100            64 
##          <NA> 
##         19563

percentile_99.5 <- percentile_checker("b_earn_a_", missing=c(8888,66666,88888))

mydata <- top_recode (variable="b_earn_a_", break_point=percentile_99.5, missing=c(8888,66666,88888))
## [1] "Frequency table before encoding"
## b_earn_a_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     2     3     8    17   100   120   150   200   300   350   400   500   550   600  1000  1500  1850  2000  2500  3000 
##   110     1     1     1     2     3     2     1     8     1     1     3     4     1     1     2     2     1     3     2     3 
##  3500  4000  5000  6000  8000  8888  9000 20000 25000 66666 88888  <NA> 
##     1     1     1     3     2     2     1     1     1     8     3 37588

## [1] "Frequency table after encoding"
## b_earn_a_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##             0             2             3             8            17           100           120           150           200 
##           110             1             1             1             2             3             2             1             8 
##           300           350           400           500           550           600          1000          1500          1850 
##             1             1             3             4             1             1             2             2             1 
##          2000          2500          3000          3500          4000          5000          6000          8000          8888 
##             3             2             3             1             1             1             3             2             2 
##          9000         20000 20925 or more         66666         88888          <NA> 
##             1             1             1             8             3         37588

percentile_99.5 <- percentile_checker("b_earn_b_", missing=c(8888,66666,88888))

mydata <- top_recode (variable="b_earn_b_", break_point=percentile_99.5, missing=c(8888,66666,88888))
## [1] "Frequency table before encoding"
## b_earn_b_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     3     5   200  1500  1800  2000  3000  3008  4000  5000  6000 66666 88888  <NA> 
##    85     2     1     2     1     1     4     3     1     1     1     1     7     3 37652

## [1] "Frequency table after encoding"
## b_earn_b_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            3            5          200         1500         1800         2000         3000         3008         4000 
##           85            2            1            2            1            1            4            3            1            1 
##         5000 5489 or more        66666        88888         <NA> 
##            1            1            7            3        37652

percentile_99.5 <- percentile_checker("b_earn_c_", missing=c(6666,8888,66666,88888,666666,888888))

mydata <- top_recode (variable="b_earn_c_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888))
## [1] "Frequency table before encoding"
## b_earn_c_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##      0      1      2      3      6      9     10     22     50    100    150    200    240    250    300    500    700    800 
##    841      4      2      1      3      3      1      1      1      5      1      8      1      4      2      8      2      2 
##   1000   1500   2000   3000   5000   6000  66666  88888 666666   <NA> 
##      3      1      1      3      1      1     17      7      1  36840

## [1] "Frequency table after encoding"
## b_earn_c_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            1            2            3            6            9           10           22           50          100 
##          841            4            2            1            3            3            1            1            1            5 
##          150          200          240          250          300          500          700          800         1000         1500 
##            1            8            1            4            2            8            2            2            3            1 
##         2000 2504 or more        66666        88888       666666         <NA> 
##            1            5           17            7            1        36840

percentile_99.5 <- percentile_checker("b_earn_d_", missing=c(6666,8888,66666,88888,666666,888888))

mydata <- top_recode (variable="b_earn_d_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888))
## [1] "Frequency table before encoding"
## b_earn_d_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##      0      1      6     50    100    150    300    500    550   1500   6666   8888  66666  88888 666666 888888   <NA> 
##    549      1      1      2      1      1      2      2      1      2      1      3     55     17      3      1  37123

## [1] "Frequency table after encoding"
## b_earn_d_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##           0           1           6          50         100         150         300         500 509 or more        6666 
##         549           1           1           2           1           1           2           2           3           1 
##        8888       66666       88888      666666      888888        <NA> 
##           3          55          17           3           1       37123

percentile_99.5 <- percentile_checker("b_earn_e_", missing=c(6666,8888,66666,88888,666666,888888))

mydata <- top_recode (variable="b_earn_e_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888))
## [1] "Frequency table before encoding"
## b_earn_e_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     2    20   400  1000  1500  2500 66666  <NA> 
##    28     1     1     1     1     1     1     3 37728

## [1] "Frequency table after encoding"
## b_earn_e_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            2           20          400         1000         1500 2335 or more        66666         <NA> 
##           28            1            1            1            1            1            1            3        37728

percentile_99.5 <- percentile_checker("b_earn_f_", missing=c(6666,8888,66666,88888,666666,888888))

mydata <- top_recode (variable="b_earn_f_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888))
## [1] "Frequency table before encoding"
## b_earn_f_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##      0     20    100    200    300    600  66666  88888 666666   <NA> 
##    334      1      1      1      1      1     20      3      1  37402

## [1] "Frequency table after encoding"
## b_earn_f_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##           0          20         100         200 231 or more       66666       88888      666666        <NA> 
##         334           1           1           1           2          20           3           1       37402

percentile_99.5 <- percentile_checker("b_earn_h_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_h_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_h_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##         0         1         2         3         4         5         6         7        10        30        50       100       150 
##      3683        15         4         2         2         1         1         1         1         1         3         5         4 
##       200       250       300       400       500       600       800      1000      1500      2000      3000      6666      8888 
##         6         2         6         1        10         1         1         4         2         1         1         1         2 
##     66666     88888    666666 666666666      <NA> 
##       161        51         4         3     33785

## [1] "Frequency table after encoding"
## b_earn_h_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##           0           1           2           3           4           5           6           7          10          30 
##        3683          15           4           2           2           1           1           1           1           1 
##          50         100         150         200         250         300         400 500 or more        6666        8888 
##           3           5           4           6           2           6           1          20           1           2 
##       66666       88888      666666   666666666        <NA> 
##         161          51           4           3       33785

percentile_99.5 <- percentile_checker("b_earn_i_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_i_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_i_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##      0      1      2      3      4      5      8     30     50    100    150    200    300    400    500    600    650    700 
##    886      3      3      2      3      2      2      1      2      1      2      2      2      2     10      3      1      3 
##    800   1000   1200   1500   3000   8888  66666  88888 666666   <NA> 
##      1      4      1      1      1      1     53     20      1  36752

## [1] "Frequency table after encoding"
## b_earn_i_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            1            2            3            4            5            8           30           50          100 
##          886            3            3            2            3            2            2            1            2            1 
##          150          200          300          400          500          600          650          700          800 1000 or more 
##            2            2            2            2           10            3            1            3            1            7 
##         8888        66666        88888       666666         <NA> 
##            1           53           20            1        36752

percentile_99.5 <- percentile_checker("b_earn_j_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_j_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_j_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     1     2   200   350   400   500   700   800   900  1200  1500  3500 66666 88888  <NA> 
##   354     1     1     2     1     1     9     1     2     1     1     1     1     7     2 37380

## [1] "Frequency table after encoding"
## b_earn_j_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            1            2          200          350          400          500          700          800          900 
##          354            1            1            2            1            1            9            1            2            1 
##         1200 1237 or more        66666        88888         <NA> 
##            1            2            7            2        37380

percentile_99.5 <- percentile_checker("b_earn_l_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_l_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_l_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##         0        40       150       200       300       350       400       500       600      1000      1500      2000      2500 
##       553         1         1         3         1         1         2         3         4         9         1         4         2 
##      3000      4000      5000      6000      7000      9000     66666     88888 666666666      <NA> 
##         4         1         1         2         1         3         5         4         5     37154

## [1] "Frequency table after encoding"
## b_earn_l_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0           40          150          200          300          350          400          500          600         1000 
##          553            1            1            3            1            1            2            3            4            9 
##         1500         2000         2500         3000         4000         5000         6000         7000 7039 or more        66666 
##            1            4            2            4            1            1            2            1            3            5 
##        88888    666666666         <NA> 
##            4            5        37154

percentile_99.5 <- percentile_checker("b_earn_o_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_o_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_o_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     3   100   150   350  1000  2000  5000 66666  <NA> 
##    48     1     1     1     1     1     1     1     1 37709

## [1] "Frequency table after encoding"
## b_earn_o_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            3          100          150          350         1000         2000 4189 or more        66666         <NA> 
##           48            1            1            1            1            1            1            1            1        37709

percentile_99.5 <- percentile_checker("b_earn_p_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_p_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_p_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     2    20    50   100   200   250   300   350   700  1000  1500  3000  7000 66666  <NA> 
##    21     1     1     1     1     1     2     1     2     1     1     1     1     1     5 37724

## [1] "Frequency table after encoding"
## b_earn_p_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            2           20           50          100          200          250          300          350          700 
##           21            1            1            1            1            1            2            1            2            1 
##         1000         1500         3000 6300 or more        66666         <NA> 
##            1            1            1            1            5        37724

percentile_99.5 <- percentile_checker("b_earn_q_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_q_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_q_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##      0      2      5     50    100    200    250    300    500  66666  88888 666666   <NA> 
##    586      2      1      1      2      4      1      1      2     34      5      1  37125

## [1] "Frequency table after encoding"
## b_earn_q_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##           0           2           5          50         100         200 250 or more       66666       88888      666666 
##         586           2           1           1           2           4           4          34           5           1 
##        <NA> 
##       37125

percentile_99.5 <- percentile_checker("b_earn_r_", missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))

mydata <- top_recode (variable="b_earn_r_", break_point=percentile_99.5, missing=c(6666,8888,66666,88888,666666,888888,666666666,888888888))
## [1] "Frequency table before encoding"
## b_earn_r_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##     0     4   100   250   400   500  1000  1500  3500  <NA> 
##   128     1     1     1     1     1     2     1     1 37628

## [1] "Frequency table after encoding"
## b_earn_r_. b_How Much Did <<Name>> Earn From <<Emp>>In A Last Week?
##            0            4          100          250          400          500         1000         1500 2139 or more         <NA> 
##          128            1            1            1            1            1            2            1            1        37628

mydata$e_hhroster_count<- as.numeric(mydata$e_hhroster_count)
hist(mydata$e_hhroster_count)

mydata <- top_recode (variable="e_hhroster_count", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_hhroster_count. 
##     2     3     4     5     6     7     8     9    10    11    12    13    14  <NA> 
##   154   910  2678  4169  4500  3508  1995   942   387   210    88    10    12 18202

## [1] "Frequency table after encoding"
## e_hhroster_count. 10
##          2          3          4          5          6          7          8          9 10 or more       <NA> 
##        154        910       2678       4169       4500       3508       1995        942        707      18202

mydata$e_eligiblechild_<- as.numeric(mydata$e_eligiblechild_)
hist(mydata$e_eligiblechild_)

mydata2 <- top_recode (variable="e_eligiblechild_", break_point=10, missing=NA)
## [1] "Frequency table before encoding"
## e_eligiblechild_. 
##     2     3     4     5     6     7     8     9    10    11    12  <NA> 
##   463  2472  2273  1493   808   332   117    39    18     9     3 29738

## [1] "Frequency table after encoding"
## e_eligiblechild_. 10
##          2          3          4          5          6          7          8          9 10 or more       <NA> 
##        463       2472       2273       1493        808        332        117         39         30      29738

Indirect PII - CategOrical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("schdays_",
                  "ownfarm_",
                  "homehrs_",
                  "hrsun",
                  "hrmon",
                  "hrtues",
                  "hrwed",
                  "hrthur",
                  "hrfri",
                  "hrsat",
                  "bhrsun",
                  "bhrmon",
                  "bhrtues",
                  "bhrwed",
                  "bhrthur",
                  "bhrfri",
                  "bhrsat",
                  "chrsun",
                  "chrmon",
                  "chrtues",
                  "chrwed",
                  "chrthur",
                  "chrfri",
                  "chrsat",
                  "dhrsun",
                  "dhrmon",
                  "dhrtues",
                  "dhrwed",
                  "dhrthur",
                  "dhrfri",
                  "dhrsat",
                  "ehrsun",
                  "ehrmon",
                  "ehrtues",
                  "ehrwed",
                  "ehrthur",
                  "ehrfri",
                  "ehrsat",
                  "fhrsun",
                  "fhrmon",
                  "fhrtues",
                  "fhrwed",
                  "fhrthur",
                  "fhrfri",
                  "fhrsat",
                  "ghrsun",
                  "ghrmon",
                  "ghrtues",
                  "ghrwed",
                  "ghrthur",
                  "ghrfri",
                  "ghrsat",
                  "hhrsun",
                  "hhrmon",
                  "hhrtues",
                  "hhrwed",
                  "hhrthur",
                  "hhrfri",
                  "hhrsat",
                  "ihrsun",
                  "ihrmon",
                  "ihrtues",
                  "ihrwed",
                  "ihrthur",
                  "ihrfri",
                  "ihrsat",
                  "jhrsun",
                  "jhrmon",
                  "jhrtues",
                  "jhrwed",
                  "jhrthur",
                  "jhrfri",
                  "jhrsat",
                  "khrsun",
                  "khrmon",
                  "khrtues",
                  "khrwed",
                  "khrthur",
                  "khrfri",
                  "khrsat",
                  "lhrsun",
                  "lhrmon",
                  "lhrtues",
                  "lhrwed",
                  "lhrthur",
                  "lhrfri",
                  "lhrsat",
                  "mhrsun",
                  "mhrmon",
                  "mhrtues",
                  "mhrwed",
                  "mhrthur",
                  "mhrfri",
                  "mhrsat",
                  "nhrsun",
                  "nhrmon",
                  "nhrtues",
                  "nhrwed",
                  "nhrthur",
                  "nhrfri",
                  "nhrsat",
                  "ohrsun",
                  "ohrmon",
                  "ohrtues",
                  "ohrwed",
                  "ohrthur",
                  "ohrfri",
                  "ohrsat",
                  "phrsun",
                  "phrmon",
                  "phrtues",
                  "phrwed",
                  "phrthur",
                  "phrfri",
                  "phrsat",
                  "qhrsun",
                  "qhrmon",
                  "qhrtues",
                  "qhrwed",
                  "qhrthur",
                  "qhrfri",
                  "qhrsat",
                  "rhrsun",
                  "rhrmon",
                  "rhrtues",
                  "rhrwed",
                  "rhrthur",
                  "rhrfri",
                  "rhrsat",
                  "thrsun",
                  "thrmon",
                  "thrtues",
                  "thrwed",
                  "thrthur",
                  "thrfri",
                  "thrsat",
                  "othhaz_a_",
                  "exp_e_",
                  "religion_",
                  "yardwork",
                  "bird",
                  "firewood",
                  "alcohol",
                  "brick",
                  "hrsworked",
                  "latehrs",
                  "b_preprimary",
                  "b_yardwork",
                  "b_bird",
                  "b_bar",
                  "b_alcohol",
                  "b_hrsun",
                  "b_hrmon",
                  "b_hrtue",
                  "b_hrwed",
                  "b_hrthur",
                  "b_hrfri",
                  "b_hrsat",
                  "b_hrsworked",
                  "b_sex_",
                  "b_currentenroll",
                  "b_schdays_",
                  "b_sex_hh",
                  "e_preprimary",
                  "e_yardwork",
                  "e_bird",
                  "e_firewood",
                  "e_alcohol",
                  "e_brick",
                  "e_hrsworked",
                  "e_latehrs",
                  "e_schdays_",
                  "b_nenroll_",
                  "b_grade_",
                  "b_scheme_",
                  "b_schmtyp_",
                  "b_schmsrc_",
                  "b_emp_b_",
                  "b_emp_e_",
                  "b_emp_g_",
                  "b_emp_h_",
                  "b_emp_i_",
                  "b_emp_j_",
                  "b_emp_m_masked",
                  "b_emp_n_",
                  "b_emp_o_",
                  "b_emp_p_",
                  "b_emp_q_",
                  "b_emp_r_",
                  "b_ownfarm_",
                  "b_tobowntime_",
                  "b_tobactivity_a_",
                  "b_tobactivity_b_",
                  "b_tobactivity_c_",
                  "b_tobactivity_d_",
                  "b_tobactivity_e_",
                  "b_tobactivity_i_",
                  "b_homehrs_",
                  "b_tobhrsown_a_",
                  "b_tobhrsown_b_",
                  "b_expo_",
                  "b_time_",
                  "b_adulttob_",
                  "b_reli_",
                  "b_savings",
                  "b_savingsall",
                  "b_vslawho",
                  "b_vslause",
                  "b_loanwhere",
                  "b_loanuse",
                  "b_fs1",
                  "b_water",
                  "b_inc1a_wsh_masked",
                  "b_inc1a_wsh_masked",
                  "b_inc1a_pmp_masked",
                  "b_inc2",
                  "b_inc2_number",
                  "b_inc3a",
                  "b_els_a_",
                  "b_els_b_",
                  "b_els_c_",
                  "b_els_d_",
                  "b_els_e_",
                  "b_els_f_",
                  "b_els_g_",
                  "b_els_h_",
                  "b_els_i_",
                  "b_els_j_",
                  "b_els_k_",
                  "b_els_l_",
                  "b_els_m_",
                  "b_els_n_",
                  "b_els_o_",
                  "b_els_p_",
                  "b_els_q_",
                  "b_els_r_",
                  "b_time_hrs_sun_a_",
                  "b_time_hrs_sun_b_",
                  "b_time_hrs_sun_c_",
                  "b_time_hrs_sun_d_",
                  "b_time_hrs_sun_e_",
                  "b_time_hrs_sun_f_",
                  "b_time_hrs_sun_g_",
                  "b_time_hrs_sun_h_",
                  "b_time_hrs_sun_i_",
                  "b_time_hrs_sun_j_",
                  "b_time_hrs_sun_k_",
                  "b_time_hrs_sun_l_",
                  "b_time_hrs_sun_m_",
                  "b_time_hrs_sun_n_",
                  "b_time_hrs_sun_o_",
                  "b_time_hrs_sun_p_",
                  "b_time_hrs_sun_q_",
                  "b_time_hrs_sun_r_",
                  "b_time_min_sun_a_",
                  "b_time_min_sun_b_",
                  "b_time_min_sun_c_",
                  "b_time_min_sun_d_",
                  "b_time_min_sun_e_",
                  "b_time_min_sun_f_",
                  "b_time_min_sun_g_",
                  "b_time_min_sun_h_",
                  "b_time_min_sun_i_",
                  "b_time_min_sun_j_",
                  "b_time_min_sun_k_",
                  "b_time_min_sun_l_",
                  "b_time_min_sun_m_",
                  "b_time_min_sun_n_",
                  "b_time_min_sun_o_",
                  "b_time_min_sun_p_",
                  "b_time_min_sun_q_",
                  "b_time_min_sun_r_",
                  "b_time_hrs_mon_a_",
                  "b_time_hrs_mon_b_",
                  "b_time_hrs_mon_c_",
                  "b_time_hrs_mon_d_",
                  "b_time_hrs_mon_e_",
                  "b_time_hrs_mon_f_",
                  "b_time_hrs_mon_g_",
                  "b_time_hrs_mon_h_",
                  "b_time_hrs_mon_i_",
                  "b_time_hrs_mon_j_",
                  "b_time_hrs_mon_k_",
                  "b_time_hrs_mon_l_",
                  "b_time_hrs_mon_m_",
                  "b_time_hrs_mon_n_",
                  "b_time_hrs_mon_o_",
                  "b_time_hrs_mon_p_",
                  "b_time_hrs_mon_q_",
                  "b_time_hrs_mon_r_",
                  "b_time_min_mon_a_",
                  "b_time_min_mon_b_",
                  "b_time_min_mon_c_",
                  "b_time_min_mon_d_",
                  "b_time_min_mon_e_",
                  "b_time_min_mon_f_",
                  "b_time_min_mon_g_",
                  "b_time_min_mon_h_",
                  "b_time_min_mon_i_",
                  "b_time_min_mon_j_",
                  "b_time_min_mon_k_",
                  "b_time_min_mon_l_",
                  "b_time_min_mon_m_",
                  "b_time_min_mon_n_",
                  "b_time_min_mon_o_",
                  "b_time_min_mon_p_",
                  "b_time_min_mon_q_",
                  "b_time_min_mon_r_",
                  "b_time_hrs_tue_a_",
                  "b_time_hrs_tue_b_",
                  "b_time_hrs_tue_c_",
                  "b_time_hrs_tue_d_",
                  "b_time_hrs_tue_e_",
                  "b_time_hrs_tue_f_",
                  "b_time_hrs_tue_g_",
                  "b_time_hrs_tue_h_",
                  "b_time_hrs_tue_i_",
                  "b_time_hrs_tue_j_",
                  "b_time_hrs_tue_k_",
                  "b_time_hrs_tue_l_",
                  "b_time_hrs_tue_m_",
                  "b_time_hrs_tue_n_",
                  "b_time_hrs_tue_o_",
                  "b_time_hrs_tue_p_",
                  "b_time_hrs_tue_q_",
                  "b_time_hrs_tue_r_",
                  "b_time_min_tue_a_",
                  "b_time_min_tue_b_",
                  "b_time_min_tue_c_",
                  "b_time_min_tue_d_",
                  "b_time_min_tue_e_",
                  "b_time_min_tue_f_",
                  "b_time_min_tue_g_",
                  "b_time_min_tue_h_",
                  "b_time_min_tue_i_",
                  "b_time_min_tue_j_",
                  "b_time_min_tue_k_",
                  "b_time_min_tue_l_",
                  "b_time_min_tue_m_",
                  "b_time_min_tue_n_",
                  "b_time_min_tue_p_",
                  "b_time_min_tue_q_",
                  "b_time_min_tue_r_",
                  "b_time_hrs_wed_a_",
                  "b_time_hrs_wed_b_",
                  "b_time_hrs_wed_c_",
                  "b_time_hrs_wed_d_",
                  "b_time_hrs_wed_e_",
                  "b_time_hrs_wed_f_",
                  "b_time_hrs_wed_g_",
                  "b_time_hrs_wed_h_",
                  "b_time_hrs_wed_i_",
                  "b_time_hrs_wed_j_",
                  "b_time_hrs_wed_k_",
                  "b_time_hrs_wed_l_",
                  "b_time_hrs_wed_m_",
                  "b_time_hrs_wed_n_",
                  "b_time_hrs_wed_o_",
                  "b_time_hrs_wed_p_",
                  "b_time_hrs_wed_q_",
                  "b_time_hrs_wed_r_",
                  "b_time_min_wed_a_",
                  "b_time_min_wed_b_",
                  "b_time_min_wed_c_",
                  "b_time_min_wed_d_",
                  "b_time_min_wed_e_",
                  "b_time_min_wed_f_",
                  "b_time_min_wed_g_",
                  "b_time_min_wed_h_",
                  "b_time_min_wed_i_",
                  "b_time_min_wed_j_",
                  "b_time_min_wed_k_",
                  "b_time_min_wed_l_",
                  "b_time_min_wed_m_",
                  "b_time_min_wed_n_",
                  "b_time_min_wed_o_",
                  "b_time_min_wed_p_",
                  "b_time_min_wed_q_",
                  "b_time_min_wed_r_",
                  "b_time_hrs_thu_a_",
                  "b_time_hrs_thu_b_",
                  "b_time_hrs_thu_c_",
                  "b_time_hrs_thu_d_",
                  "b_time_hrs_thu_e_",
                  "b_time_hrs_thu_f_",
                  "b_time_hrs_thu_g_",
                  "b_time_hrs_thu_h_",
                  "b_time_hrs_thu_i_",
                  "b_time_hrs_thu_j_",
                  "b_time_hrs_thu_k_",
                  "b_time_hrs_thu_l_",
                  "b_time_hrs_thu_m_",
                  "b_time_hrs_thu_n_",
                  "b_time_hrs_thu_o_",
                  "b_time_hrs_thu_p_",
                  "b_time_hrs_thu_q_",
                  "b_time_hrs_thu_r_",
                  "b_time_min_thu_a_",
                  "b_time_min_thu_b_",
                  "b_time_min_thu_c_",
                  "b_time_min_thu_d_",
                  "b_time_min_thu_e_",
                  "b_time_min_thu_f_",
                  "b_time_min_thu_g_",
                  "b_time_min_thu_h_",
                  "b_time_min_thu_i_",
                  "b_time_min_thu_j_",
                  "b_time_min_thu_k_",
                  "b_time_min_thu_l_",
                  "b_time_min_thu_m_",
                  "b_time_min_thu_n_",
                  "b_time_min_thu_p_",
                  "b_time_min_thu_q_",
                  "b_time_min_thu_r_",
                  "b_time_hrs_fri_a_",
                  "b_time_hrs_fri_b_",
                  "b_time_hrs_fri_c_",
                  "b_time_hrs_fri_d_",
                  "b_time_hrs_fri_e_",
                  "b_time_hrs_fri_f_",
                  "b_time_hrs_fri_g_",
                  "b_time_hrs_fri_h_",
                  "b_time_hrs_fri_i_",
                  "b_time_hrs_fri_j_",
                  "b_time_hrs_fri_k_",
                  "b_time_hrs_fri_l_",
                  "b_time_hrs_fri_m_",
                  "b_time_hrs_fri_n_",
                  "b_time_hrs_fri_o_",
                  "b_time_hrs_fri_p_",
                  "b_time_hrs_fri_q_",
                  "b_time_hrs_fri_r_",
                  "b_time_min_fri_a_",
                  "b_time_min_fri_b_",
                  "b_time_min_fri_c_",
                  "b_time_min_fri_d_",
                  "b_time_min_fri_e_",
                  "b_time_min_fri_f_",
                  "b_time_min_fri_g_",
                  "b_time_min_fri_h_",
                  "b_time_min_fri_i_",
                  "b_time_min_fri_j_",
                  "b_time_min_fri_k_",
                  "b_time_min_fri_l_",
                  "b_time_min_fri_m_",
                  "b_time_min_fri_n_",
                  "b_time_min_fri_o_",
                  "b_time_min_fri_p_",
                  "b_time_min_fri_q_",
                  "b_time_min_fri_r_",
                  "b_time_hrs_sat_a_",
                  "b_time_hrs_sat_b_",
                  "b_time_hrs_sat_c_",
                  "b_time_hrs_sat_d_",
                  "b_time_hrs_sat_e_",
                  "b_time_hrs_sat_f_",
                  "b_time_hrs_sat_g_",
                  "b_time_hrs_sat_h_",
                  "b_time_hrs_sat_i_",
                  "b_time_hrs_sat_j_",
                  "b_time_hrs_sat_k_",
                  "b_time_hrs_sat_l_",
                  "b_time_hrs_sat_m_",
                  "b_time_hrs_sat_n_",
                  "b_time_hrs_sat_o_",
                  "b_time_hrs_sat_p_",
                  "b_time_hrs_sat_q_",
                  "b_time_hrs_sat_r_",
                  "b_time_min_sat_a_",
                  "b_time_min_sat_b_",
                  "b_time_min_sat_c_",
                  "b_time_min_sat_d_",
                  "b_time_min_sat_e_",
                  "b_time_min_sat_f_",
                  "b_time_min_sat_g_",
                  "b_time_min_sat_h_",
                  "b_time_min_sat_i_",
                  "b_time_min_sat_j_",
                  "b_time_min_sat_k_",
                  "b_time_min_sat_l_",
                  "b_time_min_sat_m_",
                  "b_time_min_sat_n_",
                  "b_time_min_sat_o_",
                  "b_time_min_sat_p_",
                  "b_time_min_sat_q_",
                  "b_time_min_sat_r_",
                  "b_earn_g_",
                  "b_earn_k_",
                  "b_earn_m_",
                  "b_earn_n_",
                  "e_live_",
                  "e_nenroll_",
                  "e_grade_",
                  "e_scheme_",
                  "e_schmtyp_",
                  "e_schmtyp_a_",
                  "e_schmtyp_g_",
                  "e_schmtyp_h_",
                  "e_schmsrc_",
                  "e_schmsrc_a_",
                  "e_schmsrc_c_",
                  "e_schmsrc_f_",
                  "e_schmsrc_g_",
                  "e_schmsrc_h_",
                  "e_emp_a_",
                  "e_emp_d_",
                  "e_emp_e_",
                  "e_emp_f_",
                  "e_emp_g_",
                  "e_emp_h_",
                  "e_emp_i_",
                  "e_emp_j_",
                  "e_emp_l_",
                  "e_emp_m_",
                  "e_emp_n_",
                  "e_emp_o_masked",
                  "e_emp_p_",
                  "e_emp_q_",
                  "e_emp_r_",
                  "e_ownfarm_",
                  "e_tobowntime_",
                  "e_tobactivity_b_",
                  "e_tobactivity_c_",
                  "e_tobactivity_f_",
                  "e_tobactivity_j_",
                  "e_homehrs_",
                  "e_hrsun",
                  "e_hrmon",
                  "e_hrtues",
                  "e_hrwed",
                  "e_hrthur",
                  "e_hrfri",
                  "e_hrsat",
                  "e_bhrsun",
                  "e_bhrmon",
                  "e_bhrtues",
                  "e_bhrwed",
                  "e_bhrthur",
                  "e_bhrfri",
                  "e_bhrsat",
                  "e_chrsun",
                  "e_chrmon",
                  "e_chrtues",
                  "e_chrwed",
                  "e_chrthur",
                  "e_chrfri",
                  "e_chrsat",
                  "e_dhrsun",
                  "e_dhrmon",
                  "e_dhrtues",
                  "e_dhrwed",
                  "e_dhrthur",
                  "e_dhrfri",
                  "e_dhrsat",
                  "e_ehrsun",
                  "e_ehrmon",
                  "e_ehrtues",
                  "e_ehrwed",
                  "e_ehrthur",
                  "e_ehrfri",
                  "e_ehrsat",
                  "e_fhrsun",
                  "e_fhrmon",
                  "e_fhrtues",
                  "e_fhrwed",
                  "e_fhrthur",
                  "e_fhrfri",
                  "e_fhrsat",
                  "e_ghrsun",
                  "e_ghrmon",
                  "e_ghrtues",
                  "e_ghrwed",
                  "e_ghrthur",
                  "e_ghrfri",
                  "e_ghrsat",
                  "e_hhrsun",
                  "e_hhrmon",
                  "e_hhrtues",
                  "e_hhrwed",
                  "e_hhrthur",
                  "e_hhrfri",
                  "e_hhrsat",
                  "e_ihrsun",
                  "e_ihrmon",
                  "e_ihrtues",
                  "e_ihrwed",
                  "e_ihrthur",
                  "e_ihrfri",
                  "e_ihrsat",
                  "e_jhrsun",
                  "e_jhrmon",
                  "e_jhrtues",
                  "e_jhrwed",
                  "e_jhrthur",
                  "e_jhrfri",
                  "e_jhrsat",
                  "e_khrsun",
                  "e_khrmon",
                  "e_khrtues",
                  "e_khrwed",
                  "e_khrthur",
                  "e_khrfri",
                  "e_khrsat",
                  "e_lhrsun",
                  "e_lhrmon",
                  "e_lhrtues",
                  "e_lhrwed",
                  "e_lhrthur",
                  "e_lhrfri",
                  "e_lhrsat",
                  "e_mhrsun",
                  "e_mhrmon",
                  "e_mhrtues",
                  "e_mhrwed",
                  "e_mhrthur",
                  "e_mhrfri",
                  "e_mhrsat",
                  "e_nhrsun",
                  "e_nhrmon",
                  "e_nhrtues",
                  "e_nhrwed",
                  "e_nhrthur",
                  "e_nhrfri",
                  "e_nhrsat",
                  "e_ohrsun",
                  "e_ohrmon",
                  "e_ohrtues",
                  "e_ohrwed",
                  "e_ohrthur",
                  "e_ohrfri",
                  "e_ohrsat",
                  "e_phrsun",
                  "e_phrmon",
                  "e_phrtues",
                  "e_phrwed",
                  "e_phrthur",
                  "e_phrfri",
                  "e_phrsat",
                  "e_qhrsun",
                  "e_qhrmon",
                  "e_qhrtues",
                  "e_qhrwed",
                  "e_qhrthur",
                  "e_qhrfri",
                  "e_qhrsat",
                  "e_rhrsun",
                  "e_rhrmon",
                  "e_rhrtues",
                  "e_rhrwed",
                  "e_rhrthur",
                  "e_rhrfri",
                  "e_rhrsat",
                  "e_thrsun",
                  "e_thrmon",
                  "e_thrtues",
                  "e_thrwed",
                  "e_thrthur",
                  "e_thrfri",
                  "e_thrsat",
                  "e_earn_a__masked",
                  "e_earn_b__masked",
                  "e_earn_c__masked",
                  "e_earn_d__masked",
                  "e_earn_e__masked",
                  "e_earn_f__masked",
                  "e_earn_g__masked",
                  "e_earn_h__masked",
                  "e_earn_i__masked",
                  "e_earn_j__masked",
                  "e_earn_l__masked",
                  "e_earn_m__masked",
                  "e_earn_n__masked",
                  "e_earn_p__masked",
                  "e_earn_q__masked",
                  "e_earn_r__masked",
                  "e_tobseas_",
                  "e_tobhrsown_a_",
                  "e_tobhrsown_b_",
                  "e_tobearn__masked",
                  "e_othhaz_",
                  "e_othhaz_a_",
                  "e_exp_",
                  "e_exp_e_",
                  "e_timeofday_",
                  "e_adulttob_",
                  "e_religion_",
                  "e_tribe_",
                  "e_savings",
                  "e_savingsall",
                  "e_vslawho",
                  "e_vslawho_e",
                  "e_vslawho_g",
                  "e_vslawho_h",
                  "e_vslawho_i",
                  "e_vslawho_j",
                  "e_vslalen_num",
                  "e_vslapay",
                  "e_vslause",
                  "e_vslause_g",
                  "e_vslause_i",
                  "e_vslause_j",
                  "e_loanwhere",
                  "e_loanuse",
                  "e_loanuse_g",
                  "e_inc1a_9_masked",
                  "e_inc1a_11_masked",
                  "e_inca_b",
                  "e_inc3a",
                  "b_tribe_")

capture_tables (indirect_PII)


# Recode races or ethnicity to reduce risk of re-identification

haven_table("e_tribe_")
## e_tribe_. e_Tribe
##     1     2     3     4     5     6     7     8     9    10    11    12    13    77    88  <NA> 
## 15679     9   190  3170    86    22   230    15    21    58     6     5    17    51     4 18202
#val_labels(mydata$b_tribe_)
mydata$b_tribe_ <- recode(mydata$b_tribe_, 
                      `1`=1L, 
                      `2`=77L, 
                      `3`=3L, 
                      `4`=4L, 
                      `5`=5L, 
                      `6`=77L, 
                      `7`=7L, 
                      `8`=77L, 
                      `9`=77L,
                      `10`=10L, 
                      `11`=77L, 
                      `12`=77L, 
                      `13`=13L, 
                      `77`=77L, 
                      `88`=88L)

mydata$e_tribe_ <- recode(mydata$e_tribe_, 
                          `1`=1L, 
                          `2`=77L, 
                          `3`=3L, 
                          `4`=4L, 
                          `5`=5L, 
                          `6`=77L, 
                          `7`=7L, 
                          `8`=77L, 
                          `9`=77L,
                          `10`=10L, 
                          `11`=77L, 
                          `12`=77L, 
                          `13`=13L, 
                          `77`=77L, 
                          `88`=88L)

# Recode those with very specific values. 

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age

mydata$sex_summary <- cbind(mydata$b_sex_, mydata$e_sex_)
mydata$age_summary <- cbind(mydata$b_age_masked, mydata$e_age_masked)

educ_vars<- c("b_chprepri", "b_chpri", "b_chuppri", "b_chsec", "b_chhighersec")
mydata$edu_sum1 <- names(mydata[educ_vars])[max.col(mydata[educ_vars])]

educ_vars<- c("e_chprepri", "e_chpri", "e_chuppri", "e_chsec", "e_chhighersec")
mydata$edu_sum2 <- names(mydata[educ_vars])[max.col(mydata[educ_vars])]
mydata$edu_summary <- paste(mydata$edu_sum1, mydata$edu_sum2)

selectedKeyVars = c("sex_summary", "age_summary", "edu_summary")
selectedHouseholdID = c('hhid')
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars, 
                           hhId = selectedHouseholdID)
## Warning in cbind(reshier, unique(dataX[, 1])): number of rows of result is not a multiple of vector length (arg 1)
sdcInitial
## The input dataset consists of 37765 rows and 1335 variables.
##   --> Categorical key variables: sex_summary, age_summary, edu_summary
##   --> Cluster/Household-Id variable: hhid
## ----------------------------------------------------------------------
## Warning in cbind(stats_rec[, 1], stats_rec[, 2], paste0("(", stats_o[, 2], : number of rows of result is not a multiple of vector
## length (arg 3)
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##    Key Variable Number of categories     Mean size            Size of smallest (>0)       
##  sex_summary.V1                    4 (4)  6067.000 (6067.000)                     3    (3)
##  sex_summary.V2                    3 (3)  9781.500 (9781.500)                  9547 (9547)
##  age_summary.V1                    2 (6)  8623.000 (1779.200)                  8623  (113)
##  age_summary.V2                   15 (4)  1389.714 (6067.000)                   457    (3)
##     edu_summary                   11 (3)  3433.182 (9781.500)                    51 (9547)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 0 (0.000%)
## 
## ----------------------------------------------------------------------
dropvars <- c("sex_summary",
              "age_summary",
              "edu_summary",
              "edu_sum1",
              "edu_sum2") 
mydata <- mydata[!names(mydata) %in% dropvars]

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("b_live_sp_",
               "b_othhaz_",
               "b_vsla_desc",
               "b_loanaccess_get",
               "b_klabwhy",
               "b_kidlaw_d",
               "e_loanaccess_where",
               "e_klabwhy",
               "e_kidlaw_d",
               "e_kidlaw_e",
               "e_type")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

# Redrafted information in Local Language
mydata$b_vsla_desc[19578] <- "[Local Language]"
mydata$b_vsla_desc[19912] <- "[Local Language]"
mydata$b_vsla_desc[21634] <- "[Local Language]"
mydata$b_vsla_desc[24756] <- "[Local Language]"
mydata$b_vsla_desc[26740] <- "[Local Language]"
mydata$b_vsla_desc[27560] <- "[Local Language]"
mydata$b_vsla_desc[29800] <- "[Local Language]"
mydata$b_vsla_desc[29820] <- "[Local Language]"
mydata$b_vsla_desc[29823] <- "[Local Language]"
mydata$b_vsla_desc[30504] <- "[Local Language]"
mydata$b_vsla_desc[30975] <- "[Local Language]"
mydata$b_vsla_desc[31626] <- "[Local Language]"
mydata$b_vsla_desc[33112] <- "[Local Language]"
mydata$b_vsla_desc[34433] <- "[Local Language]"
mydata$b_vsla_desc[35943] <- "[Local Language]"
mydata$b_vsla_desc[35949] <- "[Local Language]"

GPS data: Displace

# Setup map
# !!!No GPS

Save processed data in Stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)