rm(list=ls(all=t))
filename <- "Section_10" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!No Direct PII
# !!!No Direct PII-team
# !!!No small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
# Top code high income to the 99.5 percentile
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q2)[na.exclude(mydata$s10q2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q2. What is the total amount of the loan? If your household has had multiple loans f
## 0 80 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 13000 14000 15000
## 6 1 2 5 1 9 3 2 2 3 8 1 1 1 6
## 16000 18000 20000 22000 24000 25000 30000 32000 37000 40000 43000 45000 50000 60000 62000
## 1 2 5 1 1 3 6 1 2 4 1 1 1 2 1
## 70000 1e+05 150000 <NA>
## 1 2 1 2209
## [1] "Frequency table after encoding"
## s10q2. What is the total amount of the loan? If your household has had multiple loans f
## 0 80 2000 3000 4000 5000 6000
## 6 1 2 5 1 9 3
## 7000 8000 9000 10000 11000 13000 14000
## 2 2 3 8 1 1 1
## 15000 16000 18000 20000 22000 24000 25000
## 6 1 2 5 1 1 3
## 30000 32000 37000 40000 43000 45000 50000
## 6 1 2 4 1 1 1
## 60000 62000 70000 1e+05 128499 or more <NA>
## 2 1 1 2 1 2209
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q3)[na.exclude(mydata$s10q3)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q3", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q3. In the past 12 months, how much did your household pay in interest on these loan
## 0 50 60 100 200 220 225 270 320 450 480 500 512 600 700 750 760 800
## 11 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
## 900 960 1000 1300 1400 1440 1500 1600 2000 2360 2500 2800 3000 3200 3630 3920 3960 4000
## 4 1 3 1 2 1 2 2 6 1 3 1 4 2 1 1 1 1
## 4300 5000 7200 7500 7800 8000 10000 11100 14000 15000 18600 23000 30000 60000 75000 <NA>
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2215
## [1] "Frequency table after encoding"
## s10q3. In the past 12 months, how much did your household pay in interest on these loan
## 0 50 60 100 200 220 225
## 11 1 1 1 1 1 1
## 270 320 450 480 500 512 600
## 1 1 1 1 1 1 1
## 700 750 760 800 900 960 1000
## 1 1 1 2 4 1 3
## 1300 1400 1440 1500 1600 2000 2360
## 1 2 1 2 2 6 1
## 2500 2800 3000 3200 3630 3920 3960
## 3 1 4 2 1 1 1
## 4000 4300 5000 7200 7500 7800 8000
## 1 1 1 1 1 1 1
## 10000 11100 14000 15000 18600 23000 30000
## 1 1 1 1 1 1 1
## 60000 68999 or more <NA>
## 1 1 2215
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q5)[na.exclude(mydata$s10q5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q5", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q5. What is the total amount of the loan? If your household has had multiple loans f
## 0 500 600 700 900 1000 1300 1500 1590 1750 1960 2000 2400 2500 2600
## 7 1 2 1 1 4 3 4 1 1 1 19 1 2 1
## 2640 2800 3000 3600 3996 4000 4400 4500 4800 5000 5928 6000 6500 7000 8000
## 1 1 45 2 1 31 1 1 1 122 1 52 1 28 35
## 9000 10000 10200 11000 12000 13000 13824 14000 15000 15900 16000 17000 18000 19200 20000
## 17 83 1 5 14 6 1 8 35 1 4 5 3 1 21
## 21000 22000 23000 25000 26000 30000 40000 42000 45000 50000 51760 55000 60000 120000 <NA>
## 1 2 1 2 2 9 2 2 2 6 1 1 2 1 1685
## [1] "Frequency table after encoding"
## s10q5. What is the total amount of the loan? If your household has had multiple loans f
## 0 500 600 700 900 1000 1300
## 7 1 2 1 1 4 3
## 1500 1590 1750 1960 2000 2400 2500
## 4 1 1 1 19 1 2
## 2600 2640 2800 3000 3600 3996 4000
## 1 1 1 45 2 1 31
## 4400 4500 4800 5000 5928 6000 6500
## 1 1 1 122 1 52 1
## 7000 8000 9000 10000 10200 11000 12000
## 28 35 17 83 1 5 14
## 13000 13824 14000 15000 15900 16000 17000
## 6 1 8 35 1 4 5
## 18000 19200 20000 21000 22000 23000 25000
## 3 1 21 1 2 1 2
## 26000 30000 40000 42000 45000 50000 51760
## 2 9 2 2 2 6 1
## 54838 or more <NA>
## 4 1685
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q6)[na.exclude(mydata$s10q6)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q6", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q6. In the past 12 months, how much did your household pay in interest on these loan
## -9999 0 4 5 10 13 30 45 49 50 60 70 100 120 135 140 150 160
## 1 38 1 1 1 1 2 1 1 3 3 1 7 2 1 1 10 1
## 170 180 200 210 225 240 250 270 300 310 320 325 330 340 350 360 375 400
## 1 1 5 1 1 5 3 1 16 1 1 1 1 2 4 3 1 13
## 410 414 420 436 450 480 485 488 500 510 520 534 535 540 556 560 568 589
## 1 1 1 1 6 3 1 1 32 1 2 1 1 1 1 3 1 3
## 600 610 625 638 640 656 660 680 700 730 750 800 832 850 860 880 884 900
## 13 1 1 1 2 1 1 2 11 1 6 21 1 2 1 1 2 24
## 990 1000 1040 1050 1076 1085 1120 1160 1200 1210 1240 1250 1280 1300 1320 1330 1350 1360
## 1 33 1 2 1 1 3 1 13 2 3 2 2 4 2 1 1 2
## 1400 1440 1480 1500 1520 1560 1581 1600 1680 1700 1720 1760 1800 1840 1900 1920 1960 2000
## 5 2 3 33 1 1 1 5 1 2 1 1 10 1 4 1 1 30
## 2100 2160 2200 2240 2250 2320 2340 2360 2400 2440 2480 2488 2500 2700 2750 2800 3000 3144
## 1 2 1 2 3 1 1 1 2 1 3 1 3 2 1 2 14 1
## 3160 3200 3340 3360 3374 3430 3700 4000 4100 4125 4500 4720 4800 5000 5300 5360 5400 5750
## 1 3 1 2 1 1 1 7 1 1 3 1 1 2 1 1 1 1
## 6000 6600 6900 7000 7200 7320 7980 8000 8400 8600 8760 9000 10000 10760 10940 13130 13200 15000
## 2 2 1 2 1 1 1 4 1 1 1 3 2 1 1 1 1 3
## 18720 33000 37000 <NA>
## 1 1 1 1724
## [1] "Frequency table after encoding"
## s10q6. In the past 12 months, how much did your household pay in interest on these loan
## -9999 0 4 5 10 13 30
## 1 38 1 1 1 1 2
## 45 49 50 60 70 100 120
## 1 1 3 3 1 7 2
## 135 140 150 160 170 180 200
## 1 1 10 1 1 1 5
## 210 225 240 250 270 300 310
## 1 1 5 3 1 16 1
## 320 325 330 340 350 360 375
## 1 1 1 2 4 3 1
## 400 410 414 420 436 450 480
## 13 1 1 1 1 6 3
## 485 488 500 510 520 534 535
## 1 1 32 1 2 1 1
## 540 556 560 568 589 600 610
## 1 1 3 1 3 13 1
## 625 638 640 656 660 680 700
## 1 1 2 1 1 2 11
## 730 750 800 832 850 860 880
## 1 6 21 1 2 1 1
## 884 900 990 1000 1040 1050 1076
## 2 24 1 33 1 2 1
## 1085 1120 1160 1200 1210 1240 1250
## 1 3 1 13 2 3 2
## 1280 1300 1320 1330 1350 1360 1400
## 2 4 2 1 1 2 5
## 1440 1480 1500 1520 1560 1581 1600
## 2 3 33 1 1 1 5
## 1680 1700 1720 1760 1800 1840 1900
## 1 2 1 1 10 1 4
## 1920 1960 2000 2100 2160 2200 2240
## 1 1 30 1 2 1 2
## 2250 2320 2340 2360 2400 2440 2480
## 3 1 1 1 2 1 3
## 2488 2500 2700 2750 2800 3000 3144
## 1 3 2 1 2 14 1
## 3160 3200 3340 3360 3374 3430 3700
## 1 3 1 2 1 1 1
## 4000 4100 4125 4500 4720 4800 5000
## 7 1 1 3 1 1 2
## 5300 5360 5400 5750 6000 6600 6900
## 1 1 1 1 2 2 1
## 7000 7200 7320 7980 8000 8400 8600
## 2 1 1 1 4 1 1
## 8760 9000 10000 10760 10940 13130 13200
## 1 3 2 1 1 1 1
## 15000 15539 or more <NA>
## 3 3 1724
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q8)[na.exclude(mydata$s10q8)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q8", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q8. What is the total amount of the loan? If your household has had multiple loans f
## 0 20 30 50 60 64 80 100 115 144 150 180 190 200 250
## 3 1 1 1 1 1 1 21 1 1 7 1 1 27 2
## 300 370 380 400 450 500 600 620 700 750 800 858 900 1000 1200
## 16 1 1 4 1 65 4 1 6 1 4 1 1 89 6
## 1300 1400 1450 1500 1600 1700 1800 2000 2100 2110 2200 2300 2500 2800 2900
## 1 1 1 33 2 1 1 81 2 1 1 1 14 2 2
## 3000 3400 3500 4000 4500 4800 5000 5500 5700 6000 7000 7500 8000 8200 9000
## 61 1 5 15 2 1 65 1 1 12 12 1 5 1 2
## 10000 10500 12000 12100 13000 13200 13300 15000 15500 16000 18000 19000 20000 22000 25000
## 30 1 4 1 2 1 1 15 1 1 1 1 17 1 3
## 27000 28000 30000 35000 39000 40000 45000 49000 50000 55000 60000 80000 95000 1e+05 150000
## 2 3 6 2 1 3 1 1 4 1 1 4 1 3 1
## 180000 3e+05 <NA>
## 1 2 1579
## [1] "Frequency table after encoding"
## s10q8. What is the total amount of the loan? If your household has had multiple loans f
## 0 20 30 50 60 64 80
## 3 1 1 1 1 1 1
## 100 115 144 150 180 190 200
## 21 1 1 7 1 1 27
## 250 300 370 380 400 450 500
## 2 16 1 1 4 1 65
## 600 620 700 750 800 858 900
## 4 1 6 1 4 1 1
## 1000 1200 1300 1400 1450 1500 1600
## 89 6 1 1 1 33 2
## 1700 1800 2000 2100 2110 2200 2300
## 1 1 81 2 1 1 1
## 2500 2800 2900 3000 3400 3500 4000
## 14 2 2 61 1 5 15
## 4500 4800 5000 5500 5700 6000 7000
## 2 1 65 1 1 12 12
## 7500 8000 8200 9000 10000 10500 12000
## 1 5 1 2 30 1 4
## 12100 13000 13200 13300 15000 15500 16000
## 1 2 1 1 15 1 1
## 18000 19000 20000 22000 25000 27000 28000
## 1 1 17 1 3 2 3
## 30000 35000 39000 40000 45000 49000 50000
## 6 2 1 3 1 1 4
## 55000 60000 80000 95000 1e+05 120999 or more <NA>
## 1 1 4 1 3 4 1579
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q9)[na.exclude(mydata$s10q9)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q9", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q9. In the past 12 months, how much did your household pay in interest on these loa
## 0 10 38 45 50 75 80 90 100 120 125 150 175 200 210 225 240 250
## 486 1 1 1 3 2 1 1 17 2 1 6 1 27 1 1 1 4
## 300 350 360 400 500 600 650 680 700 750 800 840 900 960 1000 1050 1200 1400
## 18 2 1 11 18 10 1 1 1 2 4 1 3 1 20 2 4 3
## 1440 1500 1600 1700 1750 1800 2000 2400 2500 2600 3000 3250 3300 3500 3900 3920 4000 4200
## 1 8 1 1 1 5 5 1 2 1 2 1 1 1 1 1 4 1
## 4400 5000 5120 7000 7500 8000 10000 12000 14700 15000 18000 21600 24000 <NA>
## 1 2 1 2 1 2 1 1 1 1 1 1 1 1582
## [1] "Frequency table after encoding"
## s10q9. In the past 12 months, how much did your household pay in interest on these loa
## 0 10 38 45 50 75 80
## 486 1 1 1 3 2 1
## 90 100 120 125 150 175 200
## 1 17 2 1 6 1 27
## 210 225 240 250 300 350 360
## 1 1 1 4 18 2 1
## 400 500 600 650 680 700 750
## 11 18 10 1 1 1 2
## 800 840 900 960 1000 1050 1200
## 4 1 3 1 20 2 4
## 1400 1440 1500 1600 1700 1750 1800
## 3 1 8 1 1 1 5
## 2000 2400 2500 2600 3000 3250 3300
## 5 1 2 1 2 1 1
## 3500 3900 3920 4000 4200 4400 5000
## 1 1 1 4 1 1 2
## 5120 7000 7500 8000 10000 12000 14700
## 1 2 1 2 1 1 1
## 14830 or more <NA>
## 4 1582
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q11)[na.exclude(mydata$s10q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q11", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q11. What is the total amount of the loan? If your household has had multiple loans
## 0 750 900 1000 1100 1200 1400 1500 1600 2000 2200 2500 3000 4000 4500
## 1 1 1 4 1 2 1 1 1 7 1 1 6 4 1
## 4600 4700 4800 5000 5500 6000 6500 7000 7500 7730 8000 9000 10000 10747 12000
## 1 1 1 8 1 5 1 3 1 1 3 1 11 1 3
## 12400 14000 15000 15600 16000 18000 20000 21000 24160 30000 37000 40000 42400 50000 65000
## 1 1 6 1 2 1 5 2 1 6 1 1 1 1 1
## 70000 122000 160000 185000 3e+05 <NA>
## 1 1 1 1 1 2185
## [1] "Frequency table after encoding"
## s10q11. What is the total amount of the loan? If your household has had multiple loans
## 0 750 900 1000 1100 1200 1400
## 1 1 1 4 1 2 1
## 1500 1600 2000 2200 2500 3000 4000
## 1 1 7 1 1 6 4
## 4500 4600 4700 4800 5000 5500 6000
## 1 1 1 1 8 1 5
## 6500 7000 7500 7730 8000 9000 10000
## 1 3 1 1 3 1 11
## 10747 12000 12400 14000 15000 15600 16000
## 1 3 1 1 6 1 2
## 18000 20000 21000 24160 30000 37000 40000
## 1 5 2 1 6 1 1
## 42400 50000 65000 70000 122000 160000 185000
## 1 1 1 1 1 1 1
## 236750 or more <NA>
## 1 2185
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q12)[na.exclude(mydata$s10q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q12. In the past 12 months, how much did your household pay in interest on these loan
## 0 10 50 90 100 250 300 350 400 500 600 800 900 1000 1050
## 35 1 1 1 1 1 2 2 3 3 2 4 1 7 1
## 1125 1260 1400 1500 1600 1800 2000 2400 3000 4500 4700 4900 5000 6000 6500
## 1 1 2 3 1 1 6 1 4 1 1 1 2 2 1
## 7400 7800 8000 10000 10500 10600 12000 16500 21000 30000 57600 108000 2e+05 <NA>
## 1 1 1 2 1 1 1 1 1 1 1 1 1 2189
## [1] "Frequency table after encoding"
## s10q12. In the past 12 months, how much did your household pay in interest on these loan
## 0 10 50 90 100 250 300
## 35 1 1 1 1 1 2
## 350 400 500 600 800 900 1000
## 2 3 3 2 4 1 7
## 1050 1125 1260 1400 1500 1600 1800
## 1 1 1 2 3 1 1
## 2000 2400 3000 4500 4700 4900 5000
## 6 1 4 1 1 1 2
## 6000 6500 7400 7800 8000 10000 10500
## 2 1 1 1 1 2 1
## 10600 12000 16500 21000 30000 57600 108000
## 1 1 1 1 1 1 1
## 151239 or more <NA>
## 1 2189
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q14)[na.exclude(mydata$s10q14)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q14", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q14. What is the total amount of the loan? If your household has had multiple loans f
## 0 100 190 300 500 550 650 750 800 1000 1150 1200 1500 1600 1800 1900 2000 2500
## 3 1 1 1 5 1 1 1 1 16 1 3 7 1 1 1 16 3
## 3000 3200 3400 3500 3600 3800 4000 5000 5500 5800 6000 7000 7500 8000 8500 9000 10000 13000
## 21 1 1 2 1 1 12 19 1 1 4 3 1 3 1 1 8 1
## 15000 17000 20000 21000 24000 25000 40000 50000 60000 <NA>
## 2 2 7 1 3 1 1 1 2 2130
## [1] "Frequency table after encoding"
## s10q14. What is the total amount of the loan? If your household has had multiple loans f
## 0 100 190 300 500 550 650
## 3 1 1 1 5 1 1
## 750 800 1000 1150 1200 1500 1600
## 1 1 16 1 3 7 1
## 1800 1900 2000 2500 3000 3200 3400
## 1 1 16 3 21 1 1
## 3500 3600 3800 4000 5000 5500 5800
## 2 1 1 12 19 1 1
## 6000 7000 7500 8000 8500 9000 10000
## 4 3 1 3 1 1 8
## 13000 15000 17000 20000 21000 24000 25000
## 1 2 2 7 1 3 1
## 40000 50000 60000 or more <NA>
## 1 1 2 2130
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q15)[na.exclude(mydata$s10q15)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q15", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q15. In the past 12 months, how much did your household pay in interest on these loa
## 0 25 40 100 120 150 200 225 250 300 360 400 450 500 600 800 1000 1100
## 19 1 1 10 1 3 15 1 2 5 1 14 2 7 15 8 13 1
## 1200 1400 1500 1600 1800 1900 2000 2100 2500 2600 3000 3400 4000 4500 4800 5000 6000 6300
## 4 2 2 3 1 1 4 1 1 1 1 1 3 1 3 3 1 1
## 6400 8000 10000 12000 <NA>
## 1 2 1 2 2137
## [1] "Frequency table after encoding"
## s10q15. In the past 12 months, how much did your household pay in interest on these loa
## 0 25 40 100 120 150 200
## 19 1 1 10 1 3 15
## 225 250 300 360 400 450 500
## 1 2 5 1 14 2 7
## 600 800 1000 1100 1200 1400 1500
## 15 8 13 1 4 2 2
## 1600 1800 1900 2000 2100 2500 2600
## 3 1 1 4 1 1 1
## 3000 3400 4000 4500 4800 5000 6000
## 1 1 3 1 3 3 1
## 6300 6400 8000 10000 12000 or more <NA>
## 1 1 2 1 2 2137
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q17)[na.exclude(mydata$s10q17)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q17. What is the total amount of the loan? If your household has had multiple loans f
## 0 2 6 12 15 20 21 23 25 26 30 35 38 40 43 45 50 53
## 7 1 1 2 3 13 1 1 1 1 9 2 1 5 1 2 27 1
## 55 56 59 60 70 72 75 76 77 80 85 86 90 100 110 116 120 130
## 1 2 1 2 5 1 2 1 1 9 1 1 1 79 2 1 13 3
## 138 140 150 155 158 160 165 170 180 190 194 200 208 212 214 216 220 225
## 1 2 35 1 1 2 1 1 3 3 1 101 1 1 1 1 2 2
## 230 240 250 260 265 280 285 300 320 330 350 360 374 380 400 440 450 493
## 3 1 12 4 1 3 1 93 1 2 3 2 1 1 30 1 1 1
## 500 520 550 560 575 600 625 630 646 650 683 700 745 750 780 800 816 900
## 101 1 1 1 1 20 1 1 1 2 1 20 1 2 1 9 1 2
## 950 1000 1050 1085 1100 1200 1300 1375 1400 1440 1500 1600 1700 1800 1830 1900 2000 2200
## 1 59 1 1 3 20 6 1 3 1 26 2 2 1 1 1 24 1
## 2400 2500 2662 2800 3000 3500 3600 3700 4000 4300 4500 4800 5000 6000 7200 8000 8160 8640
## 5 3 1 1 26 1 8 1 7 1 2 5 7 8 4 2 1 1
## 9000 9600 10000 12000 14400 16200 18000 24000 25000 26000 33600 36000 36400 48000 57600 72000 <NA>
## 2 3 2 5 1 1 2 1 1 1 1 1 1 1 1 1 1340
## [1] "Frequency table after encoding"
## s10q17. What is the total amount of the loan? If your household has had multiple loans f
## 0 2 6 12 15 20 21
## 7 1 1 2 3 13 1
## 23 25 26 30 35 38 40
## 1 1 1 9 2 1 5
## 43 45 50 53 55 56 59
## 1 2 27 1 1 2 1
## 60 70 72 75 76 77 80
## 2 5 1 2 1 1 9
## 85 86 90 100 110 116 120
## 1 1 1 79 2 1 13
## 130 138 140 150 155 158 160
## 3 1 2 35 1 1 2
## 165 170 180 190 194 200 208
## 1 1 3 3 1 101 1
## 212 214 216 220 225 230 240
## 1 1 1 2 2 3 1
## 250 260 265 280 285 300 320
## 12 4 1 3 1 93 1
## 330 350 360 374 380 400 440
## 2 3 2 1 1 30 1
## 450 493 500 520 550 560 575
## 1 1 101 1 1 1 1
## 600 625 630 646 650 683 700
## 20 1 1 1 2 1 20
## 745 750 780 800 816 900 950
## 1 2 1 9 1 2 1
## 1000 1050 1085 1100 1200 1300 1375
## 59 1 1 3 20 6 1
## 1400 1440 1500 1600 1700 1800 1830
## 3 1 26 2 2 1 1
## 1900 2000 2200 2400 2500 2662 2800
## 1 24 1 5 3 1 1
## 3000 3500 3600 3700 4000 4300 4500
## 26 1 8 1 7 1 2
## 4800 5000 6000 7200 8000 8160 8640
## 5 7 8 4 2 1 1
## 9000 9600 10000 12000 14400 16200 18000
## 2 3 2 5 1 1 2
## 24000 25000 26000 33600 34140 or more <NA>
## 1 1 1 1 5 1340
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q18)[na.exclude(mydata$s10q18)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q18", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q18. In the past 12 months, how much did your household pay in interest on these loa
## 0 5 8 20 30 50 75 100 150 200 300 310 400 500 540 600 1000 3000 <NA>
## 917 1 1 1 2 12 1 3 1 2 2 1 1 6 1 1 1 1 1341
## [1] "Frequency table after encoding"
## s10q18. In the past 12 months, how much did your household pay in interest on these loa
## 0 5 8 20 30 50 75 100 150
## 917 1 1 1 2 12 1 3 1
## 200 300 310 400 500 or more <NA>
## 2 2 1 1 10 1341
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q20)[na.exclude(mydata$s10q20)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q20", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q20. What is the total amount of the loan? If your household has had multiple loans a
## 0 89 100 110 140 300 500 600 700 800 1000 1200 1400 1500 1800
## 3 1 1 1 1 1 1 2 1 1 2 1 1 1 1
## 2000 2500 2800 3000 4000 4300 5000 5500 6000 7000 8000 8140 9000 10000 12000
## 1 2 1 4 2 1 2 1 2 1 1 1 1 1 1
## 12500 13000 15000 20000 21000 23800 24000 27000 45600 60000 61000 64000 92000 1e+05 120000
## 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1
## 150000 <NA>
## 1 2236
## [1] "Frequency table after encoding"
## s10q20. What is the total amount of the loan? If your household has had multiple loans a
## 0 89 100 110 140 300 500
## 3 1 1 1 1 1 1
## 600 700 800 1000 1200 1400 1500
## 2 1 1 2 1 1 1
## 1800 2000 2500 2800 3000 4000 4300
## 1 1 2 1 4 2 1
## 5000 5500 6000 7000 8000 8140 9000
## 2 1 2 1 1 1 1
## 10000 12000 12500 13000 15000 20000 21000
## 1 1 1 2 1 2 1
## 23800 24000 27000 45600 60000 61000 64000
## 1 1 1 1 1 1 1
## 92000 1e+05 120000 141149 or more <NA>
## 1 2 1 1 2236
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q21)[na.exclude(mydata$s10q21)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q21", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q21. In the past 12 months, how much did your household pay in interest on these loan
## 0 8 35 60 70 200 250 285 360 450 464 480 800 1000 1400 1500 2000 2520 3600 4400 5824 6000
## 31 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 3 1 1 1 1 1
## <NA>
## 2239
## [1] "Frequency table after encoding"
## s10q21. In the past 12 months, how much did your household pay in interest on these loan
## 0 8 35 60 70 200 250 285
## 31 1 1 1 1 3 1 1
## 360 450 464 480 800 1000 1400 1500
## 1 1 1 1 1 1 1 2
## 2000 2520 3600 4400 5824 5950 or more <NA>
## 3 1 1 1 1 1 2239
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q23)[na.exclude(mydata$s10q23)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q23", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q23. How much do you owe these shops for items taken on credit? Magkano ang utang mo
## 60 69 75 100 117 120 129 130 150 159 160 180 200 225 230 250 255 270
## 1 3 1 5 1 5 1 2 4 1 2 2 9 1 1 2 1 1
## 300 308 320 330 352 400 450 479 500 700 800 1000 1040 1200 1300 1350 1400 1500
## 15 1 1 1 1 3 1 1 10 3 3 4 1 2 1 1 1 1
## 1600 1645 1800 2000 2200 2300 2415 2500 2800 2900 3000 3200 3400 3500 3600 3800 4000 4200
## 1 1 1 3 2 2 1 3 1 2 5 3 1 4 1 2 3 1
## 4500 4800 4900 5000 5500 5550 5600 5700 5800 5900 6000 6500 6700 7200 7650 8000 8729 8750
## 1 1 3 4 4 1 1 1 3 2 1 1 1 1 1 1 1 1
## 9000 10000 11400 13400 13848 14400 14500 15000 16000 16800 18000 18780 19200 20132 20400 21000 22860 23490
## 1 3 1 1 1 1 1 2 1 1 3 1 3 1 1 1 1 1
## 24000 24375 24500 26000 30000 30876 33000 35000 36000 38040 40000 42900 44700 45000 45360 45800 46000 49300
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 50000 52800 54000 56000 60000 64000 68400 69000 70000 71676 72000 80000 89700 91700 95700 96480 99000 <NA>
## 1 1 1 1 3 1 1 1 1 1 2 1 1 1 1 1 1 2073
## [1] "Frequency table after encoding"
## s10q23. How much do you owe these shops for items taken on credit? Magkano ang utang mo
## 60 69 75 100 117 120 129
## 1 3 1 5 1 5 1
## 130 150 159 160 180 200 225
## 2 4 1 2 2 9 1
## 230 250 255 270 300 308 320
## 1 2 1 1 15 1 1
## 330 352 400 450 479 500 700
## 1 1 3 1 1 10 3
## 800 1000 1040 1200 1300 1350 1400
## 3 4 1 2 1 1 1
## 1500 1600 1645 1800 2000 2200 2300
## 1 1 1 1 3 2 2
## 2415 2500 2800 2900 3000 3200 3400
## 1 3 1 2 5 3 1
## 3500 3600 3800 4000 4200 4500 4800
## 4 1 2 3 1 1 1
## 4900 5000 5500 5550 5600 5700 5800
## 3 4 4 1 1 1 3
## 5900 6000 6500 6700 7200 7650 8000
## 2 1 1 1 1 1 1
## 8729 8750 9000 10000 11400 13400 13848
## 1 1 1 3 1 1 1
## 14400 14500 15000 16000 16800 18000 18780
## 1 1 2 1 1 3 1
## 19200 20132 20400 21000 22860 23490 24000
## 3 1 1 1 1 1 1
## 24375 24500 26000 30000 30876 33000 35000
## 1 1 1 1 1 1 1
## 36000 38040 40000 42900 44700 45000 45360
## 1 1 1 1 1 1 1
## 45800 46000 49300 50000 52800 54000 56000
## 1 1 1 1 1 1 1
## 60000 64000 68400 69000 70000 71676 72000
## 3 1 1 1 1 1 2
## 80000 89700 91700 95700 96394 or more <NA>
## 1 1 1 1 2 2073
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q25)[na.exclude(mydata$s10q25)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q25", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q25. What is the total amount currently saved in these bank accounts by you and all m
## 0 50 100 200 430 500 800 900 1000 1200 1400 1500 1892 2000 2100 2300 2400 2496
## 39 1 4 1 1 7 3 1 7 2 1 1 1 6 1 1 2 1
## 2500 2700 3000 3400 3500 3600 4000 4700 5000 6000 7000 7200 7999 8000 8200 10000 13000 15000
## 1 1 3 1 1 1 2 1 7 3 4 1 1 3 1 7 1 3
## 20000 25000 50000 75000 <NA>
## 3 1 1 1 2168
## [1] "Frequency table after encoding"
## s10q25. What is the total amount currently saved in these bank accounts by you and all m
## 0 50 100 200 430 500 800
## 39 1 4 1 1 7 3
## 900 1000 1200 1400 1500 1892 2000
## 1 7 2 1 1 1 6
## 2100 2300 2400 2496 2500 2700 3000
## 1 1 2 1 1 1 3
## 3400 3500 3600 4000 4700 5000 6000
## 1 1 1 2 1 7 3
## 7000 7200 7999 8000 8200 10000 13000
## 4 1 1 3 1 7 1
## 15000 20000 25000 50000 59124 or more <NA>
## 3 3 1 1 1 2168
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q26)[na.exclude(mydata$s10q26)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q26", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q26. In the past 12 months, what is the total amount added to these bank accounts by
## 0 100 260 500 800 960 1000 1200 1920 2000 2400 2500 2880 3000 3200 3840 4600 4800
## 87 2 1 1 3 1 3 3 1 2 7 2 2 1 1 1 1 1
## 5000 6720 7200 9600 10000 11800 12900 14400 22000 <NA>
## 2 1 1 1 1 1 1 1 1 2166
## [1] "Frequency table after encoding"
## s10q26. In the past 12 months, what is the total amount added to these bank accounts by
## 0 100 260 500 800 960 1000
## 87 2 1 1 3 1 3
## 1200 1920 2000 2400 2500 2880 3000
## 3 1 2 7 2 2 1
## 3200 3840 4600 4800 5000 6720 7200
## 1 1 1 1 2 1 1
## 9600 10000 11800 12900 14400 17097 or more <NA>
## 1 1 1 1 1 1 2166
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q27)[na.exclude(mydata$s10q27)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q27", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q27. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 130 200 300 350 500 750 1000 1200 1400 1500 1600 1700 1800 1892
## 49 1 2 1 1 4 1 1 1 1 2 2 1 2 1
## 2000 2500 2800 2840 3000 4000 4500 4800 5000 5100 6000 6400 6900 8000 8200
## 2 1 1 1 6 3 1 1 3 1 2 2 1 8 1
## 9600 10000 10400 11500 12600 12800 13200 14000 14800 15000 15600 16000 16800 17100 18000
## 2 1 1 1 1 1 4 1 1 3 1 1 1 1 2
## 25000 140000 144000 150000 <NA>
## 2 1 1 1 2165
## [1] "Frequency table after encoding"
## s10q27. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 130 200 300 350 500 750
## 49 1 2 1 1 4 1
## 1000 1200 1400 1500 1600 1700 1800
## 1 1 1 2 2 1 2
## 1892 2000 2500 2800 2840 3000 4000
## 1 2 1 1 1 6 3
## 4500 4800 5000 5100 6000 6400 6900
## 1 1 3 1 2 2 1
## 8000 8200 9600 10000 10400 11500 12600
## 8 1 2 1 1 1 1
## 12800 13200 14000 14800 15000 15600 16000
## 1 4 1 1 3 1 1
## 16800 17100 18000 25000 140000 144000 146099 or more
## 1 1 2 2 1 1 1
## <NA>
## 2165
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q29)[na.exclude(mydata$s10q29)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q29", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q29. In the past 12 months, how much income did you earn from interest on these accou
## 0 1 3 4 6 7 8 10 11 20 40 50 57 60 100 130 200 240
## 3 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1
## 260 500 2000 59000 <NA>
## 1 1 1 1 2270
## [1] "Frequency table after encoding"
## s10q29. In the past 12 months, how much income did you earn from interest on these accou
## 0 1 3 4 6 7 8
## 3 1 1 1 1 1 1
## 10 11 20 40 50 57 60
## 1 1 2 1 2 1 1
## 100 130 200 240 260 500 2000
## 1 1 1 1 1 1 1
## 51875 or more <NA>
## 1 2270
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q31)[na.exclude(mydata$s10q31)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q31", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q31. What is the total amount currently saved with coops and MFIs by you and all memb
## 0 30 50 70 100 117 120 150 170 200 290 300 320 330 400 450 480 490
## 4 1 3 1 4 1 1 1 1 4 1 5 1 1 5 1 2 1
## 500 600 700 720 790 792 800 810 850 900 950 957 960 970 1000 1060 1100 1120
## 18 8 9 2 1 1 11 2 1 1 2 1 2 1 38 1 2 1
## 1148 1150 1157 1170 1200 1250 1270 1300 1400 1440 1450 1500 1508 1560 1567 1600 1655 1700
## 1 3 1 1 11 1 1 6 7 1 1 26 1 2 1 2 1 1
## 1750 1800 1820 1900 1911 1920 1930 1960 1998 2000 2100 2120 2160 2200 2267 2300 2379 2390
## 1 9 2 2 1 2 1 1 1 62 2 1 1 3 1 4 1 1
## 2400 2410 2490 2500 2600 2700 2800 2900 3000 3100 3200 3400 3500 3531 3535 3600 3637 3700
## 6 1 1 10 3 5 4 2 42 3 2 1 13 1 1 1 1 1
## 3900 4000 4100 4270 4400 4500 4600 4800 4980 5000 5200 5500 5900 5920 5970 6000 6300 6363
## 2 18 1 1 1 2 1 2 1 20 1 1 1 1 1 13 1 1
## 6364 6480 6500 6600 6610 6900 7000 7500 7700 8000 8400 9000 9720 10000 10080 12000 15000 17000
## 1 1 1 1 1 2 7 2 1 3 1 2 1 3 1 3 2 1
## 25000 34947 40000 50000 <NA>
## 1 1 1 1 1787
## [1] "Frequency table after encoding"
## s10q31. What is the total amount currently saved with coops and MFIs by you and all memb
## 0 30 50 70 100 117 120
## 4 1 3 1 4 1 1
## 150 170 200 290 300 320 330
## 1 1 4 1 5 1 1
## 400 450 480 490 500 600 700
## 5 1 2 1 18 8 9
## 720 790 792 800 810 850 900
## 2 1 1 11 2 1 1
## 950 957 960 970 1000 1060 1100
## 2 1 2 1 38 1 2
## 1120 1148 1150 1157 1170 1200 1250
## 1 1 3 1 1 11 1
## 1270 1300 1400 1440 1450 1500 1508
## 1 6 7 1 1 26 1
## 1560 1567 1600 1655 1700 1750 1800
## 2 1 2 1 1 1 9
## 1820 1900 1911 1920 1930 1960 1998
## 2 2 1 2 1 1 1
## 2000 2100 2120 2160 2200 2267 2300
## 62 2 1 1 3 1 4
## 2379 2390 2400 2410 2490 2500 2600
## 1 1 6 1 1 10 3
## 2700 2800 2900 3000 3100 3200 3400
## 5 4 2 42 3 2 1
## 3500 3531 3535 3600 3637 3700 3900
## 13 1 1 1 1 1 2
## 4000 4100 4270 4400 4500 4600 4800
## 18 1 1 1 2 1 2
## 4980 5000 5200 5500 5900 5920 5970
## 1 20 1 1 1 1 1
## 6000 6300 6363 6364 6480 6500 6600
## 13 1 1 1 1 1 1
## 6610 6900 7000 7500 7700 8000 8400
## 1 2 7 2 1 3 1
## 9000 9720 10000 10080 12000 15000 17000
## 2 1 3 1 3 2 1
## 25000 29575 or more <NA>
## 1 3 1787
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q32)[na.exclude(mydata$s10q32)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q32", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q32. In the past 12 months, what is the total amount added to these accounts by you a
## 0 1 5 10 20 21 30 40 50 60 62 70 80 100 112 132 140 150
## 306 1 1 2 2 1 1 1 8 1 1 4 3 11 1 1 1 3
## 160 164 200 240 252 300 320 330 400 450 475 480 500 600 750 800 810 850
## 2 1 9 5 1 4 1 1 5 1 1 4 9 3 1 3 1 1
## 880 1000 1120 1150 1200 1280 1385 1400 1440 1500 1600 1800 1820 1900 1920 2000 2120 2300
## 1 14 1 1 4 1 1 1 1 10 2 3 1 1 1 13 1 3
## 2400 2500 2600 2700 3000 3150 3200 3360 3500 3600 3800 4000 4800 5000 6000 6720 7000 7200
## 12 1 2 1 4 1 1 2 1 1 1 4 1 4 1 1 3 2
## 9600 9720 15000 24000 <NA>
## 1 1 1 1 1787
## [1] "Frequency table after encoding"
## s10q32. In the past 12 months, what is the total amount added to these accounts by you a
## 0 1 5 10 20 21 30 40
## 306 1 1 2 2 1 1 1
## 50 60 62 70 80 100 112 132
## 8 1 1 4 3 11 1 1
## 140 150 160 164 200 240 252 300
## 1 3 2 1 9 5 1 4
## 320 330 400 450 475 480 500 600
## 1 1 5 1 1 4 9 3
## 750 800 810 850 880 1000 1120 1150
## 1 3 1 1 1 14 1 1
## 1200 1280 1385 1400 1440 1500 1600 1800
## 4 1 1 1 1 10 2 3
## 1820 1900 1920 2000 2120 2300 2400 2500
## 1 1 1 13 1 3 12 1
## 2600 2700 3000 3150 3200 3360 3500 3600
## 2 1 4 1 1 2 1 1
## 3800 4000 4800 5000 6000 6720 7000 7200
## 1 4 1 4 1 1 3 2
## 9600 9655 or more <NA>
## 1 3 1787
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q33)[na.exclude(mydata$s10q33)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q33", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q33. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 27 100 150 200 270 280 300 320 350 360 400 450 480 500 600 610 625
## 338 1 6 1 13 1 1 10 1 1 1 9 1 1 17 7 1 1
## 650 700 750 760 800 900 960 1000 1100 1200 1300 1400 1500 1600 1800 1900 2000 2040
## 1 3 1 1 7 3 1 15 1 3 1 2 5 1 3 1 9 1
## 2200 2300 2500 2600 2900 3000 3900 3910 4000 4200 4600 5000 6000 6050 7000 7600 8000 9000
## 1 1 2 2 1 4 1 1 5 1 1 8 1 1 2 1 3 1
## 9400 10000 13700 22000 25000 25400 41000 <NA>
## 1 4 1 1 1 1 1 1779
## [1] "Frequency table after encoding"
## s10q33. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 27 100 150 200 270 280
## 338 1 6 1 13 1 1
## 300 320 350 360 400 450 480
## 10 1 1 1 9 1 1
## 500 600 610 625 650 700 750
## 17 7 1 1 1 3 1
## 760 800 900 960 1000 1100 1200
## 1 7 3 1 15 1 3
## 1300 1400 1500 1600 1800 1900 2000
## 1 2 5 1 3 1 9
## 2040 2200 2300 2500 2600 2900 3000
## 1 1 1 2 2 1 4
## 3900 3910 4000 4200 4600 5000 6000
## 1 1 5 1 1 8 1
## 6050 7000 7600 8000 9000 9400 10000
## 1 2 1 3 1 1 4
## 13700 22000 23259 or more <NA>
## 1 1 3 1779
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q35)[na.exclude(mydata$s10q35)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q35", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q35. In the past 12 months, how much income did you earn from interest on these accou
## 0 1 2 3 4 5 6 7 8 9 11 12 13 15 17 18 20 21 22 23 24 28
## 3 2 1 2 1 1 3 4 2 2 3 1 1 2 2 3 6 1 1 1 2 2
## 29 30 32 34 36 37 38 40 43 50 54 55 58 60 64 75 80 84 90 100 104 108
## 1 2 1 1 2 2 2 2 1 4 1 1 1 2 1 1 2 1 1 7 1 1
## 110 115 117 120 125 126 130 132 150 157 161 175 180 200 250 252 268 300 365 450 500 600
## 1 1 1 5 1 1 1 1 1 1 1 2 2 11 1 1 1 3 1 1 4 2
## 800 1000 1200 1500 2000 <NA>
## 2 1 1 1 2 2161
## [1] "Frequency table after encoding"
## s10q35. In the past 12 months, how much income did you earn from interest on these accou
## 0 1 2 3 4 5 6 7
## 3 2 1 2 1 1 3 4
## 8 9 11 12 13 15 17 18
## 2 2 3 1 1 2 2 3
## 20 21 22 23 24 28 29 30
## 6 1 1 1 2 2 1 2
## 32 34 36 37 38 40 43 50
## 1 1 2 2 2 2 1 4
## 54 55 58 60 64 75 80 84
## 1 1 1 2 1 1 2 1
## 90 100 104 108 110 115 117 120
## 1 7 1 1 1 1 1 5
## 125 126 130 132 150 157 161 175
## 1 1 1 1 1 1 1 2
## 180 200 250 252 268 300 365 450
## 2 11 1 1 1 3 1 1
## 500 600 800 1000 1200 1500 2000 or more <NA>
## 4 2 2 1 1 1 2 2161
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q37)[na.exclude(mydata$s10q37)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q37", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q37. What is the total amount currently saved with ROSCAS by you and all members of y
## 0 140 200 300 350 400 420 450 500 600 700 800 840 900 1000 1070 1200 1600
## 5 1 3 2 1 1 1 1 2 1 3 2 1 1 6 1 3 1
## 1680 2000 2400 2500 3000 3600 4000 4200 4250 4600 5000 5200 5500 7000 10000 10500 11000 12000
## 1 2 1 1 1 1 2 1 1 1 4 1 1 1 3 1 1 2
## 13000 15000 16000 19000 20000 40000 <NA>
## 1 1 2 1 1 2 2226
## [1] "Frequency table after encoding"
## s10q37. What is the total amount currently saved with ROSCAS by you and all members of y
## 0 140 200 300 350 400 420
## 5 1 3 2 1 1 1
## 450 500 600 700 800 840 900
## 1 2 1 3 2 1 1
## 1000 1070 1200 1600 1680 2000 2400
## 6 1 3 1 1 2 1
## 2500 3000 3600 4000 4200 4250 4600
## 1 1 1 2 1 1 1
## 5000 5200 5500 7000 10000 10500 11000
## 4 1 1 1 3 1 1
## 12000 13000 15000 16000 19000 20000 40000 or more
## 2 1 1 2 1 1 2
## <NA>
## 2226
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q38)[na.exclude(mydata$s10q38)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q38", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q38. In the past 12 months, what is the total amount added to these accounts by you a
## 0 200 300 450 500 700 840 900 1000 1200 1800 2000 2500 3000 4200 4250 5250 5500
## 39 4 1 1 1 1 1 1 4 2 1 1 1 1 1 1 1 1
## 7000 10000 10500 12000 15000 16000 <NA>
## 1 2 1 1 1 1 2226
## [1] "Frequency table after encoding"
## s10q38. In the past 12 months, what is the total amount added to these accounts by you a
## 0 200 300 450 500 700 840
## 39 4 1 1 1 1 1
## 900 1000 1200 1800 2000 2500 3000
## 1 4 2 1 1 1 1
## 4200 4250 5250 5500 7000 10000 10500
## 1 1 1 1 1 2 1
## 12000 15000 15655 or more <NA>
## 1 1 1 2226
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q39)[na.exclude(mydata$s10q39)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q39", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q39. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 200 420 1000 1680 1900 2500 4000 4200 4500 4600 5200 5250 5400 7000 10000 12500 15000
## 41 1 1 3 1 1 2 1 1 1 1 1 1 1 2 1 1 3
## 16000 20000 30000 40000 <NA>
## 1 3 1 1 2226
## [1] "Frequency table after encoding"
## s10q39. In the past 12 months, what is the total amount withdrawn from these accounts by
## 0 200 420 1000 1680 1900 2500
## 41 1 1 3 1 1 2
## 4000 4200 4500 4600 5200 5250 5400
## 1 1 1 1 1 1 1
## 7000 10000 12500 15000 16000 20000 30000
## 2 1 1 3 1 3 1
## 36550 or more <NA>
## 1 2226
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q41)[na.exclude(mydata$s10q41)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q41", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q41. In the past 12 months, how much income did you earn from interest on these accou
## 20 40 150 200 250 600 1400 <NA>
## 1 1 1 2 1 1 1 2288
## [1] "Frequency table after encoding"
## s10q41. In the past 12 months, how much income did you earn from interest on these accou
## 20 40 150 200 250 600 1371 or more <NA>
## 1 1 1 2 1 1 1 2288
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q43)[na.exclude(mydata$s10q43)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q43", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q43. What is the current amount of these other savings? Ano ang kasalukuyang halaga
## 0 20 25 30 50 75 90 100 110 200 230 240 300 400 485 500 580 600
## 3 1 1 1 2 1 1 5 1 4 1 1 8 2 1 14 1 1
## 800 850 900 1000 1200 1300 1500 1600 1750 1800 2000 2500 2900 3000 3500 4000 5000 5200
## 2 1 1 13 1 2 2 1 1 2 14 1 1 6 1 3 9 1
## 5700 6000 6720 7000 7500 8000 10000 12000 13000 15000 17000 18000 20000 30000 60000 1e+05 <NA>
## 1 1 1 2 1 1 2 1 1 4 1 1 2 3 1 1 2161
## [1] "Frequency table after encoding"
## s10q43. What is the current amount of these other savings? Ano ang kasalukuyang halaga
## 0 20 25 30 50 75 90
## 3 1 1 1 2 1 1
## 100 110 200 230 240 300 400
## 5 1 4 1 1 8 2
## 485 500 580 600 800 850 900
## 1 14 1 1 2 1 1
## 1000 1200 1300 1500 1600 1750 1800
## 13 1 2 2 1 1 2
## 2000 2500 2900 3000 3500 4000 5000
## 14 1 1 6 1 3 9
## 5200 5700 6000 6720 7000 7500 8000
## 1 1 1 1 2 1 1
## 10000 12000 13000 15000 17000 18000 20000
## 2 1 1 4 1 1 2
## 30000 60000 73200 or more <NA>
## 3 1 1 2161
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q44)[na.exclude(mydata$s10q44)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q44", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q44. In the past 12 months, what is the total amount added to this savings by you and
## 0 50 90 100 200 400 500 700 800 1400 1536 1965 2000 3000 6720 15000 <NA>
## 120 3 1 2 1 1 1 1 2 1 1 1 1 2 1 1 2156
## [1] "Frequency table after encoding"
## s10q44. In the past 12 months, what is the total amount added to this savings by you and
## 0 50 90 100 200 400 500 700
## 120 3 1 2 1 1 1 1
## 800 1400 1536 1965 2000 3000 6720 9245 or more
## 2 1 1 1 1 2 1 1
## <NA>
## 2156
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q45)[na.exclude(mydata$s10q45)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q45", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q45. In the past 12 months, what is the total amount withdrawn from this savings by y
## 0 8 180 300 500 800 1965 2000 2500 3000 7000 8000 <NA>
## 125 1 1 1 3 1 1 5 1 1 1 1 2154
## [1] "Frequency table after encoding"
## s10q45. In the past 12 months, what is the total amount withdrawn from this savings by y
## 0 8 180 300 500 800 1965 2000
## 125 1 1 1 3 1 1 5
## 2500 3000 7000 7294 or more <NA>
## 1 1 1 1 2154
percentile_99.5 <- floor(quantile(na.exclude(mydata$s10q47)[na.exclude(mydata$s10q47)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s10q47", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s10q47. In the past 12 months, how much income did you earn from interest on this saving
## 0 50 150 200 360 1200 4200 <NA>
## 1 1 1 1 1 1 1 2289
## [1] "Frequency table after encoding"
## s10q47. In the past 12 months, how much income did you earn from interest on this saving
## 0 50 150 200 360 1200 4109 or more <NA>
## 1 1 1 1 1 1 1 2289
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("s10q1",
"s10q4",
"s10q7",
"s10q10",
"s10q13",
"s10q16",
"s10q19",
"s10q22",
"s10q24",
"s10q28",
"s10q30",
"s10q34",
"s10q36",
"s10q40",
"s10q46")
capture_tables (indirect_PII)
# Recode those with very specific values.
# !!!No specific values
# !!! Insufficient demographic data
# !!! Identify open-end variables here:
open_ends <- c("s10q1_why",
"s10q2_why",
"s10q3_why",
"s10q4_why",
"s10q5_why",
"s10q6_why",
"s10q7_why",
"s10q8_why",
"s10q9_why",
"s10q10_why",
"s10q11_why",
"s10q12_why",
"s10q13_why",
"s10q14_why",
"s10q15_why",
"s10q16_why",
"s10q17_why",
"s10q18_why",
"s10q19_why",
"s10q20_why",
"s10q21_why",
"s10q22_why",
"s10q23_why",
"s10q24_why",
"s10q25_why",
"s10q26_why",
"s10q27_why",
"s10q28_why",
"s10q29_why",
"s10q30_why",
"s10q31_why",
"s10q32_why",
"s10q33_why",
"s10q34_why",
"s10q35_why",
"s10q36_why",
"s10q37_why",
"s10q38_why",
"s10q39_why",
"s10q40_why",
"s10q41_why",
"s10q42_why",
"s10q43_why",
"s10q44_why",
"s10q45_why",
"s10q46_why",
"s10q47_why")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata$s10q31_why[730] <- "Just started last week, payment for loans and savings will start this coming [date]"
mydata$s10q32_why[1071] <- "Weekly [amount redacted]"
mydata$s10q33_why[1071] <- "[language]"
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)