rm(list=ls(all=t))

Setup filenames

filename <- "Section_7" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Top code high income/expenses to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q3)[na.exclude(mydata$m_s6q3)!=999999], probs = c(0.995))) 
mydata <- top_recode (variable="m_s6q3", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s6q3. sFq4: How much did you spend in total purchasing land in the last 12 months?   M
##   -998    600   4500   8000  15000  24000  25000  30000  33000  40000 250000   <NA> 
##      1      1      1      2      1      1      1      3      1      1      1   2271

## [1] "Frequency table after encoding"
## m_s6q3. sFq4: How much did you spend in total purchasing land in the last 12 months?   M
##           -998            600           4500           8000          15000          24000          25000          30000          33000          40000 236350 or more           <NA> 
##              1              1              1              2              1              1              1              3              1              1              1           2271

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q7)[na.exclude(mydata$m_s6q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s6q7. sFq10: If you were to purchase this land you received on the open market, how mu
##   -998  25000  30000  50000  1e+05 150000 250000  4e+05   <NA> 
##      6      1      1      2      2      2      1      1   2269

## [1] "Frequency table after encoding"
## m_s6q7. sFq10: If you were to purchase this land you received on the open market, how mu
##           -998          25000          30000          50000          1e+05         150000         250000 388750 or more           <NA> 
##              6              1              1              2              2              2              1              1           2269

mydata$m_farm_expenses <- as.numeric(mydata$m_farm_expenses)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_farm_expenses)[na.exclude(mydata$m_farm_expenses)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_farm_expenses", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_farm_expenses. 
##      0     20     40     80     90     93    100    120    180    200    220    250    271    300    400    432    450    500    525    550    565    574    600    800   1000   1008   1025 
##   1847      1      2      1      1      1      1      1      1      2      1      1      1      4      3      1      1      4      1      1      1      1      2      3      4      1      1 
##   1050   1085   1100   1150   1200   1310   1350   1400   1500   1520   1570   1600   1628   1730   1825   1900   1950   2000   2005   2050   2150   2400   2412   2450   2500   2600   2736 
##      1      1      2      1      3      1      2      1      2      1      1      3      1      1      1      1      1      9      1      2      1      1      1      1      1      1      1 
##   2840   3000   3040   3050   3200   3208   3300   3390   3420   3460   3500   3570   3600   3610   3700   3750   3760   3800   3900   3950   4000   4016   4050   4060   4200   4206   4300 
##      1      7      1      1      1      1      3      1      1      1      2      1      2      1      1      2      1      2      2      1      4      1      2      2      3      1      3 
##   4400   4480   4500   4600   4800   4810   4870   5000   5080   5100   5200   5390   5400   5450   5600   5876   5900   5990   6000   6100   6200   6264   6300   6600   6720   6736   6800 
##      1      1      1      1      2      1      1      3      1      1      1      1      1      1      2      1      1      1      4      1      1      1      2      2      1      1      2 
##   6850   7000   7115   7200   7316   7440   7500   7580   7600   7650   7828   7840   7900   8000   8020   8025   8080   8380   8400   8460   8730   8750   8850   8970   9000   9030   9150 
##      1      2      1      1      1      1      1      1      1      1      1      1      1      3      1      1      1      1      2      1      1      1      1      1      2      1      1 
##   9360   9500   9600  10000  10200  10250  10267  10300  10390  10700  10900  11000  11150  11500  11600  11840  12000  12100  12158  12200  12400  12500  12600  12608  12800  12900  12990 
##      1      1      1      2      1      1      1      2      1      2      1      4      1      1      2      1      6      1      1      1      1      1      1      1      1      1      1 
##  13013  13165  13280  13500  13600  13800  14000  14400  14800  15000  15115  15400  15690  15800  16000  16100  16300  16801  17000  17300  17316  17500  17700  17780  17800  18000  18100 
##      1      1      1      1      1      1      2      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      2      2 
##  18400  18720  18860  19000  19100  19300  19400  19550  20000  20600  20695  20710  21000  21200  21500  21550  21600  21800  21990  22000  22300  22400  23000  23988  24000  24200  24400 
##      1      1      1      1      1      1      1      1      2      1      1      1      2      1      1      1      1      1      1      1      1      1      2      1      1      1      1 
##  24500  24600  25000  25250  25360  25400  25436  25500  26000  26200  26250  26600  27000  27400  27500  27600  28302  28400  28680  28800  28950  29158  29350  29600  29800  30000  30500 
##      1      1      3      1      1      1      1      1      2      1      1      2      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1 
##  31900  32000  33000  33800  34100  34500  35000  36000  37300  38800  40000  40200  41316  41600  43200  44350  44400  45200  46400  46718  48400  49000  51000  51800  56900  57000  61800 
##      1      1      1      1      1      1      1      2      1      1      1      1      1      1      1      1      1      2      1      1      1      1      1      1      1      1      1 
##  63000  64400  65150  65800  68000  68675  77000  81725  87460  87700  88600  91000 102000 132160 138000 158200 163600 282600   <NA> 
##      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1      1     58

## [1] "Frequency table after encoding"
## m_farm_expenses. 75917
##             0            20            40            80            90            93           100           120           180           200           220           250           271 
##          1847             1             2             1             1             1             1             1             1             2             1             1             1 
##           300           400           432           450           500           525           550           565           574           600           800          1000          1008 
##             4             3             1             1             4             1             1             1             1             2             3             4             1 
##          1025          1050          1085          1100          1150          1200          1310          1350          1400          1500          1520          1570          1600 
##             1             1             1             2             1             3             1             2             1             2             1             1             3 
##          1628          1730          1825          1900          1950          2000          2005          2050          2150          2400          2412          2450          2500 
##             1             1             1             1             1             9             1             2             1             1             1             1             1 
##          2600          2736          2840          3000          3040          3050          3200          3208          3300          3390          3420          3460          3500 
##             1             1             1             7             1             1             1             1             3             1             1             1             2 
##          3570          3600          3610          3700          3750          3760          3800          3900          3950          4000          4016          4050          4060 
##             1             2             1             1             2             1             2             2             1             4             1             2             2 
##          4200          4206          4300          4400          4480          4500          4600          4800          4810          4870          5000          5080          5100 
##             3             1             3             1             1             1             1             2             1             1             3             1             1 
##          5200          5390          5400          5450          5600          5876          5900          5990          6000          6100          6200          6264          6300 
##             1             1             1             1             2             1             1             1             4             1             1             1             2 
##          6600          6720          6736          6800          6850          7000          7115          7200          7316          7440          7500          7580          7600 
##             2             1             1             2             1             2             1             1             1             1             1             1             1 
##          7650          7828          7840          7900          8000          8020          8025          8080          8380          8400          8460          8730          8750 
##             1             1             1             1             3             1             1             1             1             2             1             1             1 
##          8850          8970          9000          9030          9150          9360          9500          9600         10000         10200         10250         10267         10300 
##             1             1             2             1             1             1             1             1             2             1             1             1             2 
##         10390         10700         10900         11000         11150         11500         11600         11840         12000         12100         12158         12200         12400 
##             1             2             1             4             1             1             2             1             6             1             1             1             1 
##         12500         12600         12608         12800         12900         12990         13013         13165         13280         13500         13600         13800         14000 
##             1             1             1             1             1             1             1             1             1             1             1             1             2 
##         14400         14800         15000         15115         15400         15690         15800         16000         16100         16300         16801         17000         17300 
##             1             1             1             1             1             1             1             1             1             1             1             1             1 
##         17316         17500         17700         17780         17800         18000         18100         18400         18720         18860         19000         19100         19300 
##             1             1             1             1             1             2             2             1             1             1             1             1             1 
##         19400         19550         20000         20600         20695         20710         21000         21200         21500         21550         21600         21800         21990 
##             1             1             2             1             1             1             2             1             1             1             1             1             1 
##         22000         22300         22400         23000         23988         24000         24200         24400         24500         24600         25000         25250         25360 
##             1             1             1             2             1             1             1             1             1             1             3             1             1 
##         25400         25436         25500         26000         26200         26250         26600         27000         27400         27500         27600         28302         28400 
##             1             1             1             2             1             1             2             1             1             1             1             1             1 
##         28680         28800         28950         29158         29350         29600         29800         30000         30500         31900         32000         33000         33800 
##             1             1             1             1             1             1             1             1             1             1             1             1             1 
##         34100         34500         35000         36000         37300         38800         40000         40200         41316         41600         43200         44350         44400 
##             1             1             1             2             1             1             1             1             1             1             1             1             1 
##         45200         46400         46718         48400         49000         51000         51800         56900         57000         61800         63000         64400         65150 
##             2             1             1             1             1             1             1             1             1             1             1             1             1 
##         65800         68000         68675 75917 or more          <NA> 
##             1             1             1            12            58

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q12)[na.exclude(mydata$m_s6q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s6q12. sFq16: How much in total would it have cost you to purchase those inputs with yo
##  -998     0     2     5    10    15    20    25    30    35    40    45    50    55    60    70    75    80    90   100   120   150   180   200   220   250   260   290   300   400   450   458 
##    31    13     2     1    31     1    43     5    15     4    19     4    96     2    12     7     4     5     2   105     3    13     2    28     1     7     1     1    19     4     2     1 
##   500   600   650   700   750   800   850   900   998  1000  1100  1200  1250  1300  1400  1500  1600  1800  2000  2200  2250  2500  2652  3000  3600  3800  4000  4500  5000  5600  6000  6101 
##    29     4     1     6     2     3     1     2     1    14     1    11     2     1     1     3     1     2     7     1     1     3     1     6     1     1     1     3     7     1     3     1 
## 10000 13740  <NA> 
##     2     1  1676

## [1] "Frequency table after encoding"
## m_s6q12. sFq16: How much in total would it have cost you to purchase those inputs with yo
##         -998            0            2            5           10           15           20           25           30           35           40           45           50           55 
##           31           13            2            1           31            1           43            5           15            4           19            4           96            2 
##           60           70           75           80           90          100          120          150          180          200          220          250          260          290 
##           12            7            4            5            2          105            3           13            2           28            1            7            1            1 
##          300          400          450          458          500          600          650          700          750          800          850          900          998         1000 
##           19            4            2            1           29            4            1            6            2            3            1            2            1           14 
##         1100         1200         1250         1300         1400         1500         1600         1800         2000         2200         2250         2500         2652         3000 
##            1           11            2            1            1            3            1            2            7            1            1            3            1            6 
##         3600         3800         4000         4500         5000         5600         6000 6096 or more         <NA> 
##            1            1            1            3            7            1            3            4         1676

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q4)[na.exclude(mydata$m_s7q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q4", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q4. sGq7: How much did you spend?  Magkano po ang inyong nagastos?
##  -998     0  2000  3500  5000  6000  7000  7500  8000 10000 13000 14000 15000 16000 17500 18000 19000 20000 20400 21000 22000 24000 25000 27000 29000 30000 31000 35000 37000 44500 50000 70000 
##     2     1     1     1     4     1     2     1     2     4     2     1     5     2     1     2     2     4     1     2     3     1     3     1     1     4     1     1     1     1     1     1 
##  <NA> 
##  2225

## [1] "Frequency table after encoding"
## m_s7q4. sGq7: How much did you spend?  Magkano po ang inyong nagastos?
##          -998             0          2000          3500          5000          6000          7000          7500          8000         10000         13000         14000         15000 
##             2             1             1             1             4             1             2             1             2             4             2             1             5 
##         16000         17500         18000         19000         20000         20400         21000         22000         24000         25000         27000         29000         30000 
##             2             1             2             2             4             1             2             3             1             3             1             1             4 
##         31000         35000         37000         44500         50000 64099 or more          <NA> 
##             1             1             1             1             1             1          2225

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q7)[na.exclude(mydata$m_s7q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q7. sGq12: If you were to buy these livestock with your own resources, how much woul
##   -998     14    300   1200   2000   3000   5000   6000   7000   8000   9000  10000  12000  13000  15000  16000  17000  17500  18000  20000  21000  22000  23000  25000  27000  28000  29000 
##      4      1      1      1      1      1      7      2      3      7      1     21      3      3     30      4      2      1      8     37      1      1      1      8      1      1      1 
##  30000  33000  35000  37000  40000  45000  48000  50000  69500  71000  80000  90000 115000   <NA> 
##     11      1     10      1      8      2      1      9      1      1      2      1      1   2084

## [1] "Frequency table after encoding"
## m_s7q7. sGq12: If you were to buy these livestock with your own resources, how much woul
##          -998            14           300          1200          2000          3000          5000          6000          7000          8000          9000         10000         12000 
##             4             1             1             1             1             1             7             2             3             7             1            21             3 
##         13000         15000         16000         17000         17500         18000         20000         21000         22000         23000         25000         27000         28000 
##             3            30             4             2             1             8            37             1             1             1             8             1             1 
##         29000         30000         33000         35000         37000         40000         45000         48000         50000         69500         71000         80000 90000 or more 
##             1            11             1            10             1             8             2             1             9             1             1             2             2 
##          <NA> 
##          2084

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q11)[na.exclude(mydata$m_s7q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q11", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q11. sGq18: How much did you spend?  Magkano po ang inyong nagastos?
##  -998     0   500   900  1000  1200  1300  1400  1500  1600  1700  1800  1900  2000  2200  2300  2350  2400  2500  2600  2700  2800  2900  3000  3400  3500  3600  3700  3850  3900  4000  4200 
##     1     1     1     1     4     3     1     1    27     2     3     9     1    39     7     2     1     7    28     3     1     2     1    22     2     8     4     1     1     1    24     2 
##  4400  4500  4600  4800  4900  5000  5200  5500  5610  5700  6000  6050  6100  6200  6300  6400  6500  6650  6800  7000  7200  7457  7500  8000  8400  9000  9200 10000 10200 10400 11000 11300 
##     5     2     1     2     1    32     2     2     1     3     8     1     1     2     1     1     3     1     1     6     2     1     3     4     1     4     1    11     1     1     2     1 
## 11500 11600 12000 12500 13000 13750 14000 14112 14600 15000 15400 16200 17000 20000 21900 22000 24000 28000 42500 53000 60000 70000  <NA> 
##     1     1     2     2     1     1     1     1     1     3     1     1     1     2     1     1     2     1     1     1     1     1  1938

## [1] "Frequency table after encoding"
## m_s7q11. sGq18: How much did you spend?  Magkano po ang inyong nagastos?
##          -998             0           500           900          1000          1200          1300          1400          1500          1600          1700          1800          1900 
##             1             1             1             1             4             3             1             1            27             2             3             9             1 
##          2000          2200          2300          2350          2400          2500          2600          2700          2800          2900          3000          3400          3500 
##            39             7             2             1             7            28             3             1             2             1            22             2             8 
##          3600          3700          3850          3900          4000          4200          4400          4500          4600          4800          4900          5000          5200 
##             4             1             1             1            24             2             5             2             1             2             1            32             2 
##          5500          5610          5700          6000          6050          6100          6200          6300          6400          6500          6650          6800          7000 
##             2             1             3             8             1             1             2             1             1             3             1             1             6 
##          7200          7457          7500          8000          8400          9000          9200         10000         10200         10400         11000         11300         11500 
##             2             1             3             4             1             4             1            11             1             1             2             1             1 
##         11600         12000         12500         13000         13750         14000         14112         14600         15000         15400         16200         17000         20000 
##             1             2             2             1             1             1             1             1             3             1             1             1             2 
##         21900         22000         24000         28000         42500         53000 54889 or more          <NA> 
##             1             1             2             1             1             1             2          1938

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q14)[na.exclude(mydata$m_s7q14)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q14", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q14. sGq23: If you were to buy these livestock with your own resources, how much woul
##  -998   600   800  1000  1200  1300  1500  1600  1800  2000  2200  2300  2400  2500  2600  2800  3000  3200  3500  3600  3900  4000  4400  4500  4800  5000  5100  5200  6000  6200  6400  6600 
##     1     1     1     7     1     2    14     1     1    29     3     2     1    33     1     2    16     1     6     1     1     9     2     5     1    18     1     1    12     1     1     1 
##  7000  7200  7500  8000  9000  9200  9500 10000 10800 11000 12000 13500 14000 14400 15000 16000 16800 17000 17500 20000 22500 23000 24000 27500 27600 45000 69500 75000  <NA> 
##     2     1     6     6     3     1     1    13     2     2     3     1     1     1     1     3     1     1     2     6     1     1     1     1     1     1     1     1  2043

## [1] "Frequency table after encoding"
## m_s7q14. sGq23: If you were to buy these livestock with your own resources, how much woul
##          -998           600           800          1000          1200          1300          1500          1600          1800          2000          2200          2300          2400 
##             1             1             1             7             1             2            14             1             1            29             3             2             1 
##          2500          2600          2800          3000          3200          3500          3600          3900          4000          4400          4500          4800          5000 
##            33             1             2            16             1             6             1             1             9             2             5             1            18 
##          5100          5200          6000          6200          6400          6600          7000          7200          7500          8000          9000          9200          9500 
##             1             1            12             1             1             1             2             1             6             6             3             1             1 
##         10000         10800         11000         12000         13500         14000         14400         15000         16000         16800         17000         17500         20000 
##            13             2             2             3             1             1             1             1             3             1             1             2             6 
##         22500         23000         24000         27500         27600         45000 64477 or more          <NA> 
##             1             1             1             1             1             1             2          2043

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q18)[na.exclude(mydata$m_s7q18)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q18", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q18. sGq29: How much did you spend?  Magkano po ang inyong nagastos?
##  -998     0    10    20    25    30    50    60    65    70    85    90   100   105   120   150   200   210   240   250   280   284   300   320   325   336   340   350   360   370   390   400 
##     7     4     1     1     1     1    12     1     1     1     1     1    22     1     1    17    45     1     6    10     1     1    44     1     1     1     1     3     3     1     1    16 
##   420   450   500   540   550   580   600   650   660   680   700   750   760   780   800   850   875   900  1000  1025  1180  1200  1500  1540  1584  1600  1790  1900  2000  2300  2500  2700 
##     1     5    39     1     1     1    14     1     1     1     6     2     1     1     6     1     1     4    28     1     1     3     8     1     1     1     1     1    11     1     3     2 
##  2940  3000  3300  4000  4500  5000  6000  6700  7000 10000 12000 25000  <NA> 
##     1     6     1     1     2     3     1     1     2     1     1     1  1905

## [1] "Frequency table after encoding"
## m_s7q18. sGq29: How much did you spend?  Magkano po ang inyong nagastos?
##          -998             0            10            20            25            30            50            60            65            70            85            90           100 
##             7             4             1             1             1             1            12             1             1             1             1             1            22 
##           105           120           150           200           210           240           250           280           284           300           320           325           336 
##             1             1            17            45             1             6            10             1             1            44             1             1             1 
##           340           350           360           370           390           400           420           450           500           540           550           580           600 
##             1             3             3             1             1            16             1             5            39             1             1             1            14 
##           650           660           680           700           750           760           780           800           850           875           900          1000          1025 
##             1             1             1             6             2             1             1             6             1             1             4            28             1 
##          1180          1200          1500          1540          1584          1600          1790          1900          2000          2300          2500          2700          2940 
##             1             3             8             1             1             1             1             1            11             1             3             2             1 
##          3000          3300          4000          4500          5000          6000          6700          7000         10000 10210 or more          <NA> 
##             6             1             1             2             3             1             1             2             1             2          1905

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q21)[na.exclude(mydata$m_s7q21)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q21", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## m_s7q21. sGq34: If you were to buy these birds with your own resources, how much would th
##  -998    20    30    50    60    75    80    90   100   120   130   150   165   175   180   200   210   225   240   250   260   265   270   300   308   340   350   360   370   400   440   450 
##    39     1     1     6     4     1     1     2    27    10     3    47     1     3     4    66     1     1     3    11     1     1     2    79     1     1    10     5     1    16     1    14 
##   465   480   500   525   550   560   570   600   650   655   700   720   750   760   800   820   840   850   860   880   900   910   930   960   990  1000  1030  1050  1100  1120  1150  1200 
##     1     2    67     1     2     1     1    35     4     1    13     1    13     1    18     1     1     1     1     1     9     1     1     1     1    68     1     2     4     1     1    16 
##  1250  1260  1300  1350  1400  1500  1540  1550  1600  1650  1700  1800  1860  1900  2000  2100  2150  2200  2250  2400  2500  2510  2550  2700  2850  3000  3100  3380  3500  3600  3750  4000 
##     3     1     5     3     1    30     1     1     2     1     1     6     1     1    26     2     1     3     1     2    11     1     1     1     2    16     1     2     4     2     1     5 
##  4500  4800  5000  5100  5500  6000  6150  7000  7500  8000  9000 10000 10500 12000 15000 16000 22800 25000 30000  <NA> 
##     2     1    14     1     1     7     1     1     2     1     1     3     2     2     1     1     1     1     1  1466

## [1] "Frequency table after encoding"
## m_s7q21. sGq34: If you were to buy these birds with your own resources, how much would th
##          -998            20            30            50            60            75            80            90           100           120           130           150           165 
##            39             1             1             6             4             1             1             2            27            10             3            47             1 
##           175           180           200           210           225           240           250           260           265           270           300           308           340 
##             3             4            66             1             1             3            11             1             1             2            79             1             1 
##           350           360           370           400           440           450           465           480           500           525           550           560           570 
##            10             5             1            16             1            14             1             2            67             1             2             1             1 
##           600           650           655           700           720           750           760           800           820           840           850           860           880 
##            35             4             1            13             1            13             1            18             1             1             1             1             1 
##           900           910           930           960           990          1000          1030          1050          1100          1120          1150          1200          1250 
##             9             1             1             1             1            68             1             2             4             1             1            16             3 
##          1260          1300          1350          1400          1500          1540          1550          1600          1650          1700          1800          1860          1900 
##             1             5             3             1            30             1             1             2             1             1             6             1             1 
##          2000          2100          2150          2200          2250          2400          2500          2510          2550          2700          2850          3000          3100 
##            26             2             1             3             1             2            11             1             1             1             2            16             1 
##          3380          3500          3600          3750          4000          4500          4800          5000          5100          5500          6000          6150          7000 
##             2             4             2             1             5             2             1            14             1             1             7             1             1 
##          7500          8000          9000         10000         10500         12000 14729 or more          <NA> 
##             2             1             1             3             2             2             5          1466

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s6q1",
                  "m_s6q2",
                  "m_s6q4",
                  "m_s6q5",
                  "m_s6q6",
                  "m_s6q8",
                  "m_s6q10",
                  "m_s6q11",
                  "m_s6q13",
                  "m_s7q1",
                  "m_s7q2",
                  "m_s7q3",
                  "m_s7q5",
                  "m_s7q6",
                  "m_s7q8",
                  "m_s7q9",
                  "m_s7q10",
                  "m_s7q12",
                  "m_s7q13",
                  "m_s7q15",
                  "m_s7q16",
                  "m_s7q17",
                  "m_s7q19",
                  "m_s7q20")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("m_s6q4_other",
               "m_s6q6_other",
               "m_s6q8_warning",
               "m_s6q13_other",
               "m_endnote6",
               "m_s7q3_other",
               "m_s7q6_other",
               "m_s7q10_other",
               "m_s7q13_other",
               "m_s7q17_other",
               "m_s7q20_other",
               "m_endnote7")

indirect_PII <- c("m_s6q4_other",
                  "m_s6q6_other",
                  "m_s6q8_warning",
                  "m_s6q13_other",
                  "m_endnote6",
                  "m_s7q3_other",
                  "m_s7q6_other",
                  "m_s7q10_other",
                  "m_s7q13_other",
                  "m_s7q17_other",
                  "m_s7q20_other",
                  "m_endnote7")

capture_tables(indirect_PII)

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$m_s6q4_other[1003] <- "[Tagalog]"
mydata$m_s6q4_other[1294] <- "[Tagalog]"

mydata$m_s6q13_other[260] <- "[Tagalog]"
mydata$m_s6q13_other[346] <- "[Tagalog]"
mydata$m_s6q13_other[352] <- "4 kls. Of Certified Seeds from Local Government of [municipality],(4500)"
mydata$m_s6q13_other[1468] <- "Department of Agriculture and UP [municipality]"

mydata$m_s7q6_other[423] <- "[Tagalog]"
mydata$m_s7q6_other[643] <- "[Tagalog]"
mydata$m_s7q6_other[1046] <- "[municipality] municipal government"
mydata$m_s7q6_other[1469] <- "[Tagalog]"

mydata$m_s7q10_other[281] <- "[Tagalog]"
mydata$m_s7q10_other[536] <- "Fond of [Tagalog]"

mydata$m_s7q13_other[546] <- "[Tagalog]"
mydata$m_s7q13_other[1029] <- "[Tagalog]"
mydata$m_s7q13_other[1938] <- "[Tagalog]"

mydata$m_s7q20_other[268] <- "[Tagalog]"
mydata$m_s7q20_other[897] <- "[Tagalog]"
mydata$m_s7q20_other[1030] <- "[Tagalog]"



mydata$m_s7q20_other[268] <- "[language], [name]"

mydata$m_endnote6[16] <- "Respondent said that even in baseline survey, they already spending [amount redacted] in cultivating crops. He can't recall that he's been asked regarding the matter."
mydata$m_endnote6[27] <- "They plant last [date] spend [amount redacted] but they have not plant yet becausr they're waiting for the rain before they can plant"
mydata$m_endnote6[28] <- "They plant last [date] and harvested [date], they havent plant yet so thry have no idea yet how much will cost them, so they cant compare if the expensed decrease or increase"
mydata$m_endnote6[53] <- "They plant once a year, every July, last [date] they spend [amount redacted] and they know yet if their expense is increased because they have' nt plant yet for this year."
mydata$m_endnote6[62] <- "They did'nt cultivate crops last 2016 because 2 of the household member got sick, they cultivate crops this year but corn only and spend [amount redacted] for seeds and fertilizer, they are waiting for the rain so that they can cultivate crops (rice grain)"
mydata$m_endnote6[70] <- "Bought seed at [amount redacted] only for  their owned consumption"
mydata$m_endnote6[92] <- "He told that during baseline survey their expenses in their crops is only [amount redacted], he include their not on crops in the [amount redacted] expenses last baseline."
mydata$m_endnote6[110] <- "On the baseline survey their expenses is [amount redacted] while for Last 12 months their expenses estimated [amount redacted]"
mydata$m_endnote6[156] <- "The farm they cultivitang is owned by 4 household (4siblings), so every year, different household is cultivating the farm. Their turn to cultivate was last [date]."
mydata$m_endnote6[173] <- "[name] planted vegetables at their backyard for their consumption, she is not using any fertilizer, no expenses, as well as the seeds were given by friends and neighbors"
mydata$m_endnote6[190] <- "Bought the seed for croping, spend [amount redacted]"
mydata$m_endnote6[215] <- "He recieved half sack of fertilizer from DA  He spent [amount redacted] for 2 corn farm, and he spent [amount redacted] for the rice field last [date] which is not included in the baseline."
mydata$m_endnote6[218] <- "Last 12 months they spend [amount redacted] in land cultivation  , the expenses increased on labor from [amount redacted] to [amount redacted] plus free meal. The money they spend half of it came from loan to a friend and half of it from their savings"
mydata$m_endnote6[219] <- "Respondent only spend [amount redacted] pesos for the seeds since they were not yet started cultivating the farm. She said, that they will just be starting on Suday."
mydata$m_endnote6[223] <- "Respondent planted eggplant which she spent [amount redacted]."
mydata$m_endnote6[236] <- "They planted 'camote' last April and spent [amount redacted] for it."
mydata$m_endnote6[240] <- "Respondent said that fro, [amount redacted], they already spend [amount redacted] pesos for cultivating crops."
mydata$m_endnote6[257] <- "Seed- [amount redacted], fertilizers- [amount redacted], pesticides-[amount redacted], insecticides- [amount redacted]. Total [amount redacted]"
mydata$m_endnote6[259] <- "[amount redacted] seed of eggplant and sili"
mydata$m_endnote6[261] <- "[Language]"
mydata$m_endnote6[262] <- " During te baseline survey she remembered that shemention to the enumerator that thay have spent [amount redacted] in cultivating their crops, and for the last 12 months they have spent the same amount"
mydata$m_endnote6[280] <- "They planted rice even before the baseline interview but was not stated the amount they spent even though he said it to the interviewer based from him. They bought the land last 2002. For tbe last 12 mos they have spent [amount redacted]"
mydata$m_endnote6[306] <- "Fertilizer-[amount redacted], Rutubator-[amount redacted], Pagpapatanim- [amount redacted], Tagapag bunot-[amount redacted], Tagasuyod- [amount redacted], Pamatay nang damu- [amount redacted], Snail-[amount redacted], Crude- [amount redacted]  Total of [amount redacted] pesos."
mydata$m_endnote6[317] <- "FERTILIZER- [amount redacted], SEEDS- [amount redacted], PESTICIDES-[amount redacted], LABOR [amount redacted] INSECTICIDES-[amount redacted] total=[amount redacted]"
mydata$m_endnote6[327] <- "She said that they spwnt [amount redacted] during the last year before basekine interview for cultivating plants. Last October they have spent [amount redacted] for the cultivation of their corn crop."
mydata$m_endnote6[337] <- "Fertilizers=[amount redacted], Seed=[amount redacted], Pesticides=[amount redacted], Nag Araro=[amount redacted], Nagtanim=[amount redacted]. Total of [amount redacted]."
mydata$m_endnote6[343] <- "Seed [amount redacted], abono [amount redacted], pamatay nang damo [amount redacted], =[amount redacted]"
mydata$m_endnote6[377] <- "Abono=[amount redacted], patubig=[amount redacted], Pesticides= [amount redacted], Seeds= [amount redacted] TOTAL OF [amount redacted]. the seeds must be gave to them by Department of Agriculture."
mydata$m_endnote6[378] <- "Abono=[amount redacted], fertilizer - [amount redacted], pesticides= [amount redacted], laborer [amount redacted], expenses in the sides of the farm (seeds given by the neigborhood) [amount redacted]  Total =[amount redacted]."
mydata$m_endnote6[403] <- "The seeds or plant they gather in the other farm is the what they used in planting, the amount of [amount redacted] in the sFq13 was the amount of the fertilizer."
mydata$m_endnote6[420] <- "Last year the respondent cultivated [amount redacted] sq.m for his cassava plants. And now his lot rented for 3 years with [amount redacted]. He owns only backyard gardening."
mydata$m_endnote6[468] <- "[Language]"
mydata$m_endnote6[571] <- "The household loaned the [amount redacted] and the saving of [amount redacted] in using to plant"
mydata$m_endnote6[599] <- " The DOLE gave them amounted [amount redacted] pesos that they sell, and the [amount redacted] pesos which  comes from their savings, with the total of [amount redacted] pesos"
mydata$m_endnote6[617] <- "[Language]"
mydata$m_endnote6[646] <- "Last year the household cultivated 2 ha. and spent [amount redacted] and now spent [amount redacted] for 1 ha"
mydata$m_endnote6[651] <- "Respondent said that they received a pack of seeds from Brgy. [barangay name] for them to plant."
mydata$m_endnote6[678] <- "[amount redacted] from DSWD AAthat they reinvested in planting im the farm and [amount redacted] from micro credit loan and the rest is [amount redacted] from teir savings"
mydata$m_endnote6[679] <- "They gather wild plant and transfered from the farm to their backyard. [language]"
mydata$m_endnote6[715] <- "July 2016 start to plant on rented land"
mydata$m_endnote6[937] <- "She only purchased land with the amount [amount redacted], installment  of [amount redacted] every month. She already done for installment in 10 months."
mydata$m_endnote6[949] <- "[Language]"
mydata$m_endnote6[1062] <- "[amount redacted] ung fertilizer, [amount redacted] seeds"
mydata$m_endnote6[1064] <- "Since last survey the household already has a [amount redacted] pesos expenses for 12 months."
mydata$m_endnote6[1126] <- "The farm they used were owned by [name] and his siblings. So they shared in expenses and same with  the crops they harvest."
mydata$m_endnote6[1140] <- "Last year they spend [amount redacted] pesosi order to cultivate crops. They spend the same in this year"
mydata$m_endnote6[1283] <- "Contradicting the data collected last year, the respondent said that they were spending [amount redacted] every year for all the inputs needed to cultivate crops."
mydata$m_endnote6[1303] <- "The respondant told me the they spent roughly [amount redacted] pesos every year to cultivate crops and they spend stayed same in the last 12 months but in sFq12 stated that the household spent [amount redacted] in the previous year"
mydata$m_endnote6[1307] <- "The respondent spent roughly [amount redacted] in the last 12 months in cultivating crops ([amount redacted] in fertilizer and [amount redacted] in labor)and almost a half of it [amount redacted] pesos they spent before the baseline survey but in SFQ12 in baseline survey they spent [amount redacted] pesos"
mydata$m_endnote6[1379] <- "[amount redacted] was just her share to the expenses, because her children who already have their own family or household were also sharing to the expenses,"
mydata$m_endnote6[1468] <- "Their crops is organic, by the help of UP [UP name] they are planting vegetables. They gets free 4  bags of seeds in the department of Agriculture."
mydata$m_endnote6[2067] <- "The cost was decreased because he has only [amount redacted]tupongs of land from the [amount redacted]hectares [language]"
mydata$m_endnote6[2073] <- "[amount redacted] for the 1year expenses"

mydata$m_endnote7[16] <- "The [amount redacted] chicks were purchased by the twins, [name], using their salary from the hardware."
mydata$m_endnote7[18] <- "The household's goat gave birth to [amount redacted] new goats. And they also had a new chicks but respondent don't have an idea of the amount of the chicks."
mydata$m_endnote7[36] <- "Respondent said the rooster will cost [amount redacted] if they will bought it but when she asked her husband, she changed her response in favor of her husband's response. Even though, I still took the respondent's 1st response."
mydata$m_endnote7[80] <- "They had purchased [amount redacted] pigs but it was last June 2016."
mydata$m_endnote7[86] <- "They have [amount redacted] goats"
mydata$m_endnote7[110] <- "They have [amount redacted] chicken, but the goat they are care is not their ownd"
mydata$m_endnote7[112] <- "The household bought [amount redacted] piglets last November 2016 but they killed the [amount redacted] pigs last March 2017 and sold the meat."
mydata$m_endnote7[219] <- "Respondent has no idea of the amount of the new [amount redacted] native chicks."
mydata$m_endnote7[240] <- "Respondent can't tell the amount of the [amount redacted] native chicks because according to her, those were not been sold."
mydata$m_endnote7[287] <- "Baseline respondent said that the sales from the in kind of KASAMA (DOLE) was the main source, they purchase the pig worth [amount redacted]"
mydata$m_endnote7[306] <- "[amount redacted] is for 12 person she's the one who've got those piggery. It is an association so [amount redacted] piggery for 12 person belongs the sow and the piglet.   For ducks-[amount redacted], chickens-[amount redacted]."
mydata$m_endnote7[332] <- "They dont acquired any new animals. The cow that they have was have an arrangement, the owner give them [amount redacted] cow after giving birth as paid for them."
mydata$m_endnote7[349] <- "In livestock they have also additional [amount redacted] goat (kid) worth [amount redacted]"
mydata$m_endnote7[364] <- "[language]"
mydata$m_endnote7[376] <- "Respondent said that they bought  2 pig worth 5000 pesos, from the sales of the in kind transfer of Dole Integrated Livelihood and Emergency Program in which they are now tending."
mydata$m_endnote7[454] <- "3 is for [amount redacted], she buys [amount redacted] but the 2 of them died."
mydata$m_endnote7[475] <- "The large livestock is the carabao and the amount is [amount redacted], the price is low because the household head or [name] bought  it to his sibling"
mydata$m_endnote7[500] <- "[language]"
mydata$m_endnote7[872] <- "The worth of 2 chikens is [amount redacted]"
mydata$m_endnote7[1029] <- "[language]"
mydata$m_endnote7[1045] <- "Government transfer came mayor [name]"
mydata$m_endnote7[1050] <- "As per the respondent ,on his computation its only costing [amount redacted] pesos only the value of the pigs that they received from DOLE's livelihood program"
mydata$m_endnote7[1127] <- "Recently he bought [amount redacted] pcs. Of 45 days chicks."
mydata$m_endnote7[1213] <- "Even before the DOLE gives [amount redacted] piglets to them, the household already tending pigs."
mydata$m_endnote7[1245] <- "[language]"
mydata$m_endnote7[1253] <- "Their relatives gave the pig, they made it as patener(mother pig), then during the interview they have [amount redacted] piglets."
mydata$m_endnote7[1285] <- "[name] traded his quail to the mother Duck of his cousin, then the mother duck produces a (male) duck. But mostly of its eggs were stolen."
mydata$m_endnote7[1317] <- "[language]"
mydata$m_endnote7[1351] <- "[amount redacted] inakay, ducklings"
mydata$m_endnote7[1357] <- "They bought [amount redacted]pcs. Of 45days chicken last March 5, then cooked it last May for the fiesta."
mydata$m_endnote7[1362] <- "They have [amount redacted] ducks and [amount redacted] chicker. Her elder son takee care of them"
mydata$m_endnote7[1472] <- "The [amount redacted] goats were fro  another household, they only have an agreement that when it gave birth, it will be parted to each household."
mydata$m_endnote7[1486] <- "4Ps gave them [amount redacted] 'baby goat'"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)