rm(list=ls(all=t))

Setup filenames

filename <- "midline_bgy_captain_survey" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII 

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("m_s0q4") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## m_s0q4. SELECT MUNICIPALITY PROVIDED.
##                  Malinao                   Manito                 Polangui                   Abucay 
##                       12                        3                        4                        9 
##                Mariveles              San Nicolas                  Amulung                   Enrile 
##                       16                        2                        5                        5 
##                Calabanga                Camaligan                  Canaman          Jose Panganiban 
##                        5                        1                        3                        5 
##                     Labo                 Libmanan                  Magarao                Naga City 
##                        4                        1                        3                        2 
##                   Ocampo                  Pasacao                 Tinambac General Emilio Aguinaldo 
##                        1                        3                        1                        3 
##             Cauayan City                    Jones                Pagsanjan                     Pila 
##                        4                       18                        9                        1 
##                     Agno                     Anda                     Bani                 Bautista 
##                        4                        3                        2                        1 
##                 Bugallon                 Calasiao          San Carlos City                     Sual 
##                        4                        1                        1                        1 
##               Urbiztondo               Candelaria                 Sampaloc                Jala-Jala 
##                        1                        2                        4                        3 
##                  Pililla                San Mateo                    Tanay                    Pilar 
##                        3                        3                        4                        5 
##            Sorsogon City                     <NA> 
##                        2                        1 
## [1] "Frequency table after encoding"
## m_s0q4. SELECT MUNICIPALITY PROVIDED.
##  788  789  790  791  792  793  794  795  796  797  798  799  800  801  802  803  804  805  806  807  808  809 
##    4    4    4    3    2   12    3    9    3    3    5   18    1    1    4    9    5    3    2    3    2    1 
##  810  811  812  813  814  815  816  817  818  819  820  821  822  823  824  825  826  827  828 <NA> 
##    1    3    5    2    4    2    5    1   16    1    4    1    1    4    3    5    1    3    1    1

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 


percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q2)[na.exclude(mydata$bcs_s1q2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q2. How long have you been ${calc_1} in ${calc_barangay}?   Gaano na po kayo katag
##    1    2    3    4    5    6    7    8    9   10   12   13   14   17   18   20   24   36   42   45   46   47 
##    2    4    7    9    2    7    6    1    5    4    3    2    1    3    2    2    1    3    3    1    1    1 
##   48   55   60   72   74   76   79   80   84   96  108  114  115  117  118  120  144  150  204  216  276 <NA> 
##   22    1    2    5    1    2    1    3   27    1    2    1    1    2    2   14    1    1    3    1    1    1

## [1] "Frequency table after encoding"
## bcs_s1q2. How long have you been ${calc_1} in ${calc_barangay}?   Gaano na po kayo katag
##           1           2           3           4           5           6           7           8           9 
##           2           4           7           9           2           7           6           1           5 
##          10          12          13          14          17          18          20          24          36 
##           4           3           2           1           3           2           2           1           3 
##          42          45          46          47          48          55          60          72          74 
##           3           1           1           1          22           1           2           5           1 
##          76          79          80          84          96         108         114         115         117 
##           2           1           3          27           1           2           1           1           2 
##         118         120         144         150         204         216 227 or more        <NA> 
##           2          14           1           1           3           1           1           1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q5)[na.exclude(mydata$bcs_s1q5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q5", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q5. How long did you serve as ${calc_2}?  Gaano po kayo katagal naglingkod bilang \
##    1    3    4    5    6   10   12   13   14   15   19   24   36   48   60   72   84   96  102  108  120  132 
##    1    5    3    1    2    1    1    5    1    1    1    2   14    2    4   12    1    3    1    6    5    7 
##  156  180  216  288  300 <NA> 
##    5    2    1    1    1   76

## [1] "Frequency table after encoding"
## bcs_s1q5. How long did you serve as ${calc_2}?  Gaano po kayo katagal naglingkod bilang \
##           1           3           4           5           6          10          12          13          14 
##           1           5           3           1           2           1           1           5           1 
##          15          19          24          36          48          60          72          84          96 
##           1           1           2          14           2           4          12           1           3 
##         102         108         120         132         156         180         216         288 294 or more 
##           1           6           5           7           5           2           1           1           1 
##        <NA> 
##          76

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q7)[na.exclude(mydata$bcs_s1q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q7. How long would it take you to travel one-way from here to municipal government o
##    1    2    3    5   10   15   20   25   30   35   40   45   50   60   90  120  150 <NA> 
##    4   10   11   26   30   19   17    1   21    1    3    7    1    7    1    4    1    1

## [1] "Frequency table after encoding"
## bcs_s1q7. How long would it take you to travel one-way from here to municipal government o
##           1           2           3           5          10          15          20          25          30 
##           4          10          11          26          30          19          17           1          21 
##          35          40          45          50          60          90         120 125 or more        <NA> 
##           1           3           7           1           7           1           4           1           1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q8)[na.exclude(mydata$bcs_s1q8)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q8", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q8. How many times have you made that trip in the last 12 months for official busine
##    4    5    6   10   12   15   24   30   32   36   40   44   48   50   52   56   60   70   84   96  100  104 
##    1    2    1    1    3    1   19    1    1   14    1    1   13    1    1    1   16    1    2    9    3    1 
##  108  120  124  128  130  132  140  144  150  156  160  174  180  182  192  200  240  252  300 <NA> 
##    1   28    1    1    1    1    4    6    2    1    1    1    2    1    3    7    6    1    2    1

## [1] "Frequency table after encoding"
## bcs_s1q8. How many times have you made that trip in the last 12 months for official busine
##           4           5           6          10          12          15          24          30          32 
##           1           2           1           1           3           1          19           1           1 
##          36          40          44          48          50          52          56          60          70 
##          14           1           1          13           1           1           1          16           1 
##          84          96         100         104         108         120         124         128         130 
##           2           9           3           1           1          28           1           1           1 
##         132         140         144         150         156         160         174         180         182 
##           1           4           6           2           1           1           1           2           1 
##         192         200         240         252 300 or more        <NA> 
##           3           7           6           1           2           1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q9)[na.exclude(mydata$bcs_s1q9)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q9", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q9. How long would it take you to travel (one-way) from here to regional center gove
##    0    1    2    3    4    6    7    8   10   15   20   30   40   45   50   60   90  120  130  180  240  300 
##    1    7   16    6    2    2    1    2    1    6    2    8    3    1    1   24   12   23    1   27   10    5 
##  360  480 <NA> 
##    1    2    1

## [1] "Frequency table after encoding"
## bcs_s1q9. How long would it take you to travel (one-way) from here to regional center gove
##           0           1           2           3           4           6           7           8          10 
##           1           7          16           6           2           2           1           2           1 
##          15          20          30          40          45          50          60          90         120 
##           6           2           8           3           1           1          24          12          23 
##         130         180         240         300         360 480 or more        <NA> 
##           1          27          10           5           1           2           1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q10)[na.exclude(mydata$bcs_s1q10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q10", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q10. How many times have you made that trip in the last 12 months for official busine
## -998    1    2    3    4    5    6    8   10   12   15   18   20   24   28   36   40   48   60   72   96 <NA> 
##    1   48   30   19    7   15    5    1   12    8    1    1    3    3    1    3    1    1    2    1    1    1

## [1] "Frequency table after encoding"
## bcs_s1q10. How many times have you made that trip in the last 12 months for official busine
##       -998          1          2          3          4          5          6          8         10         12 
##          1         48         30         19          7         15          5          1         12          8 
##         15         18         20         24         28         36         40         48         60         72 
##          1          1          3          3          1          3          1          1          2          1 
## 76 or more       <NA> 
##          1          1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q11)[na.exclude(mydata$bcs_s1q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q11", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q11. How many households are there in this barangay?  Ilan pong sambahayan ang mayroo
##  -998    97   105   118   122   126   148   150   152   161   178   195   198   200   202   204   211   212 
##     1     1     1     1     1     1     1     3     1     1     1     1     1     3     1     1     1     1 
##   216   225   228   230   242   251   252   260   264   266   280   284   286   288   291   300   303   312 
##     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1 
##   315   320   324   326   330   340   355   357   367   370   380   392   398   400   403   404   418   420 
##     1     1     1     1     1     1     1     1     1     1     1     1     1     2     1     1     2     3 
##   434   440   449   450   487   489   496   499   500   510   520   525   528   543   550   580   586   594 
##     1     1     1     1     1     1     1     1     2     1     1     1     1     1     1     1     1     1 
##   600   610   612   658   660   664   678   687   690   697   700   710   717   719   750   754   765   769 
##     2     1     1     1     1     1     1     1     1     1     1     1     1     1     2     1     1     1 
##   785   786   820   828   840   860   867   906   907   916   984  1002  1005  1025  1038  1074  1076  1100 
##     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1     1 
##  1150  1168  1200  1218  1333  1500  1505  1560  1582  1585  1587  1606  1750  1800  1976  1980  2000  2005 
##     2     2     3     1     1     1     1     1     1     1     1     1     1     1     1     1     2     1 
##  2027  2104  2200  2410  2500  2672  2716  2786  3000  3100  3900  3926  4000  5300  7000  7600 11400 15000 
##     1     1     1     1     1     1     1     1     2     1     1     1     2     1     1     1     1     1 
## 17000 30000  <NA> 
##     1     1     1

## [1] "Frequency table after encoding"
## bcs_s1q11. How many households are there in this barangay?  Ilan pong sambahayan ang mayroo
##          -998            97           105           118           122           126           148 
##             1             1             1             1             1             1             1 
##           150           152           161           178           195           198           200 
##             3             1             1             1             1             1             3 
##           202           204           211           212           216           225           228 
##             1             1             1             1             1             1             1 
##           230           242           251           252           260           264           266 
##             1             1             1             1             1             1             1 
##           280           284           286           288           291           300           303 
##             1             1             1             1             1             1             1 
##           312           315           320           324           326           330           340 
##             1             1             1             1             1             1             1 
##           355           357           367           370           380           392           398 
##             1             1             1             1             1             1             1 
##           400           403           404           418           420           434           440 
##             2             1             1             2             3             1             1 
##           449           450           487           489           496           499           500 
##             1             1             1             1             1             1             2 
##           510           520           525           528           543           550           580 
##             1             1             1             1             1             1             1 
##           586           594           600           610           612           658           660 
##             1             1             2             1             1             1             1 
##           664           678           687           690           697           700           710 
##             1             1             1             1             1             1             1 
##           717           719           750           754           765           769           785 
##             1             1             2             1             1             1             1 
##           786           820           828           840           860           867           906 
##             1             1             1             1             1             1             1 
##           907           916           984          1002          1005          1025          1038 
##             1             1             1             1             1             1             1 
##          1074          1076          1100          1150          1168          1200          1218 
##             1             1             1             2             2             3             1 
##          1333          1500          1505          1560          1582          1585          1587 
##             1             1             1             1             1             1             1 
##          1606          1750          1800          1976          1980          2000          2005 
##             1             1             1             1             1             2             1 
##          2027          2104          2200          2410          2500          2672          2716 
##             1             1             1             1             1             1             1 
##          2786          3000          3100          3900          3926          4000          5300 
##             1             2             1             1             1             2             1 
##          7000          7600         11400         15000         17000 19405 or more          <NA> 
##             1             1             1             1             1             1             1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q12)[na.exclude(mydata$bcs_s1q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q12. How many households in this Barangay are not engaged in agriculture.  Ilang samb
##  -998     1     2     5     8    10    15    17    20    21    27    30    32    33    34    39    40    43 
##     2     2     1     2     1     8     1     1     5     1     1     3     2     1     2     1     1     2 
##    45    48    50    56    57    58    60    61    63    66    71    75    79    80    89    90    99   100 
##     2     1    12     1     1     1     1     2     1     1     1     2     1     3     1     2     1     8 
##   101   103   115   116   130   137   143   150   153   158   160   175   194   200   210   230   236   240 
##     1     2     1     1     1     1     1     4     1     1     2     1     1     4     1     2     1     1 
##   243   261   271   298   300   317   350   364   375   400   405   500   544   580   600   640   700   800 
##     1     1     1     1     6     1     3     1     1     2     1     8     1     1     3     1     1     2 
##   864   900   999  1000  1100  1105  1400  1800  2004  2600  3000  3500  3800  6500  8000 16028  <NA> 
##     1     1     1     4     1     1     1     2     1     1     1     2     1     1     2     1     1

## [1] "Frequency table after encoding"
## bcs_s1q12. How many households in this Barangay are not engaged in agriculture.  Ilang samb
##         -998            1            2            5            8           10           15           17 
##            2            2            1            2            1            8            1            1 
##           20           21           27           30           32           33           34           39 
##            5            1            1            3            2            1            2            1 
##           40           43           45           48           50           56           57           58 
##            1            2            2            1           12            1            1            1 
##           60           61           63           66           71           75           79           80 
##            1            2            1            1            1            2            1            3 
##           89           90           99          100          101          103          115          116 
##            1            2            1            8            1            2            1            1 
##          130          137          143          150          153          158          160          175 
##            1            1            1            4            1            1            2            1 
##          194          200          210          230          236          240          243          261 
##            1            4            1            2            1            1            1            1 
##          271          298          300          317          350          364          375          400 
##            1            1            6            1            3            1            1            2 
##          405          500          544          580          600          640          700          800 
##            1            8            1            1            3            1            1            2 
##          864          900          999         1000         1100         1105         1400         1800 
##            1            1            1            4            1            1            1            2 
##         2004         2600         3000         3500         3800         6500         8000 9485 or more 
##            1            1            1            2            1            1            2            1 
##         <NA> 
##            1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q13)[na.exclude(mydata$bcs_s1q13)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q13", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q13. How many households in this Barangay are considered poor?  Ilang sambahayan po s
##  -998     2    20    25    30    32    40    50    58    60    63    70    75    80    90    96    99   100 
##     1     1     6     2     3     1     3     3     1     3     1     1     1     2     2     1     1     7 
##   106   107   120   130   140   142   150   158   161   170   175   180   182   184   200   208   212   219 
##     1     1     1     1     1     1     8     1     1     1     1     1     1     1     6     1     1     1 
##   221   229   230   231   232   238   250   251   258   262   270   300   301   310   312   315   320   347 
##     1     1     1     1     1     1     4     1     1     1     1     5     1     1     1     1     3     1 
##   350   352   355   364   369   372   380   400   402   406   420   450   453   472   488   500   534   538 
##     5     1     1     1     1     1     2     7     1     1     1     4     1     1     1     3     1     1 
##   540   600   688   700   706   725   726   750   790   800   900   922   951  1000  1100  1336  1418  1500 
##     1     3     1     1     1     1     1     2     1     1     2     1     1     4     1     1     1     1 
##  1629  1782  3000  3500  6000 10000  <NA> 
##     1     1     3     1     1     1     1

## [1] "Frequency table after encoding"
## bcs_s1q13. How many households in this Barangay are considered poor?  Ilang sambahayan po s
##         -998            2           20           25           30           32           40           50 
##            1            1            6            2            3            1            3            3 
##           58           60           63           70           75           80           90           96 
##            1            3            1            1            1            2            2            1 
##           99          100          106          107          120          130          140          142 
##            1            7            1            1            1            1            1            1 
##          150          158          161          170          175          180          182          184 
##            8            1            1            1            1            1            1            1 
##          200          208          212          219          221          229          230          231 
##            6            1            1            1            1            1            1            1 
##          232          238          250          251          258          262          270          300 
##            1            1            4            1            1            1            1            5 
##          301          310          312          315          320          347          350          352 
##            1            1            1            1            3            1            5            1 
##          355          364          369          372          380          400          402          406 
##            1            1            1            1            2            7            1            1 
##          420          450          453          472          488          500          534          538 
##            1            4            1            1            1            3            1            1 
##          540          600          688          700          706          725          726          750 
##            1            3            1            1            1            1            1            2 
##          790          800          900          922          951         1000         1100         1336 
##            1            1            2            1            1            4            1            1 
##         1418         1500         1629         1782         3000         3500         6000 6740 or more 
##            1            1            1            1            3            1            1            1 
##         <NA> 
##            1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q16)[na.exclude(mydata$bcs_s1q16)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q16", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q16. What percentage of households in this Barangay are connected to electric power? 
##    0    1    2    3    5   10   13   15   16   20   22   25   30   35   39   40   47   50   68   83   95   99 
##   28   14    8    8   38   24    1    1    1   15    2    3    6    1    2    3    1    1    1    1    1    1 
##  100 <NA> 
##    2    2

## [1] "Frequency table after encoding"
## bcs_s1q16. What percentage of households in this Barangay are connected to electric power? 
##           0           1           2           3           5          10          13          15          16 
##          28          14           8           8          38          24           1           1           1 
##          20          22          25          30          35          39          40          47          50 
##          15           2           3           6           1           2           3           1           1 
##          68          83          95          99 100 or more        <NA> 
##           1           1           1           1           2           2

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q17)[na.exclude(mydata$bcs_s1q17)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q17. What percentage of households in this Barangay own and use cellular phones?  Ila
##   10   30   50   60   70   80   85   90   93   95   98   99  100 <NA> 
##    1    1    3    3    2   16    4   30    1   19    5   16   63    1

## [1] "Frequency table after encoding"
## bcs_s1q17. What percentage of households in this Barangay own and use cellular phones?  Ila
##          10          30          50          60          70          80          85          90          93 
##           1           1           3           3           2          16           4          30           1 
##          95          98          99 100 or more        <NA> 
##          19           5          16          63           1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_s1q18)[na.exclude(mydata$bcs_s1q18)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_s1q18", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_s1q18. What percentage of households in the Barangay can regularly access the internet?
## -998    0    1    2    3    5   10   15   20   23   25   30   38   40   45   50   59   60   70   75   80   90 
##    1    8    3    4    5   25   18    4   10    1    1   13    1    4    1   29    1    8    9    2   12    4 
## <NA> 
##    1

## [1] "Frequency table after encoding"
## bcs_s1q18. What percentage of households in the Barangay can regularly access the internet?
##       -998          0          1          2          3          5         10         15         20         23 
##          1          8          3          4          5         25         18          4         10          1 
##         25         30         38         40         45         50         59         60         70         75 
##          1         13          1          4          1         29          1          8          9          2 
##         80 90 or more       <NA> 
##         12          4          1

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_2)[na.exclude(mydata$bcs_ty_2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_2. How many typhoons have directly impacted this Barangay in the last 12 months?  I
## -998    1    2    3    4 <NA> 
##    1   55   15   13    3   78

## [1] "Frequency table after encoding"
## bcs_ty_2. How many typhoons have directly impacted this Barangay in the last 12 months?  I
##      -998         1         2         3 4 or more      <NA> 
##         1        55        15        13         3        78

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_5)[na.exclude(mydata$bcs_ty_5)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_5", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_5. How much would you estimate was the cost of this damage in total over the last 1
##   20000   25000   50000   80000   1e+05  150000   2e+05  250000   3e+05   5e+05   6e+05   7e+05  720000 
##       2       1       5       1       3       2       3       1       2       5       2       2       1 
##  750000   8e+05   9e+05   1e+06 1500000   2e+06 2500000   3e+06 3500000   4e+06   5e+06   1e+07   2e+07 
##       1       3       1       9       1       3       1      10       1       2      11       4       1 
##   5e+07   1e+08    <NA> 
##       2       1      84

## [1] "Frequency table after encoding"
## bcs_ty_5. How much would you estimate was the cost of this damage in total over the last 1
##            20000            25000            50000            80000            1e+05           150000 
##                2                1                5                1                3                2 
##            2e+05           250000            3e+05            5e+05            6e+05            7e+05 
##                3                1                2                5                2                2 
##           720000           750000            8e+05            9e+05            1e+06          1500000 
##                1                1                3                1                9                1 
##            2e+06          2500000            3e+06          3500000            4e+06            5e+06 
##                3                1               10                1                2               11 
##            1e+07            2e+07            5e+07 79999999 or more             <NA> 
##                4                1                2                1               84

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_7)[na.exclude(mydata$bcs_ty_7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_7", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_7. How many people have been killed as a result of typhoons in the last 12 months? 
##    1    5    6 <NA> 
##    2    1    1  161

## [1] "Frequency table after encoding"
## bcs_ty_7. How many people have been killed as a result of typhoons in the last 12 months? 
##         1 5 or more      <NA> 
##         2         2       161

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_10)[na.exclude(mydata$bcs_ty_10)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_10", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_10. In total, for how many days was the barangay without power?  Sa kabuuan, ilang a
##    1    2    3    4    5    7   14   15   21   25   30   40   45   60   90 <NA> 
##    5    7    7    2    2   12    6    3    3    1   19    1    4   12    3   78

## [1] "Frequency table after encoding"
## bcs_ty_10. In total, for how many days was the barangay without power?  Sa kabuuan, ilang a
##          1          2          3          4          5          7         14         15         21         25 
##          5          7          7          2          2         12          6          3          3          1 
##         30         40         45         60 90 or more       <NA> 
##         19          1          4         12          3         78

percentile_99.5 <- floor(quantile(na.exclude(mydata$bcs_ty_12)[na.exclude(mydata$bcs_ty_12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="bcs_ty_12", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## bcs_ty_12. In total, for how many days was transportation disrupted?  Sa kabuuan, ilang ara
##    1    2    3    4    5    7   14   21   30   60 <NA> 
##   18    8    2    4    2    8    1    1    2    1  118

## [1] "Frequency table after encoding"
## bcs_ty_12. In total, for how many days was transportation disrupted?  Sa kabuuan, ilang ara
##          1          2          3          4          5          7         14         21         30 53 or more 
##         18          8          2          4          2          8          1          1          2          1 
##       <NA> 
##        118

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("bcs_s0q21",
                  "bcs_s1q1",
                  "bcs_s1q3",
                  "bcs_s1q4",
                  "bcs_s1q14",
                  "bcs_s1q15",
                  "bcs_ty_1",
                  "bcs_ty_3",
                  "bcs_ty_4",
                  "bcs_ty_6",
                  "bcs_ty_8",
                  "bcs_ty_9",
                  "bcs_ty_11",
                  "bcs_vio_5",
                  "bcs_vio_7",
                  "bcs_vio_9",
                  "bcs_qua_7",
                  "bcs_rec_1",
                  "bcs_rec_4",
                  "bcs_dev_3__1",
                  "bcs_dev_5__1",
                  "bcs_dev_3__2",
                  "bcs_dev_5__2",
                  "bcs_dev_3__3",
                  "bcs_dev_5__3",
                  "bcs_dev_3__4",
                  "bcs_dev_5__4",
                  "bcs_dev_3__5",
                  "bcs_dev_5__5",
                  "bcs_dev_3__6",
                  "bcs_dev_5__6")

capture_tables (indirect_PII)

# Recode those with very specific values. 

break_activity <- c(1,  2,  3,  4,  5,  6)
labels_activity <- c("Barangay Captain"=1,
                     "Counselor"=2,
                     "Other"=3,
                     "Other"=4,
                     "Other"=5,
                     "Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q1", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## bcs_s1q1. What is your official role in ${calc_barangay}?  Ano po ang inyong opisyal na p
##             Barangay Captain           Counselor /Kagawad                    Secretary 
##                           92                           36                           24 
##                    Treasurer Barangay Health Worker (BHW)               Barangay Tanod 
##                            6                            2                            2 
##              Other - specify                         <NA> 
##                            2                            1 
##     recoded
##      [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
##   1     92     0     0     0     0         0
##   2      0    36     0     0     0         0
##   3      0     0    24     0     0         0
##   4      0     0     0     6     0         0
##   5      0     0     0     0     2         0
##   6      0     0     0     0     0         2
##   99     0     0     0     0     0         2
## [1] "Frequency table after encoding"
## bcs_s1q1. What is your official role in ${calc_barangay}?  Ano po ang inyong opisyal na p
## Barangay Captain        Counselor            Other             <NA> 
##               92               36               36                1 
## [1] "Inspect value labels and relabel as necessary"
## Barangay Captain        Counselor            Other            Other            Other            Other 
##                1                2                3                4                5                6
break_activity <- c(1,  2,  3,  4,  5,  6)
labels_activity <- c("Other"=1,
                     "Counselor"=2,
                     "Other"=3,
                     "Other"=4,
                     "Other"=5,
                     "Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q4", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## bcs_s1q4. What was this official role?  Ano po ang opisyal na posisyong ito?
##             Barangay Captain           Counselor /Kagawad                    Secretary 
##                            3                           65                            6 
##                    Treasurer Barangay Health Worker (BHW)               Barangay Tanod 
##                            2                            2                            3 
##              Other - specify                         <NA> 
##                            8                           76 
##     recoded
##      [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
##   1      3     0     0     0     0         0
##   2      0    65     0     0     0         0
##   3      0     0     6     0     0         0
##   4      0     0     0     2     0         0
##   5      0     0     0     0     2         0
##   6      0     0     0     0     0         3
##   99     0     0     0     0     0         8
## [1] "Frequency table after encoding"
## bcs_s1q4. What was this official role?  Ano po ang opisyal na posisyong ito?
##     Other Counselor      <NA> 
##        24        65        76 
## [1] "Inspect value labels and relabel as necessary"
##     Other Counselor     Other     Other     Other     Other 
##         1         2         3         4         5         6
break_source <- c(-999,-998,1,2,3,4)
labels_source <- c("Refused to answer"=1,
                     "Don't know"=2,
                     "Connected to Grid"=3,
                     "Local Hydroelectric"=4,
                     "Other"=5,
                     "Other"=6)
mydata <- ordinal_recode (variable="bcs_s1q15", break_points=break_source, missing=999999, value_labels=labels_source)

## [1] "Frequency table before encoding"
## bcs_s1q15. What is the main source of electric power?  Ano po ang pangunahing pinagkukunan 
## Connected to Grid        Generators              <NA> 
##               161                 2                 2 
##    recoded
##     [-999,-998) [-998,1) [1,2) [2,3) [3,4) [4,1e+06)
##   1           0        0   161     0     0         0
##   3           0        0     0     0     2         0
## [1] "Frequency table after encoding"
## bcs_s1q15. What is the main source of electric power?  Ano po ang pangunahing pinagkukunan 
## Connected to Grid             Other              <NA> 
##               161                 2                 2 
## [1] "Inspect value labels and relabel as necessary"
##   Refused to answer          Don't know   Connected to Grid Local Hydroelectric               Other 
##                   1                   2                   3                   4                   5 
##               Other 
##                   6

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("bcs_scq2",
               "bcs_scq4",
               "bcs_s1q1_other",
               "bcs_s1q4_other",
               "bcs_political1other",
               "bcs_political2other",
               "bcs_political3other",
               "bcs_s1q15other",
               "bcs_s1q19other",
               "bcs_calc_4",
               "bcs_qua_3",
               "bcs_qua_4",
               "bcs_qua_5",
               "bcs_qua_6",
               "bcs_qua_8",
               "bcs_qua_9",
               "bcs_dev_2__1",
               "bcs_dev_4__1",
               "bcs_dev_6__1",
               "bcs_dev_2__2",
               "bcs_dev_4__2",
               "bcs_dev_6__2",
               "bcs_dev_2__3",
               "bcs_dev_4__3",
               "bcs_dev_6__3",
               "bcs_dev_2__4",
               "bcs_dev_4__4",
               "bcs_dev_6__4",
               "bcs_dev_2__5",
               "bcs_dev_4__5",
               "bcs_dev_6__5",
               "bcs_dev_2__6",
               "bcs_dev_4__6",
               "bcs_dev_6__6")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$bcs_s1q1_other[56] <- "Other"
mydata$bcs_s1q1_other[102] <- "Other"
mydata$bcs_s1q4_other[5] <- "Other"
mydata$bcs_s1q4_other[7] <- "Other"
mydata$bcs_s1q4_other[8] <- "Other"
mydata$bcs_s1q4_other[12] <- "Other"
mydata$bcs_s1q4_other[98] <- "Other"
mydata$bcs_s1q4_other[107] <- "Other"
mydata$bcs_s1q4_other[131] <- "Other"
mydata$bcs_s1q4_other[149] <- "Other"
mydata$bcs_political2other[151] <- "[small location redacted] is not covered by prov.council"
mydata$bcs_political2other[152] <- "[small location redacted] is not covered buy prov.council"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)