rm(list=ls(all=t))

Setup filenames

filename <- "Section_4" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$s4q22)[na.exclude(mydata$s4q22)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s4q22", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s4q22. In the past 12 months, how much did you or other members of your household spend
##     1     5     6     9    10    15    20    21    25    27    28    30    35    40    45    50    60    70 
##     2     6     2     1     2     2    33     1     1     1     1     6     1    18     1    31    11     3 
##    80   100   110   120   125   140   150   180   200   220   240   250   260   270   280   300   320   340 
##     2    45     1    14     1     2     4     7    26     1    19     3     1     1     1    23     1     1 
##   360   365   400   420   450   460   480   500   576   600   700   720   730   780   800   840   900   960 
##     3     1     4     1     1     1    14    36     1    14     2     5     1     1     1     1     3     8 
##  1000  1008  1056  1080  1095  1200  1440  1500  1560  1600  1650  1680  1800  1825  1920  2000  2080  2160 
##    24     1     1     2     1    14     4     3     2     3     1     1     7     1     3     3     1     1 
##  2400  2880  2980  3000  3360  3600  3640  3650  3840  4000  4200  4320  4800  4860  5000  5400  5760  6000 
##     8     2     1     3     2     6     1     1     1     2     1     2     2     1     2     3     1     1 
##  6480  6720  7200  7300 10000 10800 11000 12480 14400 21600 23520 24000 29120 36000 40000  <NA> 
##     1     2     3     1     3     2     1     1     1     2     1     2     1     1     1  1786

## [1] "Frequency table after encoding"
## s4q22. In the past 12 months, how much did you or other members of your household spend
##             1             5             6             9            10            15            20 
##             2             6             2             1             2             2            33 
##            21            25            27            28            30            35            40 
##             1             1             1             1             6             1            18 
##            45            50            60            70            80           100           110 
##             1            31            11             3             2            45             1 
##           120           125           140           150           180           200           220 
##            14             1             2             4             7            26             1 
##           240           250           260           270           280           300           320 
##            19             3             1             1             1            23             1 
##           340           360           365           400           420           450           460 
##             1             3             1             4             1             1             1 
##           480           500           576           600           700           720           730 
##            14            36             1            14             2             5             1 
##           780           800           840           900           960          1000          1008 
##             1             1             1             3             8            24             1 
##          1056          1080          1095          1200          1440          1500          1560 
##             1             2             1            14             4             3             2 
##          1600          1650          1680          1800          1825          1920          2000 
##             3             1             1             7             1             3             3 
##          2080          2160          2400          2880          2980          3000          3360 
##             1             1             8             2             1             3             2 
##          3600          3640          3650          3840          4000          4200          4320 
##             6             1             1             1             2             1             2 
##          4800          4860          5000          5400          5760          6000          6480 
##             2             1             2             3             1             1             1 
##          6720          7200          7300         10000         10800         11000         12480 
##             2             3             1             3             2             1             1 
##         14400         21600         23520         24000 26329 or more          <NA> 
##             1             2             1             2             3          1786

percentile_99.5 <- floor(quantile(na.exclude(mydata$s4q23)[na.exclude(mydata$s4q23)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s4q23", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s4q23. In the past 12 months, how much did you or other members of your household win f
##     0    10    30    35    40    50    60   100   150   180   195   200   250   280   300   360   400   500 
##   262     1     1     1     1     2     2     6     2     1     1     7     1     1     7     1     2     8 
##   600   700   750   800   860   900  1000  1050  1100  1200  1300  1320  1400  1500  1600  1700  1750  1800 
##     8     3     1     1     1     1    12     3     1    11     1     1     2    24     2     1     2     7 
##  1900  2000  2120  2200  2400  2450  2500  2800  3000  3200  3500  3800  4000  4200  4500  4800  5000  5200 
##     2     9     1     1     2     1     1     1    24     1     7     1     4     1     2     3     7     1 
##  5250  5400  5500  5600  6000  6400  7000  7200  7800  8000  9000  9450 10000 10750 11400 12000 13000 15000 
##     1     2     1     1    12     1     4     3     1     3     7     1     2     1     1     2     1     2 
## 16000 30000 64800 86000 1e+05  <NA> 
##     1     1     1     1     2  1786

## [1] "Frequency table after encoding"
## s4q23. In the past 12 months, how much did you or other members of your household win f
##             0            10            30            35            40            50            60 
##           262             1             1             1             1             2             2 
##           100           150           180           195           200           250           280 
##             6             2             1             1             7             1             1 
##           300           360           400           500           600           700           750 
##             7             1             2             8             8             3             1 
##           800           860           900          1000          1050          1100          1200 
##             1             1             1            12             3             1            11 
##          1300          1320          1400          1500          1600          1700          1750 
##             1             1             2            24             2             1             2 
##          1800          1900          2000          2120          2200          2400          2450 
##             7             2             9             1             1             2             1 
##          2500          2800          3000          3200          3500          3800          4000 
##             1             1            24             1             7             1             4 
##          4200          4500          4800          5000          5200          5250          5400 
##             1             2             3             7             1             1             2 
##          5500          5600          6000          6400          7000          7200          7800 
##             1             1            12             1             4             3             1 
##          8000          9000          9450         10000         10750         11400         12000 
##             3             7             1             2             1             1             2 
##         13000         15000         16000         30000         64800 74445 or more          <NA> 
##             1             2             1             1             1             3          1786

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("s4q1",
                  "s4q3_1",
                  "s4q3_2",
                  "s4q3_3",
                  "s4q3_4",
                  "s4q3_5",
                  "s4q4_1",
                  "s4q4_2",
                  "s4q4_3",
                  "s4q5",
                  "s4q6",
                  "s4q13",
                  "s4q14",
                  "s4q15",
                  "s4q16",
                  "s4q17",
                  "s4q18",
                  "s4q24",
                  "s4q25",
                  "s4q26",
                  "s4q27",
                  "s4q28",
                  "s4q29")

capture_tables (indirect_PII)


# Recode those with very specific values. 


break_language <- c(1,2,3,4,5)
labels_language <- c("Tagalog"=1,
                "Bikol/Bicolano" = 2,
                "Other" = 3,
                "Ilocano"=4,
                "Other"=5)
mydata <- ordinal_recode (variable="s4q1", break_points=break_language, missing=999999, value_labels=labels_language)

## [1] "Frequency table before encoding"
## s4q1. What language do you normally speak at home?  Ano ang wikang karaniwan mong sina
##                 1. Tagalog          2. Bikol/Bicolano             3. Kapampangan                 4. Ilocano 
##                        995                        650                          4                        455 
## 5. Masbate<f1>o/Masbatenon                 6. Cebuano                  7. Ibanag         8. Bisaya/Binisaya 
##                          1                          2                          3                          6 
##              9. Batangueno          10. Other Foreign                11. English                       <NA> 
##                          2                         13                          1                        164 
##     recoded
##      [1,2) [2,3) [3,4) [4,5) [5,1e+06)
##   1    995     0     0     0         0
##   2      0   650     0     0         0
##   3      0     0     4     0         0
##   4      0     0     0   455         0
##   5      0     0     0     0         1
##   6      0     0     0     0         2
##   7      0     0     0     0         3
##   8      0     0     0     0         6
##   9      0     0     0     0         2
##   10     0     0     0     0        13
##   11     0     0     0     0         1
## [1] "Frequency table after encoding"
## s4q1. What language do you normally speak at home?  Ano ang wikang karaniwan mong sina
##        Tagalog Bikol/Bicolano          Other        Ilocano           <NA> 
##            995            650             32            455            164 
## [1] "Inspect value labels and relabel as necessary"
##        Tagalog Bikol/Bicolano          Other        Ilocano          Other 
##              1              2              3              4              5
break_eth <- c(1,2,3,4,6,7,11,12)
labels_eth <- c("Other"=1,
                "Bicolano" = 2,
                "Bisayan/Cebuano" = 3,
                "Other"=4,
                "Ilocano"=5,
                "Other"=6,
                "Tagalog"=7,
                "Other"=8)
mydata <- ordinal_recode (variable="s4q3_1", break_points=break_eth, missing=999999, value_labels=labels_eth)

## [1] "Frequency table before encoding"
## s4q3_1. What is the ethnicity of this household?  Ano ang lahi ng kasambahay na ito? (#1
##         1. Aklanon        2. Bicolano 3. Bisayan/Cebuano       4. Chavacano      5. Hiligaynon 
##                  1                807                151                  1                  1 
##         6. Ilocano         7. Ilonggo         9. Maranao   10. Masbate<f1>o        11. Tagalog 
##                520                 26                  1                  6                596 
##         12. Tausug          13. Waray  15. Other Foreign               <NA> 
##                  1                 16                 14                155 
##     recoded
##      [1,2) [2,3) [3,4) [4,6) [6,7) [7,11) [11,12) [12,1e+06)
##   1      1     0     0     0     0      0       0          0
##   2      0   807     0     0     0      0       0          0
##   3      0     0   151     0     0      0       0          0
##   4      0     0     0     1     0      0       0          0
##   5      0     0     0     1     0      0       0          0
##   6      0     0     0     0   520      0       0          0
##   7      0     0     0     0     0     26       0          0
##   9      0     0     0     0     0      1       0          0
##   10     0     0     0     0     0      6       0          0
##   11     0     0     0     0     0      0     596          0
##   12     0     0     0     0     0      0       0          1
##   13     0     0     0     0     0      0       0         16
##   15     0     0     0     0     0      0       0         14
## [1] "Frequency table after encoding"
## s4q3_1. What is the ethnicity of this household?  Ano ang lahi ng kasambahay na ito? (#1
##           Other        Bicolano Bisayan/Cebuano         Ilocano         Tagalog            <NA> 
##              67             807             151             520             596             155 
## [1] "Inspect value labels and relabel as necessary"
##           Other        Bicolano Bisayan/Cebuano           Other         Ilocano           Other 
##               1               2               3               4               5               6 
##         Tagalog           Other 
##               7               8
break_eth <- c(1,6,7,11,12)
labels_eth <- c("Other"=1,
                "Ilocano" = 2,
                "Other" = 3,
                "Tagalog"=4,
                "Other"=5)
mydata <- ordinal_recode (variable="s4q3_2", break_points=break_eth, missing=999999, value_labels=labels_eth)

## [1] "Frequency table before encoding"
## s4q3_2. What is the ethnicity of this household?  Ano ang lahi ng kasambahay na ito? (#2
##        2. Bicolano 3. Bisayan/Cebuano         6. Ilocano         7. Ilonggo       8. Kinaray-a 
##                  1                 29                 46                 13                  1 
##         9. Maranao   10. Masbate<f1>o        11. Tagalog          13. Waray  15. Other Foreign 
##                  1                  3                251                 51                  6 
##               <NA> 
##               1894 
##     recoded
##      [1,6) [6,7) [7,11) [11,12) [12,1e+06)
##   2      1     0      0       0          0
##   3     29     0      0       0          0
##   6      0    46      0       0          0
##   7      0     0     13       0          0
##   8      0     0      1       0          0
##   9      0     0      1       0          0
##   10     0     0      3       0          0
##   11     0     0      0     251          0
##   13     0     0      0       0         51
##   15     0     0      0       0          6
## [1] "Frequency table after encoding"
## s4q3_2. What is the ethnicity of this household?  Ano ang lahi ng kasambahay na ito? (#2
##   Other Ilocano Tagalog    <NA> 
##     105      46     251    1894 
## [1] "Inspect value labels and relabel as necessary"
##   Other Ilocano   Other Tagalog   Other 
##       1       2       3       4       5
break_religion <- c(1,2,3,4,5,6,7)
labels_religion <- c("Roman Catholic"=1,
                "Other Christian" = 2,
                "Aglipayan" = 3,
                "Iglesia ni Cristo"=4,
                "Other Christian"=5,
                "Other Christian"=6,
                "Other"=7)
mydata <- ordinal_recode (variable="s4q4_1", break_points=break_religion, missing=999999, value_labels=labels_religion)

## [1] "Frequency table before encoding"
## s4q4_1. What is your religion?  Ano ang iyong relihiyon? (#1/10)
##    1. Roman Catholic        2. Protestant         3. Aglipayan 4. Iglesia ni Cristo       5. Evangelical 
##                 1954                    4                   51                   68                    4 
##   6. Other Christian            7. Muslim                 <NA> 
##                  137                    3                   75 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,1e+06)
##   1  1954     0     0     0     0     0         0
##   2     0     4     0     0     0     0         0
##   3     0     0    51     0     0     0         0
##   4     0     0     0    68     0     0         0
##   5     0     0     0     0     4     0         0
##   6     0     0     0     0     0   137         0
##   7     0     0     0     0     0     0         3
## [1] "Frequency table after encoding"
## s4q4_1. What is your religion?  Ano ang iyong relihiyon? (#1/10)
##    Roman Catholic   Other Christian         Aglipayan Iglesia ni Cristo             Other              <NA> 
##              1954               145                51                68                 3                75 
## [1] "Inspect value labels and relabel as necessary"
##    Roman Catholic   Other Christian         Aglipayan Iglesia ni Cristo   Other Christian   Other Christian 
##                 1                 2                 3                 4                 5                 6 
##             Other 
##                 7
break_religion <- c(1,2,3,4,5,6,7)
labels_religion <- c("Roman Catholic"=1,
                     "Other Christian" = 2,
                     "Other Christian" = 3,
                     "Other Christian"=4,
                     "Other Christian"=5,
                     "Other Christian"=6,
                     "Other"=7)
mydata <- ordinal_recode (variable="s4q4_2", break_points=break_religion, missing=999999, value_labels=labels_religion)

## [1] "Frequency table before encoding"
## s4q4_2. What is your religion?  Ano ang iyong relihiyon? (#2/10)
##         3. Aglipayan 4. Iglesia ni Cristo   6. Other Christian            7. Muslim                 <NA> 
##                    5                   13                   19                    2                 2257 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,1e+06)
##   3     0     0     5     0     0     0         0
##   4     0     0     0    13     0     0         0
##   6     0     0     0     0     0    19         0
##   7     0     0     0     0     0     0         2
## [1] "Frequency table after encoding"
## s4q4_2. What is your religion?  Ano ang iyong relihiyon? (#2/10)
## Other Christian           Other            <NA> 
##              37               2            2257 
## [1] "Inspect value labels and relabel as necessary"
##  Roman Catholic Other Christian Other Christian Other Christian Other Christian Other Christian 
##               1               2               3               4               5               6 
##           Other 
##               7
break_water <- c(1,2,3,4,5,6,7,9)
labels_water <- c("Own Use Faucet, community water system"=1,
                  "Shared Faucet, community water system" = 2,
                  "Own Use Tube or pipe Well" = 3,
                  "Shared Tube or pipe well"=4,
                  "Dug Well"=5,
                  "Spring, River Stream"=6,
                  "Other"=7,
                  "Bottled Water"=8)
mydata <- ordinal_recode (variable="s4q13", break_points=break_water, missing=999999, value_labels=labels_water)

## [1] "Frequency table before encoding"
## s4q13. What is the household's main source of drinking water?  Ano ang pangunahing pina
## 1. Own Use Faucet, community water system  2. Shared Faucet, community water system 
##                                       460                                       467 
##              3. Own Use Tube or pipe Well               4. Shared Tube or pipe well 
##                                       132                                       480 
##                               5. Dug Well                   6. Spring, River Stream 
##                                       255                                       252 
##                     7. Collected Rainfall               8. Peddler - rationed water 
##                                         1                                        29 
##                          9. Bottled Water                                      <NA> 
##                                       123                                        97 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,9) [9,1e+06)
##   1   460     0     0     0     0     0     0         0
##   2     0   467     0     0     0     0     0         0
##   3     0     0   132     0     0     0     0         0
##   4     0     0     0   480     0     0     0         0
##   5     0     0     0     0   255     0     0         0
##   6     0     0     0     0     0   252     0         0
##   7     0     0     0     0     0     0     1         0
##   8     0     0     0     0     0     0    29         0
##   9     0     0     0     0     0     0     0       123
## [1] "Frequency table after encoding"
## s4q13. What is the household's main source of drinking water?  Ano ang pangunahing pina
## Own Use Faucet, community water system  Shared Faucet, community water system 
##                                    460                                    467 
##              Own Use Tube or pipe Well               Shared Tube or pipe well 
##                                    132                                    480 
##                               Dug Well                   Spring, River Stream 
##                                    255                                    252 
##                                  Other                          Bottled Water 
##                                     30                                    123 
##                                   <NA> 
##                                     97 
## [1] "Inspect value labels and relabel as necessary"
## Own Use Faucet, community water system  Shared Faucet, community water system 
##                                      1                                      2 
##              Own Use Tube or pipe Well               Shared Tube or pipe well 
##                                      3                                      4 
##                               Dug Well                   Spring, River Stream 
##                                      5                                      6 
##                                  Other                          Bottled Water 
##                                      7                                      8
break_fuel <- c(1,2,3,7,8,9)
labels_fuel <- c("Other"=1,
                  "LPG" = 2,
                  "Other" = 3,
                  "Charcoal"=4,
                  "Wood"=5,
                  "Other"=6)
mydata <- ordinal_recode (variable="s4q14", break_points=break_fuel, missing=999999, value_labels=labels_fuel)

## [1] "Frequency table before encoding"
## s4q14. What type of fuel does your household mainly use for cooking?  Anoang mga uri ng
##          1. Electricity                  2. LPG          3. Natural Gas               4. Biogas 
##                      23                     270                       2                       2 
##             5. Kerosene        6. Coal, Lignite             7. Charcoal                 8. Wood 
##                      12                       3                     407                    1561 
## 9. Straw, Shrubs, Grass                    <NA> 
##                       9                       7 
##    recoded
##     [1,2) [2,3) [3,7) [7,8) [8,9) [9,1e+06)
##   1    23     0     0     0     0         0
##   2     0   270     0     0     0         0
##   3     0     0     2     0     0         0
##   4     0     0     2     0     0         0
##   5     0     0    12     0     0         0
##   6     0     0     3     0     0         0
##   7     0     0     0   407     0         0
##   8     0     0     0     0  1561         0
##   9     0     0     0     0     0         9
## [1] "Frequency table after encoding"
## s4q14. What type of fuel does your household mainly use for cooking?  Anoang mga uri ng
##    Other      LPG Charcoal     Wood     <NA> 
##       51      270      407     1561        7 
## [1] "Inspect value labels and relabel as necessary"
##    Other      LPG    Other Charcoal     Wood    Other 
##        1        2        3        4        5        6
break_material <- c(1,2,3,4,5,6)
labels_material <- c("Strong Materials (Tile, Concrete, Brick, Stone, Wood, Plywood)"=1,
                 "Light Materials (Cogon, Nipa, Anahaw, Bamboo)" = 2,
                 "Salvaged or Make Shift Materials" = 3,
                 "Mixed, predominantly strong"=4,
                 "Mixed, predominantly light"=5,
                 "Other"=6)
mydata <- ordinal_recode (variable="s4q15", break_points=break_material, missing=999999, value_labels=labels_material)

## [1] "Frequency table before encoding"
## s4q15. What type of construction materials are the outer walls made of?  Anong uri ng m
## 1. Strong Materials (Tile, Concrete, Brick, Stone, Wood, Plywood) 
##                                                               837 
##                  2. Light Materials (Cogon, Nipa, Anahaw, Bamboo) 
##                                                               657 
##                               3. Salvaged or Make Shift Materials 
##                                                                64 
##                                    4. Mixed, predominantly strong 
##                                                               418 
##                                     5. Mixed, predominantly light 
##                                                               300 
##                                  6. Mixed, predominantly salvaged 
##                                                                19 
##                                                              <NA> 
##                                                                 1 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,6) [6,1e+06)
##   1   837     0     0     0     0         0
##   2     0   657     0     0     0         0
##   3     0     0    64     0     0         0
##   4     0     0     0   418     0         0
##   5     0     0     0     0   300         0
##   6     0     0     0     0     0        19
## [1] "Frequency table after encoding"
## s4q15. What type of construction materials are the outer walls made of?  Anong uri ng m
## Strong Materials (Tile, Concrete, Brick, Stone, Wood, Plywood) 
##                                                            837 
##                  Light Materials (Cogon, Nipa, Anahaw, Bamboo) 
##                                                            657 
##                               Salvaged or Make Shift Materials 
##                                                             64 
##                                    Mixed, predominantly strong 
##                                                            418 
##                                     Mixed, predominantly light 
##                                                            300 
##                                                          Other 
##                                                             19 
##                                                           <NA> 
##                                                              1 
## [1] "Inspect value labels and relabel as necessary"
## Strong Materials (Tile, Concrete, Brick, Stone, Wood, Plywood) 
##                                                              1 
##                  Light Materials (Cogon, Nipa, Anahaw, Bamboo) 
##                                                              2 
##                               Salvaged or Make Shift Materials 
##                                                              3 
##                                    Mixed, predominantly strong 
##                                                              4 
##                                     Mixed, predominantly light 
##                                                              5 
##                                                          Other 
##                                                              6
break_status <- c(1,2,3,4,5,6,7)
labels_status <- c("Own House and Lot"=1,
                     "Rent house or room including lot" = 2,
                     "Own house but rented lot" = 3,
                     "Own house, rent-free lot with consent of owner"=4,
                     "Own house, rent-free lot without known consent of owner"=5,
                     "Rent-free house and lot with consent of owner"=6,
                   "Other"=7)
mydata <- ordinal_recode (variable="s4q16", break_points=break_status, missing=999999, value_labels=labels_status)

## [1] "Frequency table before encoding"
## s4q16. What is the tenure status of the property occupied by the household?  Ano ang ko
##                                       1. Own House and Lot 
##                                                        717 
##                        2. Rent house or room including lot 
##                                                         50 
##                                3. Own house but rented lot 
##                                                         82 
##          4. Own house, rent-free lot with consent of owner 
##                                                       1051 
## 5. Own house, rent-free lot without known consent of owner 
##                                                         75 
##           6. Rent-free house and lot with consent of owner 
##                                                        302 
##        7. Rent-free house and lot without consent of owner 
##                                                          8 
##                                                       <NA> 
##                                                         11 
##    recoded
##     [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,1e+06)
##   1   717     0     0     0     0     0         0
##   2     0    50     0     0     0     0         0
##   3     0     0    82     0     0     0         0
##   4     0     0     0  1051     0     0         0
##   5     0     0     0     0    75     0         0
##   6     0     0     0     0     0   302         0
##   7     0     0     0     0     0     0         8
## [1] "Frequency table after encoding"
## s4q16. What is the tenure status of the property occupied by the household?  Ano ang ko
##                                       Own House and Lot 
##                                                     717 
##                        Rent house or room including lot 
##                                                      50 
##                                Own house but rented lot 
##                                                      82 
##          Own house, rent-free lot with consent of owner 
##                                                    1051 
## Own house, rent-free lot without known consent of owner 
##                                                      75 
##           Rent-free house and lot with consent of owner 
##                                                     302 
##                                                   Other 
##                                                       8 
##                                                    <NA> 
##                                                      11 
## [1] "Inspect value labels and relabel as necessary"
##                                       Own House and Lot 
##                                                       1 
##                        Rent house or room including lot 
##                                                       2 
##                                Own house but rented lot 
##                                                       3 
##          Own house, rent-free lot with consent of owner 
##                                                       4 
## Own house, rent-free lot without known consent of owner 
##                                                       5 
##           Rent-free house and lot with consent of owner 
##                                                       6 
##                                                   Other 
##                                                       7

Matching and crosstabulations: Run automated PII check

# !!! Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("s4q1other",
               "s4q1whynoresponse",
               "s4q3other",
               "s4q3whynoresponse",
               "s4q4other",
               "s4q4whynoresponse",
               "s4q5_other",
               "s4q5whynoresponse",
               "s4q6whynoresponse",
               "s4q7whynoresponse",
               "s4q9whynoresponse",
               "s4q10whynoresponse",
               "s4q11whynoresponse",
               "s4q12whynoresponse",
               "s4q13other",
               "s4q13whynoresponse",
               "s4q14other",
               "s4q14whynoresponse",
               "s4q15whynoresponse",
               "s4q16whynoresponse",
               "s4q17whynoresponse",
               "s4q18whynoresponse",
               "s4q19whynoresponse",
               "s4q20whynoresponse",
               "s4q21whynoresponse",
               "s4q22whynoresponse",
               "s4q23whynoresponse",
               "s4q24whynoresponse",
               "s4q25whynoresponse",
               "s4q26whynoresponse",
               "s4q27whynoresponse",
               "s4q28whynoresponse",
               "s4q29whynoresponse")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 


mydata$s4q1other[58] <- "Other"
mydata$s4q1other[62] <- "Other"
mydata$s4q1other[63] <- "Other"
mydata$s4q1other[64] <- "Other"
mydata$s4q1other[67] <- "Other"
mydata$s4q1other[70] <- "Other"
mydata$s4q1other[73] <- "Other"
mydata$s4q1other[75] <- "Other"
mydata$s4q1other[76] <- "Other"
mydata$s4q1other[79] <- "Other"
mydata$s4q1other[85] <- "Other"
mydata$s4q1other[92] <- "Other"
mydata$s4q1other[94] <- "Other"
mydata$s4q1other[98] <- "Other"
mydata$s4q1other[128] <- "Other"
mydata$s4q1other[130] <- "Other"
mydata$s4q1other[134] <- "Other"
mydata$s4q1other[135] <- "Other"
mydata$s4q1other[137] <- "Other"
mydata$s4q1other[139] <- "Other"
mydata$s4q1other[140] <- "Other"
mydata$s4q1other[142] <- "Other"
mydata$s4q1other[143] <- "Other"
mydata$s4q1other[146] <- "Other"
mydata$s4q1other[147] <- "Other"
mydata$s4q1other[154] <- "Other"
mydata$s4q1other[162] <- "Other"
mydata$s4q1other[167] <- "Other"
mydata$s4q1other[171] <- "Other"
mydata$s4q1other[180] <- "Other"
mydata$s4q1other[184] <- "Other"
mydata$s4q1other[189] <- "Other"
mydata$s4q1other[193] <- "Other"
mydata$s4q1other[198] <- "Other"
mydata$s4q1other[200] <- "Other"
mydata$s4q1other[201] <- "Other"
mydata$s4q1other[203] <- "Other"
mydata$s4q1other[204] <- "Other"
mydata$s4q1other[208] <- "Other"
mydata$s4q1other[209] <- "Other"
mydata$s4q1other[210] <- "Other"
mydata$s4q1other[211] <- "Other"
mydata$s4q1other[212] <- "Other"
mydata$s4q1other[213] <- "Other"
mydata$s4q1other[215] <- "Other"
mydata$s4q1other[216] <- "Other"
mydata$s4q1other[218] <- "Other"
mydata$s4q1other[219] <- "Other"
mydata$s4q1other[222] <- "Other"
mydata$s4q1other[243] <- "Other"
mydata$s4q1other[246] <- "Other"
mydata$s4q1other[247] <- "Other"
mydata$s4q1other[248] <- "Other"
mydata$s4q1other[251] <- "Other"
mydata$s4q1other[253] <- "Other"
mydata$s4q1other[255] <- "Other"
mydata$s4q1other[259] <- "Other"
mydata$s4q1other[260] <- "Other"
mydata$s4q1other[261] <- "Other"
mydata$s4q1other[265] <- "Other"
mydata$s4q1other[266] <- "Other"
mydata$s4q1other[273] <- "Other"
mydata$s4q1other[276] <- "Other"
mydata$s4q1other[278] <- "Other"
mydata$s4q1other[323] <- "Other"
mydata$s4q1other[325] <- "Other"
mydata$s4q1other[326] <- "Other"
mydata$s4q1other[327] <- "Other"
mydata$s4q1other[330] <- "Other"
mydata$s4q1other[332] <- "Other"
mydata$s4q1other[333] <- "Other"
mydata$s4q1other[334] <- "Other"
mydata$s4q1other[335] <- "Other"
mydata$s4q1other[336] <- "Other"
mydata$s4q1other[338] <- "Other"
mydata$s4q1other[339] <- "Other"
mydata$s4q1other[340] <- "Other"
mydata$s4q1other[341] <- "Other"
mydata$s4q1other[342] <- "Other"
mydata$s4q1other[344] <- "Other"
mydata$s4q1other[345] <- "Other"
mydata$s4q1other[347] <- "Other"
mydata$s4q1other[348] <- "Other"
mydata$s4q1other[349] <- "Other"
mydata$s4q1other[350] <- "Other"
mydata$s4q1other[357] <- "Other"
mydata$s4q1other[366] <- "Other"
mydata$s4q1other[368] <- "Other"
mydata$s4q1other[369] <- "Other"
mydata$s4q1other[370] <- "Other"
mydata$s4q1other[371] <- "Other"
mydata$s4q1other[372] <- "Other"
mydata$s4q1other[377] <- "Other"
mydata$s4q1other[388] <- "Other"
mydata$s4q1other[395] <- "Other"
mydata$s4q1other[396] <- "Other"
mydata$s4q1other[399] <- "Other"
mydata$s4q1other[400] <- "Other"
mydata$s4q1other[403] <- "Other"
mydata$s4q1other[456] <- "Other"
mydata$s4q1other[632] <- "Other"
mydata$s4q1other[633] <- "Other"
mydata$s4q1other[638] <- "Other"
mydata$s4q1other[640] <- "Other"
mydata$s4q1other[641] <- "Other"
mydata$s4q1other[642] <- "Other"
mydata$s4q1other[687] <- "Other"
mydata$s4q1other[688] <- "Other"
mydata$s4q1other[689] <- "Other"
mydata$s4q1other[694] <- "Other"
mydata$s4q1other[699] <- "Other"
mydata$s4q1other[703] <- "Other"
mydata$s4q1other[705] <- "Other"
mydata$s4q1other[706] <- "Other"
mydata$s4q1other[707] <- "Other"
mydata$s4q1other[708] <- "Other"
mydata$s4q1other[709] <- "Other"
mydata$s4q1other[712] <- "Other"
mydata$s4q1other[713] <- "Other"
mydata$s4q1other[1456] <- "Other"

mydata$s4q1whynoresponse[697] <- "Other"

mydata$s4q3other[64] <- "Other"
mydata$s4q3other[70] <- "Other"
mydata$s4q3other[71] <- "Other"
mydata$s4q3other[73] <- "Other"
mydata$s4q3other[75] <- "Other"
mydata$s4q3other[76] <- "Other"
mydata$s4q3other[94] <- "Other"
mydata$s4q3other[98] <- "Other"
mydata$s4q3other[127] <- "Other"
mydata$s4q3other[128] <- "Other"
mydata$s4q3other[129] <- "Other"
mydata$s4q3other[130] <- "Other"
mydata$s4q3other[133] <- "Other"
mydata$s4q3other[134] <- "Other"
mydata$s4q3other[141] <- "Other"
mydata$s4q3other[142] <- "Other"
mydata$s4q3other[143] <- "Other"
mydata$s4q3other[144] <- "Other"
mydata$s4q3other[146] <- "Other"
mydata$s4q3other[147] <- "Other"
mydata$s4q3other[148] <- "Other"
mydata$s4q3other[149] <- "Other"
mydata$s4q3other[154] <- "Other"
mydata$s4q3other[157] <- "Other"
mydata$s4q3other[163] <- "Other"
mydata$s4q3other[166] <- "Other"
mydata$s4q3other[167] <- "Other"
mydata$s4q3other[168] <- "Other"
mydata$s4q3other[170] <- "Other"
mydata$s4q3other[171] <- "Other"
mydata$s4q3other[172] <- "Other"
mydata$s4q3other[173] <- "Other"
mydata$s4q3other[174] <- "Other"
mydata$s4q3other[175] <- "Other"
mydata$s4q3other[176] <- "Other"
mydata$s4q3other[177] <- "Other"
mydata$s4q3other[178] <- "Other"
mydata$s4q3other[180] <- "Other"
mydata$s4q3other[181] <- "Other"
mydata$s4q3other[186] <- "Other"
mydata$s4q3other[187] <- "Other"
mydata$s4q3other[189] <- "Other"
mydata$s4q3other[190] <- "Other"
mydata$s4q3other[192] <- "Other"
mydata$s4q3other[193] <- "Other"
mydata$s4q3other[196] <- "Other"
mydata$s4q3other[200] <- "Other"
mydata$s4q3other[201] <- "Other"
mydata$s4q3other[203] <- "Other"
mydata$s4q3other[206] <- "Other"
mydata$s4q3other[209] <- "Other"
mydata$s4q3other[211] <- "Other"
mydata$s4q3other[212] <- "Other"
mydata$s4q3other[213] <- "Other"
mydata$s4q3other[215] <- "Other"
mydata$s4q3other[216] <- "Other"
mydata$s4q3other[217] <- "Other"
mydata$s4q3other[218] <- "Other"
mydata$s4q3other[219] <- "Other"
mydata$s4q3other[222] <- "Other"
mydata$s4q3other[224] <- "Other"
mydata$s4q3other[240] <- "Other"
mydata$s4q3other[242] <- "Other"
mydata$s4q3other[243] <- "Other"
mydata$s4q3other[245] <- "Other"
mydata$s4q3other[246] <- "Other"
mydata$s4q3other[248] <- "Other"
mydata$s4q3other[252] <- "Other"
mydata$s4q3other[253] <- "Other"
mydata$s4q3other[259] <- "Other"
mydata$s4q3other[260] <- "Other"
mydata$s4q3other[261] <- "Other"
mydata$s4q3other[266] <- "Other"
mydata$s4q3other[276] <- "Other"
mydata$s4q3other[325] <- "Other"
mydata$s4q3other[327] <- "Other"
mydata$s4q3other[329] <- "Other"
mydata$s4q3other[330] <- "Other"
mydata$s4q3other[332] <- "Other"
mydata$s4q3other[333] <- "Other"
mydata$s4q3other[334] <- "Other"
mydata$s4q3other[336] <- "Other"
mydata$s4q3other[338] <- "Other"
mydata$s4q3other[340] <- "Other"
mydata$s4q3other[341] <- "Other"
mydata$s4q3other[342] <- "Other"
mydata$s4q3other[344] <- "Other"
mydata$s4q3other[345] <- "Other"
mydata$s4q3other[347] <- "Other"
mydata$s4q3other[349] <- "Other"
mydata$s4q3other[350] <- "Other"
mydata$s4q3other[361] <- "Other"
mydata$s4q3other[366] <- "Other"
mydata$s4q3other[369] <- "Other"
mydata$s4q3other[370] <- "Other"
mydata$s4q3other[372] <- "Other"
mydata$s4q3other[373] <- "Other"
mydata$s4q3other[374] <- "Other"
mydata$s4q3other[375] <- "Other"
mydata$s4q3other[376] <- "Other"
mydata$s4q3other[377] <- "Other"
mydata$s4q3other[388] <- "Other"
mydata$s4q3other[395] <- "Other"
mydata$s4q3other[399] <- "Other"
mydata$s4q3other[400] <- "Other"
mydata$s4q3other[403] <- "Other"
mydata$s4q3other[452] <- "Other"
mydata$s4q3other[632] <- "Other"
mydata$s4q3other[633] <- "Other"
mydata$s4q3other[638] <- "Other"
mydata$s4q3other[640] <- "Other"
mydata$s4q3other[641] <- "Other"
mydata$s4q3other[642] <- "Other"
mydata$s4q3other[673] <- "Other"
mydata$s4q3other[688] <- "Other"
mydata$s4q3other[695] <- "Other"
mydata$s4q3other[699] <- "Other"
mydata$s4q3other[703] <- "Other"
mydata$s4q3other[705] <- "Other"
mydata$s4q3other[706] <- "Other"
mydata$s4q3other[707] <- "Other"
mydata$s4q3other[708] <- "Other"
mydata$s4q3other[709] <- "Other"
mydata$s4q3other[712] <- "Other"
mydata$s4q3other[779] <- "Other"
mydata$s4q3other[826] <- "Other"
mydata$s4q3other[841] <- "Other"
mydata$s4q3other[948] <- "Other"
mydata$s4q3other[1015] <- "Other"
mydata$s4q3other[1039] <- "Other"
mydata$s4q3other[1053] <- "Other"
mydata$s4q3other[1054] <- "Other"
mydata$s4q3other[1077] <- "Other"
mydata$s4q3other[1083] <- "Other"
mydata$s4q3other[1084] <- "Other"
mydata$s4q3other[1088] <- "Other"
mydata$s4q3other[1096] <- "Other"
mydata$s4q3other[1100] <- "Other"
mydata$s4q3other[1101] <- "Other"
mydata$s4q3other[1107] <- "Other"
mydata$s4q3other[1111] <- "Other"
mydata$s4q3other[1112] <- "Other"
mydata$s4q3other[1114] <- "Other"
mydata$s4q3other[1118] <- "Other"
mydata$s4q3other[1119] <- "Other"
mydata$s4q3other[1120] <- "Other"
mydata$s4q3other[1456] <- "Other"
mydata$s4q3other[1471] <- "Other"
mydata$s4q3other[1479] <- "Other"
mydata$s4q3other[1495] <- "Other"
mydata$s4q3other[1500] <- "Other"

mydata$s4q3whynoresponse[1087] <- "Other"
mydata$s4q3whynoresponse[1089] <- "Other"
mydata$s4q3whynoresponse[1211] <- "Other"

mydata$s4q4other[10] <- "Other"
mydata$s4q4other[28] <- "Other"
mydata$s4q4other[30] <- "Other"
mydata$s4q4other[69] <- "Other"
mydata$s4q4other[71] <- "Other"
mydata$s4q4other[72] <- "Other"
mydata$s4q4other[85] <- "Other"
mydata$s4q4other[88] <- "Other"
mydata$s4q4other[89] <- "Other"
mydata$s4q4other[96] <- "Other"
mydata$s4q4other[97] <- "Other"
mydata$s4q4other[98] <- "Other"
mydata$s4q4other[117] <- "Other"
mydata$s4q4other[125] <- "Other"
mydata$s4q4other[199] <- "Other"
mydata$s4q4other[236] <- "Other"
mydata$s4q4other[382] <- "Other"
mydata$s4q4other[387] <- "Other"
mydata$s4q4other[450] <- "Other"
mydata$s4q4other[508] <- "Other"
mydata$s4q4other[511] <- "Other"
mydata$s4q4other[540] <- "Other"
mydata$s4q4other[544] <- "Other"
mydata$s4q4other[545] <- "Other"
mydata$s4q4other[549] <- "Other"
mydata$s4q4other[558] <- "Other"
mydata$s4q4other[560] <- "Other"
mydata$s4q4other[563] <- "Other"
mydata$s4q4other[564] <- "Other"
mydata$s4q4other[567] <- "Other"
mydata$s4q4other[576] <- "Other"
mydata$s4q4other[585] <- "Other"
mydata$s4q4other[592] <- "Other"
mydata$s4q4other[600] <- "Other"
mydata$s4q4other[606] <- "Other"
mydata$s4q4other[612] <- "Other"
mydata$s4q4other[613] <- "Other"
mydata$s4q4other[620] <- "Other"
mydata$s4q4other[625] <- "Other"
mydata$s4q4other[632] <- "Other"
mydata$s4q4other[634] <- "Other"
mydata$s4q4other[635] <- "Other"
mydata$s4q4other[640] <- "Other"
mydata$s4q4other[644] <- "Other"
mydata$s4q4other[645] <- "Other"
mydata$s4q4other[647] <- "Other"
mydata$s4q4other[648] <- "Other"
mydata$s4q4other[649] <- "Other"
mydata$s4q4other[668] <- "Other"
mydata$s4q4other[701] <- "Other"
mydata$s4q4other[707] <- "Other"
mydata$s4q4other[712] <- "Other"
mydata$s4q4other[719] <- "Other"
mydata$s4q4other[726] <- "Other"
mydata$s4q4other[815] <- "Other"
mydata$s4q4other[849] <- "Other"
mydata$s4q4other[929] <- "Other"
mydata$s4q4other[932] <- "Other"
mydata$s4q4other[946] <- "Other"
mydata$s4q4other[981] <- "Other"
mydata$s4q4other[995] <- "Other"
mydata$s4q4other[1002] <- "Other"
mydata$s4q4other[1034] <- "Other"
mydata$s4q4other[1152] <- "Other"
mydata$s4q4other[1222] <- "Other"
mydata$s4q4other[1260] <- "Other"
mydata$s4q4other[1348] <- "Other"
mydata$s4q4other[1352] <- "Other"
mydata$s4q4other[2071] <- "Other"
mydata$s4q4other[2091] <- "Other"
mydata$s4q4other[2258] <- "Other"

mydata$s4q4whynoresponse[87] <- "Other"
mydata$s4q4whynoresponse[657] <- "Other"

mydata$s4q5_other[5] <- "Other"
mydata$s4q5_other[29] <- "Other"
mydata$s4q5_other[34] <- "Other"
mydata$s4q5_other[43] <- "Other"
mydata$s4q5_other[50] <- "Other"
mydata$s4q5_other[73] <- "Other"
mydata$s4q5_other[86] <- "Other"
mydata$s4q5_other[178] <- "Other"
mydata$s4q5_other[225] <- "Other"
mydata$s4q5_other[316] <- "Other"
mydata$s4q5_other[402] <- "Other"
mydata$s4q5_other[537] <- "Other"
mydata$s4q5_other[582] <- "Other"
mydata$s4q5_other[593] <- "Other"
mydata$s4q5_other[709] <- "Other"
mydata$s4q5_other[821] <- "Other"
mydata$s4q5_other[843] <- "Other"
mydata$s4q5_other[844] <- "Other"
mydata$s4q5_other[849] <- "Other"
mydata$s4q5_other[883] <- "Other"
mydata$s4q5_other[887] <- "Other"
mydata$s4q5_other[889] <- "Other"
mydata$s4q5_other[911] <- "Other"
mydata$s4q5_other[913] <- "Other"
mydata$s4q5_other[914] <- "Other"
mydata$s4q5_other[1018] <- "Other"
mydata$s4q5_other[1061] <- "Other"
mydata$s4q5_other[1063] <- "Other"
mydata$s4q5_other[1065] <- "Other"
mydata$s4q5_other[1070] <- "Other"
mydata$s4q5_other[1071] <- "Other"
mydata$s4q5_other[1073] <- "Other"
mydata$s4q5_other[1076] <- "Other"
mydata$s4q5_other[1083] <- "Other"
mydata$s4q5_other[1094] <- "Other"
mydata$s4q5_other[1095] <- "Other"
mydata$s4q5_other[1100] <- "Other"
mydata$s4q5_other[1102] <- "Other"
mydata$s4q5_other[1104] <- "Other"
mydata$s4q5_other[1116] <- "Other"
mydata$s4q5_other[1126] <- "Other"
mydata$s4q5_other[1153] <- "Other"
mydata$s4q5_other[1162] <- "Other"
mydata$s4q5_other[1183] <- "Other"
mydata$s4q5_other[1217] <- "Other"
mydata$s4q5_other[1232] <- "Other"
mydata$s4q5_other[1294] <- "Other"
mydata$s4q5_other[1300] <- "Other"
mydata$s4q5_other[1326] <- "Other"
mydata$s4q5_other[1590] <- "Other"
mydata$s4q5_other[1735] <- "Other"
mydata$s4q5_other[1745] <- "Other"
mydata$s4q5_other[1806] <- "Other"
mydata$s4q5_other[1811] <- "Other"
mydata$s4q5_other[1884] <- "Other"
mydata$s4q5_other[1983] <- "Other"
mydata$s4q5_other[2059] <- "Other"
mydata$s4q5_other[2105] <- "Other"
mydata$s4q5_other[2190] <- "Other"

mydata$s4q5whynoresponse[1110] <- "Other"

mydata$s4q7whynoresponse[1103] <- "[time redacted]"
mydata$s4q9whynoresponse[1103] <- "[time redacted]"

mydata$s4q10whynoresponse[925] <- "[time redacted]"
mydata$s4q10whynoresponse[1124] <- "[time redacted]"


mydata$s4q13other[716] <- "Other"
mydata$s4q13other[937] <- "Other"
mydata$s4q13other[943] <- "Other"
mydata$s4q13other[1042] <- "Other"
mydata$s4q13other[1045] <- "Other"
mydata$s4q13other[1377] <- "Tagalo"
mydata$s4q13other[1420] <- "Tagalo"
mydata$s4q13other[1521] <- "Other"
mydata$s4q13other[1979] <- "Other"


mydata$s4q14other[330] <- "Other"
mydata$s4q14other[1398] <- "Other"
mydata$s4q14other[1492] <- "Other"
mydata$s4q14other[1897] <- "Other"
mydata$s4q14other[2090] <- "Other"
mydata$s4q14other[2093] <- "Other"

mydata$s4q16whynoresponse[1320] <- "Other"
mydata$s4q16whynoresponse[1823] <- "Other"

mydata$s4q18whynoresponse[1060] <- "Other"
mydata$s4q18whynoresponse[1842] <- "Other"

mydata$s4q20whynoresponse[1080] <- "[name] together with her husband decide on those large or frequent purchases."

mydata$s4q22whynoresponse[1230] <- "Other"
mydata$s4q22whynoresponse[1257] <- "Her husband are [work] and everyday he had only an income of [amount] pesos a day."

mydata$s4q23whynoresponse[776] <- "She does not know because her son [situation]"
mydata$s4q23whynoresponse[1230] <- "Other"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)