rm(list=ls(all=t))

Setup filenames

filename <- "Section_3" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q2)[na.exclude(mydata$s3q2)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q2", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q2. What was the total value of these monetary gifts?   Magkano ang kabuuang halaga 
##     10     20     30     40     50     70     80    100    120    130    150    180    200    240    250 
##      1      6      2      3     25      2      1     62      1      1     18      2     60      1      7 
##    280    300    350    400    500    530    550    600    650    660    700    800    900   1000   1200 
##      1     39      1      9    105      1      1      6      1      1      9      9      2     75      5 
##   1250   1300   1400   1450   1500   1600   1800   2000   2100   2200   2500   2800   3000   3500   4000 
##      1      1      1      1     18      2      1     36      1      1      3      1     25      2      3 
##   5000   5300   5500   6000   6500   6850   7000   7500   8000   8200   9500  10000  11000  12000  15000 
##     11      1      1      3      1      1      1      1      3      1      1      6      2      2      2 
##  16000  27000  30000  52000  60000  70000 122244   <NA> 
##      1      1      1      1      1      1      1   1698

## [1] "Frequency table after encoding"
## s3q2. What was the total value of these monetary gifts?   Magkano ang kabuuang halaga 
##            10            20            30            40            50            70            80 
##             1             6             2             3            25             2             1 
##           100           120           130           150           180           200           240 
##            62             1             1            18             2            60             1 
##           250           280           300           350           400           500           530 
##             7             1            39             1             9           105             1 
##           550           600           650           660           700           800           900 
##             1             6             1             1             9             9             2 
##          1000          1200          1250          1300          1400          1450          1500 
##            75             5             1             1             1             1            18 
##          1600          1800          2000          2100          2200          2500          2800 
##             2             1            36             1             1             3             1 
##          3000          3500          4000          5000          5300          5500          6000 
##            25             2             3            11             1             1             3 
##          6500          6850          7000          7500          8000          8200          9500 
##             1             1             1             1             3             1             1 
##         10000         11000         12000         15000         16000         27000         30000 
##             6             2             2             2             1             1             1 
##         52000 52119 or more          <NA> 
##             1             3          1698

percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q4)[na.exclude(mydata$s3q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q4", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q4. What was the total value of these non-monetary gifts?   Magkano ang kabuuang hal
##    20    30    35    37    40    45    48    50    60    70    75    80    85   100   110   120   130   150 
##     6     2     2     1     4     1     1    31     7     4     3     5     2   108     1     3     1    31 
##   158   160   165   167   170   175   180   200   220   230   240   250   300   308   330   350   374   400 
##     1     1     1     1     1     1     2    59     2     1     4    12    40     1     1     6     1     7 
##   450   500   530   550   600   680   700   800   900   950  1000  1100  1200  1300  1500  1700  2000  2400 
##     2    58     1     1     6     1     5     4     4     1    30     1     2     2     6     1     9     1 
##  2500  3000  3500  3600 10000  <NA> 
##     2     6     1     1     1  1795

## [1] "Frequency table after encoding"
## s3q4. What was the total value of these non-monetary gifts?   Magkano ang kabuuang hal
##           20           30           35           37           40           45           48           50 
##            6            2            2            1            4            1            1           31 
##           60           70           75           80           85          100          110          120 
##            7            4            3            5            2          108            1            3 
##          130          150          158          160          165          167          170          175 
##            1           31            1            1            1            1            1            1 
##          180          200          220          230          240          250          300          308 
##            2           59            2            1            4           12           40            1 
##          330          350          374          400          450          500          530          550 
##            1            6            1            7            2           58            1            1 
##          600          680          700          800          900          950         1000         1100 
##            6            1            5            4            4            1           30            1 
##         1200         1300         1500         1700         2000         2400         2500         3000 
##            2            2            6            1            9            1            2            6 
## 3250 or more         <NA> 
##            3         1795

percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q6)[na.exclude(mydata$s3q6)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q6", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q6. What was the total value of these monetary gifts?   Magkano ang kabuuang halaga 
##     20     40     50     60     70     90     96    100    130    140    150    160    170    180    200 
##      2      2      5      3      1      2      1     32      1      1     10      1      2      1     47 
##    220    250    280    300    340    350    400    440    500    508    520    550    570    580    600 
##      1      4      1     30      1      7     14      1    145      1      1      1      1      1     16 
##    700    750    772    800    900   1000   1050   1100   1200   1300   1350   1400   1500   1600   1650 
##     16      2      1     13      5    157      2      4     13      2      1      2     58      5      1 
##   1700   1750   1800   2000   2030   2140   2200   2300   2380   2400   2500   2600   2800   2900   3000 
##      5      1      3     88      1      1      3      2      1      1     21      1      5      1     55 
##   3050   3100   3200   3400   3500   3600   3900   4000   4100   4300   4500   4800   5000   5200   5500 
##      1      1      4      2      5      1      1     19      1      2      4      1     46      1      2 
##   6000   6500   6700   7000   7200   7500   8000   8400   8700   8900   9000   9500   9600  10000  10200 
##     25      2      1      2      1      1      4      1      1      1      3      1      2     23      1 
##  10500  10800  11000  11100  12000  12360  12500  12800  14000  14400  14500  15000  15900  16000  17000 
##      3      1      1      1     28      1      1      2      1      1      1     12      1      1      1 
##  17500  18000  18400  19000  20000  21500  22000  24000  25000  28500  28800  30000  32000  33000  34000 
##      1      7      1      2      5      1      1     12      2      1      1      7      2      1      1 
##  35000  35800  36000  36500  37500  39000  40000  44000  45000  48000  48200  49500  50000  50800  51000 
##      1      1      3      1      1      1      1      1      2      4      1      1      5      1      1 
##  52200  56000  57600  59000  60000  60200  68000  72000  72500  73000  73500  77000  78000  84000  88000 
##      1      2      1      1      5      1      1      1      1      1      1      1      2      5      1 
##  90000  96000 108000 120000 126000 140000 144000 147000 156000 168000 180000 216000 240000   <NA> 
##      1      5      1      3      1      1      2      1      1      1      1      1      1   1158

## [1] "Frequency table after encoding"
## s3q6. What was the total value of these monetary gifts?   Magkano ang kabuuang halaga 
##             20             40             50             60             70             90             96 
##              2              2              5              3              1              2              1 
##            100            130            140            150            160            170            180 
##             32              1              1             10              1              2              1 
##            200            220            250            280            300            340            350 
##             47              1              4              1             30              1              7 
##            400            440            500            508            520            550            570 
##             14              1            145              1              1              1              1 
##            580            600            700            750            772            800            900 
##              1             16             16              2              1             13              5 
##           1000           1050           1100           1200           1300           1350           1400 
##            157              2              4             13              2              1              2 
##           1500           1600           1650           1700           1750           1800           2000 
##             58              5              1              5              1              3             88 
##           2030           2140           2200           2300           2380           2400           2500 
##              1              1              3              2              1              1             21 
##           2600           2800           2900           3000           3050           3100           3200 
##              1              5              1             55              1              1              4 
##           3400           3500           3600           3900           4000           4100           4300 
##              2              5              1              1             19              1              2 
##           4500           4800           5000           5200           5500           6000           6500 
##              4              1             46              1              2             25              2 
##           6700           7000           7200           7500           8000           8400           8700 
##              1              2              1              1              4              1              1 
##           8900           9000           9500           9600          10000          10200          10500 
##              1              3              1              2             23              1              3 
##          10800          11000          11100          12000          12360          12500          12800 
##              1              1              1             28              1              1              2 
##          14000          14400          14500          15000          15900          16000          17000 
##              1              1              1             12              1              1              1 
##          17500          18000          18400          19000          20000          21500          22000 
##              1              7              1              2              5              1              1 
##          24000          25000          28500          28800          30000          32000          33000 
##             12              2              1              1              7              2              1 
##          34000          35000          35800          36000          36500          37500          39000 
##              1              1              1              3              1              1              1 
##          40000          44000          45000          48000          48200          49500          50000 
##              1              1              2              4              1              1              5 
##          50800          51000          52200          56000          57600          59000          60000 
##              1              1              1              2              1              1              5 
##          60200          68000          72000          72500          73000          73500          77000 
##              1              1              1              1              1              1              1 
##          78000          84000          88000          90000          96000         108000         120000 
##              2              5              1              1              5              1              3 
##         126000         140000         144000 144945 or more           <NA> 
##              1              1              2              6           1158

percentile_99.5 <- floor(quantile(na.exclude(mydata$s3q8)[na.exclude(mydata$s3q8)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="s3q8", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## s3q8. What was the total value of these non-monetary gifts?   Magkano ang kabuuang hal
##     1    10    20    22    25    30    50    53    54    60    65    70    75    80    90    99   100   108 
##     1     1     1     1     1     3    24     1     1     3     1     2     5     3     1     1   100     1 
##   120   130   150   151   160   180   194   200   210   250   280   300   350   360   380   400   450   480 
##    10     3    61     1     1     1     1    97     2    12     1    84     9     1     1    19     3     1 
##   500   525   550   600   650   700   710   750   800   900  1000  1050  1150  1200  1300  1350  1354  1400 
##   120     1     3    12     3    15     1     2    11     1    63     1     2     6     1     1     1     1 
##  1500  1700  2000  2400  2500  3000  3300  4000  4500  4800  5000  5200  6000  7000 10000 15000 19000  <NA> 
##    27     2    26     2     4    14     1     3     1     1    14     1     1     1     8     1     1  1484

## [1] "Frequency table after encoding"
## s3q8. What was the total value of these non-monetary gifts?   Magkano ang kabuuang hal
##             1            10            20            22            25            30            50 
##             1             1             1             1             1             3            24 
##            53            54            60            65            70            75            80 
##             1             1             3             1             2             5             3 
##            90            99           100           108           120           130           150 
##             1             1           100             1            10             3            61 
##           151           160           180           194           200           210           250 
##             1             1             1             1            97             2            12 
##           280           300           350           360           380           400           450 
##             1            84             9             1             1            19             3 
##           480           500           525           550           600           650           700 
##             1           120             1             3            12             3            15 
##           710           750           800           900          1000          1050          1150 
##             1             2            11             1            63             1             2 
##          1200          1300          1350          1354          1400          1500          1700 
##             6             1             1             1             1            27             2 
##          2000          2400          2500          3000          3300          4000          4500 
##            26             2             4            14             1             3             1 
##          4800          5000          5200          6000          7000 10000 or more          <NA> 
##             1            14             1             1             1            10          1484

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!No Indirect PII - Categorical 

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("s3q1whynoresponse",
               "s3q2whynoresponse",
               "s3q3whynoresponse",
               "s3q4whynoresponse",
               "s3q5whynoresponse",
               "s3q6whynoresponse",
               "s3q7whynoresponse",
               "s3q8whynoresponse")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$s3q8whynoresponse[1080] <- "[name] does not know the market value of those clothing that was given to her family."

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)