rm(list=ls(all=t))

Setup filenames

filename <- "Section_3" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII 

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No Small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Top code high income to the 99.5 percentile

percentile_99.5 <-floor(quantile(na.exclude(mydata$eh_s3q14)[na.exclude(mydata$eh_s3q14)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="eh_s3q14", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## eh_s3q14. Q174: In the last 4 weeks, how much in total has been spent on treatment because
##  -998     0     1     2     3     4     5     6     7    10    12    13    14    15    16    18    19    20    21 
##    17    24     2     1     2     1    10     7     7    34    10     3     4    15     5    10     1    88     8 
##    22    24    25    27    28    30    32    33    34    35    36    38    39    40    42    44    45    46    47 
##     4    14    17     3     4    74     9     1     1    12     8     4     1    33     5     1     6     3     1 
##    48    50    51    52    54    55    56    57    58    60    62    63    64    65    66    67    70    72    74 
##     8   129     2     4     2     4     5     1     3    32     1     3     1     4     4     1    11     4     2 
##    75    76    78    79    80    81    82    83    84    85    88    89    90    93    94    95    96    98   100 
##    10     1     2     3    13     1     1     2     3     2     1     1    15     1     1     1     2     2   162 
##   102   103   105   106   108   109   110   113   115   116   118   120   124   125   126   128   130   135   136 
##     1     2     3     3     4     1     7     1     4     1     1    21     1     2     2     2     6     2     3 
##   139   140   141   144   145   147   148   150   153   156   158   159   160   165   166   170   175   180   182 
##     1     6     1     1     2     2     1    51     1     1     1     1     6     1     2     2     1    11     1 
##   185   186   188   190   195   200   203   206   208   210   211   215   220   222   224   225   230   231   235 
##     1     1     1     2     1   117     1     1     1     4     1     1     2     1     1     1     1     1     1 
##   240   241   245   250   256   260   265   270   275   280   283   286   287   290   300   310   314   318   320 
##     5     1     1    19     2     3     2     4     2     4     1     1     1     3    88     3     1     2     4 
##   330   335   340   345   350   360   361   363   365   368   371   375   378   380   384   390   400   410   420 
##     1     1     1     1    12     2     2     1     1     1     1     1     1     1     1     1    24     1     2 
##   448   450   456   460   462   466   467   480   498   500   501   510   517   520   529   534   550   560   570 
##     1     9     1     1     1     1     1     3     1   108     1     1     1     1     1     1     3     4     1 
##   600   608   610   620   630   635   640   650   690   700   710   720   733   740   750   760   770   772   800 
##    29     1     1     1     2     1     2     4     1    22     1     1     1     1     7     1     1     1    17 
##   850   862   896   900   930   950   960  1000  1005  1010  1035  1041  1050  1064  1075  1100  1150  1170  1200 
##     2     1     1     6     1     4     1    78     1     2     1     1     1     1     1     1     1     1    16 
##  1250  1270  1275  1280  1300  1316  1340  1342  1350  1373  1400  1420  1500  1540  1550  1600  1620  1700  1712 
##     1     1     1     1     6     1     1     1     2     1     2     1    44     1     1     8     1     3     1 
##  1720  1740  1741  1750  1760  1800  1805  1842  1890  1894  1900  2000  2040  2060  2100  2150  2200  2300  2400 
##     1     1     1     1     1     7     1     1     1     1     2    43     1     1     3     1     3     2     2 
##  2450  2456  2500  2600  2650  2696  2700  2740  2750  2800  2868  2880  2900  3000  3060  3100  3200  3450  3500 
##     1     1    16     2     2     1     2     1     1     4     1     1     1    32     1     1     1     1     2 
##  3600  3800  4000  4150  4220  4500  4600  5000  5140  5400  5430  5500  6000  6500  7000  7300  7980  8000  8800 
##     1     1    16     1     1     3     1    27     1     1     1     1     8     1     6     2     1     2     1 
##  9000 10000 11600 11730 12000 12748 13600 13650 15000 17000 19145 20000 22000 30000 35000 36000 40000 45000 50000 
##     2     8     1     1     4     1     1     1     5     1     1     2     1     1     2     1     1     1     1 
## 2e+05  <NA> 
##     1 12489

## [1] "Frequency table after encoding"
## eh_s3q14. Q174: In the last 4 weeks, how much in total has been spent on treatment because
##          -998             0             1             2             3             4             5             6 
##            17            24             2             1             2             1            10             7 
##             7            10            12            13            14            15            16            18 
##             7            34            10             3             4            15             5            10 
##            19            20            21            22            24            25            27            28 
##             1            88             8             4            14            17             3             4 
##            30            32            33            34            35            36            38            39 
##            74             9             1             1            12             8             4             1 
##            40            42            44            45            46            47            48            50 
##            33             5             1             6             3             1             8           129 
##            51            52            54            55            56            57            58            60 
##             2             4             2             4             5             1             3            32 
##            62            63            64            65            66            67            70            72 
##             1             3             1             4             4             1            11             4 
##            74            75            76            78            79            80            81            82 
##             2            10             1             2             3            13             1             1 
##            83            84            85            88            89            90            93            94 
##             2             3             2             1             1            15             1             1 
##            95            96            98           100           102           103           105           106 
##             1             2             2           162             1             2             3             3 
##           108           109           110           113           115           116           118           120 
##             4             1             7             1             4             1             1            21 
##           124           125           126           128           130           135           136           139 
##             1             2             2             2             6             2             3             1 
##           140           141           144           145           147           148           150           153 
##             6             1             1             2             2             1            51             1 
##           156           158           159           160           165           166           170           175 
##             1             1             1             6             1             2             2             1 
##           180           182           185           186           188           190           195           200 
##            11             1             1             1             1             2             1           117 
##           203           206           208           210           211           215           220           222 
##             1             1             1             4             1             1             2             1 
##           224           225           230           231           235           240           241           245 
##             1             1             1             1             1             5             1             1 
##           250           256           260           265           270           275           280           283 
##            19             2             3             2             4             2             4             1 
##           286           287           290           300           310           314           318           320 
##             1             1             3            88             3             1             2             4 
##           330           335           340           345           350           360           361           363 
##             1             1             1             1            12             2             2             1 
##           365           368           371           375           378           380           384           390 
##             1             1             1             1             1             1             1             1 
##           400           410           420           448           450           456           460           462 
##            24             1             2             1             9             1             1             1 
##           466           467           480           498           500           501           510           517 
##             1             1             3             1           108             1             1             1 
##           520           529           534           550           560           570           600           608 
##             1             1             1             3             4             1            29             1 
##           610           620           630           635           640           650           690           700 
##             1             1             2             1             2             4             1            22 
##           710           720           733           740           750           760           770           772 
##             1             1             1             1             7             1             1             1 
##           800           850           862           896           900           930           950           960 
##            17             2             1             1             6             1             4             1 
##          1000          1005          1010          1035          1041          1050          1064          1075 
##            78             1             2             1             1             1             1             1 
##          1100          1150          1170          1200          1250          1270          1275          1280 
##             1             1             1            16             1             1             1             1 
##          1300          1316          1340          1342          1350          1373          1400          1420 
##             6             1             1             1             2             1             2             1 
##          1500          1540          1550          1600          1620          1700          1712          1720 
##            44             1             1             8             1             3             1             1 
##          1740          1741          1750          1760          1800          1805          1842          1890 
##             1             1             1             1             7             1             1             1 
##          1894          1900          2000          2040          2060          2100          2150          2200 
##             1             2            43             1             1             3             1             3 
##          2300          2400          2450          2456          2500          2600          2650          2696 
##             2             2             1             1            16             2             2             1 
##          2700          2740          2750          2800          2868          2880          2900          3000 
##             2             1             1             4             1             1             1            32 
##          3060          3100          3200          3450          3500          3600          3800          4000 
##             1             1             1             1             2             1             1            16 
##          4150          4220          4500          4600          5000          5140          5400          5430 
##             1             1             3             1            27             1             1             1 
##          5500          6000          6500          7000          7300          7980          8000          8800 
##             1             8             1             6             2             1             2             1 
##          9000         10000         11600         11730         12000         12748         13600         13650 
##             2             8             1             1             4             1             1             1 
##         15000         17000         19145 19610 or more          <NA> 
##             5             1             1            11         12489

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

break_edu <- c(-998,1,2,4,6,7,8,99)
labels_edu <- c("-998" = 1,
                "1" = 2,
                "2" = 3,
                "4 or 5" = 4,
                "6" = 5,
                "7" = 6,
                "8" = 7,
                "99"= 8)
mydata <- ordinal_recode (variable="eh_s3q5", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## eh_s3q5. Q117: What is 's marital status?  sBq40: Ano ang civil status ni \
## -998    1    2    4    5    6    7    8   99 
##    1 3677  163    8   84  314  956 9376    2 
##       recoded
##        [-998,1) [1,2) [2,4) [4,6) [6,7) [7,8) [8,99) [99,1e+06)
##   -998        1     0     0     0     0     0      0          0
##   1           0  3677     0     0     0     0      0          0
##   2           0     0   163     0     0     0      0          0
##   4           0     0     0     8     0     0      0          0
##   5           0     0     0    84     0     0      0          0
##   6           0     0     0     0   314     0      0          0
##   7           0     0     0     0     0   956      0          0
##   8           0     0     0     0     0     0   9376          0
##   99          0     0     0     0     0     0      0          2
## [1] "Frequency table after encoding"
## eh_s3q5. Q117: What is 's marital status?  sBq40: Ano ang civil status ni \
##   -998      1      2 4 or 5      6      7      8     99 
##      1   3677    163     92    314    956   9376      2 
## [1] "Inspect value labels and relabel as necessary"
##   -998      1      2 4 or 5      6      7      8     99 
##      1      2      3      4      5      6      7      8

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

# !!! Identify open-end variables here: 
open_ends <- c("eh_s3q6",
               "eh_s3q10")


report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$eh_s3q6[8975] <- "other"
mydata$eh_s3q6[13940] <- "other"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)