rm(list=ls(all=t))

Setup filenames

filename <- "Section_1" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No Small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

mydata <- top_recode ("eh_s1q7", break_point=84, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q7. Q64: How old is ?   Ilang taon si ?
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22 
##   30  205  283  266  298  309  319  324  387  474  458  479  592  600  626  624  625  663  593  514  466  388  343 
##   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
##  264  252  200  143  120  109   77   98   96   91  106  134  120  138  168  182  196  191  197  222  197  211  240 
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68 
##  169  220  177  186  148  154  142  126  126  120   97   82   79   72   73   50   46   41   31   23   19   24   30 
##   69   70   71   72   73   74   75   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90   91 
##   29   23   16   17   15    8    9   13   12    9   11    8   10   11   10    6    8    3    5    4    1    2    1 
##   92   93   98  104 <NA> 
##    2    3    1    1  814

## [1] "Frequency table after encoding"
## eh_s1q7. Q64: How old is ?   Ilang taon si ?
##          0          1          2          3          4          5          6          7          8          9 
##         30        205        283        266        298        309        319        324        387        474 
##         10         11         12         13         14         15         16         17         18         19 
##        458        479        592        600        626        624        625        663        593        514 
##         20         21         22         23         24         25         26         27         28         29 
##        466        388        343        264        252        200        143        120        109         77 
##         30         31         32         33         34         35         36         37         38         39 
##         98         96         91        106        134        120        138        168        182        196 
##         40         41         42         43         44         45         46         47         48         49 
##        191        197        222        197        211        240        169        220        177        186 
##         50         51         52         53         54         55         56         57         58         59 
##        148        154        142        126        126        120         97         82         79         72 
##         60         61         62         63         64         65         66         67         68         69 
##         73         50         46         41         31         23         19         24         30         29 
##         70         71         72         73         74         75         76         77         78         79 
##         23         16         17         15          8          9         13         12          9         11 
##         80         81         82         83 84 or more       <NA> 
##          8         10         11         10         37        814

mydata <- bottom_recode ("eh_s1q6", break_point=1933, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q6. Q63: What is the year of birth of ?  Anong taon ipinanganak si $
## 1913 1920 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 
##    1    1    2    2    1    2    2    4    3    2    6    8    9   15    6   11    8   10    9   15   11    9    9 
## 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 
##   19   16   23   25   33   24   18   26   25   35   48   55   62   77   67   87   89  115  125  128  122  162  143 
## 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 
##  194  161  216  176  222  227  194  218  208  200  189  183  168  151  131  125  106  103  106   88   84  103  106 
## 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 
##  136  172  249  264  316  386  438  498  584  662  611  623  654  598  617  483  484  440  406  356  296  333  295 
## 2014 2015 2016 2017 2018 <NA> 
##  272  283  221   84    1  814

## [1] "Frequency table after encoding"
## eh_s1q6. Q63: What is the year of birth of ?  Anong taon ipinanganak si $
## 1933 or less         1934         1935         1936         1937         1938         1939         1940         1941 
##           34            9           15            6           11            8           10            9           15 
##         1942         1943         1944         1945         1946         1947         1948         1949         1950 
##           11            9            9           19           16           23           25           33           24 
##         1951         1952         1953         1954         1955         1956         1957         1958         1959 
##           18           26           25           35           48           55           62           77           67 
##         1960         1961         1962         1963         1964         1965         1966         1967         1968 
##           87           89          115          125          128          122          162          143          194 
##         1969         1970         1971         1972         1973         1974         1975         1976         1977 
##          161          216          176          222          227          194          218          208          200 
##         1978         1979         1980         1981         1982         1983         1984         1985         1986 
##          189          183          168          151          131          125          106          103          106 
##         1987         1988         1989         1990         1991         1992         1993         1994         1995 
##           88           84          103          106          136          172          249          264          316 
##         1996         1997         1998         1999         2000         2001         2002         2003         2004 
##          386          438          498          584          662          611          623          654          598 
##         2005         2006         2007         2008         2009         2010         2011         2012         2013 
##          617          483          484          440          406          356          296          333          295 
##         2014         2015         2016         2017         2018         <NA> 
##          272          283          221           84            1          814

mydata <- top_recode ("eh_s1q14", break_point=63, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q14. Q64: How old is ?   Ilang taon si ?
##     0     1     2     3     4     5     6     7     8     9    10    11    12    13    14    15    16    17    18 
##   182    42    23    20    15    12    10    14     7    10     7    11     3     7     8     7    10    17    22 
##    19    20    21    22    23    24    25    26    27    28    29    30    31    32    33    34    35    36    37 
##    22    33    17    37    24    37    16    10    26    16     4    12     6    10     6    11     5     3     3 
##    38    39    40    41    42    43    44    45    47    48    49    51    52    53    55    56    57    58    59 
##     8     4     3     6     4     3     1     1     4     2     1     1     2     4     2     2     4     1     3 
##    60    61    63    64    65    66    68    69    70    72    73    74    75    76    77    78    79    81    82 
##     1     2     2     1     1     2     1     1     1     2     2     1     2     2     1     1     1     1     3 
##    84    89    92    93    97  <NA> 
##     1     1     1     1     1 16091

## [1] "Frequency table after encoding"
## eh_s1q14. Q64: How old is ?   Ilang taon si ?
##          0          1          2          3          4          5          6          7          8          9 
##        182         42         23         20         15         12         10         14          7         10 
##         10         11         12         13         14         15         16         17         18         19 
##          7         11          3          7          8          7         10         17         22         22 
##         20         21         22         23         24         25         26         27         28         29 
##         33         17         37         24         37         16         10         26         16          4 
##         30         31         32         33         34         35         36         37         38         39 
##         12          6         10          6         11          5          3          3          8          4 
##         40         41         42         43         44         45         47         48         49         51 
##          3          6          4          3          1          1          4          2          1          1 
##         52         53         55         56         57         58         59         60         61 63 or more 
##          2          4          2          2          4          1          3          1          2         30 
##       <NA> 
##      16091

mydata <- bottom_recode ("eh_s1q13", break_point=1956, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q13. Q63: What is the year of birth of ?  Anong taon ipinanganak si ?
##  1920  1924  1925  1928  1933  1935  1936  1939  1940  1942  1943  1944  1945  1947  1949  1950  1951  1952  1954 
##     1     1     1     1     1     2     2     1     2     2     3     1     3     1     1     1     1     2     1 
##  1955  1956  1957  1958  1959  1960  1961  1962  1963  1964  1965  1966  1967  1969  1970  1971  1972  1973  1974 
##     2     2     1     3     1     2     4     1     1     3     2     1     1     3     2     2     1     1     2 
##  1975  1976  1977  1978  1979  1980  1981  1982  1983  1984  1985  1986  1987  1988  1989  1990  1991  1992  1993 
##     4     5     3     4     6     5     3     5    13     3     8     9    13     3    15    20    14    18    33 
##  1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  2010  2011  2012 
##    24    26    31    23    29    23    18    14     6     8     7     4    10    10     5     8    15     8    14 
##  2013  2014  2015  2016  2017  2018  <NA> 
##    14    20    16    36   153    45 16091

## [1] "Frequency table after encoding"
## eh_s1q13. Q63: What is the year of birth of ?  Anong taon ipinanganak si ?
## 1956 or less         1957         1958         1959         1960         1961         1962         1963         1964 
##           32            1            3            1            2            4            1            1            3 
##         1965         1966         1967         1969         1970         1971         1972         1973         1974 
##            2            1            1            3            2            2            1            1            2 
##         1975         1976         1977         1978         1979         1980         1981         1982         1983 
##            4            5            3            4            6            5            3            5           13 
##         1984         1985         1986         1987         1988         1989         1990         1991         1992 
##            3            8            9           13            3           15           20           14           18 
##         1993         1994         1995         1996         1997         1998         1999         2000         2001 
##           33           24           26           31           23           29           23           18           14 
##         2002         2003         2004         2005         2006         2007         2008         2009         2010 
##            6            8            7            4           10           10            5            8           15 
##         2011         2012         2013         2014         2015         2016         2017         2018         <NA> 
##            8           14           14           20           16           36          153           45        16091

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!No Indirect PII - Categorical

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!!No Open-Ends

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)