rm(list=ls(all=t))
filename <- "Section_1" # !!!Update filename
functions_vers <- "functions_1.8.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!No Direct PII
# !!!No Direct PII-team
# !!!No Small locations
# Focus on variables with a "Lowest Freq" in dictionary of 30 or less.
mydata <- top_recode ("eh_s1q7", break_point=84, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q7. Q64: How old is ? Ilang taon si ?
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 30 205 283 266 298 309 319 324 387 474 458 479 592 600 626 624 625 663 593 514 466 388 343
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 264 252 200 143 120 109 77 98 96 91 106 134 120 138 168 182 196 191 197 222 197 211 240
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 169 220 177 186 148 154 142 126 126 120 97 82 79 72 73 50 46 41 31 23 19 24 30
## 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
## 29 23 16 17 15 8 9 13 12 9 11 8 10 11 10 6 8 3 5 4 1 2 1
## 92 93 98 104 <NA>
## 2 3 1 1 814
## [1] "Frequency table after encoding"
## eh_s1q7. Q64: How old is ? Ilang taon si ?
## 0 1 2 3 4 5 6 7 8 9
## 30 205 283 266 298 309 319 324 387 474
## 10 11 12 13 14 15 16 17 18 19
## 458 479 592 600 626 624 625 663 593 514
## 20 21 22 23 24 25 26 27 28 29
## 466 388 343 264 252 200 143 120 109 77
## 30 31 32 33 34 35 36 37 38 39
## 98 96 91 106 134 120 138 168 182 196
## 40 41 42 43 44 45 46 47 48 49
## 191 197 222 197 211 240 169 220 177 186
## 50 51 52 53 54 55 56 57 58 59
## 148 154 142 126 126 120 97 82 79 72
## 60 61 62 63 64 65 66 67 68 69
## 73 50 46 41 31 23 19 24 30 29
## 70 71 72 73 74 75 76 77 78 79
## 23 16 17 15 8 9 13 12 9 11
## 80 81 82 83 84 or more <NA>
## 8 10 11 10 37 814
mydata <- bottom_recode ("eh_s1q6", break_point=1933, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q6. Q63: What is the year of birth of ? Anong taon ipinanganak si $
## 1913 1920 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944
## 1 1 2 2 1 2 2 4 3 2 6 8 9 15 6 11 8 10 9 15 11 9 9
## 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
## 19 16 23 25 33 24 18 26 25 35 48 55 62 77 67 87 89 115 125 128 122 162 143
## 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990
## 194 161 216 176 222 227 194 218 208 200 189 183 168 151 131 125 106 103 106 88 84 103 106
## 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## 136 172 249 264 316 386 438 498 584 662 611 623 654 598 617 483 484 440 406 356 296 333 295
## 2014 2015 2016 2017 2018 <NA>
## 272 283 221 84 1 814
## [1] "Frequency table after encoding"
## eh_s1q6. Q63: What is the year of birth of ? Anong taon ipinanganak si $
## 1933 or less 1934 1935 1936 1937 1938 1939 1940 1941
## 34 9 15 6 11 8 10 9 15
## 1942 1943 1944 1945 1946 1947 1948 1949 1950
## 11 9 9 19 16 23 25 33 24
## 1951 1952 1953 1954 1955 1956 1957 1958 1959
## 18 26 25 35 48 55 62 77 67
## 1960 1961 1962 1963 1964 1965 1966 1967 1968
## 87 89 115 125 128 122 162 143 194
## 1969 1970 1971 1972 1973 1974 1975 1976 1977
## 161 216 176 222 227 194 218 208 200
## 1978 1979 1980 1981 1982 1983 1984 1985 1986
## 189 183 168 151 131 125 106 103 106
## 1987 1988 1989 1990 1991 1992 1993 1994 1995
## 88 84 103 106 136 172 249 264 316
## 1996 1997 1998 1999 2000 2001 2002 2003 2004
## 386 438 498 584 662 611 623 654 598
## 2005 2006 2007 2008 2009 2010 2011 2012 2013
## 617 483 484 440 406 356 296 333 295
## 2014 2015 2016 2017 2018 <NA>
## 272 283 221 84 1 814
mydata <- top_recode ("eh_s1q14", break_point=63, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q14. Q64: How old is ? Ilang taon si ?
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 182 42 23 20 15 12 10 14 7 10 7 11 3 7 8 7 10 17 22
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
## 22 33 17 37 24 37 16 10 26 16 4 12 6 10 6 11 5 3 3
## 38 39 40 41 42 43 44 45 47 48 49 51 52 53 55 56 57 58 59
## 8 4 3 6 4 3 1 1 4 2 1 1 2 4 2 2 4 1 3
## 60 61 63 64 65 66 68 69 70 72 73 74 75 76 77 78 79 81 82
## 1 2 2 1 1 2 1 1 1 2 2 1 2 2 1 1 1 1 3
## 84 89 92 93 97 <NA>
## 1 1 1 1 1 16091
## [1] "Frequency table after encoding"
## eh_s1q14. Q64: How old is ? Ilang taon si ?
## 0 1 2 3 4 5 6 7 8 9
## 182 42 23 20 15 12 10 14 7 10
## 10 11 12 13 14 15 16 17 18 19
## 7 11 3 7 8 7 10 17 22 22
## 20 21 22 23 24 25 26 27 28 29
## 33 17 37 24 37 16 10 26 16 4
## 30 31 32 33 34 35 36 37 38 39
## 12 6 10 6 11 5 3 3 8 4
## 40 41 42 43 44 45 47 48 49 51
## 3 6 4 3 1 1 4 2 1 1
## 52 53 55 56 57 58 59 60 61 63 or more
## 2 4 2 2 4 1 3 1 2 30
## <NA>
## 16091
mydata <- bottom_recode ("eh_s1q13", break_point=1956, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## eh_s1q13. Q63: What is the year of birth of ? Anong taon ipinanganak si ?
## 1920 1924 1925 1928 1933 1935 1936 1939 1940 1942 1943 1944 1945 1947 1949 1950 1951 1952 1954
## 1 1 1 1 1 2 2 1 2 2 3 1 3 1 1 1 1 2 1
## 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1969 1970 1971 1972 1973 1974
## 2 2 1 3 1 2 4 1 1 3 2 1 1 3 2 2 1 1 2
## 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
## 4 5 3 4 6 5 3 5 13 3 8 9 13 3 15 20 14 18 33
## 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
## 24 26 31 23 29 23 18 14 6 8 7 4 10 10 5 8 15 8 14
## 2013 2014 2015 2016 2017 2018 <NA>
## 14 20 16 36 153 45 16091
## [1] "Frequency table after encoding"
## eh_s1q13. Q63: What is the year of birth of ? Anong taon ipinanganak si ?
## 1956 or less 1957 1958 1959 1960 1961 1962 1963 1964
## 32 1 3 1 2 4 1 1 3
## 1965 1966 1967 1969 1970 1971 1972 1973 1974
## 2 1 1 3 2 2 1 1 2
## 1975 1976 1977 1978 1979 1980 1981 1982 1983
## 4 5 3 4 6 5 3 5 13
## 1984 1985 1986 1987 1988 1989 1990 1991 1992
## 3 8 9 13 3 15 20 14 18
## 1993 1994 1995 1996 1997 1998 1999 2000 2001
## 33 24 26 31 23 29 23 18 14
## 2002 2003 2004 2005 2006 2007 2008 2009 2010
## 6 8 7 4 10 10 5 8 15
## 2011 2012 2013 2014 2015 2016 2017 2018 <NA>
## 8 14 14 20 16 36 153 45 16091
# !!!No Indirect PII - Categorical
# !!!Insufficient demographic data
# !!!No Open-Ends
# !!!No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)