rm(list=ls(all=t))

Setup filenames

filename <- "Section_1" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

mydata  <- bottom_recode ("m_s1q28", 1935, -999)
## [1] "Frequency table before encoding"
## m_s1q28. sBq34: What is 's year of birth?  Anong taon ipinanganak si ${calc
## -999 1920 1922 1924 1925 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 
##   15    1    1    1    2    2    2    2    2    1    4    7    5   13    7    9    9    7    6   12   12    5    9   19   13   17   28   31   21   16   24   22   31 
## 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 
##   47   47   58   74   60   82   87  111  116  125  121  158  140  181  158  204  178  207  217  183  188  198  199  169  182  152  142  118  114   96   93   84   75 
## 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 <NA> 
##   63   67   82  102  109  175  188  232  272  337  420  495  611  554  612  630  583  594  473  449  430  382  348  280  307  275  227  235  216   83 1310

## [1] "Frequency table after encoding"
## m_s1q28. sBq34: What is 's year of birth?  Anong taon ipinanganak si ${calc
##         -999 1935 or less         1936         1937         1938         1939         1940         1941         1942         1943         1944         1945         1946 
##           15           43            7            9            9            7            6           12           12            5            9           19           13 
##         1947         1948         1949         1950         1951         1952         1953         1954         1955         1956         1957         1958         1959 
##           17           28           31           21           16           24           22           31           47           47           58           74           60 
##         1960         1961         1962         1963         1964         1965         1966         1967         1968         1969         1970         1971         1972 
##           82           87          111          116          125          121          158          140          181          158          204          178          207 
##         1973         1974         1975         1976         1977         1978         1979         1980         1981         1982         1983         1984         1985 
##          217          183          188          198          199          169          182          152          142          118          114           96           93 
##         1986         1987         1988         1989         1990         1991         1992         1993         1994         1995         1996         1997         1998 
##           84           75           63           67           82          102          109          175          188          232          272          337          420 
##         1999         2000         2001         2002         2003         2004         2005         2006         2007         2008         2009         2010         2011 
##          495          611          554          612          630          583          594          473          449          430          382          348          280 
##         2012         2013         2014         2015         2016         2017         <NA> 
##          307          275          227          235          216           83         1310

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII 

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("m_s1q15") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## m_s1q15. sBq21: What is 's current municipality?  Nasaang munisipyo po sa
##                   Don't know                      Malinao                       Manito                     Polangui                       Abucay 
##                          164                           23                            6                           12                           13 
##                    Mariveles                  San Nicolas                      Amulung                       Enrile                    Calabanga 
##                           85                            4                            8                           10                           13 
##                      Canaman              Jose Panganiban                         Labo                     Libmanan                      Magarao 
##                           11                           17                           12                            1                            6 
##                    Naga City                       Ocampo                      Pasacao                     Tinambac     General Emilio Aguinaldo 
##                           22                            2                            9                            5                           14 
##                 Cauayan City                        Jones                    Pagsanjan                         Pila                         Agno 
##                           16                           69                           25                            1                            1 
##                         Anda                         Bani                     Bautista                     Bugallon                     Calasiao 
##                            8                            7                            6                           12                            9 
##              San Carlos City                         Sual                   Urbiztondo                   Candelaria                     Sampaloc 
##                            1                            2                            2                            1                           15 
##                    Jala-Jala                      Pililla                    San Mateo                        Tanay                        Pilar 
##                           11                           13                            6                           12                           13 
##                Sorsogon City Other municipality - specify                         Lian                         <NA> 
##                            4                          486                            1                        14703 
## [1] "Frequency table after encoding"
## m_s1q15. sBq21: What is 's current municipality?  Nasaang munisipyo po sa
##   384   385   386   387   388   389   390   391   392   393   394   395   396   397   398   399   400   401   402   403   404   405   406   407   408   409   410   411 
##     9     6    23     6    10    69     2     1     1     4     5     9     6    12    25     8    16   486    22    13    13    17     7     2    85    15     1    12 
##   412   413   414   415   416   417   418   419   420   421   422   423   424   425   426  <NA> 
##     6    13    11   164     1     1    14     8     4    12     2     1    13    12    11 14703

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s1q27",
                  "m_s1q29",
                  "m_s1q34",
                  "m_s1q38",
                  "m_s1q42",
                  "m_s1q43",
                  "m_s1q44",
                  "m_s1q45",
                  "m_s1q48",
                  "m_s1q49",
                  "m_s1q46",
                  "m_s1q47",
                  "m_s1q50",
                  "m_s1q51",
                  "m_s1q52",
                  "m_s1q53",
                  "m_s1q54",
                  "m_s1q55",
                  "m_s1q56",
                  "m_s1q57",
                  "m_s1q64",
                  "m_s1q65",
                  "m_s1q65_two",
                  "m_s1q66",
                  "m_s1q67",
                  "m_s1q67_two")

capture_tables (indirect_PII)

# Recode those with very specific values. 

break_edu <- c(-999, -998, 1, 2, 5, 6, 7, 8,99)
labels_edu <- c("Refused to answer" =1, 
                "Don't know" =2, 
                "Married Living with Spouse" =3, 
                "Married Not Living with Spouse" =4, 
                "Divorced/Annulled or Separated" =5,
                "Widow/Widower" =6, 
                "Not married but committed" =7, 
                "Single" = 8,
                "Other" = 9)
mydata <- ordinal_recode (variable="m_s1q34", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## m_s1q34. sBq40: What is 's marital status?  Ano ang civil status ni ${calc_
##                     Don't know     Married Living with Spouse Married Not Living with Spouse              Divorced/Annulled                      Separated 
##                              1                           3694                            140                              2                            114 
##                  Widow/Widower      Not married but committed                         Single                          Other                           <NA> 
##                            300                            864                           9414                             22                           1310 
##       recoded
##        [-999,-998) [-998,1) [1,2) [2,5) [5,6) [6,7) [7,8) [8,99) [99,1e+06)
##   -998           0        1     0     0     0     0     0      0          0
##   1              0        0  3694     0     0     0     0      0          0
##   2              0        0     0   140     0     0     0      0          0
##   4              0        0     0     2     0     0     0      0          0
##   5              0        0     0     0   114     0     0      0          0
##   6              0        0     0     0     0   300     0      0          0
##   7              0        0     0     0     0     0   864      0          0
##   8              0        0     0     0     0     0     0   9414          0
##   99             0        0     0     0     0     0     0      0         22
## [1] "Frequency table after encoding"
## m_s1q34. sBq40: What is 's marital status?  Ano ang civil status ni ${calc_
##                     Don't know     Married Living with Spouse Married Not Living with Spouse Divorced/Annulled or Separated                  Widow/Widower 
##                              1                           3694                            142                            114                            300 
##      Not married but committed                         Single                          Other                           <NA> 
##                            864                           9414                             22                           1310 
## [1] "Inspect value labels and relabel as necessary"
##              Refused to answer                     Don't know     Married Living with Spouse Married Not Living with Spouse Divorced/Annulled or Separated 
##                              1                              2                              3                              4                              5 
##                  Widow/Widower      Not married but committed                         Single                          Other 
##                              6                              7                              8                              9
# !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q65_two"] 
mydata <- mydata[!names(mydata) %in% "m_s1q67_two"] 

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("m_s1q10",
               "m_s1q14_other",
               "m_s1q16",
               "m_s1q34_other",
               "m_s1q36_other",
               "m_s1q65_other",
               "m_s1q67_other")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$m_s1q10[1] <- "Shes work abroad in [small location]"
mydata$m_s1q10[142] <- "Transfer to his mother house in [small location]"
mydata$m_s1q10[341] <- "She is now in Australia for treatment of [specific disease]"
mydata$m_s1q10[346] <- "With his live in partner in [small location] Ilocos Sur"
mydata$m_s1q10[563] <- "Moved to house of mother [name]"
mydata$m_s1q10[565] <- "Moved to house of mother name [name]"
mydata$m_s1q10[670] <- "Moved out to [small location],treatment for [specific disease]"
mydata$m_s1q10[765] <- "Accompanied by [name]"
mydata$m_s1q10[1246] <- "Son of [name] and [name]"
mydata$m_s1q10[1249] <- "Son of [name] and [name]"
mydata$m_s1q10[1250] <- "Son of [name] and [name]"
mydata$m_s1q10[1405] <- "With his mother [name] in Manila"
mydata$m_s1q10[1478] <- "With her mother [name]"
mydata$m_s1q10[1550] <- "Since [name] were at the age of 1year old,her Aunt [name] took her and been cared until now."
mydata$m_s1q10[1638] <- "[name] workjnb"
mydata$m_s1q10[1730] <- "[name] roden at [name]"
mydata$m_s1q10[1742] <- "[name] was [name] and [name]s son"
mydata$m_s1q10[1747] <- "At the other province living with her husband [name]"
mydata$m_s1q10[1749] <- "At the other province living with his wife [name]"
mydata$m_s1q10[1933] <- "[name] is living with her mother ([name]) in [small location] Rizal."
mydata$m_s1q10[2074] <- "[name] is sick and now at [small location] more 1 month and  staying until recover from sickness."
mydata$m_s1q10[2667] <- "She was the daughter of [name] and [name]"
mydata$m_s1q10[2670] <- "Living in the other house having their own family, she is [name] wife"
mydata$m_s1q10[2671] <- "Living in another house having his own family, [name] was the husband of [name] They are not married"
mydata$m_s1q10[2722] <- "[name] was the son of [name] and [name], they were together"
mydata$m_s1q10[3083] <- "[name]s child"
mydata$m_s1q10[3124] <- "[name] was adopted by the other family since she was  5 months old, During the baseline interview she is still living with them."
mydata$m_s1q10[3394] <- "[language]"
mydata$m_s1q10[3427] <- "Moved for other household (the same household with [name])"
mydata$m_s1q10[3429] <- "Moved for other household (the same household with [name])"
mydata$m_s1q10[3664] <- "She and her son and daughter goes to her husband in [small location], her husband is working there."
mydata$m_s1q10[3672] <- "[name] was the daughter of [name] an [name]"
mydata$m_s1q10[3674] <- "Is the son of [name] and [name]"
mydata$m_s1q10[3789] <- "Respondent said that [name]s mother adapted [name] six months ago til now, [name] is expected to become a permanent member of the respondent's mother household."
mydata$m_s1q10[3847] <- "[name] was the garandchild of the respondent, since joy is 1 year old the resondent is the one who took care of her , then now shes at her mother since june, of year 2016"
mydata$m_s1q10[3870] <- "[name] is [name]s brother, Now he is living in the other household, based on the respondent he is not working because he is senior citizen already."
mydata$m_s1q10[4025] <- "Live with parents [name]"
mydata$m_s1q10[4027] <- "Live with his parents [name]"
mydata$m_s1q10[4136] <- "Work in [small location]"
mydata$m_s1q10[4138] <- "Live separately with his family. Husband of [name]"
mydata$m_s1q10[4141] <- "Transfered and live  in [small location]"
mydata$m_s1q10[4143] <- "Transfered and live in [small location]"
mydata$m_s1q10[4155] <- "[name] is living with a girlfriend (lesbian)"
mydata$m_s1q10[4258] <- "[name]"
mydata$m_s1q10[4283] <- "Charity is living with her husband and soon to be married"
mydata$m_s1q10[4288] <- "[name] is [name]s wife"
mydata$m_s1q10[4290] <- "[name] was [name]s wife"
mydata$m_s1q10[5112] <- "[name] is together with [name], They are husband and wife."
mydata$m_s1q10[5839] <- "Her father and [name]  go was migrated already"
mydata$m_s1q10[5892] <- "[name] is the wife of [name] they are together"
mydata$m_s1q10[5981] <- "[name] was with [name]"
mydata$m_s1q10[5987] <- "[language]"
mydata$m_s1q10[5988] <- "Daughter of [name]"
mydata$m_s1q10[5989] <- "Independent [language]"
mydata$m_s1q10[6149] <- "Live with his parents in [small location]"
mydata$m_s1q10[6317] <- "Got separated with [name]"
mydata$m_s1q10[6389] <- "[name] ran away"
mydata$m_s1q10[6390] <- "Vacation in [small location] since april"
mydata$m_s1q10[6396] <- "[name] is the wife of [name] and they lived in the same household"
mydata$m_s1q10[6397] <- "[name] was the wife of [name] and they lived in the same house"
mydata$m_s1q10[6399] <- "[name] is the son of [name] and [name] and they  are together"
mydata$m_s1q10[6607] <- "Moved with her husband [name]"
mydata$m_s1q10[6612] <- "Daughter of [name] and [name]"
mydata$m_s1q10[6660] <- "[name] took care of [name] while her mother was in abroad, and now [name] is in his mother costudy."
mydata$m_s1q10[6694] <- "Former livein partner of [name]"
mydata$m_s1q10[6708] <- "Live sepately with [name]"
mydata$m_s1q10[6711] <- "Back to their own house in Surigao del Sur with his father ([name])"
mydata$m_s1q10[6716] <- "Back to their own house in Surigao with his father  ([name])"
mydata$m_s1q10[6768] <- "Separated with [name]. Has his own family"
mydata$m_s1q10[6770] <- "Wife of [name]"
mydata$m_s1q10[6954] <- "Transfer to [name]"
mydata$m_s1q10[7032] <- "Want to be with her mother, [name], and to look for a job."
mydata$m_s1q10[7039] <- "Also, to be with her mother, [name]"
mydata$m_s1q10[7040] <- "To be with his grandmother/guardian, [name], because his mother passed away aft3r giving birth to him. His father is [name]"
mydata$m_s1q10[7156] <- "[language]"
mydata$m_s1q10[7276] <- "[name] took [name] with her."
mydata$m_s1q10[7277] <- "[name] took [name] with her."
mydata$m_s1q10[7287] <- "Live together with his dad [name]"
mydata$m_s1q10[7499] <- "[language]"
mydata$m_s1q10[7504] <- "Son of [name] moved household"
mydata$m_s1q10[8069] <- "She broke up with [name]"
mydata$m_s1q10[8153] <- "[language]"
mydata$m_s1q10[8889] <- "Her husband live in brgy. [small location]"
mydata$m_s1q10[8900] <- "Husband of [name]  they live with [name] parents"
mydata$m_s1q10[9214] <- "Visit her children to her 1st husband in [small location]"
mydata$m_s1q10[9432] <- "[name] is child of [name] and [name] and he move to other location because the work of his father."
mydata$m_s1q10[9433] <- "[name] is child of [name] and [name] and he move to other location because the work of his father."
mydata$m_s1q10[9795] <- "She Stayed with her father in [small location]"
mydata$m_s1q10[9796] <- "He worked in [small location]"
mydata$m_s1q10[9967] <- "Caretaker"
mydata$m_s1q10[9981] <- "Move out to [small location]"
mydata$m_s1q10[10940] <- "Hd went home to his hometown in [small location], negros oriental"
mydata$m_s1q10[11026] <- "[name] bring his younger brother([name]) in Manila to See doctor for his follow up medications."
mydata$m_s1q10[11138] <- "[language] sa other relative"
mydata$m_s1q10[12050] <- "Daugther of [name] shes living with her mother in [small location]"
mydata$m_s1q10[12192] <- "[name] stays with his parents"
mydata$m_s1q10[12228] <- "[name] live with her mother again"
mydata$m_s1q10[12439] <- "[language]"
mydata$m_s1q10[12643] <- "[language]"
mydata$m_s1q10[12790] <- "[name](granchild)hes staying already to his own mother ."
mydata$m_s1q10[12795] <- "Hes staying already to his mother..([name] is granchild of the respo"
mydata$m_s1q10[12836] <- "[name] transfer to another household"
mydata$m_s1q10[12838] <- "[name] transfer to another household"
mydata$m_s1q10[12839] <- "He returns back to his mother([name])"
mydata$m_s1q10[12906] <- "[name] was fetch by her auntie and brought to laguna."
mydata$m_s1q10[12941] <- "They have there on house in the Barangay [small location]"
mydata$m_s1q10[13133] <- "Fallow her husband [name]"
mydata$m_s1q10[13230] <- "[small location]"
mydata$m_s1q10[13280] <- "Son of [name] and [name]"
mydata$m_s1q10[13284] <- "Daughter of [name] and [name]"
mydata$m_s1q10[13857] <- "[language]"
mydata$m_s1q10[13974] <- "[language]"
mydata$m_s1q10[15542] <- "[name] & [name] baby"
mydata$m_s1q10[15544] <- "She's with [name]"
mydata$m_s1q10[15546] <- "With her husband [name]"

mydata$m_s1q14_other[1524] <- "[small location]"
mydata$m_s1q14_other[2357] <- "[small location]"
mydata$m_s1q14_other[7892] <- "[small location]"
mydata$m_s1q14_other[12219] <- "[small location]"

mydata <- mydata[!names(mydata) %in% "m_s1q16"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q65_other"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.
mydata <- mydata[!names(mydata) %in% "m_s1q67_other"] # !!! Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)