rm(list=ls(all=t))

Setup filenames

filename <- "Section_0" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("m_s0q4",
             "m_s0q11",
             "m_s0q12") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## m_s0q4. sAq8: SELECT MUNICIPALITY PROVIDED.
##                  Malinao                   Manito                 Polangui                   Abucay                Mariveles              San Nicolas                  Amulung 
##                      167                       42                       56                      124                      224                       28                       69 
##                   Enrile                Calabanga                Camaligan                  Canaman          Jose Panganiban                     Labo                 Libmanan 
##                       70                       70                       14                       42                       70                       56                       14 
##                  Magarao                Naga City                   Ocampo                  Pasacao                 Tinambac General Emilio Aguinaldo             Cauayan City 
##                       42                       27                       14                       41                       14                       42                       56 
##                    Jones                Pagsanjan                     Pila                     Agno                     Anda                     Bani                 Bautista 
##                      250                      125                       14                       56                       42                       28                       14 
##                 Bugallon                 Calasiao          San Carlos City                     Sual               Urbiztondo               Candelaria                 Sampaloc 
##                       56                       14                       13                       14                       14                       28                       56 
##                Jala-Jala                  Pililla                San Mateo                    Tanay                    Pilar            Sorsogon City 
##                       41                       42                       42                       56                       70                       28 
## [1] "Frequency table after encoding"
## m_s0q4. sAq8: SELECT MUNICIPALITY PROVIDED.
## 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 
##  56  56  14  41  28 167  42 125 250  14  42  42  70  14  56 124  42  42  28  56  28  14  14  42  70  27  56  28  69  13 224  70  56  14  14  56  41  70  14  42  14 
## [1] "Frequency table before encoding"
## m_s0q11. sAq19: SELECT ACTUAL MUNICIPALITY
##                     Polangui                       Abucay                    Mariveles                       Enrile                    Calabanga                         Labo 
##                            1                            2                           11                            1                            2                            1 
##                    Naga City                        Jones                         Anda                     Bugallon                        Tanay Other municipality - specify 
##                            1                            2                            1                            1                            1                            9 
##                         <NA> 
##                         2252 
## [1] "Frequency table after encoding"
## m_s0q11. sAq19: SELECT ACTUAL MUNICIPALITY
##  572  573  574  575  576  577  578  579  580  581  582  583 <NA> 
##    1    1   11    2    1    1    2    1    2    9    1    1 2252 
## [1] "Frequency table before encoding"
## m_s0q12. sAq20: WRITE-IN OTHER MUNICIPALITY
##                          Aroroy         Balanga Lagonoy cam.sur           Libon     Quezon City        Santiago      Sta. Maria          Taytay      Valenzuela 
##            2276               1               1               1               1               1               1               1               1               1 
## [1] "Frequency table after encoding"
## m_s0q12. sAq20: WRITE-IN OTHER MUNICIPALITY
##  876  877  878  879  880  881  882  883  884  885 
##    1    1 2276    1    1    1    1    1    1    1

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!!No Indirect PII - Ordinal

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s0q21")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No specific values

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("m_s0q18",
              "m_endnote0")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 


# !!!Redacted, as it contains sensitive information and some information is in Tagalog.
mydata$m_s0q18[1] <- "Shes working abroad in [city]"
mydata$m_s0q18[237] <- "[situataion]since November last year"
mydata$m_s0q18[269] <- "Went to [city] to take care of her sick daughter"
mydata$m_s0q18[295] <- "Went to a Health Center in [city] with her sick child"
mydata$m_s0q18[944] <- "At [municipality] and no definite time of arrival"
mydata$m_s0q18[988] <- "He rad [name]' work related"
mydata$m_s0q18[1062] <- "[place], christening"
mydata$m_s0q18[1091] <- "Worke abroad -[date]"
mydata$m_s0q18[1138] <- "[Tagalog]"
mydata$m_s0q18[1278] <- "Abroad [country]"
mydata$m_s0q18[1433] <- "[name] didnt know where is [name]"
mydata$m_s0q18[1435] <- "[name[ told that he is busy and lots of work to be done for thw whole week. He designate his wife to be the respondent since she is knowledgeable for answering the questions."
mydata$m_s0q18[1466] <- "Baseline respondent is in the hospital , in no definite date of back home, husband of [name] is in mountain with no definite time of arrival, the only adult is 20 yrs old child."
mydata$m_s0q18[1489] <- "She brought her husband to a hospital due to [illness]"

mydata$m_endnote0[11] <- "[name] not [name]"
mydata$m_endnote0[36] <- "Baseline Respondent last name is [last name] since she's already married. She said that she mistakenly gave her maiden name to the baseline surveyor."
mydata$m_endnote0[39] <- "Interview conducted in a waiting shed near [Highschool], because respondent is attending a meeting in the highschool. Respondent's last name is [last name] not [last name]."
mydata$m_endnote0[63] <- "Baseline respondent, [name], is busy working in [city], and no definite time of coming home. She left last June 2. [name]'s children were left to their grandmother's care."
mydata$m_endnote0[126] <- "Respondent's house is too far from the hiway and has a muddy road going there, so we had to rent a serviced tricycle from the [market] to his house and vise versa."
mydata$m_endnote0[128] <- "Baseline respondent moved out with his son [name] and gone with another woman."
mydata$m_endnote0[145] <- "She understand and speak Tagalog because she is from [city]"
mydata$m_endnote0[148] <- "Respondent is currently at her another house in brgy. [address]. I need to be accompanied by the tricycle driver to passed a steep woods and a toe-watered-river."
mydata$m_endnote0[149] <- "I also had to crossed a river to reached the respondent's house. According to the respondent, during rainy season, when the river overflows, they have to took a longer route in brgy. [address] in order for them to reach the highway."
mydata$m_endnote0[151] <- "Contact number is owned by [name], aunt and neighbor of the respondent because they don't have any."
mydata$m_endnote0[156] <- "Respondent is currently at her shop at [site], in her Aunt [name]'s frontyard. So, the interview needs to be conducted here at her [shop]."
mydata$m_endnote0[239] <- "Baseline respondent everydays goes to work from [time] because its raining season which means a lot of work in the farm. Contact numbers were owned by [name], their daughter who adopted by Aunt/neighbor."
mydata$m_endnote0[270] <- "[name] was currently at work and the wife is not sure of what time he'll be back. So I decided to interview his wife."
mydata$m_endnote0[350] <- "Her name is [name]NOT [name]."
mydata$m_endnote0[486] <- "[name]"
mydata$m_endnote0[580] <- "The baseline respondent name is [name]."
mydata$m_endnote0[681] <- "[name]"
mydata$m_endnote0[722] <- "Brgy. [name of barangay] not [name]"
mydata$m_endnote0[822] <- "[name]"
mydata$m_endnote0[829] <- "The respondent surname is [surname] (n is enye) not [surname]"
mydata$m_endnote0[830] <- "The correct spelling of the respondent name is [name]"
mydata$m_endnote0[844] <- "The correct spelling of the respondent is [name]"
mydata$m_endnote0[850] <- "[address]"
mydata$m_endnote0[866] <- "[name]"
mydata$m_endnote0[896] <- "[name] is hr right name"
mydata$m_endnote0[917] <- "[name]"
mydata$m_endnote0[922] <- "[Tagalog]"
mydata$m_endnote0[949] <- "Correct spelling ([name])"
mydata$m_endnote0[959] <- "[site]"
mydata$m_endnote0[975] <- "She is [native group], its hard for us to communicate because she don't know mucb of tagalog"
mydata$m_endnote0[987] <- "[name] is dead due to [illness]"
mydata$m_endnote0[1020] <- "[name] true spelling"
mydata$m_endnote0[1026] <- "[address]"
mydata$m_endnote0[1027] <- "With her [person]"
mydata$m_endnote0[1030] <- "[Tagalog]"
mydata$m_endnote0[1037] <- "The correct spelling of the respondent's name is [name]"
mydata$m_endnote0[1067] <- "[name] moved to brgy. [address] because her youngest daughter can not longer leave  here becausr of the hot surroundings and no elctricity. Also the school of their children were way far from here."
mydata$m_endnote0[1076] <- "[name] the baseline respondent is at work and the husband is available and knowledgeable the economic activity ofthe family"
mydata$m_endnote0[1119] <- "House is owned by [name] who is known in the sitio and not the respondents name."
mydata$m_endnote0[1147] <- "[name] moved out to another place"
mydata$m_endnote0[1175] <- "Basesline respondent not available because of work in [site]"
mydata$m_endnote0[1231] <- "[name] the wife of [name] says that she is the person who participated on the baseline survey last year not her husband."
mydata$m_endnote0[1243] <- "[name]"
mydata$m_endnote0[1275] <- "[name] is in manila and only the husband is available"
mydata$m_endnote0[1285] <- "[Tagalog]"
mydata$m_endnote0[1306] <- "[name] died on [date]"
mydata$m_endnote0[1332] <- "[Tagalog]"
mydata$m_endnote0[1379] <- "Interview conducted at Councilor [name]'s front yard, because respondent is working there."
mydata$m_endnote0[1386] <- "Interview conducted with my observer [names]"
mydata$m_endnote0[1404] <- "His youngest daughter just died [date]"
mydata$m_endnote0[1406] <- "Respondent is available with observe of [names]"
mydata$m_endnote0[1441] <- "Baseline respondent is available she moved out to [municipality]"
mydata$m_endnote0[1452] <- "[name] is the respondents name  be specific because there is 2 other [name]"
mydata$m_endnote0[1459] <- "The house is located near the respondent [name]'s house, before the [name] farm"
mydata$m_endnote0[1472] <- "Interview conducted outside their[ddress] because her house is far."
mydata$m_endnote0[1481] <- "Her surname is [surname], [address] is her sitio address."
mydata$m_endnote0[1486] <- "Interview conducted at [address] in his tricycle because he's driving it untik 6pm, but we're still alone and far from other people."
mydata$m_endnote0[1490] <- "Respondent's name is [name], not [name]."
mydata$m_endnote0[1518] <- "Respondent is moved to another barangay([barangay name]) because they experience fire"
mydata$m_endnote0[1549] <- "Their previous address is in Brgy. [barangay name]. Now they migrated there whole family in [address]"
mydata$m_endnote0[1565] <- "Their previous address is in [barangay].Now they migrated in Brgy. [address]."
mydata$m_endnote0[1608] <- "Correction of spelling of surname. It's [surname] not [surname]"
mydata$m_endnote0[1633] <- "The contact no.is care of [name]"
mydata$m_endnote0[1660] <- "The main Respondent([name])is not available always at work late to go home.I interview is his wife([name]).."
mydata$m_endnote0[1699] <- "Together with the S.F.O(observed by [name])"
mydata$m_endnote0[1786] <- "Correction of the surname. Its [surname] not [surname]."
mydata$m_endnote0[1847] <- "[name] is not available..shes in manila thats why her husband is my respondent"
mydata$m_endnote0[1894] <- "The contact no.is care of [name]"
mydata$m_endnote0[1923] <- "Daughter of [name]"
mydata$m_endnote0[1987] <- "Her name is [name] "
mydata$m_endnote0[2036] <- "His name is [name]"
mydata$m_endnote0[2085] <- "[name] is always going home from work very late(around [hours])in the eve"
mydata$m_endnote0[2161] <- "The contanct # is care off [name] grand child of respondent."
mydata$m_endnote0[2196] <- "I interviewed the respondent in her new house in barangay [barangay]. They have transferred there because their house in [site] was down due to typhoon but she reconstructing it."

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)