rm(list=ls(all=t))

Setup filenames

filename <- "Section_2" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables

# !!!Include any Direct PII variables
dropvars <- c("name") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!!No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!No small locations

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

# Recode education attainment of adults to reduce risk of re-identification 

break_edu <- c(0:16, 17,20,26)
labels_edu <- c("Pre-Kinder"=1,
                "Kinder"=2,
                "1st Grade"=4,
                "2nd Grade"=5,
                "3rd Grade"=6,
                "4th Grade"=7,
                "5th Grade"=8,
                "6th Grade"=10,
                "7th Grade"=11,
                "8th Grade"=12,
                "9th Grade"=13,
                "10th Grade"=14,
                "11th Grade"=15,
                "12th Grade"=16,
                "High School Graduate"=17,
                "Vocational training or associates degree"=18,
                "Some college or higher"=19)

mydata <- ordinal_recode (variable="s2q7", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## s2q7. What is 's highest educational level completed?  Ano ang pinaka-mataas na antas 
##                                        Pre-Kinder                                            Kinder 
##                                                 3                                                 4 
##                                         1st Grade                                         2nd Grade 
##                                                11                                                12 
##                                         3rd Grade                                         4th Grade 
##                                                18                                                17 
##                                         5th Grade                                         6th Grade 
##                                                22                                               103 
##                                         7th Grade                                         8th Grade 
##                                                30                                                44 
##                                         9th Grade                                        10th Grade 
##                                                31                                                10 
##                                        11th Grade                              High School Graduate 
##                                                 2                                               185 
## 1st Year Vocational training or associates degree 2nd Year Vocational training or associates degree 
##                                                 8                                                 8 
## Vocational training or associates degree graduate                               1st year of college 
##                                                15                                                20 
##                               2nd year of college                               3rd year of college 
##                                                13                                                 4 
##                     4th year of college or higher                                  College graduate 
##                                                 2                                                22 
##                          Education beyond college                                              <NA> 
##                                                 2                                                87 
##     recoded
##      [0,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,10) [10,11) [11,12) [12,13) [13,14) [14,15) [15,16)
##   0      3     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   1      0     4     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   3      0     0     0    11     0     0     0     0     0      0       0       0       0       0       0       0
##   4      0     0     0     0    12     0     0     0     0      0       0       0       0       0       0       0
##   5      0     0     0     0     0    18     0     0     0      0       0       0       0       0       0       0
##   6      0     0     0     0     0     0    17     0     0      0       0       0       0       0       0       0
##   7      0     0     0     0     0     0     0    22     0      0       0       0       0       0       0       0
##   9      0     0     0     0     0     0     0     0     0    103       0       0       0       0       0       0
##   10     0     0     0     0     0     0     0     0     0      0      30       0       0       0       0       0
##   11     0     0     0     0     0     0     0     0     0      0       0      44       0       0       0       0
##   12     0     0     0     0     0     0     0     0     0      0       0       0      31       0       0       0
##   13     0     0     0     0     0     0     0     0     0      0       0       0       0      10       0       0
##   14     0     0     0     0     0     0     0     0     0      0       0       0       0       0       2       0
##   16     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   17     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   18     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   19     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   20     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   21     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   22     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   23     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   24     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##   25     0     0     0     0     0     0     0     0     0      0       0       0       0       0       0       0
##     recoded
##      [16,17) [17,20) [20,26) [26,1e+06)
##   0        0       0       0          0
##   1        0       0       0          0
##   3        0       0       0          0
##   4        0       0       0          0
##   5        0       0       0          0
##   6        0       0       0          0
##   7        0       0       0          0
##   9        0       0       0          0
##   10       0       0       0          0
##   11       0       0       0          0
##   12       0       0       0          0
##   13       0       0       0          0
##   14       0       0       0          0
##   16     185       0       0          0
##   17       0       8       0          0
##   18       0       8       0          0
##   19       0      15       0          0
##   20       0       0      20          0
##   21       0       0      13          0
##   22       0       0       4          0
##   23       0       0       2          0
##   24       0       0      22          0
##   25       0       0       2          0
## [1] "Frequency table after encoding"
## s2q7. What is 's highest educational level completed?  Ano ang pinaka-mataas na antas 
##                               Pre-Kinder                                   Kinder 
##                                        3                                        4 
##                                1st Grade                                2nd Grade 
##                                       11                                       12 
##                                3rd Grade                                4th Grade 
##                                       18                                       17 
##                                5th Grade                                6th Grade 
##                                       22                                      103 
##                                7th Grade                                8th Grade 
##                                       30                                       44 
##                                9th Grade                               10th Grade 
##                                       31                                       10 
##                               11th Grade                     High School Graduate 
##                                        2                                      185 
## Vocational training or associates degree                   Some college or higher 
##                                       31                                       63 
##                                     <NA> 
##                                       87 
## [1] "Inspect value labels and relabel as necessary"
##                               Pre-Kinder                                   Kinder 
##                                        1                                        2 
##                                1st Grade                                2nd Grade 
##                                        4                                        5 
##                                3rd Grade                                4th Grade 
##                                        6                                        7 
##                                5th Grade                                6th Grade 
##                                        8                                       10 
##                                7th Grade                                8th Grade 
##                                       11                                       12 
##                                9th Grade                               10th Grade 
##                                       13                                       14 
##                               11th Grade                               12th Grade 
##                                       15                                       16 
##                     High School Graduate Vocational training or associates degree 
##                                       17                                       18 
##                   Some college or higher 
##                                       19
mydata <- top_recode ("s2q3", break_point=70, missing=c(888, 999999))
## [1] "Frequency table before encoding"
## s2q3. What is 's age in years?  Ano ang edad ni  sa taon?
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22   23 
##   11    7   12    5    7    8    5    3    6    3    4    5    6    9   15   11   34   50   38   43   39   32   29 
##   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45   46 
##   26   21   24   19   11    5   12    6   10    5    8   10    6   11   11    7    6    5    3    4    3    6    3 
##   47   48   49   50   52   54   55   56   58   59   60   64   65   67   69   70   73   74   76   78   83   85   86 
##    4    2    5    4    5    1    2    3    2    3    4    3    2    1    1    1    1    1    3    3    1    1    1 
##   87   88 <NA> 
##    1    1   13

## [1] "Frequency table after encoding"
## s2q3. What is 's age in years?  Ano ang edad ni  sa taon?
##          1          2          3          4          5          6          7          8          9         10 
##         11          7         12          5          7          8          5          3          6          3 
##         11         12         13         14         15         16         17         18         19         20 
##          4          5          6          9         15         11         34         50         38         43 
##         21         22         23         24         25         26         27         28         29         30 
##         39         32         29         26         21         24         19         11          5         12 
##         31         32         33         34         35         36         37         38         39         40 
##          6         10          5          8         10          6         11         11          7          6 
##         41         42         43         44         45         46         47         48         49         50 
##          5          3          4          3          6          3          4          2          5          4 
##         52         54         55         56         58         59         60         64         65         67 
##          5          1          2          3          2          3          4          3          2          1 
##         69 70 or more       <NA> 
##          1         14         13

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("s2q4",
                  "s2q5",
                  "s2q7",
                  "s2q8",
                  "s2q9",
                  "s2q10",
                  "s2q11")

capture_tables (indirect_PII)


# Recode those with very specific values. 

break_mstatus <- c(-999,-888,1,2,7,8)
labels_mstatus <- c("No Response"=1,
                "Other" = 2,
                "Married Living with Spouse" = 3,
                "Other" = 4,
                "Not married but comitted" = 5,
                "Single"= 6)
mydata <- ordinal_recode (variable="s2q5", break_points=break_mstatus, missing=999999, value_labels=labels_mstatus)

## [1] "Frequency table before encoding"
## s2q5. What is 's marital status?  Ano ang estado ng tungkol sa kasal ni ?
##     Married Living with Spouse Married Not Living with Spouse                       Divorced 
##                            133                             32                              1 
##                      Seperated                          Widow      Not married but committed 
##                             27                             20                            125 
##                         Single                           <NA> 
##                            330                              5 
##    recoded
##     [-999,-888) [-888,1) [1,2) [2,7) [7,8) [8,1e+06)
##   1           0        0   133     0     0         0
##   2           0        0     0    32     0         0
##   4           0        0     0     1     0         0
##   5           0        0     0    27     0         0
##   6           0        0     0    20     0         0
##   7           0        0     0     0   125         0
##   8           0        0     0     0     0       330
## [1] "Frequency table after encoding"
## s2q5. What is 's marital status?  Ano ang estado ng tungkol sa kasal ni ?
##                      Other Married Living with Spouse   Not married but comitted                     Single 
##                         80                        133                        125                        330 
##                       <NA> 
##                          5 
## [1] "Inspect value labels and relabel as necessary"
##                No Response                      Other Married Living with Spouse                      Other 
##                          1                          2                          3                          4 
##   Not married but comitted                     Single 
##                          5                          6
# Recode over-specifc activities with few members for adults

haven_table("s2q8")
## s2q8. What was 's principal usual activity when they were last living in this househol
##                                                                                  Sugarcane Farming 
##                                                                                                  7 
##                                                                                     Banana Farming 
##                                                                                                  2 
##                                                                                    Coconut Farming 
##                                                                                                  6 
##                                                                                      Other Farming 
##                                                                                                 72 
##                                                                                    Poultry Farmers 
##                                                                                                  8 
##                                                                Inland And Coastal Waters Fishermen 
##                                                                                                  5 
##                                                                                 Deep-Sea Fishermen 
##                                                                                                 11 
##                                                     Mining And Quarrying Including Gold Extraction 
##                                                                                                  3 
##                                                                         Manufacturing Pyrotechnics 
##                                                                                                  1 
##                                                                                       Construction 
##                                                                                                 32 
##                                                                                      Domestic Work 
##                                                                                                 43 
##                                                       Street Work Including Scavenging And Begging 
##                                                                                                  1 
##                                                                            Scavenging In Dumpsites 
##                                                                                                  1 
##                                                                                           Plumbers 
##                                                                                                  1 
##                                                                       Vulcanizing (rubber workers) 
##                                                                                                  1 
##                                                 Heavy Equipment Operator (ie., bulldozer operator) 
##                                                                                                  1 
##                                                                                              Guard 
##                                                                                                  4 
##                                                                      Hairdresser/Barber/Beautician 
##                                                                                                  2 
##                                                                            Consumer store operator 
##                                                                                                  6 
##                                                               Cashiers, Tellers And Related Clerks 
##                                                                                                  6 
##                                                                Charcoal Makers And Related Workers 
##                                                                                                  1 
##                                                           Cleaners, Launderers And Related Workers 
##                                                                                                  4 
##                                                         Food Processing and Related Trades Workers 
##                                                                                                  3 
##                        Handicraft Workers In Wood, Textile, Leather, Chemicals And Related Workers 
##                                                                                                  7 
##                                                 Hotel Housekeepers And Restaurant Services Workers 
##                                                                                                  5 
##                                            Machinery Mechanics, Fitters And Related Trades Workers 
##                                                                                                  4 
##                                           Market Stall Vendors, Street Vendors And Related Workers 
##                                                                                                 23 
##                                               Messengers, Porters, Doorkeepers And Related Workers 
##                                                                                                  3 
## Metal Molders, Welders, Sheet-Metal Workers, Structural-Metal Preparers And Related Trades Workers 
##                                                                                                  2 
##                                                                              Motor Vehicle Drivers 
##                                                                                                  7 
##                                                                Painters And Related Trades Workers 
##                                                                                                  1 
##                                                        Textile, Garment And Related Trades Workers 
##                                                                                                  2 
##                                           Wood Treaters, Cabinet Makers And Related Trades Workers 
##                                                                                                  2 
##                                                                                       Rice Farming 
##                                                                                                  4 
##                                                                                            Student 
##                                                                                                 68 
##                  Principally performs chores and other unpaid household services for own household 
##                                                                                                108 
##                                                                                               <NA> 
##                                                                                                216
range <- 1:length(mydata$s2q3)

for (i in range){
  if(is.na(mydata$s2q3[i])==FALSE){
  if(mydata$s2q3[i]>17){
    if(is.na(mydata$s2q8[i])==FALSE){
    if(mydata$s2q8[i]==1|mydata$s2q8[i]==2|mydata$s2q8[i]==3|mydata$s2q8[i]==4|mydata$s2q8[i]==5|mydata$s2q8[i]==6){
      mydata$s2q8[i]<- 7
    }
    if(mydata$s2q8[i]!=7 & mydata$s2q8[i]!=15 & mydata$s2q8[i]!=16 & mydata$s2q8[i]!=91){
      mydata$s2q8[i] <- -888
    }
    }
  }
  }
}

haven_table("s2q8")
## s2q8. What was 's principal usual activity when they were last living in this househol
##                                                                   Other:  Specify 
##                                                                               129 
##                                                                 Sugarcane Farming 
##                                                                                 1 
##                                                                   Coconut Farming 
##                                                                                 2 
##                                                                     Other Farming 
##                                                                                84 
##                                                                   Poultry Farmers 
##                                                                                 2 
##                                                                      Construction 
##                                                                                32 
##                                                                     Domestic Work 
##                                                                                43 
##                                                           Consumer store operator 
##                                                                                 2 
##       Handicraft Workers In Wood, Textile, Leather, Chemicals And Related Workers 
##                                                                                 2 
##                          Market Stall Vendors, Street Vendors And Related Workers 
##                                                                                 2 
##                                                             Motor Vehicle Drivers 
##                                                                                 1 
##                                       Textile, Garment And Related Trades Workers 
##                                                                                 1 
##                                                                           Student 
##                                                                                48 
## Principally performs chores and other unpaid household services for own household 
##                                                                               108 
##                                                                              <NA> 
##                                                                               216

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('s2q3', 's2q4', 's2q7') ##!!! Replace with candidate categorical demo vars


# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 673 rows and 35 variables.
##   --> Categorical key variables: s2q3, s2q4, s2q7
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size           Size of smallest (>0)      
##          s2q3                   63 (63)    10.645  (10.645)                     1   (1)
##          s2q4                    2  (2)   336.500 (336.500)                   329 (329)
##          s2q7                   17 (17)    36.625  (36.625)                     2   (2)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 0 (0.000%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("s2q2whynoresponse",
               "s2q3whynoresponse",
               "s2q4whynoresponse",
               "s2q5whynoresponse",
               "s2q6whynoresponse",
               "s2q7_other",
               "s2q7whynoresponse",
               "s2q8_other",
               "s2q8whynoresponse",
               "s2q9whynoresponse",
               "s2q10whynoresponse",
               "s2q11_other",
               "s2q11whynoresponse",
               "s2q12whynoresponse",
               "s2q13whynoresponse",
               "s2q14whynoresponse",
               "s2q15whynoresponse",
               "s2q16whynoresponse")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$s2q8_other[29] <- "[name] stays at [city] for almost 2yrs from now. [name]is having a vacation twice a month in their house here in [site]."
mydata$s2q8_other[44] <- "[work]"
mydata$s2q8_other[45] <- "farmer"
mydata$s2q8_other[48] <- "driver"
mydata$s2q8_other[108] <- "She is studying in [college]"
mydata$s2q8_other[147] <- "farming"
mydata$s2q8_other[156] <- "Professor"
mydata$s2q8_other[162] <- "cutting"
mydata$s2q8_other[163] <- "Sister of [name]"
mydata$s2q8_other[189] <- "driver"
mydata$s2q8_other[205] <- "[work]"
mydata$s2q8_other[207] <- "Cashier"
mydata$s2q8_other[227] <- "Factory worker "
mydata$s2q8_other[239] <- "Factory worker "
mydata$s2q8_other[249] <- "Installation "
mydata$s2q8_other[313] <- "conductor"
mydata$s2q8_other[324] <- "[work]"
mydata$s2q8_other[339] <- "[work]"
mydata$s2q8_other[366] <- "dealer"
mydata$s2q8_other[461] <- "[object] making"
mydata$s2q8_other[477] <- "driver"
mydata$s2q8_other[496] <- "vendor"
mydata$s2q8_other[545] <- "Farming"
mydata$s2q8_other[548] <- "[work]"
mydata$s2q8_other[582] <- "[work]"
mydata$s2q8_other[650] <- "[work]"

mydata$s2q9whynoresponse[304] <- "[situation]"

mydata$s2q11_other[86] <- "Factory worker in [city]"
mydata$s2q11_other[97] <- "[Tagalo]"
mydata$s2q11_other[98] <- "[work]"
mydata$s2q11_other[108] <- "She is studying in [college]"
mydata$s2q11_other[118] <- "[Tagalo]"
mydata$s2q11_other[162] <- "[work]"
mydata$s2q11_other[249] <- "Installation"
mydata$s2q11_other[330] <- "[work]"
mydata$s2q11_other[412] <- "[vehicle]helper(logistic)"
mydata$s2q11_other[419] <- "[situation]"
mydata$s2q11_other[422] <- "[vehicle] helper"
mydata$s2q11_other[464] <- "[level] Teacher"
mydata$s2q11_other[537] <- "[work]"
mydata$s2q11_other[545] <- "[work]"
mydata$s2q11_other[555] <- "[Tagalo]"
mydata$s2q11_other[582] <- "Barangay [worker]"
mydata$s2q11_other[627] <- "[work]"
mydata$s2q11_other[646] <- "[vehicle] driver"
mydata$s2q11_other[655] <- "[vehicle] conductor"

mydata$s2q11whynoresponse[122] <- "At [province]"
mydata$s2q11whynoresponse[256] <- "[name] is not aware of his parent in law's whereabouts."
mydata$s2q11whynoresponse[304] <- "Not working, currently [situation]"
mydata$s2q11whynoresponse[562] <- "[name] is currently [situation]"

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)