rm(list=ls(all=t))

Setup filenames

filename <- "DOLE" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII 

Direct PII-team: Encode field team names

# !!!No Direct PII - team

Small locations: Encode locations with pop <100,000 using random large numbers

# !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("Municipality") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## Municipality. Municipality
##                   Abucay                     Agno                  Amulung                     Anda 
##                       70                       56                       56                       28 
##                 Bugallon                Calabanga                 Calasiao                  Canaman 
##                       28                       28                       14                       14 
##               Candelaria             Cauayan City                   Enrile General Emilio Aguinaldo 
##                       14                       28                       28                       14 
##                Jala-Jala                    Jones          Jose Panganiban                     Labo 
##                       28                       98                       42                       14 
##                  Magarao                  Malinao                   Manito                Mariveles 
##                       28                       98                       14                       98 
##                Naga City                Pagsanjan                  Pasacao                     Pila 
##                       14                       70                       28                       14 
##                    Pilar                  Pililla                 Polangui                 Sampaloc 
##                       56                       28                       28                       14 
##          San Carlos City                San Mateo              San Nicolas            Sorsogon City 
##                       14                       14                       14                       14 
##                    Tanay                 Tinambac 
##                       28                       14 
## [1] "Frequency table after encoding"
## Municipality. Municipality
## 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 
##  56  28  98  28  14  14  28  14  14  42  28  98  28  56  28  70  14  70  14  14  98  28  28  28  14  56  14 
## 487 488 489 490 491 492 493 
##  14  14  28  14  14  14  28

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Top code high income to the 99.5 percentile

mydata$Assetvalue <- as.numeric(mydata$Assetvalue)
## Warning: NAs introduced by coercion
percentile_99.5 <- floor(quantile(na.exclude(mydata$Assetvalue)[na.exclude(mydata$Assetvalue)!=-97], probs = c(0.995)))
mydata <- top_recode (variable="Assetvalue", break_point=percentile_99.5, missing=-97)
## [1] "Frequency table before encoding"
## Assetvalue. 
##     1181   9167.5     9295     9378     9494   9498.5   9514.3     9535     9624     9700     9728     9773 
##        1        1        1        1        3        1        1        1        2        1        1        1 
##   9786.5     9795     9810  9811.75     9815     9820   9826.5  9838.25     9846   9857.5     9859     9867 
##       10        1        1       13        1        3        2        1        1        1        2        1 
##     9875   9883.3     9885     9887     9890   9895.5     9896     9900     9902     9910     9917   9919.5 
##        2        2        1        1        4        3        2        2        1        2        1        1 
##     9920   9920.8  9921.25     9925  9926.32     9930     9934     9936     9937   9938.5     9943     9944 
##        1        1        2        1        1        3        3       28        1        2        1        1 
##     9945   9945.2   9945.4     9949     9950     9960     9964   9967.5   9972.5     9974   9974.5  9977.65 
##        1        2        1        1       97        2        1        3        1        1        3        2 
##     9978     9986     9990  9991.25   9993.7     9994     9995  9999.65  9999.75    10000 10000.65    10001 
##        1        2        2        1        1        2        2        8        2      625        1        1 
##    10003    10004    10005  10007.5    10009    10010    10011    10012 10012.25  10019.5    10020    10021 
##        1        3        1        1        3        1        1       26        1        2        1        1 
##    10022  10023.9    10024    10029 10029.86    10032    10034    10035    10045    10050    10057  10057.5 
##        2        1        4        1        1        1        1        1        1        2        2        1 
##    10059    10060 10067.28    10071  10072.5    10075    10082    10085    10090  10091.1 10094.75  10095.3 
##        1        1        1        1        1        1        1        1        4        1       11        1 
##    10100    10101    10121    10128  10130.5    10132 10137.75    10150    10157    10171    10199    10220 
##        1       34        2        1        5        1        1        1        1        1        1        4 
##    10223    10254    10257    10280  10318.5 10325.45    10336    10353    10395    10400 10408.04    10412 
##        1        1        1        1        1        1        1        1        1        9        1        1 
##    10420  10429.5    10450  10450.5    10475    10560 10577.64  10607.9    10631    10691    10702    10737 
##        4        1        1        1        1        1        1        1        1        1        1        1 
##    10745    10746    10813    10932  10934.5  10938.9    10961    11575    11700  11787.2    11935  12556.4 
##        1        1        1        1        1        1        1        2        3        1        2        1 
##    13997    14260    19500     <NA> 
##        1        2        1       73

## [1] "Frequency table after encoding"
## Assetvalue. 11935
##          1181        9167.5          9295          9378          9494        9498.5        9514.3 
##             1             1             1             1             3             1             1 
##          9535          9624          9700          9728          9773        9786.5          9795 
##             1             2             1             1             1            10             1 
##          9810       9811.75          9815          9820        9826.5       9838.25          9846 
##             1            13             1             3             2             1             1 
##        9857.5          9859          9867          9875        9883.3          9885          9887 
##             1             2             1             2             2             1             1 
##          9890        9895.5          9896          9900          9902          9910          9917 
##             4             3             2             2             1             2             1 
##        9919.5          9920        9920.8       9921.25          9925       9926.32          9930 
##             1             1             1             2             1             1             3 
##          9934          9936          9937        9938.5          9943          9944          9945 
##             3            28             1             2             1             1             1 
##        9945.2        9945.4          9949          9950          9960          9964        9967.5 
##             2             1             1            97             2             1             3 
##        9972.5          9974        9974.5       9977.65          9978          9986          9990 
##             1             1             3             2             1             2             2 
##       9991.25        9993.7          9994          9995       9999.65       9999.75         10000 
##             1             1             2             2             8             2           625 
##      10000.65         10001         10003         10004         10005       10007.5         10009 
##             1             1             1             3             1             1             3 
##         10010         10011         10012      10012.25       10019.5         10020         10021 
##             1             1            26             1             2             1             1 
##         10022       10023.9         10024         10029      10029.86         10032         10034 
##             2             1             4             1             1             1             1 
##         10035         10045         10050         10057       10057.5         10059         10060 
##             1             1             2             2             1             1             1 
##      10067.28         10071       10072.5         10075         10082         10085         10090 
##             1             1             1             1             1             1             4 
##       10091.1      10094.75       10095.3         10100         10101         10121         10128 
##             1            11             1             1            34             2             1 
##       10130.5         10132      10137.75         10150         10157         10171         10199 
##             5             1             1             1             1             1             1 
##         10220         10223         10254         10257         10280       10318.5      10325.45 
##             4             1             1             1             1             1             1 
##         10336         10353         10395         10400      10408.04         10412         10420 
##             1             1             1             9             1             1             4 
##       10429.5         10450       10450.5         10475         10560      10577.64       10607.9 
##             1             1             1             1             1             1             1 
##         10631         10691         10702         10737         10745         10746         10813 
##             1             1             1             1             1             1             1 
##         10932       10934.5       10938.9         10961         11575         11700       11787.2 
##             1             1             1             1             2             3             1 
## 11935 or more          <NA> 
##             7            73

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("Assettype",
                  "Compliance",
                  "Training1Description",
                  "Training2Description",
                  "Training3Decription",
                  "assettype_clean",
                  "assetcat",
                  "assets_exp")

capture_tables (indirect_PII)

# Recode those with very specific values. 

break_activity <- c(1,  2,  3,  4,  5,  6,  7,  8,  10,     11,     12,     13,     14,     15,     16,     17,     18,     20,     21,     22,     23,     24,     25)
labels_activity <- c("Sari Sari business/equipment"=1,
                     "Prepared-Food business/equipment (rice, vegetables, fish, etc)"=2,
                     "Fishing business/equipment"=3,
                     "Others"=4,
                     "Others"=5,
                     "Others"=6,
                     "Others"=7,
                     "Others"=8,
                     "Others"=9,
                     "Others"=10,
                     "Others"=11,
                     "Others"=12,
                     "Others"=13,
                     "Others"=14,
                     "Others"=15,
                     "Others"=16,
                     "Tools and equipment"=17,
                     "Others"=18,
                     "Others"=19,
                     "Others"=20,
                     "Others"=21,
                     "Others"=22,
                     "Others"=23)
mydata2 <- ordinal_recode (variable="assetcat", break_points=break_activity, missing=999999, value_labels=labels_activity)

## [1] "Frequency table before encoding"
## assetcat. Asset category (KASAMA)
##                                                              0 
##                                                             66 
##                                   Sari Sari business/equipment 
##                                                            355 
## Prepared-Food business/equipment (rice, vegetables, fish, etc) 
##                                                            380 
##                                     Fishing business/equipment 
##                                                             66 
##                        Merienda/ streetfood business/equipment 
##                                                             17 
##                                     Welding business/equipment 
##                                                              5 
##                                   Carpentry business/equipment 
##                                                             11 
##                       Product manufacturing business/equipment 
##                                                              8 
##                                 Beauty care business/equipment 
##                                                              5 
##                           Livestock raising business/equipment 
##                                                             17 
##                             Poultry raising business/equipment 
##                                                              2 
##                                    Dry good business/equipment 
##                                                              4 
##                                   Tailoring business/equipment 
##                                                             16 
##                                  Automotive business/equipment 
##                                                              3 
##                  Farming or farming-support business/equipment 
##                                                             25 
##                             Vehicle driving business/equipment 
##                                                             10 
##                     Masonry or construction business/equipment 
##                                                              3 
##                                            Tools and equipment 
##                                                            138 
##                                                        Laundry 
##                                                              2 
##                                                    Retail wear 
##                                                              9 
##                                                Ambulant vendor 
##                                                              1 
##                                                     Paint shop 
##                                                              1 
##                                 Prepaid phone loading business 
##                                                              1 
##                                                            Kit 
##                                                              3 
##     recoded
##      [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,10) [10,11) [11,12) [12,13) [13,14) [14,15) [15,16) [16,17)
##   0      0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   1    355     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   2      0   380     0     0     0     0     0      0       0       0       0       0       0       0       0
##   3      0     0    66     0     0     0     0      0       0       0       0       0       0       0       0
##   4      0     0     0    17     0     0     0      0       0       0       0       0       0       0       0
##   5      0     0     0     0     5     0     0      0       0       0       0       0       0       0       0
##   6      0     0     0     0     0    11     0      0       0       0       0       0       0       0       0
##   7      0     0     0     0     0     0     8      0       0       0       0       0       0       0       0
##   8      0     0     0     0     0     0     0      5       0       0       0       0       0       0       0
##   10     0     0     0     0     0     0     0      0      17       0       0       0       0       0       0
##   11     0     0     0     0     0     0     0      0       0       2       0       0       0       0       0
##   12     0     0     0     0     0     0     0      0       0       0       4       0       0       0       0
##   13     0     0     0     0     0     0     0      0       0       0       0      16       0       0       0
##   14     0     0     0     0     0     0     0      0       0       0       0       0       3       0       0
##   15     0     0     0     0     0     0     0      0       0       0       0       0       0      25       0
##   16     0     0     0     0     0     0     0      0       0       0       0       0       0       0      10
##   17     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   18     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   20     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   21     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   22     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   23     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   24     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##   25     0     0     0     0     0     0     0      0       0       0       0       0       0       0       0
##     recoded
##      [17,18) [18,20) [20,21) [21,22) [22,23) [23,24) [24,25) [25,1e+06)
##   0        0       0       0       0       0       0       0          0
##   1        0       0       0       0       0       0       0          0
##   2        0       0       0       0       0       0       0          0
##   3        0       0       0       0       0       0       0          0
##   4        0       0       0       0       0       0       0          0
##   5        0       0       0       0       0       0       0          0
##   6        0       0       0       0       0       0       0          0
##   7        0       0       0       0       0       0       0          0
##   8        0       0       0       0       0       0       0          0
##   10       0       0       0       0       0       0       0          0
##   11       0       0       0       0       0       0       0          0
##   12       0       0       0       0       0       0       0          0
##   13       0       0       0       0       0       0       0          0
##   14       0       0       0       0       0       0       0          0
##   15       0       0       0       0       0       0       0          0
##   16       0       0       0       0       0       0       0          0
##   17       3       0       0       0       0       0       0          0
##   18       0     138       0       0       0       0       0          0
##   20       0       0       2       0       0       0       0          0
##   21       0       0       0       9       0       0       0          0
##   22       0       0       0       0       1       0       0          0
##   23       0       0       0       0       0       1       0          0
##   24       0       0       0       0       0       0       1          0
##   25       0       0       0       0       0       0       0          3
## [1] "Frequency table after encoding"
## assetcat. Asset category (KASAMA)
##                                   Sari Sari business/equipment 
##                                                            355 
## Prepared-Food business/equipment (rice, vegetables, fish, etc) 
##                                                            380 
##                                     Fishing business/equipment 
##                                                             66 
##                                                         Others 
##                                                            143 
##                                            Tools and equipment 
##                                                            138 
##                                                           <NA> 
##                                                             66 
## [1] "Inspect value labels and relabel as necessary"
##                                   Sari Sari business/equipment 
##                                                              1 
## Prepared-Food business/equipment (rice, vegetables, fish, etc) 
##                                                              2 
##                                     Fishing business/equipment 
##                                                              3 
##                                                         Others 
##                                                              4 
##                                                         Others 
##                                                              5 
##                                                         Others 
##                                                              6 
##                                                         Others 
##                                                              7 
##                                                         Others 
##                                                              8 
##                                                         Others 
##                                                              9 
##                                                         Others 
##                                                             10 
##                                                         Others 
##                                                             11 
##                                                         Others 
##                                                             12 
##                                                         Others 
##                                                             13 
##                                                         Others 
##                                                             14 
##                                                         Others 
##                                                             15 
##                                                         Others 
##                                                             16 
##                                            Tools and equipment 
##                                                             17 
##                                                         Others 
##                                                             18 
##                                                         Others 
##                                                             19 
##                                                         Others 
##                                                             20 
##                                                         Others 
##                                                             21 
##                                                         Others 
##                                                             22 
##                                                         Others 
##                                                             23
mydata <- mydata[!names(mydata) %in% "Assettype"]
mydata <- mydata[!names(mydata) %in% "assettype_clean"]
# !!!Removed, as it contains sensitive information and there is another variable that contains this information at a more aggregated level.

Matching and crosstabulations: Run automated PII check

# !!!Insufficient demographic data

Open-ends: review responses for any sensitive information, redact as necessary

# !!!No Open-ends

GPS data: Displace

# !!!No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)