rm(list=ls(all=t))

Setup and crate dictionary

Start Rstudio by double-clicking on "PII_remover_1.5.R" Make sure the following files are in the same folder: "PII_remover_1.5.R" "functions_1.5.R" Dataset to be processed (e.g. "Nepal Round 3_FinalClean.dta")

filename <- "Nepal_HT_Study_Round2_062316" # !!!Update filename
source ("functions_1.5.R")

#mydata <- mydata [1:10,] # remove '#' from #mydata if you want to conduct a fast check on 10 rows. 

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#!!!Save flagged dictionary in .xlsx format, add "DatasetReview" to name and continue processing data with subset of flagged variables

Direct PII: variables to be removed

# !!!No Direct PII

Direct PII-team: Encode interviewer names, which may be useful for analysis of interviewer effects

!!!No Direct PII-team #Small locations: Encode locations with pop <100,000 using random large numbers !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("VDC", "ID_4") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## VDC. VDC code
##         Barahathawa        Dhungrekhola           Dhurkauli            Lalbandi      Malangawa N.P. 
##                  63                  63                  59                  61                  63 
##           Netraganj            Raniganj           Sankarpur      Bhimeswor N.P.               Bocha 
##                  64                  64                  61                  61                  61 
##         Dandakharka               Fasku            Katakuti            Lamidada              Melung 
##                  62                  62                  63                  62                  64 
##              Pawati             Badegau         Talramarang        BhoteNamlang               Irkhu 
##                  63                  63                  64                  65                  62 
##               Ichok            Kadambas           Langarche            Melamchi             Anaikot 
##                  62                  63                  62                  62                  63 
##    BaluwapatiDeupur   ChalalGaneshsthan    KalatiBhumidanda       MahankalChaur           Methinkot 
##                  62                  67                  62                  61                  62 
##          Patalekhet             RaviOpi              Balkot       Changunarayan            Chitapol 
##                  64                  62                  59                  62                  63 
##             Duwakot               Gundu  Madhyapur Thimi NP             Nankhel             Sirutar 
##                  63                  63                  66                  61                  58 
##             Baireni              Dhussa               Khari           Kiranchok             Naubise 
##                  62                  64                  62                  63                  63 
##           Salyantar        SunaulaBazar              Thakre            Chitlang          Churiyamai 
##                  63                  62                  63                  61                  63 
##              Fakhel           Kulekhani           Nibuwatar        Padampokhari   ShreepurChhatiwan 
##                  62                  62                  60                  65                  62 
## SisneriMahadevsthan       Birendranagar             Jutpani              Kathar          Khairahani 
##                  63                  63                  62                  63                  63 
##            Padampur          Parbatipur               Piple          Shaktikhor       Chhayachhetra 
##                  62                  60                  65                  62                  62 
##           Damachaur            Devsthal            Dhanwang           Phalawang           Sibaratha 
##                  63                  65                  62                  64                  62 
##          Siddheswar             Tribeni            Baijapur             Binauna           Chisapani 
##                  63                  61                  63                  62                  65 
##           Khaskusma            Kohalpur           Nepalgunj             Rajhena          Samserganj 
##                  62                  63                  60                  62                  63 
##                <NA> 
##                   3 
## [1] "Frequency table after encoding"
## VDC. VDC code
##  980  981  982  983  984  985  986  987  988  989  990  991  992  993  994  995  996  997  998  999 1000 1001 1002 
##   63   63   59   61   63   64   64   61   61   61   62   62   63   62   64   63   63   64   65   62   62   63   62 
## 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 
##   62   63   62   67   62   61   62   64   62   59   62   63   63   63   66   61   58   62   64   62   63   63   63 
## 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 
##   62   63   61   63   62   62   60   65   62   63   63   62   63   63   62   60   65   62   62   63   65   62   64 
## 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 <NA> 
##   62   63   61   63   62   65   62   63   60   62   63    3 
## [1] "Frequency table before encoding"
## ID_4. Ward Number
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14 <NA> 
##  650  638  394  576  404  650  528  423  569   25   28   38   38   38    3 
## [1] "Frequency table after encoding"
## ID_4. Ward Number
##  281  282  283  284  285  286  287  288  289  290  291  292  293  294 <NA> 
##  650  638  394  576  404  650  528  423  569   25   28   38   38   38    3

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!! No indirect PII - Ordinal

Indirect PII - Categorical

# !!! No indirect PII - Categorical

Matching and crosstabulations: Run automated PII check

# !!! No indirect or direct PII

Open-ends: review responses for any sensitive information, redact as necessary

# !!! No open-ends

GPS data: Displace

# !!! No GPS

Save processed data in Stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))