rm(list=ls(all=t))

Setup and crate dictionary

filename <- "Nepal_HT_Study_Round1_062816" # !!!Update filename
source ("functions_1.5.R")

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#!!!Save flagged dictionary in .xlsx format, add "DatasetReview" to name and continue processing data with subset of flagged variables

Direct PII: variables to be removed

# !!!No Direct PII

Direct PII-team: Encode interviewer names, which may be useful for analysis of interviewer effects

!!!No Direct PII - team #Small locations: Encode locations with pop <100,000 using random large numbers !!!Include relevant variables, but check their population size first to confirm they are <100,000

mydata <- encode_location (variables= "VDC", missing=999999)
## [1] "Frequency table before encoding"
## VDC. VDC code
##         Barahathawa        Dhungrekhola           Dhurkauli            Lalbandi      Malangawa N.P. 
##                  63                  66                  63                  63                  63 
##           Netraganj            Raniganj           Sankarpur      Bhimeswor N.P.               Bocha 
##                  64                  65                  62                  61                  61 
##         Dandakharka               Fasku            Katakuti            Lamidada              Melung 
##                  62                  62                  63                  62                  64 
##              Pawati             Badegau         Talramarang        BhoteNamlang               Irkhu 
##                  64                  64                  64                  66                  62 
##               Ichok            Kadambas           Langarche            Melamchi             Anaikot 
##                  63                  64                  62                  64                  63 
##    BaluwapatiDeupur   ChalalGaneshsthan    KalatiBhumidanda       MahankalChaur           Methinkot 
##                  62                  67                  62                  61                  62 
##          Patalekhet             RaviOpi              Balkot       Changunarayan            Chitapol 
##                  64                  62                  59                  62                  63 
##             Duwakot               Gundu  Madhyapur Thimi NP             Nankhel             Sirutar 
##                  63                  63                  66                  61                  58 
##             Baireni              Dhussa               Khari           Kiranchok             Naubise 
##                  62                  64                  62                  63                  64 
##           Salyantar        SunaulaBazar              Thakre            Chitlang          Churiyamai 
##                  63                  62                  64                  61                  63 
##              Fakhel           Kulekhani           Nibuwatar        Padampokhari   ShreepurChhatiwan 
##                  62                  62                  60                  65                  62 
## SisneriMahadevsthan       Birendranagar             Jutpani              Kathar          Khairahani 
##                  63                  63                  62                  63                  63 
##            Padampur          Parbatipur               Piple          Shaktikhor       Chhayachhetra 
##                  62                  62                  65                  64                  63 
##           Damachaur            Devsthal            Dhanwang           Phalawang           Sibaratha 
##                  63                  65                  64                  64                  62 
##          Siddheswar             Tribeni            Baijapur             Binauna           Chisapani 
##                  63                  63                  63                  62                  65 
##           Khaskusma            Kohalpur           Nepalgunj             Rajhena          Samserganj 
##                  62                  62                  60                  62                  63 
## [1] "Frequency table after encoding"
## VDC. VDC code
##  980  981  982  983  984  985  986  987  988  989  990  991  992  993  994  995  996  997  998  999 1000 1001 1002 
##   63   66   63   63   63   64   65   62   61   61   62   62   63   62   64   64   64   64   66   62   63   64   62 
## 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 
##   64   63   62   67   62   61   62   64   62   59   62   63   63   63   66   61   58   62   64   62   63   64   63 
## 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 
##   62   64   61   63   62   62   60   65   62   63   63   62   63   63   62   62   65   64   63   63   65   64   64 
## 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 
##   62   63   63   63   62   65   62   62   60   62   63

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" of 10 or less. 

break_age <- c(0, 15,25,35,45,55,100)
labels_age <- c("Less than 15" =1, 
                "15-24" =2, 
                "25-34" =3, 
                "35-44" =4, 
                "45-54" =5, 
                "55 and older" =6, 
                "NA" = 7)
mydata <- ordinal_recode (variable="D_2", break_points=break_age, missing=999999, value_labels=labels_age)

##     recoded
##      [0,15) [15,25) [25,35) [35,45) [45,55) [55,100) [100,1e+06)
##   13    170       0       0       0       0        0           0
##   14    220       0       0       0       0        0           0
##   15      0     261       0       0       0        0           0
##   16      0     293       0       0       0        0           0
##   17      0     268       0       0       0        0           0
##   18      0     249       0       0       0        0           0
##   19      0     211       0       0       0        0           0
##   20      0     238       0       0       0        0           0
##   21      0     190       0       0       0        0           0
##   22      0     170       0       0       0        0           0
##   23      0     139       0       0       0        0           0
##   24      0     101       0       0       0        0           0
##   25      0       0     145       0       0        0           0
##   26      0       0     130       0       0        0           0
##   27      0       0     119       0       0        0           0
##   28      0       0     110       0       0        0           0
##   29      0       0     105       0       0        0           0
##   30      0       0     151       0       0        0           0
##   31      0       0      86       0       0        0           0
##   32      0       0      95       0       0        0           0
##   33      0       0      75       0       0        0           0
##   34      0       0      77       0       0        0           0
##   35      0       0       0     101       0        0           0
##   36      0       0       0      62       0        0           0
##   37      0       0       0      75       0        0           0
##   38      0       0       0      57       0        0           0
##   39      0       0       0      64       0        0           0
##   40      0       0       0      95       0        0           0
##   41      0       0       0      42       0        0           0
##   42      0       0       0      69       0        0           0
##   43      0       0       0      56       0        0           0
##   44      0       0       0      57       0        0           0
##   45      0       0       0       0      69        0           0
##   46      0       0       0       0      59        0           0
##   47      0       0       0       0      43        0           0
##   48      0       0       0       0      61        0           0
##   49      0       0       0       0      47        0           0
##   50      0       0       0       0      47        0           0
##   51      0       0       0       0      41        0           0
##   52      0       0       0       0      37        0           0
##   53      0       0       0       0      37        0           0
##   54      0       0       0       0      37        0           0
##   55      0       0       0       0       0       52           0
##   56      0       0       0       0       0       27           0
##   57      0       0       0       0       0       25           0
##   58      0       0       0       0       0       31           0
##   59      0       0       0       0       0       27           0
##   60      0       0       0       0       0       41           0
##   61      0       0       0       0       0       15           0
##   62      0       0       0       0       0       26           0
##   63      0       0       0       0       0       11           0
##   64      0       0       0       0       0       10           0
##   65      0       0       0       0       0        2           0
##   66      0       0       0       0       0        1           0
## D_2. How old are you?      [Use the timeline in the manual if the respondent has a ha
## Less than 15        15-24        25-34        35-44        45-54 55 and older 
##          390         2120         1093          678          478          268 
## [1] "Inspect value labels and relabel as necessary"
## Less than 15        15-24        25-34        35-44        45-54 55 and older           NA 
##            1            2            3            4            5            6            7
# Recode education into standard categories

break_edu <- c(0,6,9,11,12,13,17,18, 777, 888, 999)
labels_edu <- c("Primary or less (0-5)" = 1,
                "Lower secondary (6-8)" = 2,
                "Secondary (9-10)" = 3,
                "SLC (11)" = 4,
                "CLASS 12/Intermediate level (12)" = 5,
                "Bachelor/Postgraduate level" = 6,
                "Literate, but never attended school" = 7,
                "Illiterate, and never attended school"= 8,
                "Refused"= 9, 
                "Does not apply" = 10, 
                "Don't Know" = 11)
mydata <- ordinal_recode (variable="D_4", break_points=break_edu, missing=999, value_labels=labels_edu)

##     recoded
##      [0,6) [6,9) [9,11) [11,12) [12,13) [13,17) [17,18) [18,777) [777,888) [888,999) [999,1e+03)
##   0      6     0      0       0       0       0       0        0         0         0           0
##   1     91     0      0       0       0       0       0        0         0         0           0
##   2    167     0      0       0       0       0       0        0         0         0           0
##   3    184     0      0       0       0       0       0        0         0         0           0
##   4    231     0      0       0       0       0       0        0         0         0           0
##   5    347     0      0       0       0       0       0        0         0         0           0
##   6      0   277      0       0       0       0       0        0         0         0           0
##   7      0   380      0       0       0       0       0        0         0         0           0
##   8      0   423      0       0       0       0       0        0         0         0           0
##   9      0     0    312       0       0       0       0        0         0         0           0
##   10     0     0    322       0       0       0       0        0         0         0           0
##   11     0     0      0     767       0       0       0        0         0         0           0
##   12     0     0      0       0     443       0       0        0         0         0           0
##   13     0     0      0       0       0      81       0        0         0         0           0
##   14     0     0      0       0       0      24       0        0         0         0           0
##   17     0     0      0       0       0       0     357        0         0         0           0
##   18     0     0      0       0       0       0       0      601         0         0           0
## D_4. What is your highest completed education level?     [You do not need to read the
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                  1026                                  1080                                   634 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   767                                   443                                   105 
##   Literate, but never attended school Illiterate, and never attended school                                  <NA> 
##                                   357                                   601                                    14 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                               Refused 
##                                     7                                     8                                     9 
##                        Does not apply                            Don't Know 
##                                    10                                    11
# Top code household composition variables with large and unusual numbers 

mydata <- top_recode ("D_20", break_point=5, missing=c(888, 999999)) # Topcode cases with 5 or more children
## [1] "Frequency table after encoding"
## D_20. How many children do you have?
##    0    1    2    3    4    5    6    7    8    9   10   11  888 
## 2213  591  840  655  342  189  103   37   15    4    2    2   34

## [1] "Frequency table after encoding"
## D_20. How many children do you have?
##         0         1         2         3         4 5 or more       888 
##      2213       591       840       655       342       352        34
# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(mydata$Inc_17[mydata$Inc_17!=999999], probs = c(0.995)))
mydata <- top_recode (variable="Inc_17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table after encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS)
##       0      10      15      30      35      36     300     400     500     600     777     888     999    1000 
##     169       1       1       1       1       1       1       3       8       1      59       1     310      31 
##    1200    1250    1300    1400    1500    1600    2000    2083    2100    2200    2300    2500    2600    3000 
##      11       1       1       1      34       2     110       1       1       1       1      22       1     165 
##    3500    3800    4000    4100    4500    5000    5500    5600    6000    6500    7000    7500    8000    8500 
##       7       1     156       1      10     434       2       1     171       2     154       8     149       1 
##    9000    9500   10000   10400   10500   11000   12000   12200   12500   12800   13000   14000   15000   16000 
##      75       1     505       1       2      12     194       1       7       1      52      29     484      41 
##   17000   17200   18000   19000   20000   21000   22000   23000   24000   24400   24500   25000   26000   27000 
##      25       1      52       4     419       8      22       5      10       1       1     238       8       4 
##   28000   29000   30000   31000   32000   33000   34500   35000   36000   38000   39000   40000   41000   41600 
##       8       3     225       3       7       5       1      72       5       1       1      98       1       1 
##   42000   43000   44000   45000   50000   52000   53000   55000   58000   59500   60000   65000   66000   70000 
##       2       3       1      24     124       1       1       8       2       1      50       1       1      14 
##   75000   80000   85000   90000   1e+05  108000  109000  110000  120000  125000  130000  150000  180000  190000 
##       5      17       4       7      33       1       1       1       4       2       1      14       1       1 
##   2e+05   3e+05  320000  350000   4e+05   5e+05  630000   7e+05 1500000   2e+06 
##      22       1       1       1       1       1       1       1       1       1

## [1] "Frequency table after encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS)
##             0            10            15            30            35            36           300           400 
##           169             1             1             1             1             1             1             3 
##           500           600           777           888           999          1000          1200          1250 
##             8             1            59             1           310            31            11             1 
##          1300          1400          1500          1600          2000          2083          2100          2200 
##             1             1            34             2           110             1             1             1 
##          2300          2500          2600          3000          3500          3800          4000          4100 
##             1            22             1           165             7             1           156             1 
##          4500          5000          5500          5600          6000          6500          7000          7500 
##            10           434             2             1           171             2           154             8 
##          8000          8500          9000          9500         10000         10400         10500         11000 
##           149             1            75             1           505             1             2            12 
##         12000         12200         12500         12800         13000         14000         15000         16000 
##           194             1             7             1            52            29           484            41 
##         17000         17200         18000         19000         20000         21000         22000         23000 
##            25             1            52             4           419             8            22             5 
##         24000         24400         24500         25000         26000         27000         28000         29000 
##            10             1             1           238             8             4             8             3 
##         30000         31000         32000         33000         34500         35000         36000         38000 
##           225             3             7             5             1            72             5             1 
##         39000         40000         41000         41600         42000         43000         44000         45000 
##             1            98             1             1             2             3             1            24 
##         50000         52000         53000         55000         58000         59500         60000         65000 
##           124             1             1             8             2             1            50             1 
##         66000         70000         75000         80000         85000         90000         1e+05        108000 
##             1            14             5            17             4             7            33             1 
##        109000        110000        120000        125000        130000        150000        180000        190000 
##             1             1             4             2             1            14             1             1 
## 2e+05 or more 
##            31
percentile_99.5 <- floor(quantile(mydata$Inc_23[mydata$Inc_23!=999999], probs = c(0.995)))
mydata <- top_recode (variable="Inc_23", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table after encoding"
## Inc_23. In a typical month, what is your total household expenditure? (in NRS)
##     15    200    300    400    500    600    700    777    800    900    999   1000   1070   1100   1200   1300 
##      1      1      3      1     18      1      1      4      1      1    179     39      1      1     14      2 
##   1400   1500   1600   1800   2000   2200   2400   2500   2600   2800   3000   3500   4000   4009   4500   5000 
##      1     38      1      1    167      3      1     61      1      1    310     29    330      1     17    721 
##   5500   6000   6500   7000   7500   8000   9000  10000  11000  12000  13000  13500  14000  15000  16000  17000 
##      2    352      1    286      3    272    103    773      7    181     47      1     16    446     14      8 
##  18000  19000  20000  21000  22000  23000  24000  25000  27000  28000  30000  32000  32500  35000  40000  42000 
##     24      1    251      4     13      4      4     95      3      1     78      2      1     17     23      1 
##  45000  50000  60000  70000  80000  90000  1e+05 120000 140000  2e+05  9e+05 
##      4     17      9      3      2      1      2      1      1      1      1

## [1] "Frequency table after encoding"
## Inc_23. In a typical month, what is your total household expenditure? (in NRS)
##            15           200           300           400           500           600           700           777 
##             1             1             3             1            18             1             1             4 
##           800           900           999          1000          1070          1100          1200          1300 
##             1             1           179            39             1             1            14             2 
##          1400          1500          1600          1800          2000          2200          2400          2500 
##             1            38             1             1           167             3             1            61 
##          2600          2800          3000          3500          4000          4009          4500          5000 
##             1             1           310            29           330             1            17           721 
##          5500          6000          6500          7000          7500          8000          9000         10000 
##             2           352             1           286             3           272           103           773 
##         11000         12000         13000         13500         14000         15000         16000         17000 
##             7           181            47             1            16           446            14             8 
##         18000         19000         20000         21000         22000         23000         24000         25000 
##            24             1           251             4            13             4             4            95 
##         27000         28000         30000         32000         32500         35000         40000         42000 
##             3             1            78             2             1            17            23             1 
##         45000 50000 or more 
##             4            38

Indirect PII - Categorical:

# !!!Include relevant variables in list below

indirect_PII <- c("D_3",
                  "D_4",
                  "D_6",
                  "D_9",
                  "EM_16_1",
                  "EM_16_2",
                  "EM_16_3",
                  "EM_16_6",
                  "CM_1C",
                  "CM_2",
                  "ME_1",
                  "ME_3",
                  "ME_5",
                  "ME_7",
                  "ME_13",
                  "ME_14",
                  "ME_16")

capture_tables (indirect_PII)

# Encode caste

mydata <- encode_location (variables= "D_3", missing=999999)
## [1] "Frequency table before encoding"
## D_3. What is your ethnic background?    [You do not need to read the response choices
##            chhetri     BRAHMAN (HILL)              magar              tharu             tamang              newar 
##               1296                704                302                206               1078                391 
##             muslim               kami              yadav                rai             gurung       DAMAIN/DHOLI 
##                  7                195                 21                 12                 51                 93 
##              limbu            thakuri              sarki               teli CHAMAR/HARIJAN/RAM              koiri 
##                  2                 80                 68                  6                  2                 74 
##              kurmi DUSADH/PASWAN/PASI              sonar    BRAHMAN (TARAI)      GHARTI/BHUJEL              malla 
##                  1                  9                 11                  9                 49                  1 
##             kalwar              kumal       HAJAM/THAKUR            sunuwar              sudhi              lohar 
##                 11                 35                  3                  3                  1                  5 
##              tatma             khatwe              majhi             nuniya             kumhar            danuwar 
##                  3                  3                  6                  2                  3                  4 
##      CHEPANG/PRAJA            haluwai             rajput           kayastha             badhae            marwadi 
##                 92                  1                  5                  8                  1                  4 
##              thami              darai             pahari                dom               bote   ADIBASI/JANAJATI 
##                 18                 14                 10                  1                  1                  1 
##               badi        OTHER CASTE               <NA> 
##                  4                118                  2 
## [1] "Frequency table after encoding"
## D_3. What is your ethnic background?    [You do not need to read the response choices
##  980  981  982  983  984  985  986  987  988  989  990  991  992  993  994  995  996  997  998  999 1000 1001 1002 
## 1296  704  302  206 1078  391    7  195   21   12   51   93    2   80   68    6    2   74    1    9   11    9   49 
## 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 
##    1   11   35    3    3    1    5    3    3    6    2    3    4   92    1    5    8    1    4   18   14   10    1 
## 1026 1027 1028 1029 <NA> 
##    1    1    4  118    2
# Recode religion

break_rel <- c(1,2,3, 777, 888, 999)
labels_rel <- c("Hindu" = 1,
                "Buddhist" = 2,
                "Other" = 3,
                "Refused" = 4,
                "Not applicable" = 5,
                "Don't know" = 6)
mydata <- ordinal_recode (variable="D_6", break_points=break_rel, missing=999, value_labels=labels_rel)

##    recoded
##     [1,2) [2,3) [3,777) [777,888) [888,999) [999,1e+03)
##   1  3941     0       0         0         0           0
##   2     0   944       0         0         0           0
##   3     0     0      12         0         0           0
##   4     0     0       2         0         0           0
##   6     0     0     116         0         0           0
##   9     0     0       3         0         0           0
## D_6. What is your religious background?    [You do not need to read the response choi
##    Hindu Buddhist    Other     <NA> 
##     3941      944      133        9 
## [1] "Inspect value labels and relabel as necessary"
##          Hindu       Buddhist          Other        Refused Not applicable     Don't know 
##              1              2              3              4              5              6
# Recode reason for moving

break_mov <- c(1,2,3, 4, 777, 888, 999)
labels_mov <- c("Family reasons (e.g., marriage)" = 1,
                "For education / training" = 2,
                "For work" = 3,
                "Other" = 4,
                "Refused to asnwer" = 5,
                "Not applicable" = 6,
                "Don't know" = 7)
mydata <- ordinal_recode (variable="CM_2", break_points=break_mov, missing=999, value_labels=labels_mov)

##    recoded
##     [1,2) [2,3) [3,4) [4,777) [777,888) [888,999) [999,1e+03)
##   1   491     0     0       0         0         0           0
##   2     0    99     0       0         0         0           0
##   3     0     0   966       0         0         0           0
##   4     0     0     0       7         0         0           0
##   5     0     0     0       1         0         0           0
##   6     0     0     0     126         0         0           0
##   7     0     0     0      25         0         0           0
## CM_2. Primary Reason for Migrating
## Family reasons (e.g., marriage)        For education / training                        For work 
##                             491                              99                             966 
##                           Other                            <NA> 
##                             159                            3312 
## [1] "Inspect value labels and relabel as necessary"
## Family reasons (e.g., marriage)        For education / training                        For work 
##                               1                               2                               3 
##                           Other               Refused to asnwer                  Not applicable 
##                               4                               5                               6 
##                      Don't know 
##                               7

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age

selectedKeyVars = c('D_1','D_2','D_4') ##!!! Replace with candidate categorical demo vars
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 5027 rows and 171 variables.
##   --> Categorical key variables: D_1, D_2, D_4
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size            Size of smallest (>0)       
##           D_1                    2 (2)  2513.500 (2513.500)                  2510 (2510)
##           D_2                    6 (6)   837.833  (837.833)                   268  (268)
##           D_4                    9 (9)   626.625  (626.625)                   105  (105)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 3 (0.060%)
##   - 3-anonymity: 6 (0.119%)
##   - 5-anonymity: 12 (0.239%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 3 x 3
##          D_1              D_2                                  D_4
##    <dbl+lbl>        <dbl+lbl>                            <dbl+lbl>
## 1 2 [Female] 1 [Less than 15] 4 [SLC (11)]                        
## 2 2 [Female] 5 [45-54]        6 [Bachelor/Postgraduate level]     
## 3 2 [Female] 5 [45-54]        5 [CLASS 12/Intermediate level (12)]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used
##      D_1 D_2 D_4
## 253    2   1  NA
## 2170   2   5  NA
## 4874   2   5  NA
mydata [notAnon,"D_4"] <- 9

Open-ends: review responses for any sensitive information, redact as necessary

report_open (list_open_ends = "HTV_1_10_TEXT")
## Warning in dir.create(file.path(getwd(), "verbatims"), recursive = TRUE): 'C:\Users\C_Pablo_Diego-
## Rosell\Desktop\Other Projects\Dwight\ILAB PII\Data\FINAL\UC Berkeley_Nepal_Awareness-General
## Public\PublicData_R3\Data\Nepal_HT_Study_Round1_062816\verbatims' already exists
# Remove as only verbatim variable in Nepali

mydata <- mydata[!names(mydata) %in% "HTV_1_10_TEXT"] # Drop as actually verbatim data in Nepali

GPS data: Displace

# !!!No GPS

Save processed data in Stata and SPSS format

# Adds "_PU" (Public Use) to the end of the name 

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))