rm(list=ls(all=t))

Setup and crate dictionary

filename <- "Nepal Round 3_FinalRaw" # !!!Update filename
source ("functions_1.7.R")

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Location: Small Location (<100,000) Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#!!!Save flagged dictionary in .xlsx format and continue processing data with subset of flagged variables

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("SbjNam", 
              "FrScName", 
              "RvwName", 
              "IDR3_3", 
              "IDR3_18", 
              "IDR3_19", 
              "IDR3_23") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode interviewer names, which may be useful for analysis of interviewer effects

!!!Replace vector in "variables" field below with relevant variable names

# Encode Direct PII-team

mydata <- encode_direct_PII_team (variables=c("Srvyr"))
## [1] "Frequency table before encoding"
## Srvyr. Srvyr
##       alka.adhikari    ambir.raj.kulung         amrita.roka anjana.kumari.dulal     ashish.shrestha bhanu.bhakta.dhakal 
##                  79                  96                  94                  98                 132                  79 
##       dev.raj.nepal dhan.kumari.darlami         dilip.joshi       gita.maharjan  gyanendra.parajuli    kajiman.mahatara 
##                 240                  86                 217                 103                 217                 236 
##       kamala.sharma      mani.ram.dahal        manjula.giri min.kumari.shrestha       nabina.khadka      niraj.shrestha 
##                  80                 242                  99                  86                  80                  89 
##     prahlad.mainali    pramila.shrestha    pratika.shrestha rabischandra.bhatta   ram.kumar.acharya     sajina.shrestha 
##                 239                  77                  88                  92                  91                  78 
##     sandip.shrestha     sanjay.pokharel       sapana.gautam     sarita.shrestha      sunil.shrestha     tirtha.maya.rai 
##                 273                  69                  84                 101                 272                 106 
##             upeksha        yamuna.karki 
##                   1                  87 
## [1] "Frequency table after encoding"
## Srvyr. Srvyr
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32 
##  79  96  94  98 132  79 240  86 217 103 217 236  80 242  99  86  80  89 239  77  88  92  91  78 273  69  84 101 272 106   1  87

Small locations: Encode locations with pop <100,000 using random large numbers

!!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("IDR3_6_19", 
             "IDR3_6_22", 
             "IDR3_6_23", 
             "IDR3_6_24", 
             "IDR3_6_26", 
             "IDR3_6_30", 
             "IDR3_6_31", 
             "IDR3_6_35", 
             "IDR3_7") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## IDR3_6_19. VDC or Municaplity of District Sarlahi
##  Barahathawa Dhungrekhola    Dhurkauli     Lalbandi Malangawa NP    Netraganj     Raniganj    Sankarpur       999999 
##           65           66           64           62           64           64           64           62         3500 
## [1] "Frequency table after encoding"
## IDR3_6_19. VDC or Municaplity of District Sarlahi
##    280    281    282    283    284    285    286    287 999999 
##     62     65     66     64     64     64     62     64   3500 
## [1] "Frequency table before encoding"
## IDR3_6_22. VDC or Municaplity of District Dolakha
## Bhimeswor NP        Bocha  Dandakharka        Fasku     Katakuti    Lamidanda       Melung       Pawati       999999 
##           61           39           62           62           63           62           64           64         3534 
## [1] "Frequency table after encoding"
## IDR3_6_22. VDC or Municaplity of District Dolakha
##    904    905    907    908    909    910    911    912 999999 
##     64     62     63     61     64     62     62     39   3534 
## [1] "Frequency table before encoding"
## IDR3_6_23. VDC or Municaplity of District Sindhupalchok
##      Badegau        Irkhu BhoteNamlang   Talamarang        Ichok     Kadambas    Langarche     Melamchi       999999 
##           64           66           66           65           63           60           62           64         3501 
## [1] "Frequency table after encoding"
## IDR3_6_23. VDC or Municaplity of District Sindhupalchok
##    876    877    879    880    881    882    883    884 999999 
##     66     65     62     60     64     64     63     66   3501 
## [1] "Frequency table before encoding"
## IDR3_6_24. VDC or Municaplity of District Kavrepalanchok
##           Anaikot Baluwapati Deupur Chalal Ganeshthan Kalati Bhumidanda    Mahankal Chaur         Methinkot        Patalekhet 
##                63                62                67                62                61                62                64 
##           Raviopi            999999 
##                62              3508 
## [1] "Frequency table after encoding"
## IDR3_6_24. VDC or Municaplity of District Kavrepalanchok
##    513    514    515    516    518    519    520    521 999999 
##     62     62     67     61     62     63     62     64   3508 
## [1] "Frequency table before encoding"
## IDR3_6_26. VDC or Municaplity of District Bhaktapur
##             Balkot      Changunarayan           Chitapol            Duwakot              Gundu Madhyapur Thimi NP            Nankhel 
##                 59                 63                 64                 63                 62                 70                 62 
##            Sirutar             999999 
##                 58               3510 
## [1] "Frequency table after encoding"
## IDR3_6_26. VDC or Municaplity of District Bhaktapur
##    689    690    691    692    693    694    695    697 999999 
##     62     62     63     63     59     58     70     64   3510 
## [1] "Frequency table before encoding"
## IDR3_6_30. VDC or Municaplity of District Dhading
##       Baireni        Dhussa         Khari     Kiranchok       Naubise     Salyantar Sunaula Bazar        Thakre        999999 
##            62            64            62            63            64            63            62            64          3507 
## [1] "Frequency table after encoding"
## IDR3_6_30. VDC or Municaplity of District Dhading
##    406    407    408    409    410    411    412    413 999999 
##     64     63     62     64     64     62     63     62   3507 
## [1] "Frequency table before encoding"
## IDR3_6_31. VDC or Municaplity of District Makwanpur
##             Chitlang           Churiyamai               Fakhel         Padampokhari            Kulekhani            Nibuwatar 
##                   61                   63                   63                   65                   62                   61 
##   Shreepur Chhatiwan Sisneri Mahadevsthan               999999 
##                   62                   63                 3511 
## [1] "Frequency table after encoding"
## IDR3_6_31. VDC or Municaplity of District Makwanpur
##    635    636    637    638    639    640    641    642 999999 
##     61     61     63     62     63     62     65     63   3511 
## [1] "Frequency table before encoding"
## IDR3_6_35. VDC or Municaplity of District Chitwan
## Birendranagar       Jutpani        Kathar    Khairahani      Padampur    Parbatipur         Piple    Shaktikhor        999999 
##            63            64            63            62            61            62            65            66          3505 
## [1] "Frequency table after encoding"
## IDR3_6_35. VDC or Municaplity of District Chitwan
##    797    798    799    800    801    802    804    805 999999 
##     63     62     63     66     64     62     61     65   3505 
## [1] "Frequency table before encoding"
## IDR3_7. Ward Number
##   1   2   3   4   5   6   7   8   9  10  11  12  14 
## 504 542 349 494 322 545 393 300 429  25  32  38  38 
## [1] "Frequency table after encoding"
## IDR3_7. Ward Number
## 505 506 507 508 509 510 511 512 513 514 515 516 517 
## 393  25  38 494 349 504 322  38 545 300 542 429  32

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" of 10 or less. 

break_age <- c(15,25,35,45,55,65,100)
labels_age <- c("15-24" =1, 
                "25-34" =2, 
                "35-44" =3, 
                "45-54" =4, 
                "55-64" =5, 
                "65 and older" =6, 
                "NA" = 7)
mydata <- ordinal_recode (variable="IDR3_20", break_points=break_age, missing=999999, value_labels=labels_age)

## [1] "Frequency table before encoding"
## IDR3_20. How old are you?
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 
## 17 43 62 62 64 72 68 73 78 61 50 42 35 53 60 50 55 56 53 52 36 35 38 51 35 43 28 41 44 31 36 34 37 35 32 29 31 31 28 24 26 18 27 33 15 18 
## 62 63 64 65 66 67 68 69 
## 17 18 14 13  9 12  2  1 
##         recoded
##          [15,25) [25,35) [35,45) [45,55) [55,65) [65,100) [100,1e+06)
##   16          17       0       0       0       0        0           0
##   17          43       0       0       0       0        0           0
##   18          62       0       0       0       0        0           0
##   19          62       0       0       0       0        0           0
##   20          64       0       0       0       0        0           0
##   21          72       0       0       0       0        0           0
##   22          68       0       0       0       0        0           0
##   23          73       0       0       0       0        0           0
##   24          78       0       0       0       0        0           0
##   25           0      61       0       0       0        0           0
##   26           0      50       0       0       0        0           0
##   27           0      42       0       0       0        0           0
##   28           0      35       0       0       0        0           0
##   29           0      53       0       0       0        0           0
##   30           0      60       0       0       0        0           0
##   31           0      50       0       0       0        0           0
##   32           0      55       0       0       0        0           0
##   33           0      56       0       0       0        0           0
##   34           0      53       0       0       0        0           0
##   35           0       0      52       0       0        0           0
##   36           0       0      36       0       0        0           0
##   37           0       0      35       0       0        0           0
##   38           0       0      38       0       0        0           0
##   39           0       0      51       0       0        0           0
##   40           0       0      35       0       0        0           0
##   41           0       0      43       0       0        0           0
##   42           0       0      28       0       0        0           0
##   43           0       0      41       0       0        0           0
##   44           0       0      44       0       0        0           0
##   45           0       0       0      31       0        0           0
##   46           0       0       0      36       0        0           0
##   47           0       0       0      34       0        0           0
##   48           0       0       0      37       0        0           0
##   49           0       0       0      35       0        0           0
##   50           0       0       0      32       0        0           0
##   51           0       0       0      29       0        0           0
##   52           0       0       0      31       0        0           0
##   53           0       0       0      31       0        0           0
##   54           0       0       0      28       0        0           0
##   55           0       0       0       0      24        0           0
##   56           0       0       0       0      26        0           0
##   57           0       0       0       0      18        0           0
##   58           0       0       0       0      27        0           0
##   59           0       0       0       0      33        0           0
##   60           0       0       0       0      15        0           0
##   61           0       0       0       0      18        0           0
##   62           0       0       0       0      17        0           0
##   63           0       0       0       0      18        0           0
##   64           0       0       0       0      14        0           0
##   65           0       0       0       0       0       13           0
##   66           0       0       0       0       0        9           0
##   67           0       0       0       0       0       12           0
##   68           0       0       0       0       0        2           0
##   69           0       0       0       0       0        1           0
##   999999       0       0       0       0       0        0        1983
## [1] "Frequency table after encoding"
## IDR3_20. How old are you?
##        15-24        25-34        35-44        45-54        55-64 65 and older           NA 
##          539          515          403          324          210           37         1983 
## [1] "Inspect value labels and relabel as necessary"
##        15-24        25-34        35-44        45-54        55-64 65 and older           NA 
##            1            2            3            4            5            6            7
# !!!Include relevant variables in list below

indirect_PII <- c("IDR3_20", 
                  "D_9", 
                  "HC2_O1", 
                  "HC2_O2", 
                  "HC2_O3", 
                  "HC2_O4", 
                  "HC2_O5", 
                  "HC2_O6", 
                  "HC3", 
                  "HC4_1", 
                  "HC4_2", 
                  "HC4_3", 
                  "HC4_4", 
                  "D_4", 
                  "Inc_17", 
                  "MC_39x3_1b", 
                  "MC_39x3_1d", 
                  "Stigma1_2", 
                  "HT_13x3_1x3", 
                  "HT_13x3_4x3", 
                  "HT_13x3_7x3", 
                  "HT_13x3_13x3", 
                  "LE20_1_r3", 
                  "I_3_conjoint2_3_r3", 
                  "G1_04", 
                  "P3", 
                  "P3A", 
                  "P4", 
                  "P4A", 
                  "P8_O1", 
                  "P8_O2", 
                  "P8_O3", 
                  "P12A", 
                  "P13A_O1", 
                  "P13A_O2", 
                  "P9B", 
                  "P10B", 
                  "P12B", 
                  "P13B_O1", 
                  "P13B_O2", 
                  "I_1_P9C", 
                  "I_1_P10C", 
                  "I_1_P11C", 
                  "I_1_P11_A3", 
                  "I_1_P12C", 
                  "I_1_P13C_O1", 
                  "I_2_P9C", 
                  "I_2_P10C", 
                  "I_2_P11_A3", 
                  "I_2_P12C", 
                  "I_3_P9C", 
                  "I_3_P10C", 
                  "I_3_P11_A3", 
                  "I_3_P12C", 
                  "I_3_P13C_O1", 
                  "I_4_P9C", 
                  "I_4_P10C", 
                  "I_4_P11_A3", 
                  "I_4_P12C", 
                  "I_4_P13C_O1", 
                  "I_5_P9C", 
                  "I_5_P10C", 
                  "I_5_P11_A3", 
                  "I_5_P12C", 
                  "I_5_P13C_O1", 
                  "I_6_P9C", 
                  "I_6_P10C", 
                  "I_6_P11C", 
                  "I_6_P11_A3", 
                  "I_6_P12C", 
                  "I_6_P13C_O1", 
                  "I_7_P9C", 
                  "I_7_P10C", 
                  "I_7_P11_A3", 
                  "I_7_P12C", 
                  "I_7_P13C_O1", 
                  "I_8_P9C", 
                  "I_8_P10C", 
                  "I_8_P11C", 
                  "I_8_P11_A3", 
                  "I_8_P12C", 
                  "I_8_P13C_O1", 
                  "I_1_P9D", 
                  "I_1_P10D", 
                  "I_1_P11D", 
                  "I_1_P11_A4", 
                  "I_1_P12D", 
                  "I_1_P13D_O1", 
                  "I_2_P9D", 
                  "I_2_P10D", 
                  "I_2_P11D", 
                  "I_2_P11_A4", 
                  "I_2_P12D", 
                  "I_2_P13D_O1", 
                  "I_1_P9E", 
                  "I_1_P10E", 
                  "I_1_P11E", 
                  "I_1_P11_A5", 
                  "I_1_P12E", 
                  "I_1_P13E_O1", 
                  "I_2_P9E", 
                  "I_2_P10E", 
                  "I_2_P11E", 
                  "I_2_P11_A5", 
                  "I_2_P12E", 
                  "I_2_P13E_O1", 
                  "I_3_P9E", 
                  "I_3_P10E", 
                  "I_3_P11E", 
                  "I_3_P11_A5", 
                  "I_3_P12E", 
                  "I_3_P13E_O1", 
                  "P20A", 
                  "P18B", 
                  "P19B", 
                  "I_1_P18C", 
                  "I_1_P19C", 
                  "I_1_P20C", 
                  "I_2_P18C", 
                  "I_1_P18D", 
                  "I_2_P18D", 
                  "I_2_P19D", 
                  "I_2_P20D", 
                  "I_3_P18D", 
                  "I_3_P19D", 
                  "I_3_P20D", 
                  "I_4_P18D", 
                  "I_4_P19D", 
                  "I_4_P20D", 
                  "I_5_P18D", 
                  "I_5_P19D", 
                  "I_5_P20D", 
                  "I_6_P18D", 
                  "I_6_P19D", 
                  "I_6_P20D", 
                  "I_7_P18D", 
                  "I_7_P19D", 
                  "I_7_P20D", 
                  "I_8_P18D", 
                  "I_8_P19D", 
                  "I_8_P20D", 
                  "I_1_P18E", 
                  "I_1_P19E", 
                  "I_1_P20E", 
                  "I_2_P18E", 
                  "I_2_P19E", 
                  "I_2_P20E", 
                  "I_3_P18E", 
                  "I_3_P19E", 
                  "I_3_P20E", 
                  "I_4_P18E", 
                  "I_4_P19E", 
                  "I_4_P20E", 
                  "I_5_P18E", 
                  "I_5_P19E", 
                  "I_5_P20E", 
                  "I_6_P18E", 
                  "I_6_P19E", 
                  "I_6_P20E", 
                  "I_7_P18E", 
                  "I_7_P19E", 
                  "I_7_P20E", 
                  "I_8_P18E", 
                  "I_8_P19E", 
                  "I_8_P20E", 
                  "Child1", 
                  "I_1_NEW_2_cl", 
                  "I_1_P19_cl", 
                  "I_1_D_9_cl", 
                  "I_1_D_4_cl", 
                  "I_2_NEW_2_cl", 
                  "I_2_P19_cl", 
                  "I_2_D_9_cl", 
                  "I_2_D_4_cl", 
                  "I_3_NEW_2_cl", 
                  "I_3_P19_cl", 
                  "I_3_D_9_cl", 
                  "I_3_D_4_cl", 
                  "I_4_NEW_2_cl", 
                  "I_4_P19_cl", 
                  "I_4_D_4_cl", 
                  "I_5_NEW_2_cl", 
                  "I_5_P19_cl", 
                  "I_5_D_9_cl", 
                  "I_5_D_4_cl", 
                  "I_6_NEW_2_cl", 
                  "I_6_P19_cl", 
                  "I_6_D_4_cl", 
                  "update6_1", 
                  "update6_2", 
                  "E2_2")

capture_tables (indirect_PII)

# Recode those with very specific values where more than half of the sample have actual data. 

mydata <- encode_direct_PII_team (variables="E2_2") # Encode as low frequencies on languages. 
## [1] "Frequency table before encoding"
## E2_2. What language did you use other than Nepali?
## MAITHILI    NEWAR   999999 
##        1        6     4004 
## [1] "Frequency table after encoding"
## E2_2. What language did you use other than Nepali?
##    1    2    3 
##    1    6 4004
mydata <- top_recode ("HC3", break_point=5, missing=c(888, 999999)) # Topcode cases with 5 or more adult household members. 
## [1] "Frequency table before encoding"
## HC3. How many people living in your household are at least 15 years old (have complet
##      0      1      2      3      4      5      6      7      8      9     13    888 999999 
##    758    544    474    170     52     16      4      1      1      1      3      3   1984

## [1] "Frequency table after encoding"
## HC3. How many people living in your household are at least 15 years old (have complet
##         0         1         2         3         4 5 or more       888    999999 
##       758       544       474       170        52        26         3      1984

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(mydata$Inc_17[mydata$Inc_17!=999999], probs = c(0.995)))
mydata2 <- top_recode (variable="Inc_17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS).
##      0      5     50     60    400    500    600    700    777    888    999   1000   1200   1500   1600   2000   2400   2500   2600 
##    133      1      1      1      1      5      2      1      4      1     22      3      1      4      5     27      2      8      1 
##   3000   3500   4000   4500   4800   5000   6000   6500   7000   8000   8500   9000  10000  11000  11500  12000  12846  13000  14000 
##     33      2     42      3      1    101     35      1     38     20      1     24    193      3      1     54      1     11     10 
##  14500  15000  15500  16000  17000  18000  19000  19135  20000  21000  22000  22500  23000  24000  25000  26000  27000  27500  28000 
##      1    174      1     24      8     16      6      1    222      2     12      1      5      5    105      1      5      1      2 
##  30000  32000  33000  34000  35000  36000  37000  40000  41000  45000  48000  50000  54000  55000  57000  60000  62000  65000  66000 
##    177      2      1      1     54      4      1     90      3     17      1    115      2      4      2     48      1      7      1 
##  67000  68000  70000  75000  79000  79500  80000  85000  95000  1e+05 103000 104000 110000 115000 117000 125000 130000 135000 150000 
##      1      1     15      4      1      1     13      1      1     27      1      1      1      2      1      1      1      1     12 
## 160000 170000  2e+05 240000 250000  3e+05 320000  5e+05  6e+05  7e+05 999999  1e+06 
##      1      1      9      1      1      3      1      1      1      1   1984      1

## [1] "Frequency table after encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS).
##             0             5            50            60           400           500           600           700           777 
##           133             1             1             1             1             5             2             1             4 
##           888           999          1000          1200          1500          1600          2000          2400          2500 
##             1            22             3             1             4             5            27             2             8 
##          2600          3000          3500          4000          4500          4800          5000          6000          6500 
##             1            33             2            42             3             1           101            35             1 
##          7000          8000          8500          9000         10000         11000         11500         12000         12846 
##            38            20             1            24           193             3             1            54             1 
##         13000         14000         14500         15000         15500         16000         17000         18000         19000 
##            11            10             1           174             1            24             8            16             6 
##         19135         20000         21000         22000         22500         23000         24000         25000         26000 
##             1           222             2            12             1             5             5           105             1 
##         27000         27500         28000         30000         32000         33000         34000         35000         36000 
##             5             1             2           177             2             1             1            54             4 
##         37000         40000         41000         45000         48000         50000         54000         55000         57000 
##             1            90             3            17             1           115             2             4             2 
##         60000         62000         65000         66000         67000         68000         70000         75000         79000 
##            48             1             7             1             1             1            15             4             1 
##         79500         80000         85000         95000         1e+05        103000        104000        110000        115000 
##             1            13             1             1            27             1             1             1             2 
##        117000        125000        130000        135000        150000        160000        170000 2e+05 or more        999999 
##             1             1             1             1            12             1             1            19          1984

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('D_4', 'IDR3_20') ##!!! Replace with candidate categorical demo vars

# weight variable
# selectedWeightVar = c('projwt') ##!!! Replace with weight var

# household id variable (cluster)
# selectedHouseholdID = c('wpid') ##!!! Replace with household id

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 4011 rows and 1162 variables.
##   --> Categorical key variables: D_4, IDR3_20
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size           Size of smallest (>0)     
##           D_4                   18 (18)   222.833 (222.833)                     1  (1)
##       IDR3_20                    7  (7)   573.000 (573.000)                    37 (37)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 10 (0.249%)
##   - 3-anonymity: 22 (0.548%)
##   - 5-anonymity: 51 (1.272%)
## 
## ----------------------------------------------------------------------
# Recode of education and age to reduce risk of re-identification 

break_edu <- c(0,6,9,11,12,13,15,16,777,888,999)
labels_edu <- c("Primary or less (0-5)" = 1,
                "Lower secondary (6-8)" = 2,
                "Secondary (9-10)" = 3,
                "SLC (11)" = 4,
                "CLASS 12/Intermediate level (12)" = 5,
                "Bachelor/Postgraduate level" = 6,
                "Literate, but never attended school" = 7,
                "Illiterate, and never attended school"= 8,
                "Does not apply"= 9,
                "Don't Know"= 10,
                "NA"= 11)
mydata <- ordinal_recode (variable="D_4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## D_4. What is your highest completed education level?  [You do not need to read the re
##               Pre-school/Kindergarten                               CLASS 1                               CLASS 2 
##                                     1                                    31                                    54 
##                               CLASS 3                               CLASS 4                               CLASS 5 
##                                    71                                    75                                   151 
##                               CLASS 6                               CLASS 7                               CLASS 8 
##                                    69                                    85                                   120 
##                               CLASS 9                              CLASS 10                                   SLC 
##                                    84                                   104                                   298 
##           CLASS 12/Intermediate level                        Bachelor level  Post-Secondary Level (e.g., MA, PhD) 
##                                   266                                    62                                     9 
##   Literate, but never attended school Illiterate, and never attended school 
##                                   305                                   242 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   0          1     0      0       0       0       0       0        0         0         0           0
##   1         31     0      0       0       0       0       0        0         0         0           0
##   2         54     0      0       0       0       0       0        0         0         0           0
##   3         71     0      0       0       0       0       0        0         0         0           0
##   4         75     0      0       0       0       0       0        0         0         0           0
##   5        151     0      0       0       0       0       0        0         0         0           0
##   6          0    69      0       0       0       0       0        0         0         0           0
##   7          0    85      0       0       0       0       0        0         0         0           0
##   8          0   120      0       0       0       0       0        0         0         0           0
##   9          0     0     84       0       0       0       0        0         0         0           0
##   10         0     0    104       0       0       0       0        0         0         0           0
##   11         0     0      0     298       0       0       0        0         0         0           0
##   12         0     0      0       0     266       0       0        0         0         0           0
##   13         0     0      0       0       0      62       0        0         0         0           0
##   14         0     0      0       0       0       9       0        0         0         0           0
##   15         0     0      0       0       0       0     305        0         0         0           0
##   16         0     0      0       0       0       0       0      242         0         0           0
##   999999     0     0      0       0       0       0       0        0         0         0        1984
## [1] "Frequency table after encoding"
## D_4. What is your highest completed education level?  [You do not need to read the re
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   383                                   274                                   188 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   298                                   266                                    71 
##   Literate, but never attended school Illiterate, and never attended school                                    NA 
##                                   305                                   242                                  1984 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_1. What is the highest completed education level of your spouse?  [You do not need 
##                               CLASS 1                               CLASS 2                               CLASS 3 
##                                    15                                    48                                    48 
##                               CLASS 4                               CLASS 5                               CLASS 6 
##                                    68                                   115                                    57 
##                               CLASS 7                               CLASS 8                               CLASS 9 
##                                    77                                   114                                    69 
##                              CLASS 10                                   SLC           CLASS 12/Intermediate level 
##                                    87                                   188                                   127 
##                        Bachelor level  Post-Secondary Level (e.g., MA, PhD)   Literate, but never attended school 
##                                    35                                    11                                   238 
## Illiterate, and never attended school                        Does not apply                            Don't know 
##                                   281                                     4                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   1         15     0      0       0       0       0       0        0         0         0           0
##   2         48     0      0       0       0       0       0        0         0         0           0
##   3         48     0      0       0       0       0       0        0         0         0           0
##   4         68     0      0       0       0       0       0        0         0         0           0
##   5        115     0      0       0       0       0       0        0         0         0           0
##   6          0    57      0       0       0       0       0        0         0         0           0
##   7          0    77      0       0       0       0       0        0         0         0           0
##   8          0   114      0       0       0       0       0        0         0         0           0
##   9          0     0     69       0       0       0       0        0         0         0           0
##   10         0     0     87       0       0       0       0        0         0         0           0
##   11         0     0      0     188       0       0       0        0         0         0           0
##   12         0     0      0       0     127       0       0        0         0         0           0
##   13         0     0      0       0       0      35       0        0         0         0           0
##   14         0     0      0       0       0      11       0        0         0         0           0
##   15         0     0      0       0       0       0     238        0         0         0           0
##   16         0     0      0       0       0       0       0      281         0         0           0
##   888        0     0      0       0       0       0       0        0         0         4           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        2426
## [1] "Frequency table after encoding"
## HC4_1. What is the highest completed education level of your spouse?  [You do not need 
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   294                                   248                                   156 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   188                                   127                                    46 
##   Literate, but never attended school Illiterate, and never attended school                            Don't Know 
##                                   238                                   281                                     4 
##                                    NA 
##                                  2429 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_2. What is the highest completed education level of your father?  [You do not need 
##               Pre-school/Kindergarten                               CLASS 1                               CLASS 2 
##                                     1                                    12                                    27 
##                               CLASS 3                               CLASS 4                               CLASS 5 
##                                    33                                    23                                    61 
##                               CLASS 6                               CLASS 7                               CLASS 8 
##                                    15                                    21                                    44 
##                               CLASS 9                              CLASS 10                                   SLC 
##                                    22                                    26                                    48 
##           CLASS 12/Intermediate level                        Bachelor level  Post-Secondary Level (e.g., MA, PhD) 
##                                    29                                     4                                     2 
##   Literate, but never attended school Illiterate, and never attended school                     Refused to answer 
##                                   121                                   111                                     1 
##                        Does not apply                            Don't know 
##                                    51                                    11 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   0          1     0      0       0       0       0       0        0         0         0           0
##   1         12     0      0       0       0       0       0        0         0         0           0
##   2         27     0      0       0       0       0       0        0         0         0           0
##   3         33     0      0       0       0       0       0        0         0         0           0
##   4         23     0      0       0       0       0       0        0         0         0           0
##   5         61     0      0       0       0       0       0        0         0         0           0
##   6          0    15      0       0       0       0       0        0         0         0           0
##   7          0    21      0       0       0       0       0        0         0         0           0
##   8          0    44      0       0       0       0       0        0         0         0           0
##   9          0     0     22       0       0       0       0        0         0         0           0
##   10         0     0     26       0       0       0       0        0         0         0           0
##   11         0     0      0      48       0       0       0        0         0         0           0
##   12         0     0      0       0      29       0       0        0         0         0           0
##   13         0     0      0       0       0       4       0        0         0         0           0
##   14         0     0      0       0       0       2       0        0         0         0           0
##   15         0     0      0       0       0       0     121        0         0         0           0
##   16         0     0      0       0       0       0       0      111         0         0           0
##   777        0     0      0       0       0       0       0        0         1         0           0
##   888        0     0      0       0       0       0       0        0         0        51           0
##   999        0     0      0       0       0       0       0        0         0         0          11
##   999999     0     0      0       0       0       0       0        0         0         0        3348
## [1] "Frequency table after encoding"
## HC4_2. What is the highest completed education level of your father?  [You do not need 
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   157                                    80                                    48 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                    48                                    29                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                   121                                   111                                     1 
##                            Don't Know                                    NA 
##                                    51                                  3359 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_3. What is the highest completed education level of your mother? If you have more t
##                               CLASS 1                               CLASS 2                               CLASS 3 
##                                     3                                    13                                    10 
##                               CLASS 4                               CLASS 5                               CLASS 6 
##                                    21                                    24                                    13 
##                               CLASS 7                               CLASS 8                               CLASS 9 
##                                     6                                    20                                     5 
##                              CLASS 10                                   SLC           CLASS 12/Intermediate level 
##                                     9                                    23                                     5 
##                        Bachelor level   Literate, but never attended school Illiterate, and never attended school 
##                                     1                                   227                                   260 
##                        Does not apply                            Don't know 
##                                    20                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   1          3     0      0       0       0       0       0        0         0         0           0
##   2         13     0      0       0       0       0       0        0         0         0           0
##   3         10     0      0       0       0       0       0        0         0         0           0
##   4         21     0      0       0       0       0       0        0         0         0           0
##   5         24     0      0       0       0       0       0        0         0         0           0
##   6          0    13      0       0       0       0       0        0         0         0           0
##   7          0     6      0       0       0       0       0        0         0         0           0
##   8          0    20      0       0       0       0       0        0         0         0           0
##   9          0     0      5       0       0       0       0        0         0         0           0
##   10         0     0      9       0       0       0       0        0         0         0           0
##   11         0     0      0      23       0       0       0        0         0         0           0
##   12         0     0      0       0       5       0       0        0         0         0           0
##   13         0     0      0       0       0       1       0        0         0         0           0
##   15         0     0      0       0       0       0     227        0         0         0           0
##   16         0     0      0       0       0       0       0      260         0         0           0
##   888        0     0      0       0       0       0       0        0         0        20           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        3348
## [1] "Frequency table after encoding"
## HC4_3. What is the highest completed education level of your mother? If you have more t
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                    71                                    39                                    14 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                    23                                     5                                     1 
##   Literate, but never attended school Illiterate, and never attended school                            Don't Know 
##                                   227                                   260                                    20 
##                                    NA 
##                                  3351 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_4. Think about your grandparents, and the grandparent with the most education. What
##                               CLASS 2                               CLASS 3                               CLASS 4 
##                                     2                                     2                                     1 
##                               CLASS 5                               CLASS 9                                   SLC 
##                                     2                                     1                                     1 
##           CLASS 12/Intermediate level   Literate, but never attended school Illiterate, and never attended school 
##                                     2                                    29                                    56 
##                            Don't know 
##                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   2          2     0      0       0       0       0       0        0         0         0           0
##   3          2     0      0       0       0       0       0        0         0         0           0
##   4          1     0      0       0       0       0       0        0         0         0           0
##   5          2     0      0       0       0       0       0        0         0         0           0
##   9          0     0      1       0       0       0       0        0         0         0           0
##   11         0     0      0       1       0       0       0        0         0         0           0
##   12         0     0      0       0       2       0       0        0         0         0           0
##   15         0     0      0       0       0       0      29        0         0         0           0
##   16         0     0      0       0       0       0       0       56         0         0           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        3912
## [1] "Frequency table after encoding"
## HC4_4. Think about your grandparents, and the grandparent with the most education. What
##                 Primary or less (0-5)                      Secondary (9-10)                              SLC (11) 
##                                     7                                     1                                     1 
##      CLASS 12/Intermediate level (12)   Literate, but never attended school Illiterate, and never attended school 
##                                     2                                    29                                    56 
##                                    NA 
##                                  3915 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
# Re-run to check 2-anonimity

selectedKeyVars = c('D_4', 'IDR3_20')
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 4011 rows and 1162 variables.
##   --> Categorical key variables: D_4, IDR3_20
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)     
##           D_4                    9 (9)   445.667 (445.667)                    71 (71)
##       IDR3_20                    7 (7)   573.000 (573.000)                    37 (37)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 2 (0.050%)
##   - 3-anonymity: 6 (0.150%)
##   - 5-anonymity: 19 (0.474%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## # A tibble: 2 x 2
##                                D_4   IDR3_20
##                          <dbl+lbl> <dbl+lbl>
## 1  6 [Bachelor/Postgraduate level] 5 [55-64]
## 2 11 [NA]                          4 [45-54]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used
##      D_4 IDR3_20
## 829   NA       5
## 1078  NA       4
mydata [notAnon,"D_4"] <- 9

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("I_1_P14D_12_TEXT", 
               "RvwComment", 
               "SrvyrComment", 
               "H2_12_TEXT", 
               "HTNx3_2_14_TEXT", 
               "HTV_1_10_TEXTx3", 
               "HTV_3_11_TEXTx3", 
               "CPR5i_TEXT", 
               "G1_00_08_TEXT", 
               "P13A_10_TEXT", 
               "P14A_12_TEXT", 
               "P13B_10_TEXT", 
               "P14B_12_TEXT", 
               "SIMPOC7B_10_TEXT", 
               "I_1_P13C_10_TEXT", 
               "I_1_P14C_12_TEXT", 
               "I_1_SIMPOC7C_10_TEXT", 
               "I_2_P14C_12_TEXT", 
               "I_1_P13D_10_TEXT", 
               "I_1_P14D_12_TEXT", 
               "I_2_P14D_12_TEXT", 
               "I_1_P13E_10_TEXT", 
               "I_1_P14E_12_TEXT", 
               "I_1_SIMPOC7E_10_TEXT", 
               "I_2_P14E_12_TEXT", 
               "I_3_P14E_12_TEXT", 
               "NEW_3_12_TEXT", 
               "NEW_9", 
               "I_1_Q_559_S", 
               "I_1_SIMPOC7_cl_10_TEXT", 
               "I_2_NEW_9_cl", 
               "I_2_SIMPOC7_cl_10_TEXT", 
               "I_3_NEW_9_cl", 
               "I_3_Q_559_S", 
               "I_3_SIMPOC7_cl_10_TEXT", 
               "e3e_TEXT", 
               "E2_11_8_TEXT", 
               "E_14_7_TEXT", 
               "L1_other_text", 
               "L2_other_text")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% "SrvyrComment"]

GPS data: Displace

# Setup map

countrymap <- map_data("world") %>% filter(region=="Nepal")  #!!! Select correct country
#admin <- raster::getData("GADM", country="NP", level=0) #!!! Select correct country map using standard 2-letter country codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
admin <- readRDS(file="gadm36_NPL_0_sp.rds")

# Displace all pairs of GPS variables (Longitude, Latitude). Check summary statistics and maps before and after displacement. 

gps.vars <- c("Longitude", "Latitude") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 53 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##    Longitude        Latitude    
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:85.07   1st Qu.:27.56  
##  Median :85.46   Median :27.65  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :53      NA's   :53
## Warning: Removed 53 rows containing missing values (geom_point).

## Warning: Removed 53 rows containing missing values (geom_point).

## Warning: Removed 53 rows containing missing values (geom_point).

## Warning: Removed 53 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##    Longitude        Latitude    
##  Min.   :84.28   Min.   :26.84  
##  1st Qu.:85.06   1st Qu.:27.55  
##  Median :85.46   Median :27.64  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.19   Max.   :28.04  
##  NA's   :53      NA's   :53     
## [1] "Processing time = 9.0734845995903"
gps.vars <- c("GPSinitial_LO", "GPSinitial_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 170 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:85.07   1st Qu.:27.56  
##  Median :85.47   Median :27.65  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :170     NA's   :170
## Warning: Removed 170 rows containing missing values (geom_point).
## Warning: Removed 170 rows containing missing values (geom_point).

## Warning: Removed 170 rows containing missing values (geom_point).

## Warning: Removed 170 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.28   Min.   :26.82  
##  1st Qu.:85.07   1st Qu.:27.55  
##  Median :85.46   Median :27.64  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.18   Max.   :28.03  
##  NA's   :170     NA's   :170    
## [1] "Processing time = 10.2460972825686"
gps.vars <- c("gps_CEa_LO", "gps_CEa_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 2272 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##    gps_CEa_LO      gps_CEa_LA   
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:84.96   1st Qu.:27.56  
##  Median :85.45   Median :27.64  
##  Mean   :85.33   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :2272    NA's   :2272
## Warning: Removed 2272 rows containing missing values (geom_point).
## Warning: Removed 2272 rows containing missing values (geom_point).

## Warning: Removed 2272 rows containing missing values (geom_point).

## Warning: Removed 2272 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##    gps_CEa_LO      gps_CEa_LA   
##  Min.   :84.30   Min.   :26.85  
##  1st Qu.:84.98   1st Qu.:27.55  
##  Median :85.45   Median :27.63  
##  Mean   :85.33   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.18   Max.   :28.03  
##  NA's   :2272    NA's   :2272   
## [1] "Processing time = 4.04510399897893"
gps.vars <- c("gpsenumimp_LO", "gpsenumimp_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 2249 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##  gpsenumimp_LO   gpsenumimp_LA  
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:84.96   1st Qu.:27.56  
##  Median :85.45   Median :27.64  
##  Mean   :85.32   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :2249    NA's   :2249
## Warning: Removed 2249 rows containing missing values (geom_point).
## Warning: Removed 2249 rows containing missing values (geom_point).

## Warning: Removed 2249 rows containing missing values (geom_point).

## Warning: Removed 2249 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##  gpsenumimp_LO   gpsenumimp_LA  
##  Min.   :84.28   Min.   :26.83  
##  1st Qu.:84.97   1st Qu.:27.55  
##  Median :85.44   Median :27.64  
##  Mean   :85.32   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.19   Max.   :28.04  
##  NA's   :2249    NA's   :2249   
## [1] "Processing time = 3.81436225175858"

Save processed data in stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav")) 

haven_table("update6_1")
## update6_1. Regardless of if you have been a victim of physical and/or sexual violence from 
##  Extremely familiar       Very familiar   Somewhat familiar   A little familiar Not at all familiar   Refused to answer 
##                 121                 383                 383                 107                  55                   1 
##          Don't know              999999 
##                   1                2960