rm(list=ls(all=t))

Setup and create dictionary

Dataset to be processed (e.g. "Nepal Round 3_FinalClean.dta")

filename <- "Nepal Round 3_FinalClean" # !!!Update filename
source ("functions_1.7.R")

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Location: Small Location (<100,000) Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#!!!Save flagged dictionary in .xlsx format and continue processing data with subset of flagged variables

Direct PII: variables to be removed

# !!!Include any Direct PII variables
dropvars <- c("IDR3_18", "IDR3_19", "RvwName", "IDR3_18", "IDR3_19", "LE_reportedby", "flag_reportedby") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode interviewer names, which may be useful for analysis of interviewer effects

!!!Replace vector in "variables" field below with relevant variable names

# Encode Direct PII-team

mydata <- encode_direct_PII_team (variables=c("Srvyr", "surveyor"))
## [1] "Frequency table before encoding"
## Srvyr. Srvyr
##       alka.adhikari    ambir.raj.kulung         amrita.roka anjana.kumari.dulal     ashish.shrestha bhanu.bhakta.dhakal 
##                  79                  96                  94                  98                 132                  79 
##       dev.raj.nepal dhan.kumari.darlami         dilip.joshi       gita.maharjan  gyanendra.parajuli    kajiman.mahatara 
##                 240                  86                 216                 103                 216                 234 
##       kamala.sharma      mani.ram.dahal        manjula.giri min.kumari.shrestha       nabina.khadka      niraj.shrestha 
##                  80                 242                  99                  86                  80                  89 
##     prahlad.mainali    pramila.shrestha    pratika.shrestha rabischandra.bhatta   ram.kumar.acharya     sajina.shrestha 
##                 239                  77                  88                  92                  91                  78 
##     sandip.shrestha     sanjay.pokharel       sapana.gautam     sarita.shrestha      sunil.shrestha     tirtha.maya.rai 
##                 269                  68                  84                 100                 270                 105 
##        yamuna.karki 
##                  87 
## [1] "Frequency table after encoding"
## Srvyr. Srvyr
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30 
##  79  96  94  98 132  79 240  86 216 103 216 234  80 242  99  86  80  89 239  77  88  92  91  78 269  68  84 100 270 105 
##  31 
##  87 
## [1] "Frequency table before encoding"
## surveyor. Surveyor
##                           alka.adhikari    ambir.raj.kulung         amrita.roka anjana.kumari.dulal     ashish.shrestha 
##                1983                  79                  96                  90                  98                  82 
## bhanu.bhakta.dhakal       dev.raj.nepal dhan.kumari.darlami       gita.maharjan       kamala.sharma        manjula.giri 
##                  77                   2                  85                  99                  79                  99 
## min.kumari.shrestha       nabina.khadka      niraj.shrestha    pramila.shrestha    pratika.shrestha rabischandra.bhatta 
##                  86                  80                  85                  77                  85                  87 
##   ram.kumar.acharya     sajina.shrestha     sandip.shrestha       sapana.gautam     sarita.shrestha     tirtha.maya.rai 
##                  88                  73                  97                  80                  99                 105 
##        yamuna.karki 
##                  86 
## [1] "Frequency table after encoding"
## surveyor. Surveyor
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22   23   24 
## 1983   79   96   90   98   82   77    2   85   99   79   99   86   80   85   77   85   87   88   73   97   80   99  105 
##   25 
##   86

Small locations: Encode locations with pop <100,000 using random large numbers

!!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("vdc", "IDR3_6_19","IDR3_6_22","IDR3_6_23","IDR3_6_24","IDR3_6_26","IDR3_6_30","IDR3_6_31","IDR3_6_35") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## vdc. VDC code
##          Barahathawa         Dhungrekhola             Lalbandi         Malangawa NP            Netraganj 
##                   64                   66                  126                   63                   64 
##             Raniganj            Sankarpur         Bhimeswor NP                Bocha          Dandakharka 
##                   64                   62                   61                   39                   62 
##                Fasku             Katakuti            Lamidanda               Melung               Pawati 
##                   62                   63                   62                   64                   64 
##              Badegau                Irkhu         BhoteNamlang           Talamarang                Ichok 
##                   64                   62                   66                   63                   63 
##             Kadambas            Langarche             Melamchi              Anaikot    Baluwapati Deupur 
##                   64                   62                   64                   63                   62 
##    Chalal Ganeshthan    Kalati Bhumidanda       Mahankal Chaur            Methinkot           Patalekhet 
##                   67                   62                   61                   62                   64 
##              Raviopi               Balkot        Changunarayan             Chitapol              Duwakot 
##                   62                   59                   62                   63                   63 
##                Gundu   Madhyapur Thimi NP              Nankhel              Sirutar              Baireni 
##                   62                   66                   62                   58                   62 
##               Dhussa                Khari            Kiranchok              Naubise            Salyantar 
##                   64                   62                   63                   64                   63 
##        Sunaula Bazar               Thakre             Chitlang           Churiyamai               Fakhel 
##                   62                   64                   61                   62                   62 
##         Padampokhari            Kulekhani            Nibuwatar   Shreepur Chhatiwan Sisneri Mahadevsthan 
##                   65                   62                   60                   62                   63 
##        Birendranagar              Jutpani               Kathar           Khairahani             Padampur 
##                   63                   63                   63                   62                   61 
##           Parbatipur                Piple           Shaktikhor 
##                   62                   65                   65 
## [1] "Frequency table after encoding"
## vdc. VDC code
## 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 
##  62  62  64  62  65  61  62  63  64  62  64  63  62  62  59  62  62  61  63  64  63  62  66  67  62  64  63  64  63  64 
## 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 
##  63  63  60  63  63  62  64  62  64  62  65  62  66  64  62  63  62  61  63  62  66  58  63  61  62  63  62  64  65  39 
## 527 528 529 530 
##  62  64  62  63 
## [1] "Frequency table before encoding"
## IDR3_6_19. VDC or Municaplity of District Sarlahi
##  Barahathawa Dhungrekhola    Dhurkauli     Lalbandi Malangawa NP    Netraganj     Raniganj    Sankarpur       999999 
##           64           66           63           63           63           64           64           62         3488 
## [1] "Frequency table after encoding"
## IDR3_6_19. VDC or Municaplity of District Sarlahi
##    904    905    906    907    908    909    910    912 999999 
##     64     63     64     62     63     64     66     63   3488 
## [1] "Frequency table before encoding"
## IDR3_6_22. VDC or Municaplity of District Dolakha
## Bhimeswor NP        Bocha  Dandakharka        Fasku     Katakuti    Lamidanda       Melung       Pawati       999999 
##           61           39           62           62           63           62           64           64         3520 
## [1] "Frequency table after encoding"
## IDR3_6_22. VDC or Municaplity of District Dolakha
##    876    877    878    879    880    881    883    884 999999 
##     64     62     62     64     62     61     39     63   3520 
## [1] "Frequency table before encoding"
## IDR3_6_23. VDC or Municaplity of District Sindhupalchok
##      Badegau        Irkhu BhoteNamlang   Talamarang        Ichok     Kadambas    Langarche     Melamchi       999999 
##           64           62           66           64           63           64           62           64         3488 
## [1] "Frequency table after encoding"
## IDR3_6_23. VDC or Municaplity of District Sindhupalchok
##    513    514    515    516    517    518    520    521 999999 
##     62     64     66     62     64     63     64     64   3488 
## [1] "Frequency table before encoding"
## IDR3_6_24. VDC or Municaplity of District Kavrepalanchok
##           Anaikot Baluwapati Deupur Chalal Ganeshthan Kalati Bhumidanda    Mahankal Chaur         Methinkot 
##                63                62                67                62                61                62 
##        Patalekhet           Raviopi            999999 
##                64                62              3494 
## [1] "Frequency table after encoding"
## IDR3_6_24. VDC or Municaplity of District Kavrepalanchok
##    689    690    691    692    693    694    695    697 999999 
##     62     62     64     67     63     62     61     62   3494 
## [1] "Frequency table before encoding"
## IDR3_6_26. VDC or Municaplity of District Bhaktapur
##             Balkot      Changunarayan           Chitapol            Duwakot              Gundu Madhyapur Thimi NP 
##                 59                 62                 63                 63                 62                 66 
##            Nankhel            Sirutar             999999 
##                 62                 58               3502 
## [1] "Frequency table after encoding"
## IDR3_6_26. VDC or Municaplity of District Bhaktapur
##    405    406    407    408    410    411    412    413 999999 
##     63     62     62     59     62     63     58     66   3502 
## [1] "Frequency table before encoding"
## IDR3_6_30. VDC or Municaplity of District Dhading
##       Baireni        Dhussa         Khari     Kiranchok       Naubise     Salyantar Sunaula Bazar        Thakre 
##            62            64            62            63            64            63            62            64 
##        999999 
##          3493 
## [1] "Frequency table after encoding"
## IDR3_6_30. VDC or Municaplity of District Dhading
##    634    635    636    637    639    640    641    642 999999 
##     62     63     62     64     62     63     64     64   3493 
## [1] "Frequency table before encoding"
## IDR3_6_31. VDC or Municaplity of District Makwanpur
##             Chitlang           Churiyamai               Fakhel         Padampokhari            Kulekhani 
##                   61                   62                   62                   65                   62 
##            Nibuwatar   Shreepur Chhatiwan Sisneri Mahadevsthan               999999 
##                   60                   62                   63                 3500 
## [1] "Frequency table after encoding"
## IDR3_6_31. VDC or Municaplity of District Makwanpur
##    798    799    800    801    802    803    804    805 999999 
##     63     62     62     62     62     65     60     61   3500 
## [1] "Frequency table before encoding"
## IDR3_6_35. VDC or Municaplity of District Chitwan
## Birendranagar       Jutpani        Kathar    Khairahani      Padampur    Parbatipur         Piple    Shaktikhor 
##            63            63            63            62            61            62            65            65 
##        999999 
##          3493 
## [1] "Frequency table after encoding"
## IDR3_6_35. VDC or Municaplity of District Chitwan
##    875    876    877    878    879    880    881    882 999999 
##     63     65     63     62     63     61     62     65   3493

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" of 10 or less. 

break_age <- c(15,25,35,45,55,65,100)
labels_age <- c("15-24" =1, 
                "25-34" =2, 
                "35-44" =3, 
                "45-54" =4, 
                "55-64" =5, 
                "65 and older" =6, 
                "NA" = 7)
mydata <- ordinal_recode (variable="IDR3_20", break_points=break_age, missing=999999, value_labels=labels_age)

## [1] "Frequency table before encoding"
## IDR3_20. How old are you?
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 
## 18 44 60 62 64 72 68 73 78 61 50 42 35 53 60 50 56 55 53 52 36 35 38 51 35 43 28 41 44 31 36 34 38 34 32 29 31 31 28 23 
## 56 57 58 59 60 61 62 63 64 65 66 67 68 69 
## 27 19 26 32 15 18 17 18 15 13  9 12  2  1 
##         recoded
##          [15,25) [25,35) [35,45) [45,55) [55,65) [65,100) [100,1e+06)
##   16          18       0       0       0       0        0           0
##   17          44       0       0       0       0        0           0
##   18          60       0       0       0       0        0           0
##   19          62       0       0       0       0        0           0
##   20          64       0       0       0       0        0           0
##   21          72       0       0       0       0        0           0
##   22          68       0       0       0       0        0           0
##   23          73       0       0       0       0        0           0
##   24          78       0       0       0       0        0           0
##   25           0      61       0       0       0        0           0
##   26           0      50       0       0       0        0           0
##   27           0      42       0       0       0        0           0
##   28           0      35       0       0       0        0           0
##   29           0      53       0       0       0        0           0
##   30           0      60       0       0       0        0           0
##   31           0      50       0       0       0        0           0
##   32           0      56       0       0       0        0           0
##   33           0      55       0       0       0        0           0
##   34           0      53       0       0       0        0           0
##   35           0       0      52       0       0        0           0
##   36           0       0      36       0       0        0           0
##   37           0       0      35       0       0        0           0
##   38           0       0      38       0       0        0           0
##   39           0       0      51       0       0        0           0
##   40           0       0      35       0       0        0           0
##   41           0       0      43       0       0        0           0
##   42           0       0      28       0       0        0           0
##   43           0       0      41       0       0        0           0
##   44           0       0      44       0       0        0           0
##   45           0       0       0      31       0        0           0
##   46           0       0       0      36       0        0           0
##   47           0       0       0      34       0        0           0
##   48           0       0       0      38       0        0           0
##   49           0       0       0      34       0        0           0
##   50           0       0       0      32       0        0           0
##   51           0       0       0      29       0        0           0
##   52           0       0       0      31       0        0           0
##   53           0       0       0      31       0        0           0
##   54           0       0       0      28       0        0           0
##   55           0       0       0       0      23        0           0
##   56           0       0       0       0      27        0           0
##   57           0       0       0       0      19        0           0
##   58           0       0       0       0      26        0           0
##   59           0       0       0       0      32        0           0
##   60           0       0       0       0      15        0           0
##   61           0       0       0       0      18        0           0
##   62           0       0       0       0      17        0           0
##   63           0       0       0       0      18        0           0
##   64           0       0       0       0      15        0           0
##   65           0       0       0       0       0       13           0
##   66           0       0       0       0       0        9           0
##   67           0       0       0       0       0       12           0
##   68           0       0       0       0       0        2           0
##   69           0       0       0       0       0        1           0
##   999999       0       0       0       0       0        0        1969
## [1] "Frequency table after encoding"
## IDR3_20. How old are you?
##        15-24        25-34        35-44        45-54        55-64 65 and older           NA 
##          539          515          403          324          210           37         1969 
## [1] "Inspect value labels and relabel as necessary"
##        15-24        25-34        35-44        45-54        55-64 65 and older           NA 
##            1            2            3            4            5            6            7
# !!!Include relevant variables in list below

indirect_PII <- c("HC2_O1", "HC2_O2", "HC2_O3", "HC2_O4", "HC2_O5", "HC2_O6","H2_12_TEXT","HC3","HC4_1",
                  "HC4_2","HC4_3","HC4_4","D_4","Inc_17","P1","P1A","P2","P3","P3A","P4","P4A","P8_O1",
                  "P8_O2","P8_O3","P8_3_number","P8_4_number","P8_5_number","P12A","P12A_TEXT","P13A_O1",
                  "P13A_O2","P13A_10_TEXT","P9B","P10B","P12B","P13B_O1","P13B_O2","P13B_10_TEXT","P9C_I1",
                  "P10C_I1","P11C_I1","P11_A3_I1","P12C_I1","P12C_TEXT_I1","P13C_O1_I1","P13C_10_TEXT_I1",
                  "P9C_I2","P10C_I2","P11C_I2","P11_A3_I2","P12C_I2","P9D_I1","P10D_I1","P11D_I1","P11_A4_I1",
                  "P12D_I1","P13D_O1_I1","P13D_10_TEXT_I1","P9D_I2","P10D_I2","P11D_I2","P11_A4_I2","P12D_I2",
                  "P13D_O1_I2","P13D_O2_I2","P9E_I1","P10E_I1","P11E_I1","P11_A5_I1","P12E_I1","P13E_O1_I1",
                  "P13E_O2_I1","P9E_I2","P10E_I2","P11E_I2","P11_A5_I2","P12E_I2","P13E_O1_I2","P14E_O1_I2",
                  "P9E_I3","P10E_I3","P11E_I3","P11_A5_I3","P12E_I3","P13E_O1_I3","P14E_O1_I3","P20A","P19B",
                  "P18C_I1","P19C_I1","P20C_I1","P18C_I2","P19C_I2","P20C_I2","P18D_I1","P19D_I1","P20D_I1",
                  "P18D_I2","P19D_I2","P20D_I2","P18E_I1","P19E_I1","P20E_I1","NEW_2_cl_I1","P19_cl_I1",
                  "D_9_cl_I1","D_4_cl_I1","NEW_2_cl_I2","P19_cl_I2","D_9_cl_I2","D_4_cl_I2","NEW_2_cl_I3",
                  "P19_cl_I3","D_9_cl_I3","D_4_cl_I3","NEW_2_cl_I4","P19_cl_I4","D_4_cl_I4","NEW_2_cl_I5",
                  "P19_cl_I5","D_9_cl_I5","D_4_cl_I5","NEW_2_cl_I6","P19_cl_I6","D_4_cl_I6","D_8_cl_I6","E2_2")

capture_tables (indirect_PII)

# Recode those with very specific values where more than half of the sample have actual data. 

mydata <- mydata[!names(mydata) %in% "H2_12_TEXT"] # Drop as actually verbatim data in Nepali

# Top code household composition variables with large and unusual numbers 

mydata <- encode_direct_PII_team (variables="E2_2") # Encode as low frequencies on languages. 
## [1] "Frequency table before encoding"
## E2_2. What language did you use other than Nepali?
## MAITHILI    NEWAR   999999 
##        1        6     3990 
## [1] "Frequency table after encoding"
## E2_2. What language did you use other than Nepali?
##    1    2    3 
##    1    6 3990
mydata <- top_recode ("HC3", break_point=5, missing=c(888, 999999)) # Topcode cases with 5 or more adult household members. 
## [1] "Frequency table before encoding"
## HC3. How many people living in your household are at least 15 years old (have complet
##      0      1      2      3      4      5      6      7      8      9     13    888 999999 
##    757    545    475    170     52     16      4      1      1      1      2      3   1970

## [1] "Frequency table after encoding"
## HC3. How many people living in your household are at least 15 years old (have complet
##         0         1         2         3         4 5 or more       888    999999 
##       757       545       475       170        52        25         3      1970

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(mydata$Inc_17[mydata$Inc_17!=999999], probs = c(0.995)))
mydata <- top_recode (variable="Inc_17", break_point=percentile_99.5, missing=999999)
## [1] "Frequency table before encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS).
##      0      5     50     60    400    500    600    700    777    888    999   1000   1200   1500   1600   2000   2400 
##    133      1      1      1      1      5      2      1      4      1     22      3      1      4      5     27      2 
##   2500   2600   3000   3500   4000   4500   4800   5000   6000   6500   7000   8000   8500   9000  10000  11000  11500 
##      8      1     33      2     42      3      1    101     35      1     38     20      1     24    193      3      1 
##  12000  12846  13000  14000  14500  15000  15500  16000  17000  18000  19000  19135  20000  21000  22000  22500  23000 
##     54      1     11     10      1    175      1     24      8     16      6      1    222      2     12      1      5 
##  24000  25000  26000  27000  27500  28000  30000  32000  33000  34000  35000  36000  37000  40000  41000  45000  48000 
##      5    105      1      5      1      2    177      2      1      1     54      4      1     90      3     17      1 
##  50000  54000  55000  57000  60000  62000  65000  66000  67000  68000  70000  75000  79000  79500  80000  85000  95000 
##    115      2      4      2     48      1      7      1      1      1     15      4      1      1     13      1      1 
##  1e+05 103000 104000 110000 115000 117000 125000 130000 135000 150000 160000 170000  2e+05 240000 250000  3e+05 320000 
##     27      1      1      1      2      1      1      1      1     11      1      1      9      1      1      3      1 
##  5e+05  6e+05  7e+05 999999  1e+06 
##      1      1      1   1970      1

## [1] "Frequency table after encoding"
## Inc_17. Approximately what was your household's cash income in the last month? (in NRS).
##             0             5            50            60           400           500           600           700 
##           133             1             1             1             1             5             2             1 
##           777           888           999          1000          1200          1500          1600          2000 
##             4             1            22             3             1             4             5            27 
##          2400          2500          2600          3000          3500          4000          4500          4800 
##             2             8             1            33             2            42             3             1 
##          5000          6000          6500          7000          8000          8500          9000         10000 
##           101            35             1            38            20             1            24           193 
##         11000         11500         12000         12846         13000         14000         14500         15000 
##             3             1            54             1            11            10             1           175 
##         15500         16000         17000         18000         19000         19135         20000         21000 
##             1            24             8            16             6             1           222             2 
##         22000         22500         23000         24000         25000         26000         27000         27500 
##            12             1             5             5           105             1             5             1 
##         28000         30000         32000         33000         34000         35000         36000         37000 
##             2           177             2             1             1            54             4             1 
##         40000         41000         45000         48000         50000         54000         55000         57000 
##            90             3            17             1           115             2             4             2 
##         60000         62000         65000         66000         67000         68000         70000         75000 
##            48             1             7             1             1             1            15             4 
##         79000         79500         80000         85000         95000         1e+05        103000        104000 
##             1             1            13             1             1            27             1             1 
##        110000        115000        117000        125000        130000        135000        150000        160000 
##             1             2             1             1             1             1            11             1 
##        170000 2e+05 or more        999999 
##             1            19          1970

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('D_4', 'IDR3_20') ##!!! Replace with candidate categorical demo vars

# weight variable
# selectedWeightVar = c('projwt') ##!!! Replace with weight var

# household id variable (cluster)
# selectedHouseholdID = c('wpid') ##!!! Replace with household id

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 3997 rows and 1115 variables.
##   --> Categorical key variables: D_4, IDR3_20
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories      Mean size           Size of smallest (>0)     
##           D_4                   18 (18)   222.056 (222.056)                     1  (1)
##       IDR3_20                    7  (7)   571.000 (571.000)                    37 (37)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 10 (0.250%)
##   - 3-anonymity: 22 (0.550%)
##   - 5-anonymity: 51 (1.276%)
## 
## ----------------------------------------------------------------------
# Recode education attainment of adults to reduce risk of re-identification 

break_edu <- c(0,6,9,11,12,13,15,16,17)
labels_edu <- c("Primary or less (0-5)" = 1,
                 "Lower secondary (6-8)" = 2,
                 "Secondary (9-10)" = 3,
                 "SLC (11)" = 4,
                 "CLASS 12/Intermediate level (12)" = 5,
                 "Bachelor/Postgraduate level" = 6,
                 "Literate, but never attended school" = 7,
                 "Illiterate, and never attended school"= 8,
                 "NA"= 9)
mydata <- ordinal_recode (variable="D_4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## D_4. What is your highest completed education level?  [You do not need to read the re
##               Pre-school/Kindergarten                               CLASS 1                               CLASS 2 
##                                     1                                    31                                    54 
##                               CLASS 3                               CLASS 4                               CLASS 5 
##                                    71                                    75                                   151 
##                               CLASS 6                               CLASS 7                               CLASS 8 
##                                    69                                    85                                   120 
##                               CLASS 9                              CLASS 10                                   SLC 
##                                    84                                   104                                   298 
##           CLASS 12/Intermediate level                        Bachelor level  Post-Secondary Level (e.g., MA, PhD) 
##                                   266                                    62                                     9 
##   Literate, but never attended school Illiterate, and never attended school 
##                                   305                                   242 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,17) [17,1e+06)
##   0          1     0      0       0       0       0       0       0          0
##   1         31     0      0       0       0       0       0       0          0
##   2         54     0      0       0       0       0       0       0          0
##   3         71     0      0       0       0       0       0       0          0
##   4         75     0      0       0       0       0       0       0          0
##   5        151     0      0       0       0       0       0       0          0
##   6          0    69      0       0       0       0       0       0          0
##   7          0    85      0       0       0       0       0       0          0
##   8          0   120      0       0       0       0       0       0          0
##   9          0     0     84       0       0       0       0       0          0
##   10         0     0    104       0       0       0       0       0          0
##   11         0     0      0     298       0       0       0       0          0
##   12         0     0      0       0     266       0       0       0          0
##   13         0     0      0       0       0      62       0       0          0
##   14         0     0      0       0       0       9       0       0          0
##   15         0     0      0       0       0       0     305       0          0
##   16         0     0      0       0       0       0       0     242          0
##   999999     0     0      0       0       0       0       0       0       1970
## [1] "Frequency table after encoding"
## D_4. What is your highest completed education level?  [You do not need to read the re
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   383                                   274                                   188 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   298                                   266                                    71 
##   Literate, but never attended school Illiterate, and never attended school                                    NA 
##                                   305                                   242                                  1970 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                                    NA 
##                                     7                                     8                                     9
break_edu <- c(0,6,9,11,12,13,15,16,777,888,999)
labels_edu <- c("Primary or less (0-5)" = 1,
                "Lower secondary (6-8)" = 2,
                "Secondary (9-10)" = 3,
                "SLC (11)" = 4,
                "CLASS 12/Intermediate level (12)" = 5,
                "Bachelor/Postgraduate level" = 6,
                "Literate, but never attended school" = 7,
                "Illiterate, and never attended school"= 8,
                "Does not apply"= 9,
                "Don't Know"= 10,
                "NA"= 11)
mydata <- ordinal_recode (variable="HC4_1", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_1. What is the highest completed education level of your spouse?  [You do not need 
##                               CLASS 1                               CLASS 2                               CLASS 3 
##                                    15                                    48                                    48 
##                               CLASS 4                               CLASS 5                               CLASS 6 
##                                    68                                   115                                    57 
##                               CLASS 7                               CLASS 8                               CLASS 9 
##                                    77                                   114                                    69 
##                              CLASS 10                                   SLC           CLASS 12/Intermediate level 
##                                    87                                   188                                   127 
##                        Bachelor level  Post-Secondary Level (e.g., MA, PhD)   Literate, but never attended school 
##                                    35                                    11                                   238 
## Illiterate, and never attended school                        Does not apply                            Don't know 
##                                   281                                     4                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   1         15     0      0       0       0       0       0        0         0         0           0
##   2         48     0      0       0       0       0       0        0         0         0           0
##   3         48     0      0       0       0       0       0        0         0         0           0
##   4         68     0      0       0       0       0       0        0         0         0           0
##   5        115     0      0       0       0       0       0        0         0         0           0
##   6          0    57      0       0       0       0       0        0         0         0           0
##   7          0    77      0       0       0       0       0        0         0         0           0
##   8          0   114      0       0       0       0       0        0         0         0           0
##   9          0     0     69       0       0       0       0        0         0         0           0
##   10         0     0     87       0       0       0       0        0         0         0           0
##   11         0     0      0     188       0       0       0        0         0         0           0
##   12         0     0      0       0     127       0       0        0         0         0           0
##   13         0     0      0       0       0      35       0        0         0         0           0
##   14         0     0      0       0       0      11       0        0         0         0           0
##   15         0     0      0       0       0       0     238        0         0         0           0
##   16         0     0      0       0       0       0       0      281         0         0           0
##   888        0     0      0       0       0       0       0        0         0         4           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        2412
## [1] "Frequency table after encoding"
## HC4_1. What is the highest completed education level of your spouse?  [You do not need 
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   294                                   248                                   156 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   188                                   127                                    46 
##   Literate, but never attended school Illiterate, and never attended school                            Don't Know 
##                                   238                                   281                                     4 
##                                    NA 
##                                  2415 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_2", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_2. What is the highest completed education level of your father?  [You do not need 
##               Pre-school/Kindergarten                               CLASS 1                               CLASS 2 
##                                     1                                    12                                    27 
##                               CLASS 3                               CLASS 4                               CLASS 5 
##                                    33                                    23                                    61 
##                               CLASS 6                               CLASS 7                               CLASS 8 
##                                    15                                    21                                    44 
##                               CLASS 9                              CLASS 10                                   SLC 
##                                    22                                    26                                    48 
##           CLASS 12/Intermediate level                        Bachelor level  Post-Secondary Level (e.g., MA, PhD) 
##                                    29                                     4                                     2 
##   Literate, but never attended school Illiterate, and never attended school                     Refused to answer 
##                                   121                                   111                                     1 
##                        Does not apply                            Don't know 
##                                    51                                    11 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   0          1     0      0       0       0       0       0        0         0         0           0
##   1         12     0      0       0       0       0       0        0         0         0           0
##   2         27     0      0       0       0       0       0        0         0         0           0
##   3         33     0      0       0       0       0       0        0         0         0           0
##   4         23     0      0       0       0       0       0        0         0         0           0
##   5         61     0      0       0       0       0       0        0         0         0           0
##   6          0    15      0       0       0       0       0        0         0         0           0
##   7          0    21      0       0       0       0       0        0         0         0           0
##   8          0    44      0       0       0       0       0        0         0         0           0
##   9          0     0     22       0       0       0       0        0         0         0           0
##   10         0     0     26       0       0       0       0        0         0         0           0
##   11         0     0      0      48       0       0       0        0         0         0           0
##   12         0     0      0       0      29       0       0        0         0         0           0
##   13         0     0      0       0       0       4       0        0         0         0           0
##   14         0     0      0       0       0       2       0        0         0         0           0
##   15         0     0      0       0       0       0     121        0         0         0           0
##   16         0     0      0       0       0       0       0      111         0         0           0
##   777        0     0      0       0       0       0       0        0         1         0           0
##   888        0     0      0       0       0       0       0        0         0        51           0
##   999        0     0      0       0       0       0       0        0         0         0          11
##   999999     0     0      0       0       0       0       0        0         0         0        3334
## [1] "Frequency table after encoding"
## HC4_2. What is the highest completed education level of your father?  [You do not need 
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                   157                                    80                                    48 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                    48                                    29                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                   121                                   111                                     1 
##                            Don't Know                                    NA 
##                                    51                                  3345 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_3", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_3. What is the highest completed education level of your mother? If you have more t
##                               CLASS 1                               CLASS 2                               CLASS 3 
##                                     3                                    13                                    10 
##                               CLASS 4                               CLASS 5                               CLASS 6 
##                                    21                                    24                                    13 
##                               CLASS 7                               CLASS 8                               CLASS 9 
##                                     6                                    20                                     5 
##                              CLASS 10                                   SLC           CLASS 12/Intermediate level 
##                                     9                                    23                                     5 
##                        Bachelor level   Literate, but never attended school Illiterate, and never attended school 
##                                     1                                   227                                   260 
##                        Does not apply                            Don't know 
##                                    20                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   1          3     0      0       0       0       0       0        0         0         0           0
##   2         13     0      0       0       0       0       0        0         0         0           0
##   3         10     0      0       0       0       0       0        0         0         0           0
##   4         21     0      0       0       0       0       0        0         0         0           0
##   5         24     0      0       0       0       0       0        0         0         0           0
##   6          0    13      0       0       0       0       0        0         0         0           0
##   7          0     6      0       0       0       0       0        0         0         0           0
##   8          0    20      0       0       0       0       0        0         0         0           0
##   9          0     0      5       0       0       0       0        0         0         0           0
##   10         0     0      9       0       0       0       0        0         0         0           0
##   11         0     0      0      23       0       0       0        0         0         0           0
##   12         0     0      0       0       5       0       0        0         0         0           0
##   13         0     0      0       0       0       1       0        0         0         0           0
##   15         0     0      0       0       0       0     227        0         0         0           0
##   16         0     0      0       0       0       0       0      260         0         0           0
##   888        0     0      0       0       0       0       0        0         0        20           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        3334
## [1] "Frequency table after encoding"
## HC4_3. What is the highest completed education level of your mother? If you have more t
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                    71                                    39                                    14 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                    23                                     5                                     1 
##   Literate, but never attended school Illiterate, and never attended school                            Don't Know 
##                                   227                                   260                                    20 
##                                    NA 
##                                  3337 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
mydata <- ordinal_recode (variable="HC4_4", break_points=break_edu, missing=999999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## HC4_4. Think about your grandparents, and the grandparent with the most education. What
##                               CLASS 2                               CLASS 3                               CLASS 4 
##                                     2                                     2                                     1 
##                               CLASS 5                               CLASS 9                                   SLC 
##                                     2                                     1                                     1 
##           CLASS 12/Intermediate level   Literate, but never attended school Illiterate, and never attended school 
##                                     2                                    29                                    56 
##                            Don't know 
##                                     3 
##         recoded
##          [0,6) [6,9) [9,11) [11,12) [12,13) [13,15) [15,16) [16,777) [777,888) [888,999) [999,1e+06)
##   2          2     0      0       0       0       0       0        0         0         0           0
##   3          2     0      0       0       0       0       0        0         0         0           0
##   4          1     0      0       0       0       0       0        0         0         0           0
##   5          2     0      0       0       0       0       0        0         0         0           0
##   9          0     0      1       0       0       0       0        0         0         0           0
##   11         0     0      0       1       0       0       0        0         0         0           0
##   12         0     0      0       0       2       0       0        0         0         0           0
##   15         0     0      0       0       0       0      29        0         0         0           0
##   16         0     0      0       0       0       0       0       56         0         0           0
##   999        0     0      0       0       0       0       0        0         0         0           3
##   999999     0     0      0       0       0       0       0        0         0         0        3898
## [1] "Frequency table after encoding"
## HC4_4. Think about your grandparents, and the grandparent with the most education. What
##                 Primary or less (0-5)                      Secondary (9-10)                              SLC (11) 
##                                     7                                     1                                     1 
##      CLASS 12/Intermediate level (12)   Literate, but never attended school Illiterate, and never attended school 
##                                     2                                    29                                    56 
##                                    NA 
##                                  3901 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8)                      Secondary (9-10) 
##                                     1                                     2                                     3 
##                              SLC (11)      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     4                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school                        Does not apply 
##                                     7                                     8                                     9 
##                            Don't Know                                    NA 
##                                    10                                    11
# Re-run to check 2-anonimity

selectedKeyVars = c('D_4', 'IDR3_20')
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 3997 rows and 1115 variables.
##   --> Categorical key variables: D_4, IDR3_20
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)     
##           D_4                    9 (9)   444.111 (444.111)                    71 (71)
##       IDR3_20                    7 (7)   571.000 (571.000)                    37 (37)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 2 (0.050%)
##   - 3-anonymity: 6 (0.150%)
##   - 5-anonymity: 19 (0.475%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 2 x 2
##                               D_4   IDR3_20
##                         <dbl+lbl> <dbl+lbl>
## 1 6 [Bachelor/Postgraduate level] 5 [55-64]
## 2 9 [NA]                          4 [45-54]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first element will be used
##      D_4 IDR3_20
## 826   NA       5
## 1075  NA       4
mydata [notAnon,"D_4"] <- 9

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("SrvyrComment",
              "H2_12_TEXT_Translation",
              "HTNx3_2_14_TEXT_Translation",
              "HTN_5x3_TEXT_Translation",
              "HTV_1_10_TEXTx3_Translation",
              "HTV_3_11_TEXTx3_Translation",
              "CPR5i_TEXT_Translation",
              "G1_00_08_TEXT_Translation",
              "P13A_10_TEXT_Translation",
              "P14A_12_TEXT_Translation",
              "SIMPOC7A_10_TEXT_Translation",
              "P13B_10_TEXT_Translation",
              "P14B_12_TEXT_Translation",
              "SIMPOC7B_10_TEXT_Translation",
              "P13C_10_TEXT_I1_Translation",
              "P14C_12_TEXT_I1_Translation",
              "SIMPOC7C_10_TEXT_I1_Translation",
              "P14C_12_TEXT_I2_Translation",
              "P13D_10_TEXT_I1_Translation",
              "P14D_12_TEXT_I1_Translation",
              "P14D_12_TEXT_I2_Translation",
              "P13E_10_TEXT_I1_Translation",
              "P14E_12_TEXT_I1_Translation",
              "SIMPOC7E_10_TEXT_I1_Translation",
              "P14E_12_TEXT_I2_Translation",
              "P14E_12_TEXT_I3_Translation",
              "NEW_3_12_TEXT_Translation",
              "NEW_9_TEXT_Translation",
              "SIMPOC7_cl_10_TEXT_I1_Translate",
              "SIMPOC7_cl_10_TEXT_I2_Translate",
              "NEW_10_TEXT_Translation",
              "P13_cl_O3_TEXT_I1_Translation",
              "NEW_9_cl_TEXT_I1_Translation",
              "NEW_9_cl_TEXT_I2_Translation",
              "NEW_9_cl_TEXT_I3_Translation",
              "P14_cl_O2_I1_TEXT_Translation",
              "P13_cl_O2_TEXT_I2_Translation",
              "SIMPOC7_cl_10_TEXT_I3_Translate",
              "P14_cl_O1_I3_TEXT_Translation",
              "P14_cl_O1_I2_TEXT_Translation",
              "IDR3_13_TEXT_Translation",
              "IDR3_15_TEXT_Translation",
              "e3e_TEXT_Translation",
              "E2_11_8_TEXT_Translation",
              "E_14_7_TEXT_Translation")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$E_14_7_TEXT_Translation[2380] <- "Respondent's bother was tricked in bad activities and later threatened to help [activity redacted]"
mydata$E_14_7_TEXT_Translation[3099] <- "In Q64, respondent said there was no income and later in Q307 respondent said [amount redacted] so entered the option more than 12,000 in Q307"
mydata$E_14_7_TEXT_Translation[3680] <- "GPS did not capture for about 20 minutes and started the interview without GPS. In Q64 respondent did not have any income but her/his son sent [amount redacted] the other day"
mydata$IDR3_13_TEXT_Translation[87] <- "[respondent name redacted] is dead"
mydata$NEW_10_TEXT_Translation[2792] <- "Shop [type redacted]"
mydata <- mydata[!names(mydata) %in% "SrvyrComment"]

GPS data: Displace

# Setup map

countrymap <- map_data("world") %>% filter(region=="Nepal")  #!!! Select correct country
#admin <- raster::getData("GADM", country="NP", level=0) #!!! Select correct country map using standard 2-letter country codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
admin <- readRDS(file="gadm36_NPL_0_sp.rds")

# Displace all pairs of GPS variables (Longitude, Latitude). Check summary statistics and maps before and after displacement. 

gps.vars <- c("Longitude", "Latitude") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 52 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##    Longitude        Latitude    
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:85.07   1st Qu.:27.56  
##  Median :85.47   Median :27.65  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :52      NA's   :52
## Warning: Removed 52 rows containing missing values (geom_point).

## Warning: Removed 52 rows containing missing values (geom_point).

## Warning: Removed 52 rows containing missing values (geom_point).

## Warning: Removed 52 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##    Longitude        Latitude    
##  Min.   :84.28   Min.   :26.83  
##  1st Qu.:85.06   1st Qu.:27.55  
##  Median :85.46   Median :27.64  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.18   Max.   :28.03  
##  NA's   :52      NA's   :52     
## [1] "Processing time = 7.08120536406835"
gps.vars <- c("GPSinitial_LO", "GPSinitial_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 167 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:85.07   1st Qu.:27.56  
##  Median :85.47   Median :27.65  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :167     NA's   :167
## Warning: Removed 167 rows containing missing values (geom_point).
## Warning: Removed 167 rows containing missing values (geom_point).

## Warning: Removed 167 rows containing missing values (geom_point).

## Warning: Removed 167 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.28   Min.   :26.82  
##  1st Qu.:85.07   1st Qu.:27.55  
##  Median :85.46   Median :27.64  
##  Mean   :85.35   Mean   :27.59  
##  3rd Qu.:85.61   3rd Qu.:27.73  
##  Max.   :86.17   Max.   :28.04  
##  NA's   :167     NA's   :167    
## [1] "Processing time = 8.29655353625615"
gps.vars <- c("gps_CEa_LO", "gps_CEa_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 2258 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##    gps_CEa_LO      gps_CEa_LA   
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:84.96   1st Qu.:27.56  
##  Median :85.45   Median :27.64  
##  Mean   :85.33   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :2258    NA's   :2258
## Warning: Removed 2258 rows containing missing values (geom_point).
## Warning: Removed 2258 rows containing missing values (geom_point).

## Warning: Removed 2258 rows containing missing values (geom_point).

## Warning: Removed 2258 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##    gps_CEa_LO      gps_CEa_LA   
##  Min.   :84.28   Min.   :26.83  
##  1st Qu.:84.98   1st Qu.:27.55  
##  Median :85.45   Median :27.64  
##  Mean   :85.33   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.17   Max.   :28.04  
##  NA's   :2258    NA's   :2258   
## [1] "Processing time = 4.31941741704941"
gps.vars <- c("gpsenumimp_LO", "gpsenumimp_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 2235 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##  gpsenumimp_LO   gpsenumimp_LA  
##  Min.   :84.31   Min.   :26.85  
##  1st Qu.:84.96   1st Qu.:27.56  
##  Median :85.45   Median :27.64  
##  Mean   :85.32   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.15   Max.   :28.00  
##  NA's   :2235    NA's   :2235
## Warning: Removed 2235 rows containing missing values (geom_point).
## Warning: Removed 2235 rows containing missing values (geom_point).

## Warning: Removed 2235 rows containing missing values (geom_point).

## Warning: Removed 2235 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##  gpsenumimp_LO   gpsenumimp_LA  
##  Min.   :84.29   Min.   :26.83  
##  1st Qu.:84.97   1st Qu.:27.54  
##  Median :85.44   Median :27.64  
##  Mean   :85.32   Mean   :27.59  
##  3rd Qu.:85.60   3rd Qu.:27.72  
##  Max.   :86.17   Max.   :28.04  
##  NA's   :2235    NA's   :2235   
## [1] "Processing time = 4.47818704843521"

Save processed data in stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))