rm(list=ls(all=t))

Setup and crate dictionary

filename <- "UC Berkeley_Nepal_Awareness-Raising for Police_Public Data_Final" # !!!Update filename
source ("functions_1.7.R")
## --------
## This is sdcMicro v5.6.0.
## For references, please have a look at citation('sdcMicro')
## Note: since version 5.0.0, the graphical user-interface is a shiny-app that can be started with sdcApp().
## Please submit suggestions and bugs at: https://github.com/sdcTools/sdcMicro/issues
## --------
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: sp
## Checking rgeos availability: TRUE
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:sdcMicro':
## 
##     freq
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/C_Pablo_Diego-Rosell/Documents/R/R-3.6.3/library/rgdal/proj
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 2.0-1
## 
## Attaching package: 'spatstat.geom'
## The following objects are masked from 'package:raster':
## 
##     area, rotate, shift
## Loading required package: spatstat.core
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:raster':
## 
##     getData
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: rpart
## spatstat.core 2.0-0
## Loading required package: spatstat.linnet
## spatstat.linnet 2.1-1
## 
## spatstat 2.0-1       (nickname: 'Caution: contains small parts') 
## For an introduction to spatstat, type 'beginner'
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-4 
##  Polygon checking: TRUE
## 
## Spatial Point Pattern Analysis Code in S-Plus
##  
##  Version 2 - Spatial and Space-Time analysis
## 
## Attaching package: 'splancs'
## The following object is masked from 'package:raster':
## 
##     zoom
## The following object is masked from 'package:dplyr':
## 
##     tribble
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.6-0 (2020-12-14) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## See https://github.com/NCAR/Fields for
##  an extensive vignette, other supplements and source code
## 
## Attaching package: 'geosphere'
## The following object is masked from 'package:spatstat.geom':
## 
##     perimeter
## 
## Attaching package: 'tibble'
## The following object is masked from 'package:splancs':
## 
##     tribble

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!No Direct PII variables

Direct PII-team: Encode interviewer names, which may be useful for analysis of interviewer effects

!!!Replace vector in "variables" field below with relevant variable names

mydata <- encode_direct_PII_team (variables=c("Srvyr"))
## [1] "Frequency table before encoding"
## Srvyr. Srvyr
##   HTVBadri  HTVdeepak    HTVKaji HTVPranita    HTVTara 
##        237        188        184        201        255 
## [1] "Frequency table after encoding"
## Srvyr. Srvyr
##   1   2   3   4   5 
## 237 188 184 201 255

Small locations: Encode locations with pop <100,000 using random large numbers

!!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("vdc_id", "police_id") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## vdc_id. VDC or Municipality
##     Bhaktapur municipality Changunarayan municipality     Madhyapur municipality 
##                        123                         39                         70 
##  Suryabinayak municipality     Bharatpur municipality            Ichyakamana VDC 
##                        109                        128                          9 
##        Kalika municipality    Khairahani municipality          Madi municipality 
##                          3                         17                         13 
##         Rapti municipality    Ratnanagar municipality        Benighat Rorang VDC 
##                         14                         28                          9 
##     Dhunibesi municipality                 Gajuri VDC                Galchhi VDC 
##                         15                         18                          9 
##           Ganga Jamuna VDC             Jwalamukhi VDC             Khaniyabas VDC 
##                          5                          4                          6 
##              Netrawati VDC     Nilkantha municipality              Siddhalek VDC 
##                          6                         58                          3 
##                 Thakre VDC         Tripurasundari VDC        Banepa municipality 
##                          3                          6                         22 
##                 Bethan VDC                 Bhumlu VDC         Chauri deurali VDC 
##                          2                          7                          4 
##     Dhulikhel municipality             Khanikhola VDC             Mahabharat VDC 
##                         61                          3                          7 
## Mandan deupur municipality    Namobuddha municipality       Panauti municipality 
##                          4                          8                          8 
##     Panchkhal Municipality                  Roshi VDC                  Timal VDC 
##                         14                          8                          2 
##                Bagmati VDC                Bakaiya VDC               Bhimfedi VDC 
##                         16                          8                         14 
##       Hetauda municipality           Indrasarobar VDC                Kailash VDC 
##                        133                          8                          6 
##               Manahari VDC         Thaha municipality 
##                         18                         17 
## [1] "Frequency table after encoding"
## vdc_id. VDC or Municipality
## 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 
##  17   8   4   9  13  14   7   5  14   6   4   8  16   3  58 123  22   8   9  15   3 
## 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 
##   6   2  18   2   7   3   6   8  39   3  18  28   8 109  17   9  70   6  61 133 128 
## 426 427 
##  14   4 
## [1] "Frequency table before encoding"
## police_id. Police Station
##                        Byasi police station 
##                                          14 
##           District police office, Bhaktapur 
##                                          91 
##                  Dubarsquare police station 
##                                           9 
##                     Dudhpati police station 
##                                           9 
##                Changunarayan police station 
##                                           6 
##                      Duwakot police station 
##                                           7 
##                    Kharipati police station 
##                                          10 
##                     Nagarkot police station 
##                                           8 
##                        Sudal police station 
##                                           8 
##                    Sanothimi police station 
##                                          14 
##                          Thimi police range 
##                                          56 
##                       Balkot police station 
##                                          14 
##                     Dadhikot police station 
##                                           6 
##                         Jagati police range 
##                                          70 
##                      Katunje police station 
##                                          16 
##                      Sirutar police station 
##                                           5 
##                     Chanauli police station 
##                                           4 
##                   Dibyanagar police station 
##                                           4 
##             Distict Police Office, Chitawan 
##                                          75 
##                    Gitanagar police station 
##                                           8 
##                     Gondrang police station 
##                                           2 
##                       Jugedi police station 
##                                           4 
##                  Narayangadh police station 
##                                          19 
##                Paras Buspark police station 
##                                           4 
##                     Ramnagar police station 
##                                           4 
##                Sitaram Chowk police station 
##                                           4 
##                      Mugling police station 
##                                           9 
##                      Jutpani police station 
##                                           3 
##                   Khairahani police station 
##                                          13 
##            Samjhaut Pipariya police station 
##                                           2 
##                  Baruwa Madi police station 
##                                           2 
##                    Kalyanpur police station 
##                                           2 
##                         Madi police station 
##                                           9 
##                     Bhandara police station 
##                                          12 
##                       Lothar police station 
##                                           2 
##                    Bachhauli police station 
##                                           3 
##                      Mainaha police station 
##                                           2 
##                  Panchakanya police station 
##                                           3 
##                        Tandi police station 
##                                          20 
##                     Benighat police station 
##                                           3 
##                    Chundunga police station 
##                                           2 
##                     Jogimara police station 
##                                           4 
##                     Jiwanpur police station 
##                                           2 
##                  Khanikhola  police station 
##                                          13 
##                       Gajuri police station 
##                                          16 
##                      Malekhu police station 
##                                           2 
##                      Baireni police station 
##                                           6 
##                       Galchi police station 
##                                           3 
##                    Budhathum police station 
##                                           2 
##                   Phulkharka police station 
##                                           3 
##                        Maidi police station 
##                                           4 
##                      Dharkha police station 
##                                           6 
##                       Marpak police station 
##                                           2 
##                       Semjom police station 
##                                           2 
##                        Todke police station 
##                                           2 
##                     Dhuwakot police station 
##                                           2 
##             District Police Office, Dhading 
##                                          49 
##                  Pucharbajar police station 
##                                           2 
##                 Sunaulabajar police station 
##                                           5 
##                       Nalang police station 
##                                           3 
##                  Mahadevbesi police station 
##                                           3 
##                    Salyankot police station 
##                                           3 
##                    Salyantar police station 
##                                           3 
##                       Banepa police station 
##                                          17 
##                         Nala police station 
##                                           2 
##                        Sanga police station 
##                                           3 
## Chyamrangbesi police station (Dhungakharka) 
##                                           2 
##                      Chaubas police station 
##                                           2 
##                    Dolalghat police station 
##                                           3 
##                      Phalate police station 
##                                           2 
##               Kattikedeurali police station 
##                                           4 
##               District Police Office, Kavre 
##                                          58 
##               Kavrebhanjyang police station 
##                                           3 
##                   Taaldhunga police station 
##                                           3 
##                       Gokule police station 
##                                           4 
##                  Phoksingtar police station 
##                                           3 
##                       Deupur police station 
##                                           2 
##                   Higuwapati police station 
##                                           2 
##      Machchepauwa police station (Bhakunde) 
##                                           4 
##          Shikharpur police station (Kanpur) 
##                                           2 
##                    Shyampati police station 
##                                           2 
##                      Khopasi police station 
##                                           2 
##                    Kushadevi police station 
##                                           2 
##                      Panauti police station 
##                                           4 
##                       Baluwa police station 
##                                           2 
##               Bhagawatisthan police station 
##                                           4 
##                        Khawa police station 
##                                           2 
##                    Panchkhal police station 
##                                           4 
##                    Timalbesi police station 
##                                           2 
##                    Mangaltar police station 
##                                           6 
##                 Pachuwarghat police station 
##                                           2 
##                     Bhorleni police station 
##                                           2 
##                    Jhurjhure police station 
##                                           3 
##                   Phaparbari police station 
##                                           7 
##                 Shiwaraigaun police station 
##                                           4 
##                    Hattisude police station 
##                                           3 
##                      Thingan police station 
##                                           5 
##                       Bhaise police station 
##                                           3 
##                     Bhimfedi police station 
##                                          11 
##                     Basamadi police station 
##                                           3 
##                   Bhutandevi police station 
##                                           4 
##              Buspark police station hetauda 
##                                           3 
##                    Chaughada police station 
##                                           4 
##                   Chaukitole police station 
##                                           4 
##                      Hetauda police station 
##                                          25 
##                    Newarpani police station 
##                                           6 
##                 Padampokhari police station 
##                                           4 
##               Pashupatinagar police station 
##                                          13 
##                   Thulodamar police station 
##                                           4 
##          District police office, Makawanpur 
##                                          65 
##                       Fakhel police station 
##                                           4 
##                      Sisneri police station 
##                                           4 
##                       Bharta police station 
##                                           3 
##                  Dandakharka police station 
##                                           3 
##                     Manahari police station 
##                                          16 
##                    Sunachuri police station 
##                                           2 
##                     Chitlang police station 
##                                           4 
##                       Palung police station 
##                                          11 
##                 Simbhanjyang police station 
##                                           2 
## [1] "Frequency table after encoding"
## police_id. Police Station
## 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 
##   6   2   2   2  13   2   4   4  16   4   4   2   2  11   6   2   3   2   8   2  20 
## 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 
##  16   4   2   8  13   7   2   4   3   2   2   4   6  56   3   5  75   9  10   7   6 
## 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 
##   4   2   8  70   2   3  25   3   2  13   2   5   3   3   2   4   2  14   2   3   2 
## 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 
##   9   3   4   6   2   9   4   3   2   4   4   3   3   4   5   3   2   4   2  49  11 
## 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 
##   2   3   2  91   2   3   2  14   4  14   4   3   3   4   3   3   9   4   2  12  17 
## 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 
##   6   3   4   2   3   4   2   3  19  14   4  58   4   2  65

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Recode birth year into 5 year groups.

break_year <- c(999,2020,2025,2030,2035,2040,2045,2050, 2055)
labels_year <- c("Missing"  =1,
                "2020-2024" =2, 
                "2025-2029" =3, 
                "2030-2034" =4, 
                "2035-2039" =5,
                "2040-2044" =6,
                "2045-2049" =7,
                "2050-2054" =8)
mydata <- ordinal_recode (variable="dem4", break_points=break_year, missing=999999, value_labels=labels_year)

## [1] "Frequency table before encoding"
## dem4. What year were you born? [Answer in Nepali Years]  [Use the timeline in the manu
##  999 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 
##    2    2    3    3    6   11   10    4   12   13   14   14   18   13   28   20   39 
## 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 
##   40   51   66   69   73   78   63   94   43   40   41   54   43   57   15   15   11 
##       recoded
##        [999,2020) [2020,2025) [2025,2030) [2030,2035) [2035,2040) [2040,2045)
##   999           2           0           0           0           0           0
##   2021          0           2           0           0           0           0
##   2022          0           3           0           0           0           0
##   2023          0           3           0           0           0           0
##   2024          0           6           0           0           0           0
##   2025          0           0          11           0           0           0
##   2026          0           0          10           0           0           0
##   2027          0           0           4           0           0           0
##   2028          0           0          12           0           0           0
##   2029          0           0          13           0           0           0
##   2030          0           0           0          14           0           0
##   2031          0           0           0          14           0           0
##   2032          0           0           0          18           0           0
##   2033          0           0           0          13           0           0
##   2034          0           0           0          28           0           0
##   2035          0           0           0           0          20           0
##   2036          0           0           0           0          39           0
##   2037          0           0           0           0          40           0
##   2038          0           0           0           0          51           0
##   2039          0           0           0           0          66           0
##   2040          0           0           0           0           0          69
##   2041          0           0           0           0           0          73
##   2042          0           0           0           0           0          78
##   2043          0           0           0           0           0          63
##   2044          0           0           0           0           0          94
##   2045          0           0           0           0           0           0
##   2046          0           0           0           0           0           0
##   2047          0           0           0           0           0           0
##   2048          0           0           0           0           0           0
##   2049          0           0           0           0           0           0
##   2050          0           0           0           0           0           0
##   2051          0           0           0           0           0           0
##   2052          0           0           0           0           0           0
##   2053          0           0           0           0           0           0
##       recoded
##        [2045,2050) [2050,2055) [2055,1e+06)
##   999            0           0            0
##   2021           0           0            0
##   2022           0           0            0
##   2023           0           0            0
##   2024           0           0            0
##   2025           0           0            0
##   2026           0           0            0
##   2027           0           0            0
##   2028           0           0            0
##   2029           0           0            0
##   2030           0           0            0
##   2031           0           0            0
##   2032           0           0            0
##   2033           0           0            0
##   2034           0           0            0
##   2035           0           0            0
##   2036           0           0            0
##   2037           0           0            0
##   2038           0           0            0
##   2039           0           0            0
##   2040           0           0            0
##   2041           0           0            0
##   2042           0           0            0
##   2043           0           0            0
##   2044           0           0            0
##   2045          43           0            0
##   2046          40           0            0
##   2047          41           0            0
##   2048          54           0            0
##   2049          43           0            0
##   2050           0          57            0
##   2051           0          15            0
##   2052           0          15            0
##   2053           0          11            0
## [1] "Frequency table after encoding"
## dem4. What year were you born? [Answer in Nepali Years]  [Use the timeline in the manu
##   Missing 2020-2024 2025-2029 2030-2034 2035-2039 2040-2044 2045-2049 2050-2054 
##         2        14        50        87       216       377       221        98 
## [1] "Inspect value labels and relabel as necessary"
##   Missing 2020-2024 2025-2029 2030-2034 2035-2039 2040-2044 2045-2049 2050-2054 
##         1         2         3         4         5         6         7         8
# Recode education attainment into standard Nepal categories

break_edu <- c(0,7,10,12,13,14,16,17,18, 777, 888, 999)
labels_edu <- c("Primary or less (0-5)" = 1,
                "Lower secondary (6-8)" = 2,
                "Secondary (9-10)" = 3,
                "SLC (11)" = 4,
                "CLASS 12/Intermediate level (12)" = 5,
                "Bachelor/Postgraduate level" = 6,
                "Literate, but never attended school" = 7,
                "Illiterate, and never attended school"= 8,
                "Refused"= 9, 
                "Does not apply" = 10, 
                "Don't Know" = 11)
mydata <- ordinal_recode (variable="dem6", break_points=break_edu, missing=999, value_labels=labels_edu)

## [1] "Frequency table before encoding"
## dem6. What is your highest completed education level?   [You do not need to read the r
##                               Class 1                               Class 2 
##                                     1                                     1 
##                               Class 3                               Class 4 
##                                     2                                    11 
##                               Class 5                               Class 6 
##                                    56                                    17 
##                               Class 7                               Class 8 
##                                    75                                    94 
##                               Class 9                              Class 10 
##                                    35                                    95 
##                                   slc           Class 12/Intermediate Level 
##                                   300                                   264 
##                        Bachelor Level                          Master Level 
##                                    82                                    31 
## Illiterate, and never attended school 
##                                     1 
##     recoded
##      [0,7) [7,10) [10,12) [12,13) [13,14) [14,16) [16,17) [17,18) [18,777) [777,888)
##   2      1      0       0       0       0       0       0       0        0         0
##   3      1      0       0       0       0       0       0       0        0         0
##   4      2      0       0       0       0       0       0       0        0         0
##   5     11      0       0       0       0       0       0       0        0         0
##   6     56      0       0       0       0       0       0       0        0         0
##   7      0     17       0       0       0       0       0       0        0         0
##   8      0     75       0       0       0       0       0       0        0         0
##   9      0     94       0       0       0       0       0       0        0         0
##   10     0      0      35       0       0       0       0       0        0         0
##   11     0      0      95       0       0       0       0       0        0         0
##   12     0      0       0     300       0       0       0       0        0         0
##   13     0      0       0       0     264       0       0       0        0         0
##   14     0      0       0       0       0      82       0       0        0         0
##   15     0      0       0       0       0      31       0       0        0         0
##   17     0      0       0       0       0       0       0       1        0         0
##     recoded
##      [888,999) [999,1e+03)
##   2          0           0
##   3          0           0
##   4          0           0
##   5          0           0
##   6          0           0
##   7          0           0
##   8          0           0
##   9          0           0
##   10         0           0
##   11         0           0
##   12         0           0
##   13         0           0
##   14         0           0
##   15         0           0
##   17         0           0
## [1] "Frequency table after encoding"
## dem6. What is your highest completed education level?   [You do not need to read the r
##                 Primary or less (0-5)                 Lower secondary (6-8) 
##                                    71                                   186 
##                      Secondary (9-10)                              SLC (11) 
##                                   130                                   300 
##      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                   264                                   113 
## Illiterate, and never attended school 
##                                     1 
## [1] "Inspect value labels and relabel as necessary"
##                 Primary or less (0-5)                 Lower secondary (6-8) 
##                                     1                                     2 
##                      Secondary (9-10)                              SLC (11) 
##                                     3                                     4 
##      CLASS 12/Intermediate level (12)           Bachelor/Postgraduate level 
##                                     5                                     6 
##   Literate, but never attended school Illiterate, and never attended school 
##                                     7                                     8 
##                               Refused                        Does not apply 
##                                     9                                    10 
##                            Don't Know 
##                                    11
mydata <- top_recode ("dem9", break_point=2, missing=999999) # Topcode cases with 2 or more children, as there are only 16 cases with 3 or more. 
## [1] "Frequency table before encoding"
## dem9. How many sons do you have?  [Open-ended]
##   0   1   2   3   4 
## 396 483 170  14   2

## [1] "Frequency table after encoding"
## dem9. How many sons do you have?  [Open-ended]
##         0         1 2 or more 
##       396       483       186

mydata <- top_recode ("dem10", break_point=2, missing=999999) # Topcode cases with 2 or more children, as there are only 36 cases with 3 or more. 
## [1] "Frequency table before encoding"
## dem10. How many daughters do you have?  [Open-ended]  
##   0   1   2   3   4   5 
## 499 401 129  29   2   5

## [1] "Frequency table after encoding"
## dem10. How many daughters do you have?  [Open-ended]  
##         0         1 2 or more 
##       499       401       165

# Top code high income to the 99.5 percentile

percentile_99.5 <- floor(quantile(mydata$dem11[mydata$dem11>999], probs = c(0.995)))
mydata <- top_recode (variable="dem11", break_point=percentile_99.5, missing=999)
## [1] "Frequency table before encoding"
## dem11. In a typical month, approximately what is your household's cash income? (in NRS)
##    777    888    999  10000  12500  15000  16000  16500  17000  17200  17210  17230 
##      1      1      5      4      1     11      8      1    113      3      1      1 
##  17330  17500  17550  18000  18235  18300  18340  18500  18600  18650  19000  20000 
##      1      8      1     43      1      1      2      1      1      1      5     70 
##  21000  22000  22200  22500  22600  22630  23000  23500  23800  24000  25000  26000 
##      8     74      1      4      1      1     51      3      1     14     69      8 
##  27000  27500  28000  28500  29000  30000  32000  33000  34000  35000  35500  36000 
##     10      2     10      1      4     82     11      3     10     55      1      3 
##  37000  38000  39000  40000  42000  43000  44000  45000  47000  48000  50000  52000 
##      3      1      2     90      7      1      5     37      2      1     74      1 
##  55000  56000  58000  60000  65000  68000  70000  75000  80000  85000  90000  1e+05 
##      8      2      1     34      2      1     14      4     10      3      4     21 
## 120000 125000 130000 150000 155000 160000 175000  2e+05 250000  4e+05 
##      2      1      2      8      1      1      1      7      1      1

## [1] "Frequency table after encoding"
## dem11. In a typical month, approximately what is your household's cash income? (in NRS)
##           777           888           999         10000         12500         15000 
##             1             1             5             4             1            11 
##         16000         16500         17000         17200         17210         17230 
##             8             1           113             3             1             1 
##         17330         17500         17550         18000         18235         18300 
##             1             8             1            43             1             1 
##         18340         18500         18600         18650         19000         20000 
##             2             1             1             1             5            70 
##         21000         22000         22200         22500         22600         22630 
##             8            74             1             4             1             1 
##         23000         23500         23800         24000         25000         26000 
##            51             3             1            14            69             8 
##         27000         27500         28000         28500         29000         30000 
##            10             2            10             1             4            82 
##         32000         33000         34000         35000         35500         36000 
##            11             3            10            55             1             3 
##         37000         38000         39000         40000         42000         43000 
##             3             1             2            90             7             1 
##         44000         45000         47000         48000         50000         52000 
##             5            37             2             1            74             1 
##         55000         56000         58000         60000         65000         68000 
##             8             2             1            34             2             1 
##         70000         75000         80000         85000         90000         1e+05 
##            14             4            10             3             4            21 
##        120000        125000        130000        150000        155000        160000 
##             2             1             2             8             1             1 
##        175000 2e+05 or more 
##             1             9

# Group year of employment into 5-year groups. 

break_year <- c(0, seq(2050, 2075, 5))
labels_year <- c("Before 2050"= 1,
                 "2050-2054" = 2, 
                 "2055-2059" = 3, 
                 "2060-2064" = 4, 
                 "2065-2069" = 5, 
                 "2070-2074" = 6)
mydata <- ordinal_recode (variable="emp1y", break_points=break_year, missing=999999, value_labels=labels_year)

## [1] "Frequency table before encoding"
## emp1y. Year
## 2029 2030 2036 2041 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 
##    1    1    1    1    1    2   10    9   10   18    8   12   18   12   17   27   17 
## 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2069 2070 2071 2072 2073 
##   33   76   63   51   78   67  134   91    2   20   53   93   78    3   54    4 
##       recoded
##        [0,2050) [2050,2055) [2055,2060) [2060,2065) [2065,2070) [2070,2075)
##   2029        1           0           0           0           0           0
##   2030        1           0           0           0           0           0
##   2036        1           0           0           0           0           0
##   2041        1           0           0           0           0           0
##   2044        1           0           0           0           0           0
##   2045        2           0           0           0           0           0
##   2046       10           0           0           0           0           0
##   2047        9           0           0           0           0           0
##   2048       10           0           0           0           0           0
##   2049       18           0           0           0           0           0
##   2050        0           8           0           0           0           0
##   2051        0          12           0           0           0           0
##   2052        0          18           0           0           0           0
##   2053        0          12           0           0           0           0
##   2054        0          17           0           0           0           0
##   2055        0           0          27           0           0           0
##   2056        0           0          17           0           0           0
##   2057        0           0          33           0           0           0
##   2058        0           0          76           0           0           0
##   2059        0           0          63           0           0           0
##   2060        0           0           0          51           0           0
##   2061        0           0           0          78           0           0
##   2062        0           0           0          67           0           0
##   2063        0           0           0         134           0           0
##   2064        0           0           0          91           0           0
##   2065        0           0           0           0           2           0
##   2066        0           0           0           0          20           0
##   2067        0           0           0           0          53           0
##   2069        0           0           0           0          93           0
##   2070        0           0           0           0           0          78
##   2071        0           0           0           0           0           3
##   2072        0           0           0           0           0          54
##   2073        0           0           0           0           0           4
##       recoded
##        [2075,1e+06)
##   2029            0
##   2030            0
##   2036            0
##   2041            0
##   2044            0
##   2045            0
##   2046            0
##   2047            0
##   2048            0
##   2049            0
##   2050            0
##   2051            0
##   2052            0
##   2053            0
##   2054            0
##   2055            0
##   2056            0
##   2057            0
##   2058            0
##   2059            0
##   2060            0
##   2061            0
##   2062            0
##   2063            0
##   2064            0
##   2065            0
##   2066            0
##   2067            0
##   2069            0
##   2070            0
##   2071            0
##   2072            0
##   2073            0
## [1] "Frequency table after encoding"
## emp1y. Year
## Before 2050   2050-2054   2055-2059   2060-2064   2065-2069   2070-2074 
##          54          67         216         421         168         139 
## [1] "Inspect value labels and relabel as necessary"
## Before 2050   2050-2054   2055-2059   2060-2064   2065-2069   2070-2074 
##           1           2           3           4           5           6
# !!!Include relevant variables in list below

indirect_PII <- c("list_id", 
                  "police_rank", 
                  "dem3", 
                  "dem5", 
                  "dem7", 
                  "dem8", 
                  "emp20_1", 
                  "emp20_2", 
                  "emp20_5", 
                  "govlaw0", 
                  "enumimp1_a", 
                  "enumimp1_b", 
                  "enumimp1_c")

capture_tables (indirect_PII)

# Recode those with very specific values where more than half of the sample have actual data. 

# Top code high police ranks, as only a few "Superintendent of Police (SP)", "Deputy Superintendent of Police (DSP)", or "Inspector\t"

break_rank <- c(0, 4, 5, 6, 7,8)
labels_rank <- c("Superintendent, Deputy Superintendent, or Inspector"= 1,
                 "Sub Inspector" = 2, 
                 "Assistant Sub Inspector" = 3, 
                 "Head Constable" = 4, 
                 "Constable" = 5)
mydata <- ordinal_recode (variable="police_rank", break_points=break_rank, missing=999999, value_labels=labels_rank)

## [1] "Frequency table before encoding"
## police_rank. What is your police rank?
##         Superintendent of Police (SP) Deputy Superintendent of Police (DSP) 
##                                     5                                     8 
##                            Inspector\t                       Sub Inspector\t\t 
##                                    18                                    65 
##             Assistant Sub Inspector\t\t                      Head Constable\t\t 
##                                   132                                   237 
##                            Constable\t 
##                                   600 
##    recoded
##     [0,4) [4,5) [5,6) [6,7) [7,8) [8,1e+06)
##   1     5     0     0     0     0         0
##   2     8     0     0     0     0         0
##   3    18     0     0     0     0         0
##   4     0    65     0     0     0         0
##   5     0     0   132     0     0         0
##   6     0     0     0   237     0         0
##   7     0     0     0     0   600         0
## [1] "Frequency table after encoding"
## police_rank. What is your police rank?
## Superintendent, Deputy Superintendent, or Inspector 
##                                                  31 
##                                       Sub Inspector 
##                                                  65 
##                             Assistant Sub Inspector 
##                                                 132 
##                                      Head Constable 
##                                                 237 
##                                           Constable 
##                                                 600 
## [1] "Inspect value labels and relabel as necessary"
## Superintendent, Deputy Superintendent, or Inspector 
##                                                   1 
##                                       Sub Inspector 
##                                                   2 
##                             Assistant Sub Inspector 
##                                                   3 
##                                      Head Constable 
##                                                   4 
##                                           Constable 
##                                                   5
mydata <- ordinal_recode (variable="enumimp1_c", break_points=break_rank, missing=999999, value_labels=labels_rank)

## [1] "Frequency table before encoding"
## enumimp1_c. What is the police rank of the respondent?
##         Superintendent of Police (SP) Deputy Superintendent of Police (DSP) 
##                                     5                                     8 
##                            Inspector\t                       Sub Inspector\t\t 
##                                    18                                    65 
##             Assistant Sub Inspector\t\t                      Head Constable\t\t 
##                                   132                                   237 
##                            Constable\t 
##                                   600 
##    recoded
##     [0,4) [4,5) [5,6) [6,7) [7,8) [8,1e+06)
##   1     5     0     0     0     0         0
##   2     8     0     0     0     0         0
##   3    18     0     0     0     0         0
##   4     0    65     0     0     0         0
##   5     0     0   132     0     0         0
##   6     0     0     0   237     0         0
##   7     0     0     0     0   600         0
## [1] "Frequency table after encoding"
## enumimp1_c. What is the police rank of the respondent?
## Superintendent, Deputy Superintendent, or Inspector 
##                                                  31 
##                                       Sub Inspector 
##                                                  65 
##                             Assistant Sub Inspector 
##                                                 132 
##                                      Head Constable 
##                                                 237 
##                                           Constable 
##                                                 600 
## [1] "Inspect value labels and relabel as necessary"
## Superintendent, Deputy Superintendent, or Inspector 
##                                                   1 
##                                       Sub Inspector 
##                                                   2 
##                             Assistant Sub Inspector 
##                                                   3 
##                                      Head Constable 
##                                                   4 
##                                           Constable 
##                                                   5
# Encode caste and political party, as many small groups

mydata <- encode_location (variables= "dem5", missing=999999)
## [1] "Frequency table before encoding"
## dem5. What is your ethnic background?  [You do not need to read the response choices.]
##            Chhetri     Brahmin (Hill)              Magar              Tharu 
##                279                137                 69                 66 
##             Tamang              Newar             Muslim               Kami 
##                 67                 64                  5                 14 
##              Yadav                Rai             Gurung       Damain/Dholi 
##                 67                 15                 21                 18 
##              Limbu            Thakuri              Sarki               Teli 
##                  2                 15                  6                 22 
## Chamar/Harijan/Ram              Koiri              Kurmi Dusadh/Paswan/Pasi 
##                  5                  7                 20                  7 
##              Sonar    Brahman (Tarai)      Gharti/Bhujel             Kalwar 
##                  4                 22                  7                  1 
##              Kumal       Hajam/Thakur              Sudhi              Lohar 
##                 15                  6                  1                  1 
##              Tatma              Majhi             Nuniya             Kumhar 
##                  5                  9                  2                  2 
##            Danuwar      Chepang/Praja            Haluwai             Rajput 
##                  3                  7                  2                  5 
##              Darai             Pahari               Bote   Adibasi/Janajati 
##                  3                  1                  1                  3 
##               Badi        Other Caste         Don't know 
##                  1                 57                  1 
## [1] "Frequency table after encoding"
## dem5. What is your ethnic background?  [You do not need to read the response choices.]
## 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 
##   5  14   2  69   5   7   2  15  64  67   7   2   1  66   2  22   6  67   9   1  18 
## 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 
##  57   3  21  20   1   7   1   5   1   6   4  15   3   7   3 279   5  15  22 137   1 
## 426 
##   1
mydata <- encode_location (variables= "govlaw0", missing=999999)
## [1] "Frequency table before encoding"
## govlaw0. If you had to say, which Nepali party do you feel closest to?    [You do not nee
##                 Madheshi Jana Adhikari Forum, Nepal 
##                                                   1 
##                     Nepal Communist Party (CPN UML) 
##                                                  99 
## Nepal Communist Party (Marxist <U+0096> Leninist)  \t 
##                                                   7 
##                      Nepali Congress Party (NCP)  \t 
##                                                  74 
##                          Rastriya Prajatantra Party 
##                                                   4 
##                    Rastriya Prajatantra Party Nepal 
##                                                   1 
##                                    Sadbhawana Party 
##                                                   1 
##           Terai <U+0096> Madhesh Loktantrik Party \t 
##                                                   2 
##              Unified Nepal Communist Party (Maoist) 
##                                                  11 
##                       Other (please specify): _____ 
##                                                   2 
##                    I do not feel close to any party 
##                                                 822 
##                                   Refused to answer 
##                                                  28 
##                                                <NA> 
##                                                  13 
## [1] "Frequency table after encoding"
## govlaw0. If you had to say, which Nepali party do you feel closest to?    [You do not nee
##  661  662  663  664  665  666  667  668  669  670  671  672 <NA> 
##   99  822    7   11    1    2   28   74    1    2    4    1   13
# Recode religion

break_rel <- c(1,2,3, 777, 888, 999)
labels_rel <- c("Hindu" = 1,
                "Buddhist" = 2,
                "Other" = 3,
                "Refused" = 4,
                "Not applicable" = 5,
                "Don't know" = 6)
mydata <- ordinal_recode (variable="dem7", break_points=break_rel, missing=999, value_labels=labels_rel)

## [1] "Frequency table before encoding"
## dem7. What is your religious background?  [You do not need to read the response choice
##     Hindu  Buddhist     Islam    Kirant Christian 
##       986        64         6         1         8 
##    recoded
##     [1,2) [2,3) [3,777) [777,888) [888,999) [999,1e+03)
##   1   986     0       0         0         0           0
##   2     0    64       0         0         0           0
##   3     0     0       6         0         0           0
##   4     0     0       1         0         0           0
##   5     0     0       8         0         0           0
## [1] "Frequency table after encoding"
## dem7. What is your religious background?  [You do not need to read the response choice
##    Hindu Buddhist    Other 
##      986       64       15 
## [1] "Inspect value labels and relabel as necessary"
##          Hindu       Buddhist          Other        Refused Not applicable 
##              1              2              3              4              5 
##     Don't know 
##              6
# Recode marital status

break_mar <- c(1,2,777, 888, 999)
labels_mar <- c("Married" = 1,
                "Other" = 2,
                "Refused" = 3,
                "Not applicable" = 4,
                "Don't know" = 5)
mydata <- ordinal_recode (variable="dem8", break_points=break_mar, missing=999, value_labels=labels_mar)

## [1] "Frequency table before encoding"
## dem8. What is your marital status?
##            Married Separated/Divorced            Widowed      Never Married 
##                930                  4                  2                129 
##    recoded
##     [1,2) [2,777) [777,888) [888,999) [999,1e+03)
##   1   930       0         0         0           0
##   2     0       4         0         0           0
##   3     0       2         0         0           0
##   4     0     129         0         0           0
## [1] "Frequency table after encoding"
## dem8. What is your marital status?
## Married   Other 
##     930     135 
## [1] "Inspect value labels and relabel as necessary"
##        Married          Other        Refused Not applicable     Don't know 
##              1              2              3              4              5

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('dem3', 'police_rank', "dem4") ##!!! Replace with candidate categorical demo vars
# weight variable
selectedWeightVar = c('weight') ##!!! Replace with weight var
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars, weightVar=selectedWeightVar)
sdcInitial
## The input dataset consists of 1065 rows and 314 variables.
##   --> Categorical key variables: dem3, police_rank, dem4
##   --> Weight variable: weight
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)
##          dem3                    2 (2)   532.500 (532.500)                   168
##   police_rank                    5 (5)   213.000 (213.000)                    31
##          dem4                    8 (8)   133.125 (133.125)                     2
##       
##  (168)
##   (31)
##    (2)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 8 (0.751%)
##   - 3-anonymity: 18 (1.690%)
##   - 5-anonymity: 39 (3.662%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## # A tibble: 8 x 3
##         dem3                                             police_rank          dem4
##    <dbl+lbl>                                               <dbl+lbl>     <dbl+lbl>
## 1 2 [Female] 1 [Superintendent, Deputy Superintendent, or Inspector] 7 [2045-2049]
## 2 2 [Female] 1 [Superintendent, Deputy Superintendent, or Inspector] 3 [2025-2029]
## 3 2 [Female] 3 [Assistant Sub Inspector]                             7 [2045-2049]
## 4 1 [Male]   4 [Head Constable]                                      7 [2045-2049]
## 5 1 [Male]   5 [Constable]                                           2 [2020-2024]
## 6 2 [Female] 5 [Constable]                                           5 [2035-2039]
## 7 2 [Female] 2 [Sub Inspector]                                       3 [2025-2029]
## 8 2 [Female] 3 [Assistant Sub Inspector]                             8 [2050-2054]
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
##     dem3 police_rank dem4
## 1      2           1   NA
## 192    2          NA    3
## 389    2           3   NA
## 585    1           4   NA
## 690    1           5   NA
## 720    2           5   NA
## 864    2           2   NA
## 871    2           3   NA
mydata [192,"police_rank"] <- NA
mydata [192,"enumimp1_c"] <- NA
mydata [notAnon,"dem4"] <- NA
mydata [192,"dem4"] <- 3

# Check that 2-anonimity has been achieved. 

sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars, weightVar=selectedWeightVar)
sdcInitial
## The input dataset consists of 1065 rows and 314 variables.
##   --> Categorical key variables: dem3, police_rank, dem4
##   --> Weight variable: weight
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)
##          dem3                    2 (2)   532.500 (532.500)                   168
##   police_rank                    6 (6)   212.800 (212.800)                    30
##          dem4                    9 (9)   132.250 (132.250)                     2
##       
##  (168)
##   (30)
##    (2)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 5 (0.469%)
##   - 5-anonymity: 21 (1.972%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 

open_ends <- c("start_failure_reason_other",
               "start_failure_reason_other_eng",
               "mc1_other",
               "mc1_other_eng",
               "ht25o",
               "ht25o_eng",
               "ht26o",
               "ht26o_eng",
               "govlaw0_other",
               "govlaw0_other_eng",
               "enumimp9_other",
               "enumimp7o",
               "enumimp7o_eng",
               "enumimp10",
               "enumimp10_eng",
               "poster_read")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% c("govlaw0_other","govlaw0_other_eng")] # Specific political party mentioned

GPS data: Displace

# Setup map

countrymap <- map_data("world") %>% filter(region=="Nepal")  #!!! Select correct country
#admin <- raster::getData("GADM", country="NP", level=0) #!!! Select correct country map using standard 2-letter country codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
admin <- readRDS(file="gadm36_NPL_0_sp.rds")

# Displace all pairs of GPS variables (Longitude, Latitude). Check summary statistics and maps before and after displacement. 

gps.vars <- c("Longitude", "Latitude") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 34 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##    Longitude        Latitude    
##  Min.   :84.25   Min.   :27.21  
##  1st Qu.:84.89   1st Qu.:27.59  
##  Median :85.16   Median :27.67  
##  Mean   :85.11   Mean   :27.64  
##  3rd Qu.:85.43   3rd Qu.:27.68  
##  Max.   :85.80   Max.   :28.08  
##  NA's   :34      NA's   :34
## Warning: Removed 34 rows containing missing values (geom_point).

## Warning: Removed 34 rows containing missing values (geom_point).

## Warning: Removed 34 rows containing missing values (geom_point).

## Warning: Removed 34 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##    Longitude        Latitude    
##  Min.   :84.25   Min.   :27.20  
##  1st Qu.:84.87   1st Qu.:27.59  
##  Median :85.17   Median :27.66  
##  Mean   :85.12   Mean   :27.64  
##  3rd Qu.:85.44   3rd Qu.:27.69  
##  Max.   :85.80   Max.   :28.07  
##  NA's   :34      NA's   :34     
## [1] "Processing time = 2.92298678557078"
gps.vars <- c("GPSinitial_LO", "GPSinitial_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 30 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.25   Min.   :27.21  
##  1st Qu.:84.88   1st Qu.:27.60  
##  Median :85.16   Median :27.67  
##  Mean   :85.11   Mean   :27.64  
##  3rd Qu.:85.43   3rd Qu.:27.68  
##  Max.   :85.80   Max.   :28.08  
##  NA's   :30      NA's   :30
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##  GPSinitial_LO   GPSinitial_LA  
##  Min.   :84.24   Min.   :27.20  
##  1st Qu.:84.87   1st Qu.:27.59  
##  Median :85.15   Median :27.66  
##  Mean   :85.11   Mean   :27.64  
##  3rd Qu.:85.43   3rd Qu.:27.69  
##  Max.   :85.83   Max.   :28.10  
##  NA's   :30      NA's   :30     
## [1] "Processing time = 2.82884380022685"
gps.vars <- c("gpsht13a_LO", "gpsht13a_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 119 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##   gpsht13a_LO     gpsht13a_LA   
##  Min.   :84.25   Min.   :27.21  
##  1st Qu.:84.90   1st Qu.:27.58  
##  Median :85.25   Median :27.67  
##  Mean   :85.13   Mean   :27.64  
##  3rd Qu.:85.43   3rd Qu.:27.68  
##  Max.   :85.80   Max.   :28.08  
##  NA's   :119     NA's   :119
## Warning: Removed 119 rows containing missing values (geom_point).
## Warning: Removed 119 rows containing missing values (geom_point).

## Warning: Removed 119 rows containing missing values (geom_point).

## Warning: Removed 119 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##   gpsht13a_LO     gpsht13a_LA   
##  Min.   :84.21   Min.   :27.18  
##  1st Qu.:84.89   1st Qu.:27.58  
##  Median :85.22   Median :27.65  
##  Mean   :85.13   Mean   :27.64  
##  3rd Qu.:85.44   3rd Qu.:27.69  
##  Max.   :85.81   Max.   :28.09  
##  NA's   :119     NA's   :119    
## [1] "Processing time = 2.57622871398926"
gps.vars <- c("gpsenumip_LO", "gpsenumip_LA") # !!!Include relevant variables, always longitude first, latitude second.
mydata <- displace(gps.vars, admin=admin, samp_num=1, other_num=100000) # May take a few minutes to process.
## Warning: Removed 136 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics before displacement"
##   gpsenumip_LO    gpsenumip_LA  
##  Min.   :84.25   Min.   :27.21  
##  1st Qu.:84.90   1st Qu.:27.58  
##  Median :85.25   Median :27.67  
##  Mean   :85.14   Mean   :27.64  
##  3rd Qu.:85.43   3rd Qu.:27.68  
##  Max.   :85.80   Max.   :28.08  
##  NA's   :136     NA's   :136
## Warning: Removed 136 rows containing missing values (geom_point).
## Warning: Removed 136 rows containing missing values (geom_point).

## Warning: Removed 136 rows containing missing values (geom_point).

## Warning: Removed 136 rows containing missing values (geom_point).

## [1] "Summary Long/Lat statistics after displacement"
##   gpsenumip_LO    gpsenumip_LA  
##  Min.   :84.23   Min.   :27.17  
##  1st Qu.:84.90   1st Qu.:27.58  
##  Median :85.25   Median :27.65  
##  Mean   :85.14   Mean   :27.64  
##  3rd Qu.:85.44   3rd Qu.:27.69  
##  Max.   :85.83   Max.   :28.10  
##  NA's   :136     NA's   :136    
## [1] "Processing time = 2.35587553183238"

Save processed data in Stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))