rm(list=ls(all=t))

Setup filenames

filename <- "App_rural_parents_Raw_noPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables

dropvars <- c("nombres",
              "item_1_2_1",
              "item_1_2_4",
              "item_3_4_1",
              "item_3_4_2",
              "item_5_6_1",
              "item_5_6_2") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!! No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

#  !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("codlocal", "cod_mod") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## codlocal. codlocal
##  55610  57968  58185  59665  60433  65421  66736  68599  68603  68655  68679  68735  68900  68924  68938 
##      1      1      1      1      1      2      1      3      2      1      2      1      1      1      3 
##  68943  68957  68962  68976  68981  69103  69179  69235  69551  69669  69985  70007  70031  70074  70088 
##      2      2      2      2      5      2      3      2      2      1      3      2      1      1      1 
##  70111  70149  71115  71498  71592  71629  71733  71752  71766  71790  71926  71931  73119  73162  73181 
##      1      3      1      1      1      1      5      2      1      1      2      1      6      4      3 
##  73195  73280  73303  73322  73341  73398  73435  73459  73529  73534  73548  73567  73572  73591  73609 
##      2      2      2      7      2      7      2      1      2      5      2      4      3      2      3 
##  73789 130308 142655 146154 147484 147686 147709 148520 148600 150122 150136 150202 150221 150259 150513 
##      5      3      2      2      2      4      1      3      2      2      2      3      3      5      4 
## 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 150773 150792 150834 150848 150966 
##      9      2      2      4      2      3      4      3      6      4      4      6      4      5      7 
## 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664 151678 152060 152215 
##      6      2      3      2      2      3      2      1      2      1      4      2      1      1      2 
## 152239 152263 152282 152574 152588 152593 152606 152625 152668 152673 152734 152753 152786 153540 153818 
##      3      1      1      1      1      3      3      1      3      4      4      2      3      1      7 
## 153823 153837 153842 153861 153875 153880 153899 153903 153941 153955 154021 154035 154064 154078 154083 
##      2      1      1      2      8      2      1      1      2      6      5      1      1      1      1 
## 154097 154120 154200 154238 154262 154549 157010 157053 157072 157190 157213 157227 157345 157350 157374 
##      2     12      2      1      6      1      3      3      2      1      1      2      2      1      8 
## 157393 157406 157487 157492 157500 157543 157595 157604 157618 157623 157656 157661 157680 157703 157717 
##      2      3      4      2      3      4      1      7      2      3      2      3      3      2      4 
## 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 157982 158024 
##      3      3      1      4      1      3      2      4      5      2      1      2      3      1      2 
## 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359 
##      2      2      5      2      4      2      2      3      2      3      3      3      1      3      5 
## 158364 158378 158383 158401 158415 158444 158458 158477 158482 158496 158509 158547 158590 158608 158627 
##      1      2      4      7      2      1      3      4      5      4      5      1      7      3      2 
## 158651 158665 158670 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 159815 164968 
##      1      2      2      1      2      1     10      1      1      2      4      4      1      7      2 
## 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637 165680 
##      6      3      1      5      2      1      3      2      3      2      3      1      2      7      2 
## 165699 165703 165717 165736 165741 165784 165798 165802 165840 165864 165915 165920 166038 166076 166104 
##      3      1      1      1      6      3      2      2      1      8      2      1      1      3      2 
## 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 167189 
##      3      2      1      3      3      1      3      2      1      1      3      1     10      6      1 
## 167194 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580 
##      2      3      5      8      7      2      2      2      2      1      1      6      2      5      1 
## 167599 167603 167617 167636 167641 167679 167684 169126 169150 170196 170200 170219 170304 170318 170375 
##      2      7      2      2      2      1      8      1      5      1      1      1      1      4      1 
## 170479 170484 170506 170709 170832 170865 170907 170931 171134 340231 340293 343357 462430 462543 505991 
##      5      2      6      1      1      2      2      1      4      1      1      4      1      1      4 
## 508447 515508 517084 517102 520915 526465 526470 531928 534658 538208 538227 538779 555306 560162 562439 
##      1      2      1      2      1      5      6      2      3      2      1      3      3      2      1 
## 563151 571844 582376 585308 590263 601493 602242 603468 603581 603699 603755 605066 605132 605146 609248 
##      3      3      2      5      3      7      1      2      4      4      2      3      2      3      8 
## 611760 748169 748739 999999 
##      1      1      1     99 
## [1] "Frequency table after encoding"
## codlocal. codlocal
##    855    856    857    858    859    860    861    862    863    864    865    866    867    868    869 
##      1      1      1      4      1      2      3      2      3      5      8      3      3      1      1 
##    870    871    872    873    874    875    876    877    878    879    880    881    882    883    884 
##      6      1      5      2      1      2      1      1      4      2      3      2      2      7      3 
##    885    886    887    888    889    890    891    892    893    894    895    896    897    898    899 
##      2      1      1      4      3      4      2      2      6      3      2      3      2      4      2 
##    900    901    902    903    904    905    906    907    908    909    910    911    912    913    914 
##      3      1      1      3      1      6      5      4      2      5      3      2      4      1      1 
##    915    916    917    918    919    920    921    922    923    924    925    926    927    928    929 
##      2      1      1      2      8      1      7      1      2      6      1      4      2      2      2 
##    930    931    932    933    934    935    936    937    938    939    940    941    942    943    944 
##      8      3      2      3      1      1      3      1      8      3      6      2      1      4      3 
##    945    946    947    948    949    950    951    952    953    954    955    956    957    958    959 
##      4      2      6      2      5      3      2      3      1      2      5      3      1      3      6 
##    960    961    962    963    964    966    967    968    969    970    971    972    973    974    975 
##      1      2      1      7      2      3      2      2      3      3      3      4      7      1      2 
##    976    977    978    979    980    981    982    983    984    985    986    987    988    989    990 
##      2      1      1      7      1      4      2      2      1      1      1      5      2      1      2 
##    991    992    993    994    995    996    997    998    999   1000   1001   1002   1003   1004   1005 
##      1      2      1      7      2      1      2      3      5      3      3      1      1      1      1 
##   1006   1007   1008   1009   1010   1011   1012   1013   1014   1015   1016   1017   1018   1019   1020 
##      1      2      4      4      1      3      2      2      3      2      3      1      1      2      7 
##   1021   1022   1023   1024   1025   1026   1027   1028   1029   1030   1031   1032   1033   1034   1035 
##      5      1      6      3      5      1      5      5      2      2      1      1      3      2      3 
##   1036   1037   1038   1039   1040   1041   1042   1043   1044   1045   1046   1047   1048   1049   1050 
##      1      1      4      4      3      1      2     12      4      1      3      4      1      2      2 
##   1051   1052   1053   1054   1055   1056   1057   1058   1059   1060   1061   1062   1063   1064   1065 
##      1      1      3      3      1      1      3      1      2      4      3      2      3      3      1 
##   1066   1067   1068   1069   1070   1071   1072   1073   1074   1075   1076   1077   1078   1079   1080 
##      1      1      4      1      1      3      1      1      3      2      1      3      2      3      1 
##   1081   1082   1083   1084   1085   1086   1087   1088   1089   1090   1091   1092   1093   1094   1095 
##      2      2      1      2      2      7      2      2      2      5      5      4      2      1      5 
##   1096   1097   1098   1099   1100   1101   1102   1103   1104   1105   1106   1107   1108   1109   1110 
##      1      5      2      4      6      2      1      2      7      5      2      2      3      2      1 
##   1111   1112   1113   1114   1115   1116   1117   1118   1119   1120   1121   1122   1123   1124   1125 
##      2      9      2      3      1      1      3      4      2      2      1      8      4      1      4 
##   1126   1127   1128   1129   1130   1131   1132   1133   1134   1135   1136   1137   1138   1139   1140 
##      7      1      1      2      1      1      2      2      2      2      1     10      1      3      2 
##   1141   1142   1143   1144   1145   1146   1147   1148   1149   1150   1151   1152   1153   1154   1155 
##      1      2      1      6      3      2      2      2      2      1      1      1      5      2      1 
##   1156   1157   1158   1159   1160   1161   1162   1163   1164   1165   1166   1167   1168   1169   1170 
##      1      2      2      1      2      3      4      2      2      1      6      1      2      3      4 
##   1171   1172   1173   1174   1175   1176   1177   1178   1179   1180   1181   1182   1183   1184   1185 
##      2      8      1      3      2      2      6      2      5      2      3      3      2      4      1 
##   1186   1187   1188   1189   1190   1191   1192   1193   1194   1195   1196   1197   1198   1199   1200 
##      2      3      2     10      1      2      4      2      7      3      3      1      3      2      2 
##   1201   1202   1203 999999 
##      1      7      1     99 
## [1] "Frequency table before encoding"
## cod_mod. cod_mod
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815 
##       6       1       1       4       2       4       3       2       2       1       2       1       1 
##  207407  216341  220285  226704  232207  232223  232231  232249  232264  232504  232512  232538  232546 
##       1       1       2       1       2       1       2       1       1       1       1       2       3 
##  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130  233296  233361 
##       2       1       1       1       2       1       2       1       1       2       4       4       1 
##  233676  233718  233734  233882  233890  233908  233916  233924  233932  233940  233957  233965  233973 
##       3       1       2       1       1       4       3       2       2       3       1       2       2 
##  233981  233999  234021  234062  234096  234104  234112  234120  234138  234153  234161  234187  234211 
##       2       3       2       2       2       3       1       2       3       3       2       2       1 
##  234229  234237  234351  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583 
##       2       2       1       2       1       3       2       3       3       2       1       2       2 
##  234674  234682  234781  234831  234856  236158  236349  236422  236448  236463  236471  236489  236653 
##       3       3       2       3       2       1       2       4       1       6       1       3       1 
##  236661  236927  287409  287425  287466  309286  309294  309377  309419  309435  309682  310433  312090 
##       8       4       2       3       1       1       6       1       1       1       1       3       2 
##  312215  312306  312421  312744  312868  313080  313239  313395  313460  313890  313908  313965  313981 
##       1       2       1       1       2       1       2       2       1       3       2       2       3 
##  314070  314187  314211  314237  314245  314260  314278  314294  405258  405498  405704  405738  405746 
##       2       4       2       2       2       4       3       2       3       3       3       2       2 
##  405837  405852  405894  405902  405928  405936  406009  406066  406082  406116  406124  406140  406215 
##       1       2       2       2       1       2       2       4       3       4       2       1       2 
##  406223  406264  406413  406595  406629  406645  406975  406983  407007  407049  408245  408278  408286 
##       2       2       2       1       3       1       2       3       1       2       2       2       3 
##  408294  408328  408336  408393  408468  408476  408484  408492  408559  408567  408609  408666  408732 
##       2       1       1       3       2       3       1       1       1       3       2       3       1 
##  408773  408823  408856  408922  408955  408971  409003  409011  409029  409193  409227  409235  409243 
##       1       2       1       2       3       2       2       3       3       2       3       1       2 
##  409284  409292  409300  409318  409326  409359  409441  409565  409896  410464  410480  410514  410613 
##       2       3       3       2       1       2       2       2       1       1       1       2       1 
##  410670  410746  410779  410787  410803  473249  481283  486688  486928  489120  495069  495325  502922 
##       1       2       5       2       2       1       3       3       1       2       5       6       2 
##  504142  517581  517888  518084  518472  519496  519595  519678  525923  550392  551309  557587  579268 
##       1      12       2      10       4       2       2       6       3       2       3       3       2 
##  579276  579284  579292  579300  585885  587055  587147  587204  589804  591255  591602  592147  612051 
##       1       1       3       6       2       6       1       3       3       8       5       3       2 
##  612119  612291  612507  612689  612747  612770  612804  615013  616110  617787  617829  621391  623017 
##       2       1       3       1       1       1       2       2       3       7       4       5       2 
##  623041  637272  639542  639617  647388  647412  647446  647628  671628  672105  679829  680058  680082 
##       1       5       1       1       2       3       6       1       1       4       1       2       1 
##  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116  776039  779041  783423 
##       4       8       1       1       2       1       1       2       3       1       1       1       3 
##  783621  783696  783704  783720  783787  783795  791319  791574  794438  796888  818674  844159  844183 
##       1       3       1       3       2       1      10       1       1       1       2       2       2 
##  891408  891812  895482  927871  930958  931055  931063  932236  932491  932608  932848  933226  933283 
##       5       1       1       3       1       7       6       3       2       3       3       2       3 
##  933291  933317  933531  933846  999999 1031574 1117704 1201649 1201870 1260942 1266428 1273655 1314376 
##       1       3       1       3      99       2       2       4       3       2       3       1       2 
## 1320647 1321322 1321330 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269 
##       3       3       4       5       4       1       1       1       1       3       4       2       1 
## 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1377209 1377233 
##       2       1       1       2       4       2       7       1       3       2       3       2       1 
## 1377415 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390665 
##       3       6       1       2       1       1       3       4       1       1       2       4       5 
## 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392224 1392240 1392257 1396191 1396209 
##       4       1       4       2       1       5       5       5       2       2       3       4       2 
## 1396225 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 1408426 1412634 1412873 
##       8       6       1       2       1       1       2       2       5       3       2       3       2 
## 1415983 1418615 1423003 1442185 1452705 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192 
##       3       1       2       2       5       3       4       2       1       1       2       3       4 
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130 
##       3       2       1       2       1       2       1 
## [1] "Frequency table after encoding"
## cod_mod. cod_mod
##    713    714    715    716    717    718    719    720    721    722    723    724    725    726    727 
##      1      1      2      3      1      1      3      2      2      2      1      2      1      1      1 
##    728    729    730    731    732    733    734    735    736    737    738    739    740    741    742 
##      2      2      2      6      5      1      1      2      1      3      3      5     10      1      2 
##    743    744    745    746    747    748    749    750    751    752    753    754    755    756    757 
##      3      3      1      1      1      1      3      3      3      3      2      2      2      1      6 
##    758    759    760    761    762    763    764    765    766    767    768    769    770    771    772 
##      3      2      1      3      3      7      1      2      2      2      1      7      2      1      1 
##    773    774    775    776    777    778    779    780    781    782    783    784    785    786    787 
##      2      6      1      1      2      5      2      3      1      2      1      2      2      2      1 
##    788    789    790    791    792    793    794    795    796    797    798    799    800    801    802 
##      1      2      2      1      2      3      2      6      1      3      3      1      1      1      2 
##    803    804    805    806    807    808    809    810    811    812    813    814    815    816    817 
##      8      5      3      3      5      2      4      1      2      1      1      1      5      4      3 
##    818    819    820    821    822    823    824    825    826    827    828    829    830    831    832 
##     10      1      1      1      2      1      2      2      4      4      3      1      1      3      2 
##    833    834    835    836    837    838    839    840    841    842    843    844    845    846    847 
##      2      5      2      2      6      3      2      3      1      1      2      4      1      2      4 
##    848    849    850    851    852    853    854    855    856    857    858    859    860    861    862 
##      1      2      1      2      1      1      2      2      4      1      2      3      8      3      2 
##    863    864    865    866    867    868    869    870    871    872    873    874    875    876    877 
##      3      2      2      2      1      2      3      2      3      3      3      2      2      1      2 
##    878    879    880    881    882    883    884    885    886    887    888    889    890    891    892 
##      3      3      4      4      2      1      1      2      3      2      1      1      1      2      1 
##    893    894    895    896    897    898    899    900    901    902    903    904    905    906    907 
##      1      4      2      1      2      4      1     12      5      1      3      2      2      1      2 
##    908    909    910    911    912    913    914    915    916    917    918    919    920    921    922 
##      3      2      1      1      1      3      2      2      2      1      3      1      2      2      2 
##    923    924    925    926    927    928    929    930    931    932    933    934    935    936    937 
##      2      6      3      1      1      4      2      1      2      5      2      2      2      1      4 
##    938    939    940    941    942    943    944    945    946    947    948    949    950    951    952 
##      6      3      1      3      4      1      3      3      2      1      3      3      2      1      3 
##    953    954    955    956    957    958    959    960    961    962    963    964    965    966    967 
##      2      2      8      3      1      5      2      1      2      2      3      4      4      2      1 
##    968    969    970    971    972    973    974    975    976    977    978    979    980    981    982 
##      1      5      5      1      2      1      2      1      2      3      2      2      5      1      1 
##    983    984    985    986    987    988    989    990    991    992    993    994    995    996    997 
##      2      4      1      8      2      4      4      1      2      3      1      2      1      1      2 
##    998    999   1000   1001   1002   1003   1004   1005   1006   1007   1008   1009   1010   1011   1012 
##      3      2      2      3      1      3      1      2      1      1      2      4      2      1      1 
##   1013   1014   1015   1016   1017   1018   1019   1020   1021   1022   1023   1024   1025   1026   1027 
##      1      2      1      2      1      3      1      1      3      1      2      2      3      2      2 
##   1028   1029   1030   1031   1032   1033   1034   1035   1036   1037   1038   1039   1040   1041   1042 
##      2      1      2      1      2      3      3      1      2      2      2      1      1      1      2 
##   1043   1044   1045   1046   1047   1048   1049   1050   1051   1052   1053   1054   1055   1056   1057 
##      3      3      2      3      3      3      1      2      1      7      2      2      4      1      4 
##   1058   1059   1060   1061   1062   1063   1064   1065   1066   1067   1068   1069   1070   1071   1072 
##      1      2      2      1      4      2      3      2      3      3      3      1      2      4      2 
##   1073   1074   1075   1076   1078   1079   1080   1081   1082   1083   1084   1085   1086   1087   1088 
##      3      1      1      2      4      3      1      1      3      3      3      3      3      3      1 
##   1089   1090   1091   1092   1093   1094   1095   1096   1097   1098   1099   1100   1101   1102   1103 
##      6      3      2      6      2      2      6      6      2      1      1      2      1      1      1 
##   1104   1105   1106   1107   1108   1109 999999 
##      4      2      2      3      1      4     99

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!! No Indirect PII- Ordinal

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("rp_finance_6a")

capture_tables(indirect_PII)

# Recode those with very specific values. 

mydata$rp_finance_6a <- labelled(mydata$rp_finance_6a, c("Amigos"="Amigos",
                                                         "Familia"="Familia", 
                                                         "Internet (otro sitio)"="Internet (otro sitio)",
                                                         "Internet desde casa"="Internet desde casa", 
                                                         "Internet desde la escuela"="Internet desde la escuela",
                                                         "Profesores"="Profesores"))
mydata$rp_finance_6a[mydata$rp_finance_6a == "Internet desde casa"] <- "Internet (otro sitio)"
mydata$rp_finance_6a[mydata$rp_finance_6a == "Internet desde la escuela"] <- "Internet (otro sitio)"

Matching and crosstabulations: Run automated PII check

# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('genero', 'grado') ##!!! Replace with candidate categorical demo vars

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 1044 rows and 92 variables.
##   --> Categorical key variables: genero, grado
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size           Size of smallest (>0)      
##        genero                    2 (2)   522.000 (522.000)                   510 (510)
##         grado                    4 (4)   261.000 (261.000)                    10  (10)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 3 (0.287%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("rp_finance_2a")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% "rp_finance_2a"]

GPS data: Displace

# !!! No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)