rm(list=ls(all=t))

Setup filenames

filename <- "App_rural_students_Raw_noPII" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!!Include any Direct PII variables

dropvars <- c("nombres",
              "item_1_2_1",
              "item_1_2_4",
              "item_3_4_1",
              "item_3_4_2",
              "item_5_6_1",
              "item_5_6_2") 
mydata <- mydata[!names(mydata) %in% dropvars]

Direct PII-team: Encode field team names

# !!! No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

#  !!!Include relevant variables, but check their population size first to confirm they are <100,000

locvars <- c("codlocal", "cod_mod") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## codlocal. codlocal
##  55610  58185  59420  59509  59665  60433  62922  63530  64068  65421  66736  67161  68599  68603  68655  68679 
##      1      1      1      1      1      1      1      1      1      2      1      1     12      3      3      2 
##  68735  68900  68924  68938  68943  68957  68962  68976  68981  69103  69179  69235  69551  69669  69706  69810 
##      1      3      1      4      2      8      2      3      8      2      3      2      2      1      4      1 
##  69985  70007  70031  70074  70088  70111  70149  71115  71498  71592  71629  71733  71752  71766  71790  71926 
##     16      2      1      2      2      1      5      2      1      3      1     15      4      2      1      2 
##  71931  73119  73162  73181  73195  73280  73303  73322  73341  73398  73435  73459  73529  73534  73548  73553 
##      1     12     10      2      8      2      6      8      4     19      4      2      3     14      3      6 
##  73567  73572  73591  73609  73789 130308 142655 146154 147484 147686 147709 147714 148520 148600 148997 150122 
##      6      7      4      6     11     14      7      5      4      4      3      2      5     10      2      4 
## 150136 150202 150221 150259 150513 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 150773 
##      4      7      5     15      6     21      6      6      6      5      3     16      6      9      7     12 
## 150792 150834 150848 150966 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664 
##     17      4     14     15     16      4      7      3      9      9      7      1      8      1     10      3 
## 151678 152060 152215 152239 152263 152282 152574 152588 152593 152606 152625 152668 152673 152734 152753 152786 
##      1      1     14     16      4      4      4      5      6     10      3     12      9     15      6     17 
## 153540 153818 153823 153837 153842 153861 153875 153880 153899 153903 153922 153941 153955 154021 154035 154064 
##      1     24      6      1      5      2     22      5      4      6      1      4     15     14      7      3 
## 154078 154083 154097 154120 154200 154238 154262 154549 155054 157010 157053 157072 157190 157213 157227 157345 
##      3      6      3     25      2      1      6      2      1      3     15      5      6      1      8      6 
## 157350 157374 157393 157406 157487 157492 157500 157538 157543 157595 157604 157618 157623 157656 157661 157680 
##      1     31     10      6     14      3      6      1      6      8     22      3      7      3      9      7 
## 157703 157717 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 157982 
##      5     17     14     10      1      5      4      8      7     15     18      6      3      6     13     10 
## 158024 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359 
##      3      5      4     16      6     13     11      8      9      9     21     11      6      1     15     19 
## 158364 158378 158383 158401 158415 158444 158458 158477 158482 158496 158509 158547 158590 158608 158627 158632 
##      4      4     13     22      6      1      8      6     17     16     15      2     16      4      5      1 
## 158646 158665 158670 158707 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 159797 159815 
##      1      4      4      2      3      4      2     24      2      1      6      5     14      2      5     18 
## 164930 164968 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637 
##      1      7      8      4      3     16      6      8      7      4      4      2      8      2      2     17 
## 165680 165699 165703 165717 165736 165741 165784 165798 165802 165840 165864 165915 165920 166038 166076 166104 
##      8      7      4      5      8     26      7      4      5      4     12      3      2      1     16      7 
## 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 167189 167194 
##     11      2      1      4      7      6      9      8      2      4      4      2     20     20      4      5 
## 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580 167599 167603 
##      8     19     22     21      7      6      5      4      4      2     24      5     16      8      5     15 
## 167617 167636 167641 167679 167684 169126 169150 170196 170200 170219 170304 170318 170479 170484 170506 170610 
##      9      6      6      1     23      1     13      1      6      3      5      8      8      5     14      2 
## 170709 170832 170865 170907 170931 171134 340293 343357 462430 462543 505991 508447 515508 517084 517102 520915 
##      1      4      5      5      2      6      1     11      9      2     14      3      7      1      7      1 
## 526465 526470 531928 534658 535506 538208 538227 538779 555306 556042 560162 562439 563151 571844 582376 585308 
##     14     16     14      7      1      9      1     10      7      1      2      1      5      7      4      7 
## 590263 601493 602242 603468 603581 603699 603717 603755 605066 605132 605146 609248 611760 748169 748739 999999 
##     10     18      5     11      6     16      1      3      3      3      8     27      1      2      1    204 
## [1] "Frequency table after encoding"
## codlocal. codlocal
##    823    824    825    826    827    828    829    830    831    832    833    834    835    836    837    838 
##      1      6      8      1      2     16     12      9      6      6      9     17      1     17      2      6 
##    839    840    841    842    843    844    845    846    847    848    849    850    851    852    853    854 
##     11      1     16      4      3     16      9     14      1      1      1      8      7      2      4      6 
##    855    856    857    858    859    860    861    862    863    864    865    866    867    868    869    870 
##      8     15     14     21      5      8      4      8      1      1      4      4      5      6     16     14 
##    871    872    873    874    875    876    877    878    879    880    881    882    883    884    885    886 
##      6      1      1     16      7      4     12      8      2      9      1      1     18      3      2      3 
##    887    888    889    890    891    892    893    894    895    896    897    898    899    900    901    902 
##     10      5      2      6      2     10     16     21      1      1      5      4      7     11      5      6 
##    903    904    905    906    907    908    909    910    911    912    913    914    915    916    917    918 
##      2      3      5      5      1      4      7      7      1     22      4     10     22      4      4      6 
##    919    920    921    922    923    924    925    926    927    928    929    930    931    932    933    934 
##     10     11      8      6      4      5     31     19      1     14      7      1     26     12     14      3 
##    935    936    937    938    939    940    941    942    943    944    945    946    947    948    949    950 
##      5      5      2      7      4     16     22      4     16      6      6      1      2      6     23      6 
##    951    952    953    954    955    956    957    958    959    960    961    962    963    964    965    966 
##      3      8      6      2      8      2      3      2      5      6      6      2     14      2      5      2 
##    967    968    969    970    971    972    973    974    975    976    977    978    979    980    981    982 
##      6      8      2      6      1      1     25      7      5      5      2      4      1      4      4      4 
##    983    984    985    986    987    988    989    990    991    992    993    994    995    996    997    998 
##      7     12      1      1      5     16      5     15      2      6      3      5      1     15     15     10 
##    999   1000   1001   1002   1003   1004   1005   1006   1007   1008   1009   1010   1011   1012   1013   1014 
##      1     27      2      3      7      8     14      4     15      4     24      4     15      8     10      1 
##   1015   1016   1017   1018   1019   1020   1021   1022   1023   1024   1025   1026   1027   1028   1029   1030 
##      7     10     19      1      1      4      1      8      8      3      1      4      5     17      5     16 
##   1031   1032   1033   1034   1035   1036   1037   1038   1039   1040   1041   1042   1043   1044   1045   1046 
##      4      9      5      2      1      1      3     18      2      8      2      1      7      3      2      1 
##   1047   1048   1049   1050   1051   1052   1053   1054   1055   1056   1057   1058   1059   1060   1061   1062 
##     24      5      7      7      3      8      4      4      1     19      3      3      2      2      5      4 
##   1063   1064   1065   1066   1067   1068   1069   1070   1071   1072   1073   1074   1075   1076   1077   1078 
##      4     15      6      1      4      9      1      6      1      7      2      9      4      1      6      8 
##   1079   1080   1081   1082   1083   1084   1085   1086   1087   1088   1089   1090   1091   1092   1093   1094 
##      4     11      2      7     22      5     14      2      2     16      2      5     17      6      3      1 
##   1095   1096   1097   1098   1099   1100   1101   1102   1103   1104   1105   1106   1107   1108   1109   1110 
##      1      1      8      1     14      3      3     15      6      4      9      1      7      6      7      3 
##   1111   1112   1113   1114   1115   1116   1117   1118   1119   1120   1121   1122   1123   1124   1125   1126 
##      7      2      8     13      6      4      1      1      4     10      3     24      3     13     11     14 
##   1127   1128   1129   1130   1131   1132   1133   1134   1135   1136   1137   1138   1139   1140   1141   1142 
##      5     16      5      9      1      3      3      2      3      3      3      2     12     14     20      6 
##   1144   1145   1146   1147   1148   1149   1150   1151   1152   1153   1154   1155   1156   1157   1158   1159 
##      9     15      4      8      6      3      4      9      8     15     17      1      6      4      6      3 
##   1160   1161   1162   1163   1164   1165   1166   1167   1168   1169   1170   1171   1172   1173   1174   1175 
##     20      1      2      6      5      7      2     13      1      3      6      7     21     14      1      1 
##   1176   1177   1178   1179   1180   1181   1182   1183   1184   1185   1186   1187   1188   1189   1190 999999 
##      1      2      7      2      4      6      4      2     10      1      7      3     11     13     18    204 
## [1] "Frequency table before encoding"
## cod_mod. cod_mod
##  204800  204875  204909  205005  205047  205112  205120  205153  205682  205690  205773  205781  205815  206334 
##       9       3       2       6       4       7       7       7       6       6       7       2       4       2 
##  207373  207407  216341  220285  226704  232207  232223  232231  232249  232264  232504  232512  232538  232546 
##       1       1       6       3       6       7       6       6       1       7       3       5       2       5 
##  232553  232561  232579  232587  232595  232603  232611  232645  232728  232777  233130  233296  233361  233676 
##       5       4       3       6       3       6       2       1       2       6       4       5       4       3 
##  233718  233734  233882  233890  233908  233916  233924  233932  233940  233957  233965  233973  233981  233999 
##       7       5       1       3       6       8       9       3       4       4       3       5       3       7 
##  234021  234062  234096  234104  234112  234120  234138  234153  234161  234187  234195  234203  234229  234237 
##       5       4       6       6       4       4       5       5       8       5       1       1       4       4 
##  234351  234369  234377  234385  234401  234419  234427  234443  234450  234500  234583  234674  234682  234781 
##       1       8       4       8       7       8       7       6       3       6       3       9       6       3 
##  234831  234856  236158  236349  236422  236448  236463  236471  236489  236653  236661  236927  287409  287425 
##       7       6       5       5      16       4       8       1       7       1      31       8       8       6 
##  287466  309286  309294  309377  309419  309435  309567  310433  310441  312090  312215  312306  312421  312744 
##       1       1      12       1       3       1       4       3       1       2       3       8       3       2 
##  312868  313080  313239  313395  313460  313890  313908  313965  313981  314070  314187  314211  314237  314245 
##       2       1       2       8       1       2       8       6       6       2       4       4       4       3 
##  314252  314260  314278  314294  405258  405498  405704  405738  405746  405852  405894  405902  405928  405936 
##       6       6       7       4       5       7       5       4       4       7       6       6       3       7 
##  406009  406066  406082  406116  406124  406140  406215  406223  406264  406413  406595  406629  406645  406975 
##       6      10       6       6       5       1       5       4       3       7       5       6       4       5 
##  406983  407007  407049  408211  408245  408278  408286  408294  408328  408336  408393  408468  408476  408484 
##      10       3       6       1       7       4       4       2       5       1       4       8       7       4 
##  408492  408559  408567  408609  408666  408732  408773  408823  408856  408922  408955  408971  409003  409011 
##       5       3       8       6       8       2       4       3       8       5       4       5       5       8 
##  409029  409193  409227  409235  409243  409284  409292  409300  409318  409326  409359  409441  409565  409896 
##       8       2       8       8       5       9       7       9       4       4       7       8       3       1 
##  410464  410480  410514  410613  410670  410746  410779  410787  410803  473249  481283  486688  486928  489120 
##       1       6       5       1       3       5       8       5       4       4       7       7       2       8 
##  495069  495325  498782  499863  502922  504142  517581  517888  518084  518472  519496  519595  519678  525923 
##      14      16       1       2       2       4      25       5      22      10       6       5       6      21 
##  550392  551309  557587  579268  579276  579284  579292  579300  585885  587055  587147  587204  589200  589747 
##       7       3       9       9       1       1      17      20       6      15       3      13       1       1 
##  589804  591255  591602  592147  612051  612119  612291  612507  612689  612747  612770  612804  615013  616110 
##       5      23      14       4       2       2       5       9       1       1       1       3       4       6 
##  617787  617829  621391  623017  623041  637272  639542  639617  647388  647412  647446  647628  655746  671628 
##      18       6      15       4       2       8       2       1       4       7      16       4       1       4 
##  672105  679829  680058  680082  680124  699603  712562  712711  712778  723031  730655  731273  735498  736116 
##       4       2       9       1      17      12       1       2       2       1       2       2       4       2 
##  775700  776039  783423  783597  783621  783696  783704  783720  783787  783795  791319  791574  794438  796888 
##       5       2      11       1       1      11       1      14      10      10      20       4       1       2 
##  818674  818708  844159  844183  891408  891812  895482  927871  930958  931055  931063  932236  932434  932491 
##       5       2       2       3      14       1       1      10       1      14      18      12       1       4 
##  932608  932848  933226  933283  933291  933317  933531  933598  933846  999999 1031574 1117704 1120005 1201649 
##       8       7       6      10       1       6       1       2       6     204       4      10       1      14 
## 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 1321355 1321421 1327279 1327287 1336072 
##       6       2       3       1       2       2      10       6      11       9      11       2       9       3 
## 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 
##       9      11      13       7       1      13       8       1       7      19      11      15       1       7 
## 1369248 1372507 1374438 1377209 1377233 1377415 1379361 1380021 1380120 1386226 1388610 1388644 1388651 1389261 
##      14       7       1       9       1      10       1      18       4      10       2       5      11       9 
## 1389279 1390095 1390467 1390517 1390582 1390665 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 
##       5       2      11       6       1      17      16       7       5      10       7      13      16      14 
## 1392216 1392224 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1401934 1401942 1401959 
##       2       6       3       8      19      11      27      17       4       8       1      11       8      17 
## 1402536 1408426 1412634 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810 
##       7       2       5       3       4       4      12       4       7       9      10      12       9       8 
## 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 1666130 
##       6       7      10      10       7       8       8       3       4       9       1 
## [1] "Frequency table after encoding"
## cod_mod. cod_mod
##    269    270    271    272    273    274    275    276    277    278    279    280    281    282    283    284 
##      4      1      1     10      1     23      7      9      4      2      6     10      2     11      4      3 
##    285    286    287    288    289    290    291    292    293    294    295    296    297    298    299    300 
##      6      1      1      9      1      4     15     12      3     16      4      5      7      3      8      2 
##    301    302    303    304    305    306    307    308    309    310    312    313    314    315    316    317 
##     14      1     11      4      2      8      5      7      5      6      7      8      1      3      9      4 
##    318    319    320    321    322    323    324    325    326    327    328    329    330    331    332    333 
##      7      5      7      1      8      3      4      6      2      6      6      4      1      7      4      5 
##    334    335    336    337    338    339    340    341    342    343    344    345    346    347    348    349 
##      6      7      1      1      3      7      1      3      8      1      2      6      3      5      8      4 
##    350    351    352    353    354    355    356    357    358    359    360    361    362    363    364    365 
##      1      7      2      7      6      1      5      3      6      8     14      9     19      5      4      6 
##    366    367    368    369    370    371    372    373    374    375    376    377    378    379    380    381 
##      2      6      2      7      1      9     13      6      1      5      9      1      1     12      6      9 
##    382    383    384    385    386    387    388    389    390    391    392    393    394    395    396    397 
##      3     17      5      1      4      4     18      6      1      4      4      7      4      5      4      1 
##    398    399    400    401    402    403    404    405    406    407    408    409    410    411    412    413 
##      9      1      2      8      9     27      6      4      6      6      8      1     18     14      6      6 
##    414    415    416    417    418    419    420    421    422    423    424    425    426    427    428    429 
##      6     12      6      5      7     31      5      4      3      2      1      2      8      7      2      7 
##    430    431    432    433    434    435    436    437    438    439    440    441    442    443    444    445 
##      4      6      6     10      3      6     16      1      1      4      4      4      2      4      7     10 
##    446    447    448    449    450    451    452    453    454    455    456    457    458    459    460    461 
##      8     11      2      2     10     11     17      6      7      8      1      4      2      6      8      1 
##    462    463    464    465    466    467    468    469    470    471    472    473    474    475    476    477 
##      4      7      6     11      5      1      6      3      7      7      5     10      5      7     14      4 
##    478    479    480    481    482    483    484    485    486    487    488    489    490    491    492    493 
##      2     17      7      3     21      3      7      1      6      6     18      3      1      8      6      4 
##    494    495    496    497    498    499    500    501    502    503    504    505    506    507    508    509 
##      7     14      2      8      1     19     10      5      5      4      3     12      7     14      4      2 
##    510    511    512    513    514    515    516    517    518    519    520    521    522    523    524    525 
##      3      4      8      1      8      7     10      3      7      3      4      5     11      1      5      8 
##    526    527    528    529    530    531    532    533    534    535    536    537    538    539    540    541 
##      1      2      5      4      2      8      9     11      2      8      5      1      8      5      6      1 
##    542    543    544    545    546    547    548    549    550    551    552    553    554    555    556    557 
##      5      8      9      8      4      4      9     11      9     17     16      1      6      4      2      3 
##    558    559    560    561    562    563    564    565    566    567    568    569    570    571    572    573 
##      2      3      8     15      1     13      7      7      4      9     20     13      1      7      2      2 
##    574    575    576    577    578    579    580    581    582    583    584    585    586    587    588    589 
##      8      6      1      3     15      5      2      6      5     11     10      5      1      2      2      2 
##    590    591    592    593    594    595    596    597    598    599    600    601    602    603    604    605 
##      6      3      1      1     10      2      3      5      3      1      6      5      2      5     20     12 
##    606    607    608    609    610    611    612    613    614    615    616    617    618    619    620    621 
##     16      6      8      3      7      4      8      5      2      5      1      5      4      4      1      4 
##    622    623    624    625    626    627    628    629    630    631    632    633    634    635    636    637 
##      1      1      2      9      3      4      5     13     10      2      4      4     25      8      1      7 
##    638    639    640    641    642    643    644    645    646    647    648    649    650    651    652    653 
##      3      3      1      2      1      7      2      1      6      1      4     14     10      2      8      5 
##    654    655    656    657    658    659    660    661    662    663    664    665    666    667    668    669 
##      8      7      4      6      2      7      6      3      9     11      3      8     17     22      4     10 
##    670    671    672    673    674    675    676    677    678    679    680    681    682    683    684    685 
##      1     10      3      5      4      4      3      5      2     16      6      9     10      6     14      4 
## 999999 
##    204

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# !!! No Indirect PII- Ordinal

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("genero")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!! No Indirect PII- Categorical variables with very specific values.

Matching and crosstabulations: Run automated PII check

# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('genero', 'grado') ##!!! Replace with candidate categorical demo vars

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2726 rows and 92 variables.
##   --> Categorical key variables: genero, grado
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##  Key Variable Number of categories     Mean size            Size of smallest (>0)       
##        genero                    2 (2)  1363.000 (1363.000)                  1327 (1327)
##         grado                    4 (4)   681.500  (681.500)                    32   (32)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 0 (0.000%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!! Identify open-end variables here: 
open_ends <- c("rp_finance_2a")

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata <- mydata[!names(mydata) %in% "rp_finance_2a"]

GPS data: Displace

# !!! No GPS data

Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)