rm(list=ls(all=t))
filename <- "App_rural_students_Raw_noPII" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("nombres",
"item_1_2_1",
"item_1_2_4",
"item_3_4_1",
"item_3_4_2",
"item_5_6_1",
"item_5_6_2")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!! No Direct PII-team
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("codlocal", "cod_mod")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## codlocal. codlocal
## 55610 58185 59420 59509 59665 60433 62922 63530 64068 65421 66736 67161 68599 68603 68655 68679
## 1 1 1 1 1 1 1 1 1 2 1 1 12 3 3 2
## 68735 68900 68924 68938 68943 68957 68962 68976 68981 69103 69179 69235 69551 69669 69706 69810
## 1 3 1 4 2 8 2 3 8 2 3 2 2 1 4 1
## 69985 70007 70031 70074 70088 70111 70149 71115 71498 71592 71629 71733 71752 71766 71790 71926
## 16 2 1 2 2 1 5 2 1 3 1 15 4 2 1 2
## 71931 73119 73162 73181 73195 73280 73303 73322 73341 73398 73435 73459 73529 73534 73548 73553
## 1 12 10 2 8 2 6 8 4 19 4 2 3 14 3 6
## 73567 73572 73591 73609 73789 130308 142655 146154 147484 147686 147709 147714 148520 148600 148997 150122
## 6 7 4 6 11 14 7 5 4 4 3 2 5 10 2 4
## 150136 150202 150221 150259 150513 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 150773
## 4 7 5 15 6 21 6 6 6 5 3 16 6 9 7 12
## 150792 150834 150848 150966 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664
## 17 4 14 15 16 4 7 3 9 9 7 1 8 1 10 3
## 151678 152060 152215 152239 152263 152282 152574 152588 152593 152606 152625 152668 152673 152734 152753 152786
## 1 1 14 16 4 4 4 5 6 10 3 12 9 15 6 17
## 153540 153818 153823 153837 153842 153861 153875 153880 153899 153903 153922 153941 153955 154021 154035 154064
## 1 24 6 1 5 2 22 5 4 6 1 4 15 14 7 3
## 154078 154083 154097 154120 154200 154238 154262 154549 155054 157010 157053 157072 157190 157213 157227 157345
## 3 6 3 25 2 1 6 2 1 3 15 5 6 1 8 6
## 157350 157374 157393 157406 157487 157492 157500 157538 157543 157595 157604 157618 157623 157656 157661 157680
## 1 31 10 6 14 3 6 1 6 8 22 3 7 3 9 7
## 157703 157717 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 157982
## 5 17 14 10 1 5 4 8 7 15 18 6 3 6 13 10
## 158024 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359
## 3 5 4 16 6 13 11 8 9 9 21 11 6 1 15 19
## 158364 158378 158383 158401 158415 158444 158458 158477 158482 158496 158509 158547 158590 158608 158627 158632
## 4 4 13 22 6 1 8 6 17 16 15 2 16 4 5 1
## 158646 158665 158670 158707 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 159797 159815
## 1 4 4 2 3 4 2 24 2 1 6 5 14 2 5 18
## 164930 164968 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637
## 1 7 8 4 3 16 6 8 7 4 4 2 8 2 2 17
## 165680 165699 165703 165717 165736 165741 165784 165798 165802 165840 165864 165915 165920 166038 166076 166104
## 8 7 4 5 8 26 7 4 5 4 12 3 2 1 16 7
## 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 167189 167194
## 11 2 1 4 7 6 9 8 2 4 4 2 20 20 4 5
## 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580 167599 167603
## 8 19 22 21 7 6 5 4 4 2 24 5 16 8 5 15
## 167617 167636 167641 167679 167684 169126 169150 170196 170200 170219 170304 170318 170479 170484 170506 170610
## 9 6 6 1 23 1 13 1 6 3 5 8 8 5 14 2
## 170709 170832 170865 170907 170931 171134 340293 343357 462430 462543 505991 508447 515508 517084 517102 520915
## 1 4 5 5 2 6 1 11 9 2 14 3 7 1 7 1
## 526465 526470 531928 534658 535506 538208 538227 538779 555306 556042 560162 562439 563151 571844 582376 585308
## 14 16 14 7 1 9 1 10 7 1 2 1 5 7 4 7
## 590263 601493 602242 603468 603581 603699 603717 603755 605066 605132 605146 609248 611760 748169 748739 999999
## 10 18 5 11 6 16 1 3 3 3 8 27 1 2 1 204
## [1] "Frequency table after encoding"
## codlocal. codlocal
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
## 1 6 8 1 2 16 12 9 6 6 9 17 1 17 2 6
## 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
## 11 1 16 4 3 16 9 14 1 1 1 8 7 2 4 6
## 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
## 8 15 14 21 5 8 4 8 1 1 4 4 5 6 16 14
## 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
## 6 1 1 16 7 4 12 8 2 9 1 1 18 3 2 3
## 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
## 10 5 2 6 2 10 16 21 1 1 5 4 7 11 5 6
## 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
## 2 3 5 5 1 4 7 7 1 22 4 10 22 4 4 6
## 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
## 10 11 8 6 4 5 31 19 1 14 7 1 26 12 14 3
## 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
## 5 5 2 7 4 16 22 4 16 6 6 1 2 6 23 6
## 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966
## 3 8 6 2 8 2 3 2 5 6 6 2 14 2 5 2
## 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
## 6 8 2 6 1 1 25 7 5 5 2 4 1 4 4 4
## 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
## 7 12 1 1 5 16 5 15 2 6 3 5 1 15 15 10
## 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
## 1 27 2 3 7 8 14 4 15 4 24 4 15 8 10 1
## 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
## 7 10 19 1 1 4 1 8 8 3 1 4 5 17 5 16
## 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
## 4 9 5 2 1 1 3 18 2 8 2 1 7 3 2 1
## 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
## 24 5 7 7 3 8 4 4 1 19 3 3 2 2 5 4
## 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
## 4 15 6 1 4 9 1 6 1 7 2 9 4 1 6 8
## 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
## 4 11 2 7 22 5 14 2 2 16 2 5 17 6 3 1
## 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
## 1 1 8 1 14 3 3 15 6 4 9 1 7 6 7 3
## 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
## 7 2 8 13 6 4 1 1 4 10 3 24 3 13 11 14
## 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
## 5 16 5 9 1 3 3 2 3 3 3 2 12 14 20 6
## 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159
## 9 15 4 8 6 3 4 9 8 15 17 1 6 4 6 3
## 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
## 20 1 2 6 5 7 2 13 1 3 6 7 21 14 1 1
## 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 999999
## 1 2 7 2 4 6 4 2 10 1 7 3 11 13 18 204
## [1] "Frequency table before encoding"
## cod_mod. cod_mod
## 204800 204875 204909 205005 205047 205112 205120 205153 205682 205690 205773 205781 205815 206334
## 9 3 2 6 4 7 7 7 6 6 7 2 4 2
## 207373 207407 216341 220285 226704 232207 232223 232231 232249 232264 232504 232512 232538 232546
## 1 1 6 3 6 7 6 6 1 7 3 5 2 5
## 232553 232561 232579 232587 232595 232603 232611 232645 232728 232777 233130 233296 233361 233676
## 5 4 3 6 3 6 2 1 2 6 4 5 4 3
## 233718 233734 233882 233890 233908 233916 233924 233932 233940 233957 233965 233973 233981 233999
## 7 5 1 3 6 8 9 3 4 4 3 5 3 7
## 234021 234062 234096 234104 234112 234120 234138 234153 234161 234187 234195 234203 234229 234237
## 5 4 6 6 4 4 5 5 8 5 1 1 4 4
## 234351 234369 234377 234385 234401 234419 234427 234443 234450 234500 234583 234674 234682 234781
## 1 8 4 8 7 8 7 6 3 6 3 9 6 3
## 234831 234856 236158 236349 236422 236448 236463 236471 236489 236653 236661 236927 287409 287425
## 7 6 5 5 16 4 8 1 7 1 31 8 8 6
## 287466 309286 309294 309377 309419 309435 309567 310433 310441 312090 312215 312306 312421 312744
## 1 1 12 1 3 1 4 3 1 2 3 8 3 2
## 312868 313080 313239 313395 313460 313890 313908 313965 313981 314070 314187 314211 314237 314245
## 2 1 2 8 1 2 8 6 6 2 4 4 4 3
## 314252 314260 314278 314294 405258 405498 405704 405738 405746 405852 405894 405902 405928 405936
## 6 6 7 4 5 7 5 4 4 7 6 6 3 7
## 406009 406066 406082 406116 406124 406140 406215 406223 406264 406413 406595 406629 406645 406975
## 6 10 6 6 5 1 5 4 3 7 5 6 4 5
## 406983 407007 407049 408211 408245 408278 408286 408294 408328 408336 408393 408468 408476 408484
## 10 3 6 1 7 4 4 2 5 1 4 8 7 4
## 408492 408559 408567 408609 408666 408732 408773 408823 408856 408922 408955 408971 409003 409011
## 5 3 8 6 8 2 4 3 8 5 4 5 5 8
## 409029 409193 409227 409235 409243 409284 409292 409300 409318 409326 409359 409441 409565 409896
## 8 2 8 8 5 9 7 9 4 4 7 8 3 1
## 410464 410480 410514 410613 410670 410746 410779 410787 410803 473249 481283 486688 486928 489120
## 1 6 5 1 3 5 8 5 4 4 7 7 2 8
## 495069 495325 498782 499863 502922 504142 517581 517888 518084 518472 519496 519595 519678 525923
## 14 16 1 2 2 4 25 5 22 10 6 5 6 21
## 550392 551309 557587 579268 579276 579284 579292 579300 585885 587055 587147 587204 589200 589747
## 7 3 9 9 1 1 17 20 6 15 3 13 1 1
## 589804 591255 591602 592147 612051 612119 612291 612507 612689 612747 612770 612804 615013 616110
## 5 23 14 4 2 2 5 9 1 1 1 3 4 6
## 617787 617829 621391 623017 623041 637272 639542 639617 647388 647412 647446 647628 655746 671628
## 18 6 15 4 2 8 2 1 4 7 16 4 1 4
## 672105 679829 680058 680082 680124 699603 712562 712711 712778 723031 730655 731273 735498 736116
## 4 2 9 1 17 12 1 2 2 1 2 2 4 2
## 775700 776039 783423 783597 783621 783696 783704 783720 783787 783795 791319 791574 794438 796888
## 5 2 11 1 1 11 1 14 10 10 20 4 1 2
## 818674 818708 844159 844183 891408 891812 895482 927871 930958 931055 931063 932236 932434 932491
## 5 2 2 3 14 1 1 10 1 14 18 12 1 4
## 932608 932848 933226 933283 933291 933317 933531 933598 933846 999999 1031574 1117704 1120005 1201649
## 8 7 6 10 1 6 1 2 6 204 4 10 1 14
## 1201870 1260942 1266428 1271840 1273655 1314376 1320647 1321322 1321330 1321355 1321421 1327279 1327287 1336072
## 6 2 3 1 2 2 10 6 11 9 11 2 9 3
## 1343573 1343581 1344639 1345024 1347269 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868
## 9 11 13 7 1 13 8 1 7 19 11 15 1 7
## 1369248 1372507 1374438 1377209 1377233 1377415 1379361 1380021 1380120 1386226 1388610 1388644 1388651 1389261
## 14 7 1 9 1 10 1 18 4 10 2 5 11 9
## 1389279 1390095 1390467 1390517 1390582 1390665 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174
## 5 2 11 6 1 17 16 7 5 10 7 13 16 14
## 1392216 1392224 1392240 1392257 1396191 1396209 1396225 1396852 1396878 1396886 1398783 1401934 1401942 1401959
## 2 6 3 8 19 11 27 17 4 8 1 11 8 17
## 1402536 1408426 1412634 1412873 1415983 1418615 1423003 1442185 1452705 1458348 1459791 1459809 1523802 1523810
## 7 2 5 3 4 4 12 4 7 9 10 12 9 8
## 1523828 1540988 1540996 1541192 1625532 1625557 1625573 1630631 1637263 1659101 1666130
## 6 7 10 10 7 8 8 3 4 9 1
## [1] "Frequency table after encoding"
## cod_mod. cod_mod
## 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
## 4 1 1 10 1 23 7 9 4 2 6 10 2 11 4 3
## 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 6 1 1 9 1 4 15 12 3 16 4 5 7 3 8 2
## 301 302 303 304 305 306 307 308 309 310 312 313 314 315 316 317
## 14 1 11 4 2 8 5 7 5 6 7 8 1 3 9 4
## 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
## 7 5 7 1 8 3 4 6 2 6 6 4 1 7 4 5
## 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
## 6 7 1 1 3 7 1 3 8 1 2 6 3 5 8 4
## 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
## 1 7 2 7 6 1 5 3 6 8 14 9 19 5 4 6
## 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
## 2 6 2 7 1 9 13 6 1 5 9 1 1 12 6 9
## 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
## 3 17 5 1 4 4 18 6 1 4 4 7 4 5 4 1
## 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
## 9 1 2 8 9 27 6 4 6 6 8 1 18 14 6 6
## 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
## 6 12 6 5 7 31 5 4 3 2 1 2 8 7 2 7
## 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
## 4 6 6 10 3 6 16 1 1 4 4 4 2 4 7 10
## 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
## 8 11 2 2 10 11 17 6 7 8 1 4 2 6 8 1
## 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
## 4 7 6 11 5 1 6 3 7 7 5 10 5 7 14 4
## 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
## 2 17 7 3 21 3 7 1 6 6 18 3 1 8 6 4
## 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
## 7 14 2 8 1 19 10 5 5 4 3 12 7 14 4 2
## 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
## 3 4 8 1 8 7 10 3 7 3 4 5 11 1 5 8
## 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
## 1 2 5 4 2 8 9 11 2 8 5 1 8 5 6 1
## 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
## 5 8 9 8 4 4 9 11 9 17 16 1 6 4 2 3
## 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
## 2 3 8 15 1 13 7 7 4 9 20 13 1 7 2 2
## 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
## 8 6 1 3 15 5 2 6 5 11 10 5 1 2 2 2
## 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
## 6 3 1 1 10 2 3 5 3 1 6 5 2 5 20 12
## 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
## 16 6 8 3 7 4 8 5 2 5 1 5 4 4 1 4
## 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
## 1 1 2 9 3 4 5 13 10 2 4 4 25 8 1 7
## 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
## 3 3 1 2 1 7 2 1 6 1 4 14 10 2 8 5
## 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
## 8 7 4 6 2 7 6 3 9 11 3 8 17 22 4 10
## 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
## 1 10 3 5 4 4 3 5 2 16 6 9 10 6 14 4
## 999999
## 204
# !!! No Indirect PII- Ordinal
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("genero")
capture_tables (indirect_PII)
# Recode those with very specific values.
# !!! No Indirect PII- Categorical variables with very specific values.
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('genero', 'grado') ##!!! Replace with candidate categorical demo vars
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 2726 rows and 92 variables.
## --> Categorical key variables: genero, grado
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## genero 2 (2) 1363.000 (1363.000) 1327 (1327)
## grado 4 (4) 681.500 (681.500) 32 (32)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 0 (0.000%)
##
## ----------------------------------------------------------------------
# !!! Identify open-end variables here:
open_ends <- c("rp_finance_2a")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata <- mydata[!names(mydata) %in% "rp_finance_2a"]
# !!! No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)