rm(list=ls(all=t))
filename <- "App_rural_parents_Raw_noPII" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!!Include any Direct PII variables
dropvars <- c("nombres",
"item_1_2_1",
"item_1_2_4",
"item_3_4_1",
"item_3_4_2",
"item_5_6_1",
"item_5_6_2")
mydata <- mydata[!names(mydata) %in% dropvars]
# !!! No Direct PII-team
# !!!Include relevant variables, but check their population size first to confirm they are <100,000
locvars <- c("codlocal", "cod_mod")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## codlocal. codlocal
## 55610 57968 58185 59665 60433 65421 66736 68599 68603 68655 68679 68735 68900 68924 68938
## 1 1 1 1 1 2 1 3 2 1 2 1 1 1 3
## 68943 68957 68962 68976 68981 69103 69179 69235 69551 69669 69985 70007 70031 70074 70088
## 2 2 2 2 5 2 3 2 2 1 3 2 1 1 1
## 70111 70149 71115 71498 71592 71629 71733 71752 71766 71790 71926 71931 73119 73162 73181
## 1 3 1 1 1 1 5 2 1 1 2 1 6 4 3
## 73195 73280 73303 73322 73341 73398 73435 73459 73529 73534 73548 73567 73572 73591 73609
## 2 2 2 7 2 7 2 1 2 5 2 4 3 2 3
## 73789 130308 142655 146154 147484 147686 147709 148520 148600 150122 150136 150202 150221 150259 150513
## 5 3 2 2 2 4 1 3 2 2 2 3 3 5 4
## 150532 150565 150570 150607 150612 150631 150645 150650 150754 150768 150773 150792 150834 150848 150966
## 9 2 2 4 2 3 4 3 6 4 4 6 4 5 7
## 150971 150985 151027 151070 151107 151188 151193 151206 151254 151598 151640 151664 151678 152060 152215
## 6 2 3 2 2 3 2 1 2 1 4 2 1 1 2
## 152239 152263 152282 152574 152588 152593 152606 152625 152668 152673 152734 152753 152786 153540 153818
## 3 1 1 1 1 3 3 1 3 4 4 2 3 1 7
## 153823 153837 153842 153861 153875 153880 153899 153903 153941 153955 154021 154035 154064 154078 154083
## 2 1 1 2 8 2 1 1 2 6 5 1 1 1 1
## 154097 154120 154200 154238 154262 154549 157010 157053 157072 157190 157213 157227 157345 157350 157374
## 2 12 2 1 6 1 3 3 2 1 1 2 2 1 8
## 157393 157406 157487 157492 157500 157543 157595 157604 157618 157623 157656 157661 157680 157703 157717
## 2 3 4 2 3 4 1 7 2 3 2 3 3 2 4
## 157722 157736 157760 157779 157798 157802 157821 157835 157840 157864 157878 157915 157977 157982 158024
## 3 3 1 4 1 3 2 4 5 2 1 2 3 1 2
## 158057 158095 158104 158123 158161 158175 158180 158203 158217 158236 158241 158255 158335 158340 158359
## 2 2 5 2 4 2 2 3 2 3 3 3 1 3 5
## 158364 158378 158383 158401 158415 158444 158458 158477 158482 158496 158509 158547 158590 158608 158627
## 1 2 4 7 2 1 3 4 5 4 5 1 7 3 2
## 158651 158665 158670 158712 158745 158750 158788 158934 159207 159453 159491 159556 159702 159815 164968
## 1 2 2 1 2 1 10 1 1 2 4 4 1 7 2
## 165029 165072 165086 165091 165185 165190 165246 165326 165331 165345 165473 165543 165604 165637 165680
## 6 3 1 5 2 1 3 2 3 2 3 1 2 7 2
## 165699 165703 165717 165736 165741 165784 165798 165802 165840 165864 165915 165920 166038 166076 166104
## 3 1 1 1 6 3 2 2 1 8 2 1 1 3 2
## 166118 166316 166533 166590 166627 166632 166651 166774 166788 166830 166905 166948 167014 167170 167189
## 3 2 1 3 3 1 3 2 1 1 3 1 10 6 1
## 167194 167207 167212 167226 167231 167269 167311 167349 167354 167368 167410 167537 167561 167575 167580
## 2 3 5 8 7 2 2 2 2 1 1 6 2 5 1
## 167599 167603 167617 167636 167641 167679 167684 169126 169150 170196 170200 170219 170304 170318 170375
## 2 7 2 2 2 1 8 1 5 1 1 1 1 4 1
## 170479 170484 170506 170709 170832 170865 170907 170931 171134 340231 340293 343357 462430 462543 505991
## 5 2 6 1 1 2 2 1 4 1 1 4 1 1 4
## 508447 515508 517084 517102 520915 526465 526470 531928 534658 538208 538227 538779 555306 560162 562439
## 1 2 1 2 1 5 6 2 3 2 1 3 3 2 1
## 563151 571844 582376 585308 590263 601493 602242 603468 603581 603699 603755 605066 605132 605146 609248
## 3 3 2 5 3 7 1 2 4 4 2 3 2 3 8
## 611760 748169 748739 999999
## 1 1 1 99
## [1] "Frequency table after encoding"
## codlocal. codlocal
## 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
## 1 1 1 4 1 2 3 2 3 5 8 3 3 1 1
## 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
## 6 1 5 2 1 2 1 1 4 2 3 2 2 7 3
## 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
## 2 1 1 4 3 4 2 2 6 3 2 3 2 4 2
## 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
## 3 1 1 3 1 6 5 4 2 5 3 2 4 1 1
## 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
## 2 1 1 2 8 1 7 1 2 6 1 4 2 2 2
## 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
## 8 3 2 3 1 1 3 1 8 3 6 2 1 4 3
## 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
## 4 2 6 2 5 3 2 3 1 2 5 3 1 3 6
## 960 961 962 963 964 966 967 968 969 970 971 972 973 974 975
## 1 2 1 7 2 3 2 2 3 3 3 4 7 1 2
## 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
## 2 1 1 7 1 4 2 2 1 1 1 5 2 1 2
## 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
## 1 2 1 7 2 1 2 3 5 3 3 1 1 1 1
## 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
## 1 2 4 4 1 3 2 2 3 2 3 1 1 2 7
## 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
## 5 1 6 3 5 1 5 5 2 2 1 1 3 2 3
## 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
## 1 1 4 4 3 1 2 12 4 1 3 4 1 2 2
## 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
## 1 1 3 3 1 1 3 1 2 4 3 2 3 3 1
## 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
## 1 1 4 1 1 3 1 1 3 2 1 3 2 3 1
## 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
## 2 2 1 2 2 7 2 2 2 5 5 4 2 1 5
## 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
## 1 5 2 4 6 2 1 2 7 5 2 2 3 2 1
## 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
## 2 9 2 3 1 1 3 4 2 2 1 8 4 1 4
## 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
## 7 1 1 2 1 1 2 2 2 2 1 10 1 3 2
## 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
## 1 2 1 6 3 2 2 2 2 1 1 1 5 2 1
## 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
## 1 2 2 1 2 3 4 2 2 1 6 1 2 3 4
## 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185
## 2 8 1 3 2 2 6 2 5 2 3 3 2 4 1
## 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
## 2 3 2 10 1 2 4 2 7 3 3 1 3 2 2
## 1201 1202 1203 999999
## 1 7 1 99
## [1] "Frequency table before encoding"
## cod_mod. cod_mod
## 204800 204875 204909 205005 205047 205112 205120 205153 205682 205690 205773 205781 205815
## 6 1 1 4 2 4 3 2 2 1 2 1 1
## 207407 216341 220285 226704 232207 232223 232231 232249 232264 232504 232512 232538 232546
## 1 1 2 1 2 1 2 1 1 1 1 2 3
## 232553 232561 232579 232587 232595 232603 232611 232645 232728 232777 233130 233296 233361
## 2 1 1 1 2 1 2 1 1 2 4 4 1
## 233676 233718 233734 233882 233890 233908 233916 233924 233932 233940 233957 233965 233973
## 3 1 2 1 1 4 3 2 2 3 1 2 2
## 233981 233999 234021 234062 234096 234104 234112 234120 234138 234153 234161 234187 234211
## 2 3 2 2 2 3 1 2 3 3 2 2 1
## 234229 234237 234351 234369 234377 234385 234401 234419 234427 234443 234450 234500 234583
## 2 2 1 2 1 3 2 3 3 2 1 2 2
## 234674 234682 234781 234831 234856 236158 236349 236422 236448 236463 236471 236489 236653
## 3 3 2 3 2 1 2 4 1 6 1 3 1
## 236661 236927 287409 287425 287466 309286 309294 309377 309419 309435 309682 310433 312090
## 8 4 2 3 1 1 6 1 1 1 1 3 2
## 312215 312306 312421 312744 312868 313080 313239 313395 313460 313890 313908 313965 313981
## 1 2 1 1 2 1 2 2 1 3 2 2 3
## 314070 314187 314211 314237 314245 314260 314278 314294 405258 405498 405704 405738 405746
## 2 4 2 2 2 4 3 2 3 3 3 2 2
## 405837 405852 405894 405902 405928 405936 406009 406066 406082 406116 406124 406140 406215
## 1 2 2 2 1 2 2 4 3 4 2 1 2
## 406223 406264 406413 406595 406629 406645 406975 406983 407007 407049 408245 408278 408286
## 2 2 2 1 3 1 2 3 1 2 2 2 3
## 408294 408328 408336 408393 408468 408476 408484 408492 408559 408567 408609 408666 408732
## 2 1 1 3 2 3 1 1 1 3 2 3 1
## 408773 408823 408856 408922 408955 408971 409003 409011 409029 409193 409227 409235 409243
## 1 2 1 2 3 2 2 3 3 2 3 1 2
## 409284 409292 409300 409318 409326 409359 409441 409565 409896 410464 410480 410514 410613
## 2 3 3 2 1 2 2 2 1 1 1 2 1
## 410670 410746 410779 410787 410803 473249 481283 486688 486928 489120 495069 495325 502922
## 1 2 5 2 2 1 3 3 1 2 5 6 2
## 504142 517581 517888 518084 518472 519496 519595 519678 525923 550392 551309 557587 579268
## 1 12 2 10 4 2 2 6 3 2 3 3 2
## 579276 579284 579292 579300 585885 587055 587147 587204 589804 591255 591602 592147 612051
## 1 1 3 6 2 6 1 3 3 8 5 3 2
## 612119 612291 612507 612689 612747 612770 612804 615013 616110 617787 617829 621391 623017
## 2 1 3 1 1 1 2 2 3 7 4 5 2
## 623041 637272 639542 639617 647388 647412 647446 647628 671628 672105 679829 680058 680082
## 1 5 1 1 2 3 6 1 1 4 1 2 1
## 680124 699603 712562 712711 712778 723031 730655 731273 735498 736116 776039 779041 783423
## 4 8 1 1 2 1 1 2 3 1 1 1 3
## 783621 783696 783704 783720 783787 783795 791319 791574 794438 796888 818674 844159 844183
## 1 3 1 3 2 1 10 1 1 1 2 2 2
## 891408 891812 895482 927871 930958 931055 931063 932236 932491 932608 932848 933226 933283
## 5 1 1 3 1 7 6 3 2 3 3 2 3
## 933291 933317 933531 933846 999999 1031574 1117704 1201649 1201870 1260942 1266428 1273655 1314376
## 1 3 1 3 99 2 2 4 3 2 3 1 2
## 1320647 1321322 1321330 1321355 1321421 1327279 1327287 1336072 1343573 1343581 1344639 1345024 1347269
## 3 3 4 5 4 1 1 1 1 3 4 2 1
## 1347293 1347301 1347434 1347459 1347921 1347939 1347970 1352269 1364868 1369248 1372507 1377209 1377233
## 2 1 1 2 4 2 7 1 3 2 3 2 1
## 1377415 1380021 1380120 1386226 1388610 1388644 1388651 1389261 1389279 1390095 1390467 1390517 1390665
## 3 6 1 2 1 1 3 4 1 1 2 4 5
## 1390673 1392083 1392091 1392109 1392117 1392125 1392141 1392174 1392224 1392240 1392257 1396191 1396209
## 4 1 4 2 1 5 5 5 2 2 3 4 2
## 1396225 1396852 1396878 1396886 1398783 1398932 1401934 1401942 1401959 1402536 1408426 1412634 1412873
## 8 6 1 2 1 1 2 2 5 3 2 3 2
## 1415983 1418615 1423003 1442185 1452705 1459791 1459809 1523802 1523810 1523828 1540988 1540996 1541192
## 3 1 2 2 5 3 4 2 1 1 2 3 4
## 1625532 1625557 1625573 1630631 1637263 1659101 1666130
## 3 2 1 2 1 2 1
## [1] "Frequency table after encoding"
## cod_mod. cod_mod
## 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
## 1 1 2 3 1 1 3 2 2 2 1 2 1 1 1
## 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
## 2 2 2 6 5 1 1 2 1 3 3 5 10 1 2
## 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
## 3 3 1 1 1 1 3 3 3 3 2 2 2 1 6
## 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
## 3 2 1 3 3 7 1 2 2 2 1 7 2 1 1
## 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
## 2 6 1 1 2 5 2 3 1 2 1 2 2 2 1
## 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
## 1 2 2 1 2 3 2 6 1 3 3 1 1 1 2
## 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
## 8 5 3 3 5 2 4 1 2 1 1 1 5 4 3
## 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
## 10 1 1 1 2 1 2 2 4 4 3 1 1 3 2
## 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
## 2 5 2 2 6 3 2 3 1 1 2 4 1 2 4
## 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
## 1 2 1 2 1 1 2 2 4 1 2 3 8 3 2
## 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
## 3 2 2 2 1 2 3 2 3 3 3 2 2 1 2
## 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
## 3 3 4 4 2 1 1 2 3 2 1 1 1 2 1
## 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
## 1 4 2 1 2 4 1 12 5 1 3 2 2 1 2
## 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
## 3 2 1 1 1 3 2 2 2 1 3 1 2 2 2
## 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937
## 2 6 3 1 1 4 2 1 2 5 2 2 2 1 4
## 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
## 6 3 1 3 4 1 3 3 2 1 3 3 2 1 3
## 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
## 2 2 8 3 1 5 2 1 2 2 3 4 4 2 1
## 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
## 1 5 5 1 2 1 2 1 2 3 2 2 5 1 1
## 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
## 2 4 1 8 2 4 4 1 2 3 1 2 1 1 2
## 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
## 3 2 2 3 1 3 1 2 1 1 2 4 2 1 1
## 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
## 1 2 1 2 1 3 1 1 3 1 2 2 3 2 2
## 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
## 2 1 2 1 2 3 3 1 2 2 2 1 1 1 2
## 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
## 3 3 2 3 3 3 1 2 1 7 2 2 4 1 4
## 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
## 1 2 2 1 4 2 3 2 3 3 3 1 2 4 2
## 1073 1074 1075 1076 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
## 3 1 1 2 4 3 1 1 3 3 3 3 3 3 1
## 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
## 6 3 2 6 2 2 6 6 2 1 1 2 1 1 1
## 1104 1105 1106 1107 1108 1109 999999
## 4 2 2 3 1 4 99
# !!! No Indirect PII- Ordinal
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("rp_finance_6a")
capture_tables(indirect_PII)
# Recode those with very specific values.
mydata$rp_finance_6a <- labelled(mydata$rp_finance_6a, c("Amigos"="Amigos",
"Familia"="Familia",
"Internet (otro sitio)"="Internet (otro sitio)",
"Internet desde casa"="Internet desde casa",
"Internet desde la escuela"="Internet desde la escuela",
"Profesores"="Profesores"))
mydata$rp_finance_6a[mydata$rp_finance_6a == "Internet desde casa"] <- "Internet (otro sitio)"
mydata$rp_finance_6a[mydata$rp_finance_6a == "Internet desde la escuela"] <- "Internet (otro sitio)"
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('genero', 'grado') ##!!! Replace with candidate categorical demo vars
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 1044 rows and 92 variables.
## --> Categorical key variables: genero, grado
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## genero 2 (2) 522.000 (522.000) 510 (510)
## grado 4 (4) 261.000 (261.000) 10 (10)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 3 (0.287%)
##
## ----------------------------------------------------------------------
# !!! Identify open-end variables here:
open_ends <- c("rp_finance_2a")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata <- mydata[!names(mydata) %in% "rp_finance_2a"]
# !!! No GPS data
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)