rm(list=ls(all=t))

Setup filenames

filename <- "ehsection2_relabelled" # !!!Update filename
functions_vers <-  "functions_1.7.R" # !!!Update helper functions file

Setup data, functions and create dictionary for dataset review

source (functions_vers)

Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:

# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

Direct PII: variables to be removed

# !!! No Direct PII

Direct PII-team: Encode field team names

# !!! No Direct PII-team

Small locations: Encode locations with pop <100,000 using random large numbers

!!!Include relevant variables, but check their population size first to confirm they are <100,000

dropvars <- c("dise") 
mydata <- mydata[!names(mydata) %in% dropvars]

locvars <- c("a006_a_block_id", "a007_a_vill_id") 
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## a006_a_block_id. 006 Block ID
##    1    2    3    4    5    6    7    8    9 
## 1310 1079 1288 2714  561 1284 1072 2835 3631 
## [1] "Frequency table after encoding"
## a006_a_block_id. 006 Block ID
##  279  280  281  282  283  284  285  286  287 
## 1310 2835 1079 1284 1288 3631 1072  561 2714 
## [1] "Frequency table before encoding"
## a007_a_vill_id. 007 Village ID
##   1   2   3   4   5   6   7   8   9  10  11  12  13  15  16  17  18  19  20  21  22 
## 131 111 101 114 141 204 182  90  84  93 103 162 141  79 101 151  96 108 109 205 139 
##  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43 
## 117 125 237 192 176 138 103  97 174 178 132 112 245 138 111 131 176 105 107 103 126 
##  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64 
## 196 146 147 113 104 111 102 110 136 194 187 168 137 106 136  79 184 145  91 121 113 
##  65  66  67  68  69  70  71  72  73  74  75  76  77  78  80  81  82  83  84  85  87 
## 188 109 160 125 159  94  92 121 114 160 131 124 155 191 191  96 168  91 110  80 119 
##  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 
## 157  89 122 113 118 136  77 115 158 172 138 199 104 167 140  91 132 102 237  92 159 
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 
## 138 112 146 187  96 159 138  90 131 145 124  96  52  70 
## [1] "Frequency table after encoding"
## a007_a_vill_id. 007 Village ID
## 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 
## 109 131  79 103 191 104 126 113 191 101 145 136 136 176 187 102 122 138 102 131 237 
## 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 
## 112  52 162  96 237 196 103  94 192 107 176 103 111  96 204 138 199  96 158 155 119 
## 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 
## 131  93  90  97 105 136 124 113 111  91 188 110  90 108 125 194 111 146 182 178 104 
## 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 
## 138 139 106 157 172 132 145 118 138 159  92 160 245 184 113 159  70 121 141 101 187 
## 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 
## 125 124  91 174 160 115 159 109 205 168 121 146  96 132  77 137 112 140 151  89 114 
## 714 715 716 717 718 719 720 721 722 723 724 725 726 727 
## 114 167 110 168  91  92 138  79 117 131 147  84 141  80

Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Focus on variables with a "Lowest Freq" in dictionary of 30 or less. 

mydata <- top_recode (variable="a203_age_", break_point=80, missing=NA)
## [1] "Frequency table before encoding"
## a203_age_. 203 How old is [Name]?
##          1          2          3          4          5          6          7 
##        208        144        179        208        257        226        347 
##          8          9         10         11         12         13         14 
##        361        351        635        526       1005       1042        822 
##         15         16         17         18         19         20         21 
##        722        582        459        537        293        319        126 
##         22         23         24         25         26         27         28 
##        164        108         58        159         55         46         95 
##         29         30         31         32         33         34         35 
##         37        357         42        220        135        107        742 
##         36         37         38         39         40         41         42 
##        135        117        311         80        815         52        169 
##         43         44         45         46         47         48         49 
##         84         53        489         40         42         69         17 
##         50         51         52         53         54         55         56 
##        276          9         41         27         23        139         26 
##         57         58         59         60         61         62         63 
##         17         48         10        275         12         46         33 
##         64         65         66         67         68         69         70 
##         13        159         10          9         24          4        174 
##         71         72         73         74         75         76         77 
##          4         16         10          4         61          2          4 
##         78         79         80         81         82         83         85 
##          3          1         64          1          8          1         21 
##         86         87         88         89         90         92         93 
##          1          2          1          1         15          4          1 
##         95         96         97 Don't know         99        100 
##          6          1          1          5          2         12

## [1] "Frequency table after encoding"
## a203_age_. 203 How old is [Name]?
##          1          2          3          4          5          6          7 
##        208        144        179        208        257        226        347 
##          8          9         10         11         12         13         14 
##        361        351        635        526       1005       1042        822 
##         15         16         17         18         19         20         21 
##        722        582        459        537        293        319        126 
##         22         23         24         25         26         27         28 
##        164        108         58        159         55         46         95 
##         29         30         31         32         33         34         35 
##         37        357         42        220        135        107        742 
##         36         37         38         39         40         41         42 
##        135        117        311         80        815         52        169 
##         43         44         45         46         47         48         49 
##         84         53        489         40         42         69         17 
##         50         51         52         53         54         55         56 
##        276          9         41         27         23        139         26 
##         57         58         59         60         61         62         63 
##         17         48         10        275         12         46         33 
##         64         65         66         67         68         69         70 
##         13        159         10          9         24          4        174 
##         71         72         73         74         75         76         77 
##          4         16         10          4         61          2          4 
##         78         79 80 or more 
##          3          1        147

mydata <- top_recode (variable="a210_income_", break_point=8000, missing=NA)
## [1] "Frequency table before encoding"
## a210_income_. 210 How much income in cash or in kind did [Name] earn from employment in the la
##              0              1              2              5              6 
##           1930              1              1              1              1 
##              7              8             10             14             21 
##              3              1              3              1              1 
##             24             28             35             49             50 
##              1              1              3              2              2 
##             54             56             60             63             65 
##              1              1              3              2              1 
##             70             75             80             84     Don't know 
##             10              1              1              1            171 
##             99            100            105            120            122 
##             15             25              3              1              1 
##            123            125            130            140            142 
##              1              1              1              5              1 
##            150            160            170            171            175 
##             16              3              1              1              6 
##            180            182            188            196            200 
##              1              2              1              1             39 
##            201            210            215            220            225 
##              2              3              1              1              2 
##            227            231            238            250            264 
##              1              1              4             10              3 
##            265            270            280            300            320 
##              1              1              1             38              1 
##            325            340            350            357            360 
##              2              1             43              2              1 
##            375            400            420            450            462 
##              3             61              4              9              1 
##            480            486            490            500            525 
##              1              2              2             66              1 
##            540            550            560            571            588 
##              1              3             11              4              3 
##            600            625            630            640            650 
##             92              3              3              1              9 
##            668            696            700            714            720 
##              1              1            187              5              1 
##            735            750            770            780            798 
##              1             29              1              1              1 
##            800            804            830            835            840 
##             43              2              1              1              6 
##            850            857            875            900            918 
##              1              8              2             32              1 
##            920            931            945            950            960 
##              1              4              1              5              1 
##            980            996 Not applicable           1000           1042 
##              4              1          10622             99              1 
##           1050           1090           1100           1140           1142 
##             61              1             10              1              8 
##           1143           1150           1160           1162           1169 
##              1              1              1             13              2 
##           1190           1200           1225           1250           1260 
##              1            108              3             28              1 
##           1280           1285           1296           1300           1305 
##              1              5              1              3              1 
##           1379           1398           1400           1420           1428 
##              2              2            383              1              8 
##           1450           1460           1470           1480           1500 
##             12              1              2              1            118 
##           1512           1520           1560           1575           1600 
##              1              1              1              1             19 
##           1631           1632           1633           1650           1700 
##              5              1              1              7              8 
##           1750           1800           1820           1862           1869 
##             73             55              2              2              1 
##           1875           1890           1900           1950           2000 
##              1              1              1              1            103 
##           2007           2025           2050           2100           2140 
##              1              1              2            296              1 
##           2142           2150           2200           2245           2250 
##              4              5              7              1              4 
##           2285           2300           2310           2331           2333 
##              1              3              3              2              2 
##           2350           2400           2450           2500           2525 
##              2             28             24             63              1 
##           2547           2598           2600           2625           2630 
##              1              1              4              1              1 
##           2695           2700           2730           2750           2800 
##              1              5              1              5            102 
##           2857           3000           3150           3200           3262 
##              1             72              3              2              1 
##           3300           3360           3500           3550           3600 
##              1              1            151              1             10 
##           3750           3800           3850           4000           4200 
##              1              2              6             29             31 
##           4500           4662           4666           4700           4900 
##              6              3              1              2              5 
##           5000           5250           5350           5450           5600 
##             16              1              1              1              2 
##           5714           6000           6050           6600           6800 
##              1              8              1              1              1 
##           6996           7000           7142           8000           8162 
##              1             10              1              3              1 
##           8400           9500          10000          12000          12500 
##              2              1              4              3              1 
##          13600          14000          15000          17500          20000 
##              1              3              1              1              1 
##          30000          40000          47000          50000          70000 
##              1              1              3              1              1

## [1] "Frequency table after encoding"
## a210_income_. 210 How much income in cash or in kind did [Name] earn from employment in the la
##              0              1              2              5              6 
##           1930              1              1              1              1 
##              7              8             10             14             21 
##              3              1              3              1              1 
##             24             28             35             49             50 
##              1              1              3              2              2 
##             54             56             60             63             65 
##              1              1              3              2              1 
##             70             75             80             84     Don't know 
##             10              1              1              1            171 
##             99            100            105            120            122 
##             15             25              3              1              1 
##            123            125            130            140            142 
##              1              1              1              5              1 
##            150            160            170            171            175 
##             16              3              1              1              6 
##            180            182            188            196            200 
##              1              2              1              1             39 
##            201            210            215            220            225 
##              2              3              1              1              2 
##            227            231            238            250            264 
##              1              1              4             10              3 
##            265            270            280            300            320 
##              1              1              1             38              1 
##            325            340            350            357            360 
##              2              1             43              2              1 
##            375            400            420            450            462 
##              3             61              4              9              1 
##            480            486            490            500            525 
##              1              2              2             66              1 
##            540            550            560            571            588 
##              1              3             11              4              3 
##            600            625            630            640            650 
##             92              3              3              1              9 
##            668            696            700            714            720 
##              1              1            187              5              1 
##            735            750            770            780            798 
##              1             29              1              1              1 
##            800            804            830            835            840 
##             43              2              1              1              6 
##            850            857            875            900            918 
##              1              8              2             32              1 
##            920            931            945            950            960 
##              1              4              1              5              1 
##            980            996 Not applicable           1000           1042 
##              4              1          10622             99              1 
##           1050           1090           1100           1140           1142 
##             61              1             10              1              8 
##           1143           1150           1160           1162           1169 
##              1              1              1             13              2 
##           1190           1200           1225           1250           1260 
##              1            108              3             28              1 
##           1280           1285           1296           1300           1305 
##              1              5              1              3              1 
##           1379           1398           1400           1420           1428 
##              2              2            383              1              8 
##           1450           1460           1470           1480           1500 
##             12              1              2              1            118 
##           1512           1520           1560           1575           1600 
##              1              1              1              1             19 
##           1631           1632           1633           1650           1700 
##              5              1              1              7              8 
##           1750           1800           1820           1862           1869 
##             73             55              2              2              1 
##           1875           1890           1900           1950           2000 
##              1              1              1              1            103 
##           2007           2025           2050           2100           2140 
##              1              1              2            296              1 
##           2142           2150           2200           2245           2250 
##              4              5              7              1              4 
##           2285           2300           2310           2331           2333 
##              1              3              3              2              2 
##           2350           2400           2450           2500           2525 
##              2             28             24             63              1 
##           2547           2598           2600           2625           2630 
##              1              1              4              1              1 
##           2695           2700           2730           2750           2800 
##              1              5              1              5            102 
##           2857           3000           3150           3200           3262 
##              1             72              3              2              1 
##           3300           3360           3500           3550           3600 
##              1              1            151              1             10 
##           3750           3800           3850           4000           4200 
##              1              2              6             29             31 
##           4500           4662           4666           4700           4900 
##              6              3              1              2              5 
##           5000           5250           5350           5450           5600 
##             16              1              1              1              2 
##           5714           6000           6050           6600           6800 
##              1              8              1              1              1 
##           6996           7000           7142   8000 or more 
##              1             10              1             29

mydata <- top_recode (variable="a213_amt_paid_", break_point=15000, missing=NA)
## [1] "Frequency table before encoding"
## a213_amt_paid_. 213 How much did you have to pay in fees to [Name]’s school in order to enroll h
##              0             10             20             25             30 
##           3289              3              1              1              4 
##             40             50             52             60             70 
##              1             62              1             14              4 
##             75             80     Don't know            100            110 
##              1              2            151            307              2 
##            115            120            125            130            140 
##              1              4              4              9              2 
##            150            160            170            180            200 
##             73             12             15              8            282 
##            210            220            240            250            255 
##             23              6              3             79             11 
##            270            275            300            310            315 
##              1              2             81              1              2 
##            320            325            340            350            355 
##              6              2              1             42              2 
##            360            365            370            375            380 
##             28             13              5              5              4 
##            390            400            405            410            415 
##              3             65              1              1              2 
##            425            430            450            460            465 
##              2              1             27              9              2 
##            470            475            480            485            490 
##              3              8             10              2              1 
##            495            500            510            515            520 
##              1            174              2              2              6 
##            525            540            550            560            565 
##             11              7             19             11              3 
##            570            580            595            600            608 
##              8              5              6             81              1 
##            610            615            620            625            630 
##              1              1              3              6              3 
##            635            640            650            660            665 
##              2              2             21              5              4 
##            670            675            700            710            715 
##              1              2             55              1              1 
##            720            725            730            750            760 
##              5              1              1             17              2 
##            770            775            800            850            852 
##              3              3             17              3              1 
##            860            865            900            915            945 
##              1              1             11              1              1 
##            950            960            990 Not applicable           1000 
##              2              1              1           9802             49 
##           1070           1100           1150           1200           1230 
##              1             13              1             16              1 
##           1300           1350           1375           1400           1440 
##              2              2              1              3              3 
##           1450           1500           1600           1615           1700 
##              2             24              2              1              3 
##           1800           1900           2000           2200           2350 
##              6              1             29              2              2 
##           2400           2500           2800           2900           3000 
##              8             16              5              1             69 
##           3500           3600           4000           4100           4200 
##             13             14             35              1              1 
##           4500           4600           4800           5000           5400 
##             12              1              7             57              5 
##           5500           5800           6000           6300           6500 
##             12              1             59              1              6 
##           6600           6700           7000           7080           7200 
##              2              2             34              1             12 
##           7500           7600           8000           8400           8500 
##              3              2             32              2              1 
##           8600           9000           9500           9600           9800 
##              1             23              1              4              1 
##          10000          10500          10800          11000          12000 
##             46              1              1              5             34 
##          13000          13500          14000          14400          15000 
##              4              1              2              2             12 
##          16000          16800          17000          18000          20000 
##              5              3              3              3              3 
##          21000          25000          30000          35000          40000 
##              1              5              2              2              1 
##          50000          65000          90600 
##              1              1              1

## [1] "Frequency table after encoding"
## a213_amt_paid_. 213 How much did you have to pay in fees to [Name]’s school in order to enroll h
##              0             10             20             25             30 
##           3289              3              1              1              4 
##             40             50             52             60             70 
##              1             62              1             14              4 
##             75             80     Don't know            100            110 
##              1              2            151            307              2 
##            115            120            125            130            140 
##              1              4              4              9              2 
##            150            160            170            180            200 
##             73             12             15              8            282 
##            210            220            240            250            255 
##             23              6              3             79             11 
##            270            275            300            310            315 
##              1              2             81              1              2 
##            320            325            340            350            355 
##              6              2              1             42              2 
##            360            365            370            375            380 
##             28             13              5              5              4 
##            390            400            405            410            415 
##              3             65              1              1              2 
##            425            430            450            460            465 
##              2              1             27              9              2 
##            470            475            480            485            490 
##              3              8             10              2              1 
##            495            500            510            515            520 
##              1            174              2              2              6 
##            525            540            550            560            565 
##             11              7             19             11              3 
##            570            580            595            600            608 
##              8              5              6             81              1 
##            610            615            620            625            630 
##              1              1              3              6              3 
##            635            640            650            660            665 
##              2              2             21              5              4 
##            670            675            700            710            715 
##              1              2             55              1              1 
##            720            725            730            750            760 
##              5              1              1             17              2 
##            770            775            800            850            852 
##              3              3             17              3              1 
##            860            865            900            915            945 
##              1              1             11              1              1 
##            950            960            990 Not applicable           1000 
##              2              1              1           9802             49 
##           1070           1100           1150           1200           1230 
##              1             13              1             16              1 
##           1300           1350           1375           1400           1440 
##              2              2              1              3              3 
##           1450           1500           1600           1615           1700 
##              2             24              2              1              3 
##           1800           1900           2000           2200           2350 
##              6              1             29              2              2 
##           2400           2500           2800           2900           3000 
##              8             16              5              1             69 
##           3500           3600           4000           4100           4200 
##             13             14             35              1              1 
##           4500           4600           4800           5000           5400 
##             12              1              7             57              5 
##           5500           5800           6000           6300           6500 
##             12              1             59              1              6 
##           6600           6700           7000           7080           7200 
##              2              2             34              1             12 
##           7500           7600           8000           8400           8500 
##              3              2             32              2              1 
##           8600           9000           9500           9600           9800 
##              1             23              1              4              1 
##          10000          10500          10800          11000          12000 
##             46              1              1              5             34 
##          13000          13500          14000          14400  15000 or more 
##              4              1              2              2             43

mydata <- top_recode (variable="a214_schl_edu_fess_", break_point=20000, missing=NA)
## [1] "Frequency table before encoding"
## a214_schl_edu_fess_. 214 In the last 12 months, how much has this household spent out of pocket for [
##     Don't know            100            200            250            300 
##            197              6             11              3             29 
##            375            400            450            490            500 
##              1             17              2              1            163 
##            520            570            600            650            700 
##              1              1             26              2             61 
##            750            800            850            900            920 
##              1             73              1             12              1 
##            970            975 Not applicable           1000           1050 
##              1              1           9802            737              2 
##           1100           1150           1200           1233           1240 
##              8              1            114              1              1 
##           1250           1300           1320           1360           1400 
##              3             17              1              1              9 
##           1433           1450           1500           1600           1610 
##              2              1            662             23              1 
##           1666           1700           1800           1870           2000 
##              2              5              8              1           1182 
##           2050           2100           2200           2300           2330 
##              1              6              5              4              2 
##           2500           2600           2700           3000           3100 
##            326              1              3            780              2 
##           3300           3500           3600           3800           4000 
##              3             49              4              3            372 
##           4500           4800           5000           5200           5500 
##             10              2            552              1              4 
##           5600           5800           6000           6080           6200 
##              2              1            114              1              1 
##           6450           6500           7000           7200           8000 
##              1              1             64              1             64 
##           9000           9200           9500           9800          10000 
##              7              2              1              1             96 
##          11000          11100          12000          12300          13000 
##              1              1             22              1              5 
##          14000          15000          16000          17000          20000 
##              3             20              1              2             14 
##          22000          23000          24000          25000          27000 
##              2              1              1              6              1 
##          30000          35000          50000          70000          80000 
##              5              1              2              1              1 
##         150000 
##              1

## [1] "Frequency table after encoding"
## a214_schl_edu_fess_. 214 In the last 12 months, how much has this household spent out of pocket for [
##     Don't know            100            200            250            300 
##            197              6             11              3             29 
##            375            400            450            490            500 
##              1             17              2              1            163 
##            520            570            600            650            700 
##              1              1             26              2             61 
##            750            800            850            900            920 
##              1             73              1             12              1 
##            970            975 Not applicable           1000           1050 
##              1              1           9802            737              2 
##           1100           1150           1200           1233           1240 
##              8              1            114              1              1 
##           1250           1300           1320           1360           1400 
##              3             17              1              1              9 
##           1433           1450           1500           1600           1610 
##              2              1            662             23              1 
##           1666           1700           1800           1870           2000 
##              2              5              8              1           1182 
##           2050           2100           2200           2300           2330 
##              1              6              5              4              2 
##           2500           2600           2700           3000           3100 
##            326              1              3            780              2 
##           3300           3500           3600           3800           4000 
##              3             49              4              3            372 
##           4500           4800           5000           5200           5500 
##             10              2            552              1              4 
##           5600           5800           6000           6080           6200 
##              2              1            114              1              1 
##           6450           6500           7000           7200           8000 
##              1              1             64              1             64 
##           9000           9200           9500           9800          10000 
##              7              2              1              1             96 
##          11000          11100          12000          12300          13000 
##              1              1             22              1              5 
##          14000          15000          16000          17000  20000 or more 
##              3             20              1              2             36

Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values

# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("a206_relation_",
                  "a207_complete_edu_",
                  "a208_employ_status_")
capture_tables (indirect_PII)

# Recode those with very specific values. 

break_rel <- c(1,2,3,4,5,99)
labels_rel <- c("Single/not committed" =1, 
                "Single, committed or engaged" =2, 
                "Currently Married and cohabitating" =3, 
                "Married but not cohabitating" =4, 
                "Divorced/Widowed/Widower" =5)
mydata <- ordinal_recode (variable="a206_relation_", break_points=break_rel, missing=999999, value_labels=labels_rel)

## [1] "Frequency table before encoding"
## a206_relation_. 206 What is [Name]'s relationship status?
##               Single/not committed       Single, committed or engaged 
##                               6366                                624 
## Currently Married and cohabitating       Married but not cohabitating 
##                               5671                               1174 
##                           Divorced                    Widowed/Widower 
##                                 23                                720 
##                     Not applicable 
##                               1196 
##      recoded
##       [1,2) [2,3) [3,4) [4,5) [5,99) [99,1e+06)
##   1    6366     0     0     0      0          0
##   2       0   624     0     0      0          0
##   3       0     0  5671     0      0          0
##   4       0     0     0  1174      0          0
##   5       0     0     0     0     23          0
##   6       0     0     0     0    720          0
##   999     0     0     0     0      0       1196
## [1] "Frequency table after encoding"
## a206_relation_. 206 What is [Name]'s relationship status?
##               Single/not committed       Single, committed or engaged 
##                               6366                                624 
## Currently Married and cohabitating       Married but not cohabitating 
##                               5671                               1174 
##           Divorced/Widowed/Widower                                  6 
##                                743                               1196 
## [1] "Inspect value labels and relabel as necessary"
##               Single/not committed       Single, committed or engaged 
##                                  1                                  2 
## Currently Married and cohabitating       Married but not cohabitating 
##                                  3                                  4 
##           Divorced/Widowed/Widower 
##                                  5
val_labels(mydata$a207_complete_edu_)
##             Never Attended School                           Grade 1 
##                                 0                                 1 
##                           Grade 2                           Grade 3 
##                                 2                                 3 
##                           Grade 4                           Grade 5 
##                                 4                                 5 
##                           Grade 6                           Grade 7 
##                                 6                                 7 
##                           Grade 8                           Grade 9 
##                                 8                                 9 
##                          Grade 10                          Grade 11 
##                                10                                11 
##                          Grade 12         University / Not Graduate 
##                                12                                13 
##             University / Graduate Post Bachelors Tertiary Education 
##                                14                                15 
##                Technical training              Professional studies 
##                                16                                17 
##                       Pre-primary                        Don't know 
##                                18                                98 
##                    Not applicable                        Don't know 
##                               999                                NA 
##                    Not applicable 
##                                NA
break_edu <- c(0:14,18,19)
labels_edu <- c("Never Attended School or Only Attended Pre-School" = 1,
                "Grade 1" = 2,
                "Grade 2" = 3,
                "Grade 3" = 4,
                "Grade 4" = 5,
                "Grade 5" = 6,
                "Grade 6" = 7,
                "Grade 7" = 8,
                "Grade 8" = 9,
                "Grade 9" = 10,
                "Grade 10" = 11,
                "Grade 11" = 12,
                "Grade 12" = 13,
                "University / Not Graduate" = 14,
                "University / Graduate or higher" = 15,
                "Below primary" = 16)
mydata <- ordinal_recode (variable="a207_complete_edu_", 
                          break_points=break_edu, 
                          missing=999999, 
                          value_labels=labels_edu)

## [1] "Frequency table before encoding"
## a207_complete_edu_. 207 What is [Name]'s completed level of education?
##             Never Attended School                           Grade 1 
##                              3707                               336 
##                           Grade 2                           Grade 3 
##                               543                               684 
##                           Grade 4                           Grade 5 
##                               631                              1414 
##                           Grade 6                           Grade 7 
##                               718                              2804 
##                           Grade 8                           Grade 9 
##                              1434                               720 
##                          Grade 10                          Grade 11 
##                               715                               191 
##                          Grade 12         University / Not Graduate 
##                               381                               100 
##             University / Graduate Post Bachelors Tertiary Education 
##                               103                                31 
##                Technical training              Professional studies 
##                                15                                 6 
##                       Pre-primary                        Don't know 
##                                 4                                41 
##                    Not applicable 
##                              1196 
##      recoded
##       [0,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,10) [10,11) [11,12)
##   0    3707     0     0     0     0     0     0     0     0      0       0       0
##   1       0   336     0     0     0     0     0     0     0      0       0       0
##   2       0     0   543     0     0     0     0     0     0      0       0       0
##   3       0     0     0   684     0     0     0     0     0      0       0       0
##   4       0     0     0     0   631     0     0     0     0      0       0       0
##   5       0     0     0     0     0  1414     0     0     0      0       0       0
##   6       0     0     0     0     0     0   718     0     0      0       0       0
##   7       0     0     0     0     0     0     0  2804     0      0       0       0
##   8       0     0     0     0     0     0     0     0  1434      0       0       0
##   9       0     0     0     0     0     0     0     0     0    720       0       0
##   10      0     0     0     0     0     0     0     0     0      0     715       0
##   11      0     0     0     0     0     0     0     0     0      0       0     191
##   12      0     0     0     0     0     0     0     0     0      0       0       0
##   13      0     0     0     0     0     0     0     0     0      0       0       0
##   14      0     0     0     0     0     0     0     0     0      0       0       0
##   15      0     0     0     0     0     0     0     0     0      0       0       0
##   16      0     0     0     0     0     0     0     0     0      0       0       0
##   17      0     0     0     0     0     0     0     0     0      0       0       0
##   18      0     0     0     0     0     0     0     0     0      0       0       0
##   98      0     0     0     0     0     0     0     0     0      0       0       0
##   999     0     0     0     0     0     0     0     0     0      0       0       0
##      recoded
##       [12,13) [13,14) [14,18) [18,19) [19,1e+06)
##   0         0       0       0       0          0
##   1         0       0       0       0          0
##   2         0       0       0       0          0
##   3         0       0       0       0          0
##   4         0       0       0       0          0
##   5         0       0       0       0          0
##   6         0       0       0       0          0
##   7         0       0       0       0          0
##   8         0       0       0       0          0
##   9         0       0       0       0          0
##   10        0       0       0       0          0
##   11        0       0       0       0          0
##   12      381       0       0       0          0
##   13        0     100       0       0          0
##   14        0       0     103       0          0
##   15        0       0      31       0          0
##   16        0       0      15       0          0
##   17        0       0       6       0          0
##   18        0       0       0       4          0
##   98        0       0       0       0         41
##   999       0       0       0       0       1196
## [1] "Frequency table after encoding"
## a207_complete_edu_. 207 What is [Name]'s completed level of education?
## Never Attended School or Only Attended Pre-School 
##                                              3707 
##                                           Grade 1 
##                                               336 
##                                           Grade 2 
##                                               543 
##                                           Grade 3 
##                                               684 
##                                           Grade 4 
##                                               631 
##                                           Grade 5 
##                                              1414 
##                                           Grade 6 
##                                               718 
##                                           Grade 7 
##                                              2804 
##                                           Grade 8 
##                                              1434 
##                                           Grade 9 
##                                               720 
##                                          Grade 10 
##                                               715 
##                                          Grade 11 
##                                               191 
##                                          Grade 12 
##                                               381 
##                         University / Not Graduate 
##                                               100 
##                   University / Graduate or higher 
##                                               155 
##                                     Below primary 
##                                                 4 
##                                                17 
##                                              1237 
## [1] "Inspect value labels and relabel as necessary"
## Never Attended School or Only Attended Pre-School 
##                                                 1 
##                                           Grade 1 
##                                                 2 
##                                           Grade 2 
##                                                 3 
##                                           Grade 3 
##                                                 4 
##                                           Grade 4 
##                                                 5 
##                                           Grade 5 
##                                                 6 
##                                           Grade 6 
##                                                 7 
##                                           Grade 7 
##                                                 8 
##                                           Grade 8 
##                                                 9 
##                                           Grade 9 
##                                                10 
##                                          Grade 10 
##                                                11 
##                                          Grade 11 
##                                                12 
##                                          Grade 12 
##                                                13 
##                         University / Not Graduate 
##                                                14 
##                   University / Graduate or higher 
##                                                15 
##                                     Below primary 
##                                                16
val_labels(mydata$a208_employ_status_)
##                            Not in labor force 
##                                             0 
##                  Unpaid Worker in Family Farm 
##                                             1 
##              Unpaid Worker in Family Business 
##                                             2 
##                  Self Employed in Agriculture 
##                                             3 
##               Self Employed in Outside of Agr 
##                                             4 
##          Regular Wage or Salary Worker in Agr 
##                                             5 
## Regular Wage or Salary Worker Outside of Agr. 
##                                             6 
##                                Skilled worker 
##                                             7 
##                                Daily labourer 
##                                             8 
##                         Paif household worker 
##                                             9 
##                                     Pensioner 
##                                            10 
##                                 Farm labourer 
##                                            11 
##                                          Chef 
##                                            12 
##                                        Priest 
##                                            13
break_edu <- c(0:9,11,12)
labels_edu <- c("Not in labor force" = 0,
                "Unpaid Worker in Family Farm" = 1,
                "Unpaid Worker in Family Business" = 2,
                "Self Employed in Agriculture" = 3,
                "Self Employed in Outside of Agr" = 4,
                "Regular Wage or Salary Worker in Agr" = 5,
                "Regular Wage or Salary Worker Outside of Agr" = 6,
                "Skilled worker" = 7,
                "Daily labourer" = 8,
                "Other" = 9,
                "Farm Labourer" = 10,
                "Other" = 11)
mydata <- ordinal_recode (variable="a208_employ_status_", 
                          break_points=break_edu, 
                          missing=999999, 
                          value_labels=labels_edu)

## [1] "Frequency table before encoding"
## a208_employ_status_. 208 What is [Name]'s employment status over the last 7 days?
##                            Not in labor force 
##                                          9418 
##                  Unpaid Worker in Family Farm 
##                                           579 
##              Unpaid Worker in Family Business 
##                                           142 
##                  Self Employed in Agriculture 
##                                          1307 
##               Self Employed in Outside of Agr 
##                                           455 
##          Regular Wage or Salary Worker in Agr 
##                                           184 
## Regular Wage or Salary Worker Outside of Agr. 
##                                          1646 
##                                Skilled worker 
##                                           479 
##                                Daily labourer 
##                                           277 
##                         Paif household worker 
##                                             4 
##                                     Pensioner 
##                                             6 
##                                 Farm labourer 
##                                            66 
##                                          Chef 
##                                             1 
##                                        Priest 
##                                             2 
##                                            98 
##                                            11 
##                                           999 
##                                          1196 
##                                          <NA> 
##                                             1 
##      recoded
##       [0,1) [1,2) [2,3) [3,4) [4,5) [5,6) [6,7) [7,8) [8,9) [9,11) [11,12)
##   0    9418     0     0     0     0     0     0     0     0      0       0
##   1       0   579     0     0     0     0     0     0     0      0       0
##   2       0     0   142     0     0     0     0     0     0      0       0
##   3       0     0     0  1307     0     0     0     0     0      0       0
##   4       0     0     0     0   455     0     0     0     0      0       0
##   5       0     0     0     0     0   184     0     0     0      0       0
##   6       0     0     0     0     0     0  1646     0     0      0       0
##   7       0     0     0     0     0     0     0   479     0      0       0
##   8       0     0     0     0     0     0     0     0   277      0       0
##   9       0     0     0     0     0     0     0     0     0      4       0
##   10      0     0     0     0     0     0     0     0     0      6       0
##   11      0     0     0     0     0     0     0     0     0      0      66
##   12      0     0     0     0     0     0     0     0     0      0       0
##   13      0     0     0     0     0     0     0     0     0      0       0
##   98      0     0     0     0     0     0     0     0     0      0       0
##   999     0     0     0     0     0     0     0     0     0      0       0
##      recoded
##       [12,1e+06)
##   0            0
##   1            0
##   2            0
##   3            0
##   4            0
##   5            0
##   6            0
##   7            0
##   8            0
##   9            0
##   10           0
##   11           0
##   12           1
##   13           2
##   98          11
##   999       1196
## [1] "Frequency table after encoding"
## a208_employ_status_. 208 What is [Name]'s employment status over the last 7 days?
##                 Unpaid Worker in Family Farm 
##                                         9418 
##             Unpaid Worker in Family Business 
##                                          579 
##                 Self Employed in Agriculture 
##                                          142 
##              Self Employed in Outside of Agr 
##                                         1307 
##         Regular Wage or Salary Worker in Agr 
##                                          455 
## Regular Wage or Salary Worker Outside of Agr 
##                                          184 
##                               Skilled worker 
##                                         1646 
##                               Daily labourer 
##                                          479 
##                                        Other 
##                                          343 
##                                Farm Labourer 
##                                           10 
##                                           12 
##                                         1210 
##                                         <NA> 
##                                            1 
## [1] "Inspect value labels and relabel as necessary"
##                           Not in labor force 
##                                            0 
##                 Unpaid Worker in Family Farm 
##                                            1 
##             Unpaid Worker in Family Business 
##                                            2 
##                 Self Employed in Agriculture 
##                                            3 
##              Self Employed in Outside of Agr 
##                                            4 
##         Regular Wage or Salary Worker in Agr 
##                                            5 
## Regular Wage or Salary Worker Outside of Agr 
##                                            6 
##                               Skilled worker 
##                                            7 
##                               Daily labourer 
##                                            8 
##                                        Other 
##                                            9 
##                                Farm Labourer 
##                                           10 
##                                        Other 
##                                           11

Matching and crosstabulations: Run automated PII check

# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
selectedKeyVars = c('a204_gender_', 'a203_age_', 'a207_complete_edu_') ##!!! Replace with candidate categorical demo vars

# weight variable (add if available)
# selectedWeightVar = c('projwt') ##!!! Replace with weight var

# household id variable (cluster)
selectedHouseholdID = c('hh_id') ##!!! Replace with household id

# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, 
                           keyVars = selectedKeyVars, 
                           hhId = selectedHouseholdID)
                           
sdcInitial
## The input dataset consists of 15774 rows and 34 variables.
##   --> Categorical key variables: a204_gender_, a203_age_, a207_complete_edu_
##   --> Cluster/Household-Id variable: hh_id
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##        Key Variable Number of categories      Mean size           
##        a204_gender_                    2  (2)  7887.000 (7887.000)
##           a203_age_                   80 (80)   197.175  (197.175)
##  a207_complete_edu_                   17 (17)   927.882  (927.882)
##  Size of smallest (>0)       
##                   6581 (6581)
##                      1    (1)
##                      4    (4)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 305 (1.934%)
##   - 3-anonymity: 617 (3.911%)
##   - 5-anonymity: 1222 (7.747%)
## 
## ----------------------------------------------------------------------

Show values of key variable of records that violate k-anonymity

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 305 x 3
##    a204_gender_ a203_age_                   a207_complete_edu_
##       <dbl+lbl> <dbl+lbl>                            <dbl+lbl>
##  1   0 [Female]        11  9 [Grade 8]                        
##  2   0 [Female]        17  2 [Grade 1]                        
##  3   0 [Female]        15 15 [University / Graduate or higher]
##  4   0 [Female]        20  2 [Grade 1]                        
##  5   0 [Female]        12 13 [Grade 12]                       
##  6   0 [Female]        15 17                                  
##  7   1 [Male]          53  8 [Grade 7]                        
##  8   1 [Male]          44  4 [Grade 3]                        
##  9   0 [Female]        75  9 [Grade 8]                        
## 10   0 [Female]        42  5 [Grade 4]                        
## # ... with 295 more rows
sdcFinal <- localSuppression(sdcInitial)

# Recombining anonymized variables

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
##       a204_gender_ a203_age_ a207_complete_edu_
## 137              0        NA                  9
## 251              0        NA                  2
## 921              0        NA                 15
## 1050             0        NA                  2
## 1813             0        NA                 13
## 2257             0        NA                 17
## 2435             1        NA                  8
## 2473             1        NA                  4
## 2479             0        NA                  9
## 2494             0        NA                  5
## 2543             0        NA                  5
## 2557             1        NA                  8
## 2588             0        NA                 14
## 2656             1        NA                  5
## 2703             0        NA                  8
## 2818             1        NA                  2
## 2855             0        NA                  8
## 2867             0        NA                 10
## 2887             0        NA                 11
## 2889             0        NA                 15
## 2923             1        NA                 11
## 2959             0        NA                 11
## 2966             1        NA                  4
## 3055             0        NA                  9
## 3116             1        NA                  9
## 3143             1        NA                  9
## 3159             1        NA                  5
## 3210             1        NA                 14
## 3235             1        NA                  7
## 3251             0        NA                  9
## 3253             0        NA                 11
## 3265             0        NA                  9
## 3348             0        NA                 17
## 3355             1        NA                 15
## 3357             0        NA                  9
## 3387             0        NA                  4
## 3452             0        NA                 15
## 3454             0        NA                 12
## 3623             1        NA                  3
## 3646             1        NA                  5
## 3658             0        NA                 13
## 3798             1        NA                  9
## 3858             1        NA                  4
## 3932             0        NA                  3
## 3956             1        NA                  9
## 3971             1        NA                 12
## 4074             1        NA                 15
## 4107             1        NA                  3
## 4207             1        NA                  8
## 4231             0        NA                 13
## 4259             0        NA                  2
## 4319             0         7                 NA
## 4326             1        NA                  9
## 4391             1        NA                  1
## 4412             1        NA                  2
## 4480             1        NA                  5
## 4505             1        NA                  8
## 4507             1        NA                  8
## 4564             0        NA                  2
## 4620             1        NA                 12
## 4657             1        NA                  2
## 4666             1        NA                  7
## 4679             0        NA                  3
## 4712             1        NA                  2
## 4713             0        NA                  2
## 4714             1        NA                  4
## 4722             0        NA                  5
## 4726             0        NA                  6
## 4736             1        NA                 12
## 4749             1        NA                  8
## 4867             1        NA                 15
## 4906             1        NA                  7
## 4931             1        NA                 14
## 4934             0        NA                  3
## 4936             0        NA                 15
## 4961             1        NA                 12
## 4970             0        NA                  4
## 5153             0        NA                  4
## 5168             1        NA                  3
## 5268             1        NA                  3
## 5279             1        NA                 12
## 5297             0        NA                  3
## 5312             1        NA                  2
## 5323             1        NA                  5
## 5385             1        NA                  8
## 5396             0        NA                  7
## 5455             1        NA                 13
## 5537             0        NA                  5
## 5588             1        NA                  7
## 5635             1        NA                  5
## 5742             1        NA                  6
## 5764             1        NA                 12
## 5783             1        NA                 13
## 5789             0        NA                 13
## 5837             1        NA                  3
## 5855             1        NA                  9
## 5888             1        NA                  7
## 5936             1        NA                 11
## 6008             0        NA                  4
## 6076             1        NA                  5
## 6228             0        NA                  9
## 6365             1        NA                 15
## 6421             0        NA                  6
## 6442             0        NA                  9
## 6443             0        NA                 15
## 6447             1        NA                  5
## 6474             0        NA                  3
## 6479             0        NA                  1
## 6481             0        NA                  2
## 6630             1        NA                 10
## 6669             0        NA                  1
## 6692             0        NA                 11
## 6754             1        NA                  3
## 6762             1        NA                  7
## 6771             1        NA                  3
## 6796             0        NA                 11
## 6805             0        NA                  8
## 6813             0        NA                  4
## 6832             1        NA                 15
## 6948             0        NA                  5
## 6962             1        NA                 11
## 7032             0        NA                  6
## 7133             1        NA                  3
## 7139             0        NA                  6
## 7141             0        NA                 12
## 7251             1        NA                  2
## 7292             1        NA                 11
## 7303             1        NA                  8
## 7349             0        NA                  8
## 7409             1        NA                 15
## 7424             1        NA                 15
## 7425             0        NA                  4
## 7543             1        NA                  1
## 7560             0        NA                 13
## 7583             0        NA                  5
## 7620             1        NA                  4
## 7668             0        NA                  8
## 7678             0        NA                  8
## 7751             1        NA                  5
## 7774             1        NA                  3
## 7780             1        NA                 13
## 7797             1        NA                 14
## 7827             0        NA                  8
## 7838             0        NA                 10
## 7881             1        NA                 15
## 7912             1        NA                  6
## 7939             0        NA                 15
## 7942             0        NA                 15
## 7944             0        NA                 12
## 7946             1        NA                 14
## 7994             1        NA                 10
## 8039             1        NA                 17
## 8044             1        NA                  5
## 8109             0        NA                  7
## 8126             1        NA                  2
## 8143             1        NA                  1
## 8240             1        NA                 10
## 8242             1        NA                  8
## 8290             0        NA                 13
## 8476             1        NA                  5
## 8616             1        NA                 14
## 8654             1        NA                  7
## 8681             1        NA                 17
## 8907             1        NA                  1
## 8950             0        NA                 13
## 9010             0        NA                  6
## 9021             0        NA                  7
## 9067             0        NA                 14
## 9095             0        NA                  6
## 9152             0        NA                  4
## 9154             0        NA                  6
## 9155             0        NA                  4
## 9175             0        NA                 11
## 9209             0        NA                  6
## 9241             1        NA                 12
## 9270             0        NA                 10
## 9307             1        NA                  4
## 9393             0        NA                  3
## 9446             0        NA                 13
## 9565             1        NA                 14
## 9635             0        NA                 17
## 9641             0        NA                  4
## 9814             1        NA                  4
## 9840             1        NA                 12
## 9888             0        NA                  5
## 9924             1        NA                  5
## 10010            1        NA                  6
## 10123            1        NA                  8
## 10140            1        NA                  6
## 10143            1        NA                 12
## 10223            1        NA                  9
## 10251            0        NA                 12
## 10254            1        NA                 14
## 10264            1        NA                  7
## 10291            1        NA                 13
## 10317            0        NA                  2
## 10336            1        NA                  7
## 10375            0        NA                  9
## 10402            0        NA                 13
## 10409            1        NA                  9
## 10468            1        NA                 10
## 10551            1        NA                 11
## 10561            1        NA                 12
## 10595            1        NA                  3
## 10606            1        NA                 13
## 10693            0        NA                  3
## 10726            1        NA                  2
## 10729            0        NA                  2
## 10823            1        NA                 12
## 10913            0        NA                 15
## 10935            1        NA                  3
## 11189            1        NA                 10
## 11219            1        NA                  3
## 11307            1        NA                  8
## 11335            1        NA                 15
## 11360            1        NA                  4
## 11426            1        NA                  5
## 11457            1        NA                 12
## 11464            1        NA                  6
## 11500            1        NA                  2
## 11528            1        NA                  6
## 11607            1        NA                  9
## 11610            0        NA                  4
## 11623            1        NA                  8
## 11632            1        NA                  6
## 11977            1        NA                  4
## 11990            1        NA                  3
## 12006            0        NA                  3
## 12035            1        NA                 13
## 12062            1        NA                 15
## 12138            1        NA                  7
## 12143            1        NA                 12
## 12172            1        NA                 10
## 12299            0        NA                  5
## 12313            0        NA                  2
## 12361            1        NA                 12
## 12362            0        NA                  3
## 12475            1        NA                  7
## 12625            0        NA                 13
## 12643            0        NA                  5
## 12697            0        NA                  6
## 12701            0        NA                 12
## 12863            1        NA                  2
## 12902            1        NA                 17
## 13003            1        NA                  6
## 13007            1        NA                 17
## 13070            1        NA                  2
## 13075            1        NA                  5
## 13126            1        NA                 17
## 13181            0        NA                  1
## 13345            1        NA                  7
## 13491            0        NA                 10
## 13531            1        NA                  4
## 13538            1        NA                  7
## 13643            1        NA                 13
## 13734            0        NA                  5
## 13778            0        NA                 10
## 13786            1        NA                  1
## 13812            1        NA                 13
## 13854            1        NA                  8
## 13906            1        NA                 11
## 13975            1        NA                  3
## 14030            1        NA                 17
## 14031            1        NA                 13
## 14077            0        NA                 12
## 14089            0        NA                  5
## 14094            0        NA                 13
## 14099            1        NA                  2
## 14122            1        NA                 17
## 14142            1        NA                 13
## 14174            0        NA                  2
## 14230            0        NA                 17
## 14256            0        NA                 17
## 14272            1        NA                 10
## 14301            0        NA                  6
## 14572            1        NA                 10
## 14595            0        NA                  9
## 14629            1        NA                 15
## 14707            0        NA                  5
## 14709            1        NA                 15
## 14715            0        NA                  9
## 14747            0        NA                 12
## 14765            1        NA                  7
## 14801            1        NA                  2
## 14802            0        NA                  6
## 14826            1        NA                  3
## 14846            1        NA                  4
## 14862            0        NA                  5
## 14874            0        NA                  5
## 14958            0        NA                  5
## 15210            1        NA                  4
## 15211            0        NA                  5
## 15305            1        NA                  8
## 15347            0        NA                  5
## 15397            1        NA                  4
## 15517            1        NA                  1
## 15525            0        NA                  3
## 15539            1        NA                 11
## 15540            1        NA                 14
## 15622            0        NA                  5
## 15624            0        NA                 17
## 15682            0        NA                  9
## 15722            1        NA                  6
## 15744            0        NA                 15
## 15766            0        NA                  3
mydata [notAnon,"a203_age_"][mydata[notAnon,"a203_age_"]>17] <- NA

sdcInitial <- createSdcObj(dat = mydata, 
                           keyVars = selectedKeyVars, 
                           hhId = selectedHouseholdID)

#mydata <- labelDataset(mydata)
notAnon <- sdcInitial@risk$individual[,2] < 2 # for 2-anonymity
mydata[notAnon,selectedKeyVars]
## # A tibble: 1 x 3
##   a204_gender_ a203_age_ a207_complete_edu_
##      <dbl+lbl> <dbl+lbl>          <dbl+lbl>
## 1   0 [Female]         7 16 [Below primary]
sdcFinal <- localSuppression(sdcInitial)

extractManipData(sdcFinal)[notAnon,selectedKeyVars] # manipulated variables HH
## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (cc != class(v_p)) {: the condition has length > 1 and only the first
## element will be used
##      a204_gender_ a203_age_ a207_complete_edu_
## 4319            0         7                 NA
mydata [notAnon,"a204_gender_"] <- NA

createSdcObj(dat = mydata, 
             keyVars = selectedKeyVars, 
             hhId = selectedHouseholdID)
## The input dataset consists of 15774 rows and 34 variables.
##   --> Categorical key variables: a204_gender_, a203_age_, a207_complete_edu_
##   --> Cluster/Household-Id variable: hh_id
## ----------------------------------------------------------------------
## Information on categorical key variables:
## 
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
##        Key Variable Number of categories      Mean size           
##        a204_gender_                    3  (3)  7886.500 (7886.500)
##           a203_age_                   78 (78)   201.195  (201.195)
##  a207_complete_edu_                   17 (17)   927.882  (927.882)
##  Size of smallest (>0)       
##                   6581 (6581)
##                      2    (2)
##                      4    (4)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
## 
## Number of observations violating
##   - 2-anonymity: 0 (0.000%)
##   - 3-anonymity: 0 (0.000%)
##   - 5-anonymity: 9 (0.057%)
## 
## ----------------------------------------------------------------------

Open-ends: review responses for any sensitive information, redact as necessary

# !!! No open-ends

GPS data: Displace

# !!! No GPS data

Save processed data in Stata and SPSS format

Adds "_PU" (Public Use) to the end of the name

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)