rm(list=ls(all=t))
filename <- "bhsection0" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
# !!! No Direct PII
# !!! No Direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000
dropvars <- c("dise", "q006_block_name")
mydata <- mydata[!names(mydata) %in% dropvars]
locvars <- c("q006_block_id", "q007_vlg_id")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## q006_block_id. 6 Block Code
## 1 2 3 4 5 6 7 8 9 <NA>
## 194 155 195 407 98 190 143 422 516 33
## [1] "Frequency table after encoding"
## q006_block_id. 6 Block Code
## 279 280 281 282 283 284 285 286 287 <NA>
## 422 155 195 516 407 98 194 143 190 33
## [1] "Frequency table before encoding"
## q007_vlg_id. 7 Village Code
## 1 2 3 4 5 6 7 9 10 11 12 13 15 16 17 18 19 20
## 16 16 16 15 20 31 28 17 15 20 24 24 15 18 21 17 17 18
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 30 22 18 17 32 27 26 18 14 15 24 24 22 16 29 18 17 22
## 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
## 27 17 16 18 17 28 20 24 21 19 17 17 16 18 26 24 27 18
## 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
## 17 21 13 24 22 16 18 18 29 16 18 21 25 13 16 19 16 23
## 75 76 77 78 80 81 82 83 84 85 87 88 89 90 91 92 93 94
## 23 17 22 29 30 17 22 17 17 13 16 22 15 19 19 19 21 13
## 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
## 17 22 28 21 25 18 24 21 15 19 14 31 16 27 21 17 21 26
## 113 114 115 116 117 118 119 <NA>
## 14 24 19 16 21 22 16 33
## [1] "Frequency table after encoding"
## q007_vlg_id. 7 Village Code
## 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
## 18 25 25 17 22 18 16 20 17 21 16 16 18 20 24 28 17 15
## 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 17 22 22 18 24 17 31 16 19 16 18 17 21 15 20 16 23 19
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
## 22 15 14 16 17 19 17 21 27 27 14 22 17 17 24 27 29 18
## 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
## 18 16 28 24 22 17 29 13 15 17 30 21 15 16 26 22 24 18
## 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
## 18 19 18 17 16 26 21 13 30 22 28 17 19 21 15 14 13 13
## 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
## 18 21 16 32 24 21 29 16 24 17 18 19 26 27 17 16 31 16
## 373 374 375 376 377 378 379 <NA>
## 23 21 22 24 19 24 21 33
# !!!No Indirect PII - Ordinal
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)
indirect_PII <- c("q012_urban", "s1_relation")
capture_tables (indirect_PII)
# Recode those with very specific values.
# Not enough variables for matching possible
# !!! No open-ends
# !!! No GPS
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)