rm(list=ls(all=t))
filename <- "Rwanda_Public Use" # !!!Update filename
functions_vers <- "functions_1.7.R" # !!!Update helper functions file
source (functions_vers)
Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags:
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition.
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000)
# Large Location (>100,000)
# Weight: weightVar
# Household ID: hhId,
# Open-ends: Review responses for any sensitive information, redact as necessary
!!!No direct PII
!!!Replace vector in "variables" field below with relevant variable names
# Encode Direct PII-team
!!!No direct PII-team
!!!Include relevant variables, but check their population size first to confirm they are <100,000 Remove redundant small location information
mydata <- mydata[!names(mydata) %in% c("nsector",
"sectordum1",
"sectordum2",
"sectordum3",
"sectordum4",
"sectordum5",
"sectordum6",
"sectordum7",
"sectordum8",
"sectordum9",
"sectordum10",
"sectordum11",
"sectordum12",
"sectordum13",
"sectordum14",
"sectordum15",
"sectordum15")]
Relabel small locations
var_label(mydata$b_sectordum1) <- "Bl_Sector==1"
var_label(mydata$b_sectordum2) <- "Bl_Sector==2"
var_label(mydata$b_sectordum3) <- "Bl_Sector==3"
var_label(mydata$b_sectordum4) <- "Bl_Sector==4"
var_label(mydata$b_sectordum5) <- "Bl_Sector==5"
var_label(mydata$b_sectordum6) <- "Bl_Sector==6"
var_label(mydata$b_sectordum7) <- "Bl_Sector==7"
var_label(mydata$b_sectordum8) <- "Bl_Sector==8"
var_label(mydata$b_sectordum9) <- "Bl_Sector==9"
var_label(mydata$b_sectordum10) <- "Bl_Sector==10"
var_label(mydata$b_sectordum11) <- "Bl_Sector==11"
var_label(mydata$b_sectordum12) <- "Bl_Sector==12"
var_label(mydata$b_sectordum13) <- "Bl_Sector==13"
var_label(mydata$b_sectordum14) <- "Bl_Sector==14"
var_label(mydata$b_sectordum15) <- "Bl_Sector==15"
locvars <- c("e_v1d", "e_v4d")
mydata <- encode_location (variables= locvars, missing=999999)
## [1] "Frequency table before encoding"
## e_v1d. T: Mfs Site
## BURUHUKIRO GATARE NKOMANE UWINKINGI
## 17 21 24 30
## RUHERU NYABIMATA MUGANZA NKUNGU
## 35 25 14 24
## GIHEKE KAMEMBE RUGABANO MUTUNTU
## 44 12 57 19
## GASHALI KAVUMU MUHANDA Other (please specify)
## 20 27 21 40
## <NA>
## 532
## [1] "Frequency table after encoding"
## e_v1d. T: Mfs Site
## 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 <NA>
## 14 12 40 19 24 21 57 27 24 30 44 21 17 35 20 25 532
## [1] "Frequency table before encoding"
## e_v4d. C: Mfs Site
## BURUHUKIRO GATARE NKUNGU RUGABANO
## 4 1 1 4
## MUTUNTU KAVUMU MUHANDA Other (please specify)
## 1 3 1 1
## <NA>
## 946
## [1] "Frequency table after encoding"
## e_v4d. C: Mfs Site
## 904 905 906 907 908 909 910 911 <NA>
## 4 3 1 1 1 1 1 4 946
# Focus on variables with a "Lowest Freq" of 10 or less.
break_edu <- c(1,3,9,15,88)
labels_edu <- c("1. No school, Kindergarten/Nursery" =1,
"2. Primary (P1-P6)" = 2,
"3. Secondary (S1-S6)" =3,
"4. Vocational training or College/University" =4,
"5. Don't know" =5)
mydata <- ordinal_recode (variable="b_hh8", break_points=break_edu, missing=8888, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## b_hh8. Hh8. Woman Education
## No school P1 P2 P3 P4
## 18 6 7 18 42
## P5 P6 S1 S2 S3
## 86 244 89 69 49
## S4 S5 S6 Vocational training College/University
## 14 19 50 3 4
## Don't know <NA>
## 45 199
## recoded
## [1,3) [3,9) [9,15) [15,88) [88,8.89e+03)
## 1 18 0 0 0 0
## 3 0 6 0 0 0
## 4 0 7 0 0 0
## 5 0 18 0 0 0
## 6 0 42 0 0 0
## 7 0 86 0 0 0
## 8 0 244 0 0 0
## 9 0 0 89 0 0
## 10 0 0 69 0 0
## 11 0 0 49 0 0
## 12 0 0 14 0 0
## 13 0 0 19 0 0
## 14 0 0 50 0 0
## 15 0 0 0 3 0
## 16 0 0 0 4 0
## 88 0 0 0 0 45
## [1] "Frequency table after encoding"
## b_hh8. Hh8. Woman Education
## 1. No school, Kindergarten/Nursery 2. Primary (P1-P6)
## 18 403
## 3. Secondary (S1-S6) 4. Vocational training or College/University
## 290 7
## 5. Don't know <NA>
## 45 199
## [1] "Inspect value labels and relabel as necessary"
## 1. No school, Kindergarten/Nursery 2. Primary (P1-P6)
## 1 2
## 3. Secondary (S1-S6) 4. Vocational training or College/University
## 3 4
## 5. Don't know
## 5
mydata <- ordinal_recode (variable="b_hh9", break_points=break_edu, missing=8888, value_labels=labels_edu)
## [1] "Frequency table before encoding"
## b_hh9. Hh9. Man Education
## No school Kindergarten/Nursery P1 P2
## 28 1 5 16
## P3 P4 P5 P6
## 27 54 109 191
## S1 S2 S3 S4
## 56 72 48 16
## S5 S6 Vocational training College/University
## 17 58 3 6
## Don't know <NA>
## 56 199
## recoded
## [1,3) [3,9) [9,15) [15,88) [88,8.89e+03)
## 1 28 0 0 0 0
## 2 1 0 0 0 0
## 3 0 5 0 0 0
## 4 0 16 0 0 0
## 5 0 27 0 0 0
## 6 0 54 0 0 0
## 7 0 109 0 0 0
## 8 0 191 0 0 0
## 9 0 0 56 0 0
## 10 0 0 72 0 0
## 11 0 0 48 0 0
## 12 0 0 16 0 0
## 13 0 0 17 0 0
## 14 0 0 58 0 0
## 15 0 0 0 3 0
## 16 0 0 0 6 0
## 88 0 0 0 0 56
## [1] "Frequency table after encoding"
## b_hh9. Hh9. Man Education
## 1. No school, Kindergarten/Nursery 2. Primary (P1-P6)
## 29 402
## 3. Secondary (S1-S6) 4. Vocational training or College/University
## 267 9
## 5. Don't know <NA>
## 56 199
## [1] "Inspect value labels and relabel as necessary"
## 1. No school, Kindergarten/Nursery 2. Primary (P1-P6)
## 1 2
## 3. Secondary (S1-S6) 4. Vocational training or College/University
## 3 4
## 5. Don't know
## 5
# Top code household composition variables with large and unusual numbers
mydata <- top_recode ("b_hh_masked", break_point=10, missing=999999) # Topcode cases with 10 or more individuals.
## [1] "Frequency table before encoding"
## b_hh_masked. Hh. Household Size
## 2 3 4 5 6 7 8 9 10 11 12 13+ <NA>
## 16 55 57 113 115 148 104 76 48 21 5 5 199
## [1] "Frequency table after encoding"
## b_hh_masked. Hh. Household Size
## 2 3 4 5 6 7 8 9 10 or more
## 16 55 57 113 115 148 104 76 79
## <NA>
## 199
mydata <- top_recode ("hh", break_point=10, missing=999999) # Topcode cases with 10 or more individuals.
## [1] "Frequency table before encoding"
## hh. Hh. Household Size
## 2 3 4 5 6 7 8 9 10 11 12 13 14
## 23 71 74 139 148 182 136 91 59 27 7 4 1
## [1] "Frequency table after encoding"
## hh. Hh. Household Size
## 2 3 4 5 6 7 8 9 10 or more
## 23 71 74 139 148 182 136 91 98
# TOp code number of household adults and household adults working.
mydata <- top_recode ("b_hh6_masked", break_point=6, missing=999999) # Topcode cases with 10 or more individuals.
## [1] "Frequency table before encoding"
## b_hh6_masked. Hh6. Adults>18
## 1 or less 2 3 4 5 6 7 8+ <NA>
## 97 266 187 100 68 26 9 10 199
## [1] "Frequency table after encoding"
## b_hh6_masked. Hh6. Adults>18
## 1 or less 2 3 4 5 6 or more <NA>
## 97 266 187 100 68 45 199
mydata <- top_recode ("b_hh7_masked", break_point=4, missing=999999) # Topcode cases with 10 or more individuals.
## [1] "Frequency table before encoding"
## b_hh7_masked. Hh7. Adults>18 Working
## 0 1 2 3 4 5 6 7+ <NA>
## 278 184 183 66 25 15 6 6 199
## [1] "Frequency table after encoding"
## b_hh7_masked. Hh7. Adults>18 Working
## 0 1 2 3 4 or more <NA>
## 278 184 183 66 52 199
# Dates of birth removed, as strong identifier and ages already provided in separate variables
mydata <- mydata[!names(mydata) %in% c("b_d2_month", "e_d1a_month")]
# !!!Include relevant variables in list below
indirect_PII <- c("b_age_imputed",
"b_conditions",
"b_filliterate",
"b_fjunsecondary",
"b_fprimary",
"b_fsensecondary",
"b_ftertiary",
"b_harassment",
"b_hclprevalence",
"b_hclprevalence_project",
"b_hcltothrs",
"b_healthissues",
"b_hh_below15_masked",
"b_hrsconstruction",
"b_hrsdomestic",
"b_hrsfarming",
"b_hrsfire",
"b_hrshandcraft",
"b_hrslivestock",
"b_hrsmore100",
"b_hrsother",
"b_hrswashing",
"b_hrswater",
"b_hrsworked_tot",
"b_hrsworked_tot_nodom_masked",
"b_hw1_e_dum",
"b_hw1_f_dum",
"b_hw1_g_dum",
"b_hw1_h_dum",
"b_hw1_i_dum",
"b_hw1_j_dum",
"b_hw1_k_dum",
"b_hw1_l_dum",
"b_hw1_m_dum",
"b_hw1_n_dum",
"b_hw1_o_dum",
"b_hw1_p_dum",
"b_hw1_q_dum",
"b_hw1_r_dum",
"b_hw1_s_dum",
"b_hw1_t_dum",
"b_hw1_u_dum",
"b_hw1_v_dum",
"b_hw1_w_dum",
"b_hw1_x_dum",
"b_hw1_y_dum",
"b_hw1_z_dum",
"b_hw1_st_dum",
"b_hw1_yz_dum",
"b_i1_a_dum",
"b_i1_b_dum",
"b_i1_c_dum",
"b_i1_d_dum",
"b_i1_e_dum",
"b_i1_f_dum",
"b_i1_g_dum",
"b_i1_h_dum",
"b_i1_i_dum",
"b_i1_j_dum",
"b_i1_k_dum",
"b_i1_l_dum",
"b_i1_nonpoultry",
"b_institutions",
"b_interview_month",
"b_interview_year",
"b_lastjunsecondary",
"b_lastnosch",
"b_lastprimary",
"b_lastsensecondary",
"b_lastvoctraining",
"b_locations",
"b_machineryuse",
"b_mfs_agrwithoutprotection",
"b_milliterate",
"b_minor",
"b_mjunsecondary",
"b_mprimary",
"b_msensecondary",
"b_mtertiary",
"b_repetition",
"b_single",
"b_useproduct",
"e_activities",
"e_HRS_WKD",
"e_HRS_WKD2",
"e_HZAG",
"e_NO_REST_DAY",
"e_conditions",
"e_conditions2",
"e_harassment",
"e_healthissues",
"e_hlprevalence",
"e_institutions",
"e_locations",
"e_machineryuse",
"e_mfs_agwoprotect",
"e_minor",
"e_protectivegear",
"e_tot_hrs_pastweek_masked",
"e_useproduct",
"female",
"age_imputed",
"single",
"hw1m_d",
"hh1",
"hh5",
"i1d",
"i2",
"b_q17_ownphone",
"b_d1_female",
"b_d2_year",
"b_d3",
"b_d4",
"b_d4a",
"b_d4b",
"b_d4b_days",
"b_d5a",
"b_d5b",
"b_d5c",
"b_w1a",
"b_w1a_work",
"b_w1a1",
"b_w1a2",
"b_w1a3",
"b_w1b",
"b_w1b1",
"b_w1b2",
"b_w1b3",
"b_w1c",
"b_w1c1",
"b_w1c2",
"b_w1c3",
"b_w1d",
"b_w1d1",
"b_w1d2",
"b_w1d3",
"b_w1e",
"b_w1e1",
"b_w1e2",
"b_w1e3",
"b_w1f",
"b_w1f1",
"b_w1f2",
"b_w1f3",
"b_w2a",
"b_w2b",
"b_w2c",
"b_w2d",
"b_w2e",
"b_w2f",
"b_w2g",
"b_w2h",
"b_w2i",
"b_w2_other",
"b_w3_1a",
"b_w3_1b",
"b_w3_1c",
"b_w3_1d",
"b_w3_1e_masked",
"b_w3_2a",
"b_w3_2b",
"b_w3_2c",
"b_w3_2d",
"b_w3_2e_masked",
"b_w3_3a",
"b_w3_3b",
"b_w3_3c",
"b_w3_3d",
"b_w3_3e_masked",
"b_w3_4a",
"b_w3_4b",
"b_w3_4c",
"b_w3_4d",
"b_w3_4e_masked",
"b_w3_5a",
"b_w3_5b",
"b_w3_5c",
"b_w3_5d",
"b_w3_5e_masked",
"b_w3_6a",
"b_w3_6b",
"b_w3_6c",
"b_w3_6d",
"b_w3_6e_masked",
"b_w3_7a",
"b_w3_7b",
"b_w3_7c",
"b_w3_7d",
"b_w3_7e_masked",
"b_w3_8a",
"b_w3_8b",
"b_w3_8c",
"b_w3_8d",
"b_w3_8e_masked",
"b_w3_9a",
"b_w3_9b",
"b_w3_9c",
"b_w3_9d",
"b_w3_9e_masked",
"b_w3_10a",
"b_w3_10b",
"b_w3_10c",
"b_w3_10d",
"b_w3_10e_masked",
"b_w3_11",
"b_w3_12",
"b_w3_13",
"b_w3_14",
"b_w3_15",
"b_w3_16",
"b_w3_17",
"b_w3_21",
"b_w3_22",
"b_w3_23",
"b_w3_24",
"b_w3_25",
"b_w3_26",
"b_w3_27",
"b_w3_31",
"b_w3_32",
"b_w3_33",
"b_w3_34",
"b_w3_35",
"b_w3_36",
"b_w3_37",
"b_w3_41",
"b_w3_42",
"b_w3_43",
"b_w3_44",
"b_w3_45",
"b_w3_46",
"b_w3_47",
"b_w3_51",
"b_w3_52",
"b_w3_53",
"b_w3_54",
"b_w3_55",
"b_w3_56",
"b_w3_57",
"b_w3_61",
"b_w3_62",
"b_w3_63",
"b_w3_64",
"b_w3_65",
"b_w3_66",
"b_w3_67",
"b_w3_71",
"b_w3_72",
"b_w3_73",
"b_w3_74",
"b_w3_75",
"b_w3_76",
"b_w3_77",
"b_w3_81",
"b_w3_82",
"b_w3_83",
"b_w3_84",
"b_w3_85",
"b_w3_86",
"b_w3_87",
"b_w3_91",
"b_w3_92",
"b_w3_93",
"b_w3_94",
"b_w3_95",
"b_w3_96",
"b_w3_97",
"b_w3_101",
"b_w3_102",
"b_w3_103",
"b_w3_104",
"b_w3_105",
"b_w3_106",
"b_w3_107",
"b_w4b_1",
"b_w4b_2",
"b_w4b_3",
"b_w4b_4",
"b_w4b_5",
"b_w4b_6",
"b_w4b_7",
"b_w4b_8",
"b_w4b_9",
"b_w4b_10",
"b_w4b_other",
"b_w5_1",
"b_w5_2",
"b_w5_3",
"b_w5_4",
"b_w5_5",
"b_w5_6",
"b_w6_1",
"b_w6_2",
"b_w6_3",
"b_w6_4",
"b_w6_5",
"b_w6_6",
"b_w6_7",
"b_w7_1",
"b_w7_2",
"b_hw1_a",
"b_hw1_b",
"b_hw1_c",
"b_hw1_d",
"b_hw1_e",
"b_hw1_f",
"b_hw1_farming",
"b_hw1_g",
"b_hw1_h",
"b_hw1_i",
"b_hw1_j",
"b_hw1_k",
"b_hw1_l",
"b_hw1_m",
"b_hw1_n",
"b_hw1_o",
"b_hw1_oth",
"b_hw1_p",
"b_hw1_q",
"b_hw1_r",
"b_hw1_s",
"b_hw1_t",
"b_hw1_u",
"b_hw1_v",
"b_hw1_w",
"b_hw1_x",
"b_hw1_y",
"b_hw1_z",
"b_hw2_a",
"b_hw2_b",
"b_hw2_c",
"b_hw2_d",
"b_hw2_e",
"b_hw2_f",
"b_hw2_g",
"b_hw2_h",
"b_hw2_i",
"b_hw2_j",
"b_hw2_k",
"b_hw2_l",
"b_hw2_m",
"b_hw2_n",
"b_hw2_o",
"b_hw3_a",
"b_hw3_b",
"b_hw3_c",
"b_hw3_d",
"b_hw3_e",
"b_hw3_f",
"b_hw3_g",
"b_hw3_h",
"b_hw3_i",
"b_hw3_j",
"b_hw3_k",
"b_hw3_l",
"b_hw3_m",
"b_hw4_a",
"b_hw4_b",
"b_hw4_c",
"b_hw4_d",
"b_hw4_e",
"b_hw4_f",
"b_hw4_g",
"b_hw4_h",
"b_hw4_i",
"b_hw4_j",
"b_hw4_k",
"b_hw4_l",
"b_hw4_m",
"b_hw4_n_other1",
"b_hw4_n_other2",
"b_hw5_a",
"b_hw5_b",
"b_hw5_c",
"b_hw5_d",
"b_hh1_masked",
"b_hh2_masked",
"b_hh3_masked",
"b_hh4_masked",
"b_hh5",
"b_i1_b_masked",
"b_i1_c_masked",
"b_i1_d_masked",
"b_i1_e_masked",
"b_i1_f_masked",
"b_i1_g_masked",
"b_i1_h_masked",
"b_i1_i_masked",
"b_i1_j_masked",
"b_i1_k_masked",
"b_i1_l_masked",
"b_i2",
"b_i2plots_masked",
"b_i3plots_masked",
"b_ci21",
"e_interview_month",
"e_interview_year",
"e_q14",
"e_d1a_year_masked",
"e_d1b_masked",
"e_d2",
"e_d3a",
"e_d3b",
"e_d3bsp_masked",
"e_w1a_tea",
"e_w1a_coff",
"e_w1a_rice",
"e_w1b_tea",
"e_w1b_coff",
"e_w1b_rice",
"e_w1c_a",
"e_w1c_b",
"e_w1c_c",
"e_w1c_d",
"e_w1c_e",
"e_w1c_f",
"e_w1c_g",
"e_w1c_h",
"e_w1c_i",
"e_w1c_j",
"e_w1c_k",
"e_w1c_l",
"e_w1d1a",
"e_w1d1b",
"e_w1d1c",
"e_w1d1d",
"e_w1d1e",
"e_w1d1f",
"e_w1d1g",
"e_w1d1h",
"e_w1d1i",
"e_w1d1j",
"e_w1d1k",
"e_w1d1l",
"e_w1d1m",
"e_w1d1n",
"e_w1d1o",
"e_w1d1p",
"e_w1d1q",
"e_w1d1r",
"e_w1d1s",
"e_w1d1t",
"e_w1d1u",
"e_w1d1v",
"e_w1d1z",
"e_w1d2a",
"e_w1d2b",
"e_w1d2c",
"e_w1d2d",
"e_w1d2e",
"e_w1d2f",
"e_w1d2g",
"e_w1d2h",
"e_w1d2i",
"e_w1d2j",
"e_w1d2k",
"e_w1d2l",
"e_w1d2m",
"e_w1d2n",
"e_w1d2o",
"e_w1d2p",
"e_w1d2q",
"e_w1d2r",
"e_w1d2s",
"e_w1d2t",
"e_w1d2u",
"e_w1d2v",
"e_w1d2z",
"e_w2a1",
"e_w2a2",
"e_w2a3",
"e_w2a4",
"e_w2a5",
"e_w2a6",
"e_w2a7",
"e_w2b_masked",
"e_w2c_masked",
"e_w2d",
"e_w3a",
"e_w3b",
"e_w3c",
"e_w3d",
"e_w3e",
"e_w3f",
"e_hw1a",
"e_hw1b",
"e_hw1c",
"e_hw1d",
"e_hw1e",
"e_hw1f",
"e_hw2a",
"e_hw2b",
"e_hw2c",
"e_hw2d",
"e_hw2e",
"e_hw2f",
"e_hw2g",
"e_hw2h",
"e_hw2i",
"e_hw2j",
"e_hw2k",
"e_hw2l",
"e_hw2m",
"e_hw2n",
"e_hw2o",
"e_hw3a",
"e_hw3b",
"e_hw3c",
"e_hw3d",
"e_hw3e",
"e_hw3f",
"e_hw3g",
"e_hw3h",
"e_hw3i",
"e_hw3j",
"e_hw3k",
"e_hw3l",
"e_hw3m",
"e_hw4a",
"e_hw4b",
"e_hw4c",
"e_hw4d",
"e_hw4e",
"e_hw4f",
"e_hw4g",
"e_hw4h",
"e_hw4i",
"e_hw4j",
"e_hw4k",
"e_hw4l",
"e_hw4m",
"e_hw5a",
"e_hw5b",
"e_hw5c",
"e_hw5d")
capture_tables (indirect_PII)
# Based on dictionary inspection, select variables for creating sdcMicro object
# See: https://sdcpractice.readthedocs.io/en/latest/anon_methods.html
# All variable names should correspond to the names in the data file
# selected categorical key variables: gender, occupation/education and age
mydata$educ <- mydata$b_d5a
mydata$educ[is.na(mydata$educ)] <- mydata$b_d4a[is.na(mydata$educ)]
selectedKeyVars = c('female', 'age_imputed', 'educ') ##!!! Replace with candidate categorical demo vars
# weight variable
# selectedWeightVar = c('projwt') ##!!! Replace with weight var
# household id variable (cluster)
# selectedHouseholdID = c('wpid') ##!!! Replace with household id
# creating the sdcMicro object with the assigned variables
sdcInitial <- createSdcObj(dat = mydata, keyVars = selectedKeyVars)
sdcInitial
## The input dataset consists of 962 rows and 700 variables.
## --> Categorical key variables: female, age_imputed, educ
## ----------------------------------------------------------------------
## Information on categorical key variables:
##
## Reported is the number, mean size and size of the smallest category >0 for recoded variables.
## In parenthesis, the same statistics are shown for the unmodified data.
## Note: NA (missings) are counted as seperate categories!
## Key Variable Number of categories Mean size Size of smallest (>0)
## female 2 (2) 481.000 (481.000) 441 (441)
## age_imputed 4 (4) 320.000 (320.000) 68 (68)
## educ 12 (12) 69.364 (69.364) 1 (1)
## ----------------------------------------------------------------------
## Infos on 2/3-Anonymity:
##
## Number of observations violating
## - 2-anonymity: 0 (0.000%)
## - 3-anonymity: 0 (0.000%)
## - 5-anonymity: 0 (0.000%)
##
## ----------------------------------------------------------------------
!!!No records violate 2-anonymity #Open-ends: review responses for any sensitive information, redact as necessary
# !!! Identify open-end variables here:
open_ends <- c("b_d3_other", "e_v3a_other_transl","e_v4a_other_transl")
report_open (list_open_ends = open_ends)
# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number
mydata <- mydata[!names(mydata) %in% "b_d3_other"] # Drop as actually verbatim data in Kinyarwanda.
mydata$e_v4a_other_transl[3] <- "[Kinyarwanda]" # Drop as actually verbatim data in Kinyarwanda.
mydata$e_v4a_other_transl[4] <- "[Location]" # Redrafted as small location appears.
mydata$e_v4a_other_transl[15] <- "[Kinyarwanda]" # Drop as actually verbatim data in Kinyarwanda.
mydata$e_v4a_other_transl[18] <- "[Kinyarwanda]" # Drop as actually verbatim data in Kinyarwanda.
mydata$e_v4a_other_transl[16] <- "[School]" # Redrafted as school name appears.
# !!! No GPS
Adds "_PU" (Public Use) to the end of the name
haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))
# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)