#' ---
#' author: "Pablo Diego-Rosell, PhD"
#' email: "pablo_diego_rosell@yahoo.com"
#' output:
#'    html_document:
#'      toc: true
#' theme: united
#' ---

rm(list=ls(all=t))

#'# Setup filenames

filename <- "Section_7" # !!!Update filename
functions_vers <-  "functions_1.8.R" # !!!Update helper functions file

#'# Setup data, functions and create dictionary for dataset review
source (functions_vers)
#'
#' Visually inspect variables in "dictionary.csv" and flag for risk, using the following flags: 
# Direct PII: Respondent Names, Addresses, Identification Numbers, Phone Numbers
# Direct PII-team: Interviewer Names, other field team names 
# Indirect PII-ordinal: Date of birth, Age, income, education, household composition. 
# Indirect PII-categorical: Gender, education, ethnicity, nationality,
# occupation, employer, head of household, marital status
# GPS: Longitude, Latitude
# Small Location: Location (<100,000) 
# Large Location (>100,000)
# Weight: weightVar
# Household ID:  hhId, 
# Open-ends: Review responses for any sensitive information, redact as necessary 

#'# Direct PII: variables to be removed

mydata$household_id <- zap_labels(mydata$household_id)

#'# Direct PII-team: Encode field team names
# !!!No Direct PII - team


#'# Small locations: Encode locations  with pop <100,000 using random large numbers
# !!!Small locations

#'# Indirect PII - Ordinal: Global recode or Top/bottom coding for extreme values

# Top code high income/expenses to the 99.5 percentile

percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q3)[na.exclude(mydata$m_s6q3)!=999999], probs = c(0.995))) 
mydata <- top_recode (variable="m_s6q3", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q7)[na.exclude(mydata$m_s6q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q7", break_point=percentile_99.5, missing=999999)
mydata$m_farm_expenses <- as.numeric(mydata$m_farm_expenses)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_farm_expenses)[na.exclude(mydata$m_farm_expenses)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_farm_expenses", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s6q12)[na.exclude(mydata$m_s6q12)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s6q12", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q4)[na.exclude(mydata$m_s7q4)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q4", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q7)[na.exclude(mydata$m_s7q7)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q7", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q11)[na.exclude(mydata$m_s7q11)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q11", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q14)[na.exclude(mydata$m_s7q14)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q14", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q18)[na.exclude(mydata$m_s7q18)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q18", break_point=percentile_99.5, missing=999999)
percentile_99.5 <- floor(quantile(na.exclude(mydata$m_s7q21)[na.exclude(mydata$m_s7q21)!=999999], probs = c(0.995)))
mydata <- top_recode (variable="m_s7q21", break_point=percentile_99.5, missing=999999)

#'# Indirect PII - Categorical: Recode, encode, or Top/bottom coding for extreme values
# !!!Include relevant variables in list below (Indirect PII - Categorical, and Ordinal if not processed yet)

indirect_PII <- c("m_s6q1",
                  "m_s6q2",
                  "m_s6q4",
                  "m_s6q5",
                  "m_s6q6",
                  "m_s6q8",
                  "m_s6q10",
                  "m_s6q11",
                  "m_s6q13",
                  "m_s7q1",
                  "m_s7q2",
                  "m_s7q3",
                  "m_s7q5",
                  "m_s7q6",
                  "m_s7q8",
                  "m_s7q9",
                  "m_s7q10",
                  "m_s7q12",
                  "m_s7q13",
                  "m_s7q15",
                  "m_s7q16",
                  "m_s7q17",
                  "m_s7q19",
                  "m_s7q20")

capture_tables (indirect_PII)

# Recode those with very specific values. 
# !!!No very specific values

#'# Matching and crosstabulations: Run automated PII check 
# !!!Insufficient demographic data


#'# Open-ends: review responses for any sensitive information, redact as necessary
#' 
# !!! Identify open-end variables here: 
open_ends <- c("m_s6q4_other",
               "m_s6q6_other",
               "m_s6q8_warning",
               "m_s6q13_other",
               "m_endnote6",
               "m_s7q3_other",
               "m_s7q6_other",
               "m_s7q10_other",
               "m_s7q13_other",
               "m_s7q17_other",
               "m_s7q20_other",
               "m_endnote7")

indirect_PII <- c("m_s6q4_other",
                  "m_s6q6_other",
                  "m_s6q8_warning",
                  "m_s6q13_other",
                  "m_endnote6",
                  "m_s7q3_other",
                  "m_s7q6_other",
                  "m_s7q10_other",
                  "m_s7q13_other",
                  "m_s7q17_other",
                  "m_s7q20_other",
                  "m_endnote7")

capture_tables(indirect_PII)

report_open (list_open_ends = open_ends)

# Review "verbatims.csv". Identify variables to be deleted or redacted and their row number 

mydata$m_s6q4_other[1003] <- "[Tagalog]"
mydata$m_s6q4_other[1294] <- "[Tagalog]"

mydata$m_s6q13_other[260] <- "[Tagalog]"
mydata$m_s6q13_other[346] <- "[Tagalog]"
mydata$m_s6q13_other[352] <- "4 kls. Of Certified Seeds from Local Government of [municipality],(4500)"
mydata$m_s6q13_other[1468] <- "Department of Agriculture and UP [municipality]"

mydata$m_s7q6_other[423] <- "[Tagalog]"
mydata$m_s7q6_other[643] <- "[Tagalog]"
mydata$m_s7q6_other[1046] <- "[municipality] municipal government"
mydata$m_s7q6_other[1469] <- "[Tagalog]"

mydata$m_s7q10_other[281] <- "[Tagalog]"
mydata$m_s7q10_other[536] <- "Fond of [Tagalog]"

mydata$m_s7q13_other[546] <- "[Tagalog]"
mydata$m_s7q13_other[1029] <- "[Tagalog]"
mydata$m_s7q13_other[1938] <- "[Tagalog]"

mydata$m_s7q20_other[268] <- "[Tagalog]"
mydata$m_s7q20_other[897] <- "[Tagalog]"
mydata$m_s7q20_other[1030] <- "[Tagalog]"



mydata$m_s7q20_other[268] <- "[language], [name]"

mydata$m_endnote6[16] <- "Respondent said that even in baseline survey, they already spending [amount redacted] in cultivating crops. He can't recall that he's been asked regarding the matter."
mydata$m_endnote6[27] <- "They plant last [date] spend [amount redacted] but they have not plant yet becausr they're waiting for the rain before they can plant"
mydata$m_endnote6[28] <- "They plant last [date] and harvested [date], they havent plant yet so thry have no idea yet how much will cost them, so they cant compare if the expensed decrease or increase"
mydata$m_endnote6[53] <- "They plant once a year, every July, last [date] they spend [amount redacted] and they know yet if their expense is increased because they have' nt plant yet for this year."
mydata$m_endnote6[62] <- "They did'nt cultivate crops last 2016 because 2 of the household member got sick, they cultivate crops this year but corn only and spend [amount redacted] for seeds and fertilizer, they are waiting for the rain so that they can cultivate crops (rice grain)"
mydata$m_endnote6[70] <- "Bought seed at [amount redacted] only for  their owned consumption"
mydata$m_endnote6[92] <- "He told that during baseline survey their expenses in their crops is only [amount redacted], he include their not on crops in the [amount redacted] expenses last baseline."
mydata$m_endnote6[110] <- "On the baseline survey their expenses is [amount redacted] while for Last 12 months their expenses estimated [amount redacted]"
mydata$m_endnote6[156] <- "The farm they cultivitang is owned by 4 household (4siblings), so every year, different household is cultivating the farm. Their turn to cultivate was last [date]."
mydata$m_endnote6[173] <- "[name] planted vegetables at their backyard for their consumption, she is not using any fertilizer, no expenses, as well as the seeds were given by friends and neighbors"
mydata$m_endnote6[190] <- "Bought the seed for croping, spend [amount redacted]"
mydata$m_endnote6[215] <- "He recieved half sack of fertilizer from DA  He spent [amount redacted] for 2 corn farm, and he spent [amount redacted] for the rice field last [date] which is not included in the baseline."
mydata$m_endnote6[218] <- "Last 12 months they spend [amount redacted] in land cultivation  , the expenses increased on labor from [amount redacted] to [amount redacted] plus free meal. The money they spend half of it came from loan to a friend and half of it from their savings"
mydata$m_endnote6[219] <- "Respondent only spend [amount redacted] pesos for the seeds since they were not yet started cultivating the farm. She said, that they will just be starting on Suday."
mydata$m_endnote6[223] <- "Respondent planted eggplant which she spent [amount redacted]."
mydata$m_endnote6[236] <- "They planted 'camote' last April and spent [amount redacted] for it."
mydata$m_endnote6[240] <- "Respondent said that fro, [amount redacted], they already spend [amount redacted] pesos for cultivating crops."
mydata$m_endnote6[257] <- "Seed- [amount redacted], fertilizers- [amount redacted], pesticides-[amount redacted], insecticides- [amount redacted]. Total [amount redacted]"
mydata$m_endnote6[259] <- "[amount redacted] seed of eggplant and sili"
mydata$m_endnote6[261] <- "[Language]"
mydata$m_endnote6[262] <- " During te baseline survey she remembered that shemention to the enumerator that thay have spent [amount redacted] in cultivating their crops, and for the last 12 months they have spent the same amount"
mydata$m_endnote6[280] <- "They planted rice even before the baseline interview but was not stated the amount they spent even though he said it to the interviewer based from him. They bought the land last 2002. For tbe last 12 mos they have spent [amount redacted]"
mydata$m_endnote6[306] <- "Fertilizer-[amount redacted], Rutubator-[amount redacted], Pagpapatanim- [amount redacted], Tagapag bunot-[amount redacted], Tagasuyod- [amount redacted], Pamatay nang damu- [amount redacted], Snail-[amount redacted], Crude- [amount redacted]  Total of [amount redacted] pesos."
mydata$m_endnote6[317] <- "FERTILIZER- [amount redacted], SEEDS- [amount redacted], PESTICIDES-[amount redacted], LABOR [amount redacted] INSECTICIDES-[amount redacted] total=[amount redacted]"
mydata$m_endnote6[327] <- "She said that they spwnt [amount redacted] during the last year before basekine interview for cultivating plants. Last October they have spent [amount redacted] for the cultivation of their corn crop."
mydata$m_endnote6[337] <- "Fertilizers=[amount redacted], Seed=[amount redacted], Pesticides=[amount redacted], Nag Araro=[amount redacted], Nagtanim=[amount redacted]. Total of [amount redacted]."
mydata$m_endnote6[343] <- "Seed [amount redacted], abono [amount redacted], pamatay nang damo [amount redacted], =[amount redacted]"
mydata$m_endnote6[377] <- "Abono=[amount redacted], patubig=[amount redacted], Pesticides= [amount redacted], Seeds= [amount redacted] TOTAL OF [amount redacted]. the seeds must be gave to them by Department of Agriculture."
mydata$m_endnote6[378] <- "Abono=[amount redacted], fertilizer - [amount redacted], pesticides= [amount redacted], laborer [amount redacted], expenses in the sides of the farm (seeds given by the neigborhood) [amount redacted]  Total =[amount redacted]."
mydata$m_endnote6[403] <- "The seeds or plant they gather in the other farm is the what they used in planting, the amount of [amount redacted] in the sFq13 was the amount of the fertilizer."
mydata$m_endnote6[420] <- "Last year the respondent cultivated [amount redacted] sq.m for his cassava plants. And now his lot rented for 3 years with [amount redacted]. He owns only backyard gardening."
mydata$m_endnote6[468] <- "[Language]"
mydata$m_endnote6[571] <- "The household loaned the [amount redacted] and the saving of [amount redacted] in using to plant"
mydata$m_endnote6[599] <- " The DOLE gave them amounted [amount redacted] pesos that they sell, and the [amount redacted] pesos which  comes from their savings, with the total of [amount redacted] pesos"
mydata$m_endnote6[617] <- "[Language]"
mydata$m_endnote6[646] <- "Last year the household cultivated 2 ha. and spent [amount redacted] and now spent [amount redacted] for 1 ha"
mydata$m_endnote6[651] <- "Respondent said that they received a pack of seeds from Brgy. [barangay name] for them to plant."
mydata$m_endnote6[678] <- "[amount redacted] from DSWD AAthat they reinvested in planting im the farm and [amount redacted] from micro credit loan and the rest is [amount redacted] from teir savings"
mydata$m_endnote6[679] <- "They gather wild plant and transfered from the farm to their backyard. [language]"
mydata$m_endnote6[715] <- "July 2016 start to plant on rented land"
mydata$m_endnote6[937] <- "She only purchased land with the amount [amount redacted], installment  of [amount redacted] every month. She already done for installment in 10 months."
mydata$m_endnote6[949] <- "[Language]"
mydata$m_endnote6[1062] <- "[amount redacted] ung fertilizer, [amount redacted] seeds"
mydata$m_endnote6[1064] <- "Since last survey the household already has a [amount redacted] pesos expenses for 12 months."
mydata$m_endnote6[1126] <- "The farm they used were owned by [name] and his siblings. So they shared in expenses and same with  the crops they harvest."
mydata$m_endnote6[1140] <- "Last year they spend [amount redacted] pesosi order to cultivate crops. They spend the same in this year"
mydata$m_endnote6[1283] <- "Contradicting the data collected last year, the respondent said that they were spending [amount redacted] every year for all the inputs needed to cultivate crops."
mydata$m_endnote6[1303] <- "The respondant told me the they spent roughly [amount redacted] pesos every year to cultivate crops and they spend stayed same in the last 12 months but in sFq12 stated that the household spent [amount redacted] in the previous year"
mydata$m_endnote6[1307] <- "The respondent spent roughly [amount redacted] in the last 12 months in cultivating crops ([amount redacted] in fertilizer and [amount redacted] in labor)and almost a half of it [amount redacted] pesos they spent before the baseline survey but in SFQ12 in baseline survey they spent [amount redacted] pesos"
mydata$m_endnote6[1379] <- "[amount redacted] was just her share to the expenses, because her children who already have their own family or household were also sharing to the expenses,"
mydata$m_endnote6[1468] <- "Their crops is organic, by the help of UP [UP name] they are planting vegetables. They gets free 4  bags of seeds in the department of Agriculture."
mydata$m_endnote6[2067] <- "The cost was decreased because he has only [amount redacted]tupongs of land from the [amount redacted]hectares [language]"
mydata$m_endnote6[2073] <- "[amount redacted] for the 1year expenses"

mydata$m_endnote7[16] <- "The [amount redacted] chicks were purchased by the twins, [name], using their salary from the hardware."
mydata$m_endnote7[18] <- "The household's goat gave birth to [amount redacted] new goats. And they also had a new chicks but respondent don't have an idea of the amount of the chicks."
mydata$m_endnote7[36] <- "Respondent said the rooster will cost [amount redacted] if they will bought it but when she asked her husband, she changed her response in favor of her husband's response. Even though, I still took the respondent's 1st response."
mydata$m_endnote7[80] <- "They had purchased [amount redacted] pigs but it was last June 2016."
mydata$m_endnote7[86] <- "They have [amount redacted] goats"
mydata$m_endnote7[110] <- "They have [amount redacted] chicken, but the goat they are care is not their ownd"
mydata$m_endnote7[112] <- "The household bought [amount redacted] piglets last November 2016 but they killed the [amount redacted] pigs last March 2017 and sold the meat."
mydata$m_endnote7[219] <- "Respondent has no idea of the amount of the new [amount redacted] native chicks."
mydata$m_endnote7[240] <- "Respondent can't tell the amount of the [amount redacted] native chicks because according to her, those were not been sold."
mydata$m_endnote7[287] <- "Baseline respondent said that the sales from the in kind of KASAMA (DOLE) was the main source, they purchase the pig worth [amount redacted]"
mydata$m_endnote7[306] <- "[amount redacted] is for 12 person she's the one who've got those piggery. It is an association so [amount redacted] piggery for 12 person belongs the sow and the piglet.   For ducks-[amount redacted], chickens-[amount redacted]."
mydata$m_endnote7[332] <- "They dont acquired any new animals. The cow that they have was have an arrangement, the owner give them [amount redacted] cow after giving birth as paid for them."
mydata$m_endnote7[349] <- "In livestock they have also additional [amount redacted] goat (kid) worth [amount redacted]"
mydata$m_endnote7[364] <- "[language]"
mydata$m_endnote7[376] <- "Respondent said that they bought  2 pig worth 5000 pesos, from the sales of the in kind transfer of Dole Integrated Livelihood and Emergency Program in which they are now tending."
mydata$m_endnote7[454] <- "3 is for [amount redacted], she buys [amount redacted] but the 2 of them died."
mydata$m_endnote7[475] <- "The large livestock is the carabao and the amount is [amount redacted], the price is low because the household head or [name] bought  it to his sibling"
mydata$m_endnote7[500] <- "[language]"
mydata$m_endnote7[872] <- "The worth of 2 chikens is [amount redacted]"
mydata$m_endnote7[1029] <- "[language]"
mydata$m_endnote7[1045] <- "Government transfer came mayor [name]"
mydata$m_endnote7[1050] <- "As per the respondent ,on his computation its only costing [amount redacted] pesos only the value of the pigs that they received from DOLE's livelihood program"
mydata$m_endnote7[1127] <- "Recently he bought [amount redacted] pcs. Of 45 days chicks."
mydata$m_endnote7[1213] <- "Even before the DOLE gives [amount redacted] piglets to them, the household already tending pigs."
mydata$m_endnote7[1245] <- "[language]"
mydata$m_endnote7[1253] <- "Their relatives gave the pig, they made it as patener(mother pig), then during the interview they have [amount redacted] piglets."
mydata$m_endnote7[1285] <- "[name] traded his quail to the mother Duck of his cousin, then the mother duck produces a (male) duck. But mostly of its eggs were stolen."
mydata$m_endnote7[1317] <- "[language]"
mydata$m_endnote7[1351] <- "[amount redacted] inakay, ducklings"
mydata$m_endnote7[1357] <- "They bought [amount redacted]pcs. Of 45days chicken last March 5, then cooked it last May for the fiesta."
mydata$m_endnote7[1362] <- "They have [amount redacted] ducks and [amount redacted] chicker. Her elder son takee care of them"
mydata$m_endnote7[1472] <- "The [amount redacted] goats were fro  another household, they only have an agreement that when it gave birth, it will be parted to each household."
mydata$m_endnote7[1486] <- "4Ps gave them [amount redacted] 'baby goat'"


#'# GPS data: Displace
# !!!No GPS data

#'# Save processed data in Stata and SPSS format

haven::write_dta(mydata, paste0(filename, "_PU.dta"))
haven::write_sav(mydata, paste0(filename, "_PU.sav"))

# Add report title dynamically
title_var <- paste0("DOL-ILAB SDC - ", filename)
#'---
#'  title: `r title_var`
#'---
