###################################################
# some scrap code for reconciling R results
#
#
#
# chris zhang 8/12/2020
###################################################

library(MASS)
library(dplyr)
library(glm.predict)
## Read in CPS data
fp_cps <- '../data/cps/cps_clean_2014.csv'
cps <- read.csv(fp_cps)
cps[is.na(cps[, "wage12"]), "wage12"] <- mean(cps[, "wage12"], na.rm = TRUE)
# below seems to solve warning
# 1: In eval(family$initialize) : non-integer #successes in a binomial glm!
# cps[, "marsupwt"] <- round(cps[, "marsupwt"], 0)

# add constant? (cause rank problem so seems polr has this accounted for? Through zeta? intuition??)
## Run ordinal regression
xvar_formula <- paste('female + black + asian + native + other + age + agesq + BA + GradSch + married + wage12 + wkswork + wkhours + emp_gov',
                      "+ occ_1 + occ_2 + occ_3 + occ_3 + occ_4 +  occ_5 + occ_6 + occ_7 + occ_8",
                      "+ occ_9 + ind_1 + ind_2 + ind_3 + ind_4 + ind_5 + ind_6 + ind_7 + ind_8 + ind_9 + ind_10 + ind_11 + ind_12")

formula <- paste("factor(empsize) ~ ", xvar_formula)
train_filt = "TRUE"
# use , weight=marsupwt below will trigger starting value issue
# Error in polr(as.formula(formula), data = cps %>% filter_(train_filt),  : 
# attempt to find suitable starting values failed
estimate <- polr(as.formula(formula), data = cps %>% filter_(train_filt))

# predict
fp_acs <- '../data/acs/ACS_cleaned_forsimulation_2016_ri_Py.csv'
acs <- read.csv(fp_acs)
Xd <- acs[, c(c("female", "black", "asian", "native", "other", "age", "agesq", "BA", "GradSch", 
              "married", "wage12", "wkswork", "wkhours", "emp_gov"), 
          paste0("occ_", seq(1, 9)),
          paste0("ind_", seq(1, 12)))]
yhat <- predict(estimate, Xd)


###############
# check emp_gov in clean fmla

d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 | D2==3, 1,0))

d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 | D2==3, 1,0))
d_fmla <- d_fmla %>% mutate(emp_gov = ifelse(is.na(D2),0,emp_gov))


