###################################################
# some scrap code for reconciling R results
#
#
#
# chris zhang 8/12/2020
###################################################
library(MASS)
library(dplyr)
library(glm.predict)
## Read in CPS data
fp_cps <- '../data/cps/cps_clean_2014.csv'
cps <- read.csv(fp_cps)
cps[is.na(cps[, "wage12"]), "wage12"] <- mean(cps[, "wage12"], na.rm = TRUE)
# below seems to solve warning
# 1: In eval(family$initialize) : non-integer #successes in a binomial glm!
# cps[, "marsupwt"] <- round(cps[, "marsupwt"], 0)
## Run ordinal regression
xvar_formula <- paste('female + black + asian + native + other + age + agesq + BA + GradSch + married + wage12 + wkswork + wkhours + emp_gov',
"+ occ_1 + occ_2 + occ_3 + occ_3 + occ_4 +  occ_5 + occ_6 + occ_7 + occ_8",
"+ occ_9 + ind_1 + ind_2 + ind_3 + ind_4 + ind_5 + ind_6 + ind_7 + ind_8 + ind_9 + ind_10 + ind_11 + ind_12")
formula <- paste("factor(empsize) ~ ", xvar_formula)
train_filt = "TRUE"
# use , weight=marsupwt below will trigger starting value issue
# Error in polr(as.formula(formula), data = cps %>% filter_(train_filt),  :
# attempt to find suitable starting values failed
estimate <- polr(as.formula(formula), data = cps %>% filter_(train_filt))
estimate
estimate$zeta
model=estimate$coefficients
names(model)
table(cps$wage12)
table(cps$femlae)
table(cps$female)
estimate$lev
estimate$lp
estimate$coefficients
# add constant
cps$const <- 1
xvar_formula <- paste('const + female + black + asian + native + other + age + agesq + BA + GradSch + married + wage12 + wkswork + wkhours + emp_gov',
"+ occ_1 + occ_2 + occ_3 + occ_3 + occ_4 +  occ_5 + occ_6 + occ_7 + occ_8",
"+ occ_9 + ind_1 + ind_2 + ind_3 + ind_4 + ind_5 + ind_6 + ind_7 + ind_8 + ind_9 + ind_10 + ind_11 + ind_12")
formula <- paste("factor(empsize) ~ ", xvar_formula)
train_filt = "TRUE"
# use , weight=marsupwt below will trigger starting value issue
# Error in polr(as.formula(formula), data = cps %>% filter_(train_filt),  :
# attempt to find suitable starting values failed
estimate <- polr(as.formula(formula), data = cps %>% filter_(train_filt))
estimate$coefficients
estimate$lev
estimate$deviance
estimate$xlevels
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
library(dplyr)
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 | D2==3, 1,0))
table(d_fmla$emp_gov)
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse((D2==1 | D2==2 | D2==3), 1,0))
table(d_fmla$emp_gov)
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1, 1,0))
table(d_fmla$emp_gov)
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2, 1,0))
table(d_fmla$emp_gov)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 |, 1,0))
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 1,0))
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2, 1,0))
table(d_fmla$emp_gov)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 | D2==3, 1,0))
table(d_fmla$emp_gov)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==2 | D2==3, 1,0))
table(d_fmla$emp_gov)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse((D2==1) | (D2==2) | (D2==3), 1,0))
table(d_fmla$emp_gov)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(((D2==1) | (D2==2) | (D2==3)), 1,0))
table(d_fmla$emp_gov)
table(d_fmla$D2)
d_fmla <- read.csv('../data/fmla/fmla_2012/fmla_2012_employee_revised_puf.csv')
d_fmla <- d_fmla %>% mutate(emp_gov= ifelse(D2==1 | D2==2 | D2==3, 1,0))
d_fmla <- d_fmla %>% mutate(emp_gov = ifelse(is.na(D2),0,emp_gov))
table(d_fmla$D2)
table(d_fmla$emp_gov)
