clear

***Use the March 2018 version of the CPS. 
use "C:\Ryan\DOL\microsim\Data\cps\raw\2017\cpsmar2017.dta"

**********NEED SOME EXCLUSIONS
keep if (wsal_val > 0 & wsal_val != .) | (ern_val>0 & ern_val != .)

keep if inlist(a_wkstat,2,3,4,5,6,7)
keep if inlist(a_clswkr,1,2,3,4,5,6)


***Gender
gen female = 1 if a_sex == 2
replace female = 0 if a_sex == 1

***Race
gen black = 0
replace black = 1 if inlist(prdtrace,2)

gen asian = 0
replace asian = 1 if inlist(prdtrace,4)

gen native = 0
replace native = 1 if inlist(prdtrace,3)

gen other = 0
replace other = 1 if prdtrace != 1 & black == 0 & asian == 0 & native == 0


***Age
rename a_age age

*age squared
gen agesq = age*age

***Education
gen BA = 0
replace BA = 1 if a_hga == 43

gen GradSch = 0
replace GradSch = 1 if inlist(a_hga,44,45,46)

***Married
gen married = 0
replace married=1 if inlist(a_maritl,1,2,3)

***Earnings: leave for now (ern_val?)
gen wage12 = wsal_val
*gen wage20 = wsal_val * (258.165/238.08)

***wkhours
gen byte wkhours=hrswk

replace wkhours=. if wkhours<=0

***wkswork - in dataset

***emp_gov (employed by the government)
gen emp_gov = 0 
replace emp_gov = 1 if inlist(peio1cow,1,2,3)

***a_mjind - in dataset

***industry types - confirmed from crosstab in 2016
gen ind_top1 = 0
replace ind_top1 = 1 if a_mjind == 10

gen ind_top2 = 0
replace ind_top2 = 1 if a_mjind == 5

gen ind_top3 = 0
replace ind_top3 = 1 if a_mjind == 9

gen ind_other = 0
replace ind_other = 1 if inlist(a_mjind,1,2,3,4,6,7,8,11,12,13,14)

gen ind_na = 0
replace ind_na = 1 if a_mjind<1 | a_mjind>14

*a_mjocc - in dataset

***occupation types - confirmed from crosstab in 2016
gen occ_top1 = 0
replace occ_top1 = 1 if a_mjocc == 2

gen occ_top2 = 0
replace occ_top2 = 1 if a_mjocc == 3

gen occ_top3 = 0
replace occ_top3 = 1 if a_mjocc == 1

gen occ_other = 0
replace occ_other = 1 if inlist(a_mjocc,4,5,6,7,8,9,10,11)

gen occ_na = 0
replace occ_na = 1 if a_mjocc<1 | a_mjocc>11

***occupation dummies
forvalues i = 1/10 {
	gen occ_`i' = 0
	replace occ_`i' = 1 if a_mjocc == `i'
}

***industry dummies
forvalues i = 1/13 {
	gen ind_`i' = 0
	replace ind_`i' = 1 if a_mjind == `i'
}

***hourly
gen hourly = 1 if a_hrlywk == 1
replace hourly = 0 if a_hrlywk == 2

***empsize
gen empsize = noemp
replace empsize=. if empsize==0

***oneemp
gen oneemp = 1 if phmemprs == 1
replace oneemp = 0 if phmemprs == 2 | phmemprs == 3

***union
gen union = 1 if a_unmem == 1
replace union = 0 if a_unmem == 2

***fmla_eligible
gen fmla_eligible = 0
replace fmla_eligible = 1 if (inlist(a_clswkr,2,3,4) | (inlist(a_clswkr,1) & inlist(noemp,3,4,5,6))) & wkhours*wkswork>=1250 & oneemp == 1

***prerelg a_wkstat a_clswkr a_hrlywk - in dataset

keep peridnum marsupwt female black asian native other age agesq BA GradSch married wage12 wkhours wkswork emp_gov a_mjind ind_top1 ind_top2 ind_top3 ind_other ind_na a_mjocc occ_top1 occ_top2 occ_top3 occ_other occ_na occ_1 occ_2 occ_3 occ_4 occ_5 occ_6 occ_7 occ_8 occ_9 occ_10 ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7 ind_8 ind_9 ind_10 ind_11 ind_12 ind_13 hourly empsize oneemp union fmla_eligible prerelg a_wkstat a_clswkr a_hrlywk

order peridnum marsupwt female black asian native other age agesq BA GradSch married wage12 wkhours wkswork emp_gov a_mjind ind_top1 ind_top2 ind_top3 ind_other ind_na a_mjocc occ_top1 occ_top2 occ_top3 occ_other occ_na occ_1 occ_2 occ_3 occ_4 occ_5 occ_6 occ_7 occ_8 occ_9 occ_10 ind_1 ind_2 ind_3 ind_4 ind_5 ind_6 ind_7 ind_8 ind_9 ind_10 ind_11 ind_12 ind_13 hourly empsize oneemp union fmla_eligible prerelg a_wkstat a_clswkr a_hrlywk

export delimited using "C:\Ryan\DOL\microsim\Data\cps\final\2017\cps_clean_2017.csv", replace