set more off

* PROGRAM: appendix_figure2.do
* 
* NOTES: Predict test score outcomes and produce figures to view selection.
*
log using "/home/projects/sehs/replication/logs/appendix_figure2.log", replace

*tier-by-school-by-cohort fe


*********************************************************
*********************************************************
*********************************************************
tempfile temp1 temp2

use "/home/projects/sehs/replication/data/analytic_replication.dta", clear

global grade8 "black8 hisp8 male8 frpl8 assignedsch8"

gen positive=center1>0 & center1~=.
replace positive=1 if center2>0 & center2~=.
replace positive=1 if center3>0 & center3~=.
replace positive=1 if center4>0 & center4~=.
replace positive=1 if center5>0 & center5~=.
replace positive=1 if center6>0 & center6~=.

gen selectruleany=offerrule~=""

*************************************************
***** Predict missing outcomes for bounding *****
*************************************************
*** application information for prediction ***
* pointstotal pointsexam pointstest7 pointsgrades 
replace schid8=0 if schid8==.

areg stdactcomp positive pointsexam pointstest7 pointsgrades $grade8 i.tier i.cohort9, absorb(schid8)
predict stdactcomp_hat
gen miss_stdactcomp = stdactcomp==.
label var stdactcomp_hat "Predicted (standardized) ACT composite score for those missing test scores"
expand 6

sort sid
by sid: gen numobs=_n

gen schoice=schoice1 if numobs==1
replace schoice=schoice2 if numobs==2
replace schoice=schoice3 if numobs==3
replace schoice=schoice4 if numobs==4
replace schoice=schoice5 if numobs==5
replace schoice=schoice6 if numobs==6

drop if schoice=="Skinner North"
drop if schoice==""

gen centered=brooks_cut_centered if schoice=="Brooks"
replace centered=jones_cut_centered if schoice=="Jones"
replace centered=king_cut_centered if schoice=="King"
replace centered=lane_cut_centered if schoice=="Lane"
replace centered=lindblom_cut_centered if schoice=="Lindblom"
replace centered=northside_cut_centered if schoice=="Northside"
replace centered=payton_cut_centered if schoice=="Payton"
replace centered=southshore_cut_centered if schoice=="South Shore"
replace centered=westing_cut_centered if schoice=="Westinghouse"
replace centered=young_cut_centered if schoice=="Young"
drop if centered==.

sort sid
by sid: egen maxcenter=max(centered)
keep if centered==maxcenter
by sid: egen maxrank=max(numobs)
keep if numobs==maxrank

gen admit=0
replace admit=1 if centered>=0

gen centered_bin=round(centered-2, 10)

***********************
* Sample restrictions
drop if dsped8==1
drop if completeapp==0
*dropping people missing tier
drop if inlist(tier, 0, .)
***********************

collapse (mean) stdactcomp_hat (count) n_stdactcomp=stdactcomp_hat , by(centered_bin tier miss_stdactcomp)

graph twoway (scatter stdactcomp_hat centered_bin if tier==1 & miss_stdactcomp==1, legend(label(1 "Score missing")) msize(vsmall)) (scatter stdactcomp_hat centered_bin if tier==1 & miss_stdactcomp==0, msymbol(Oh) legend(label(2 "Score observed")) title("Predicted Standardized ACT Score") subtitle("Tier 1") ytitle("") xline(0) xtitle("Centered Application Score") yscale(range(-3(1)3)) ylabel(-3(1)3)),  saving("/home/projects/sehs/replication/figures/appendix_figure2_tier1.gph", replace)

graph twoway (scatter stdactcomp_hat centered_bin if tier==4 & miss_stdactcomp==1, legend(label(1 "Score missing")) msize(vsmall)) (scatter stdactcomp_hat centered_bin if tier==4 & miss_stdactcomp==0, msymbol(Oh) legend(label(2 "Score observed")) title("Predicted Standardized ACT Score") subtitle("Tier 4") ytitle("") xline(0) xtitle("Centered Application Score") yscale(range(-3(1)3)) ylabel(-3(1)3)),  saving("/home/projects/sehs/replication/figures/appendix_figure2_tier4.gph", replace)


