do "D:\Data\workdata\704665\daycare\dofiles\first.do"


*************************************************************
* finds all moms born 1935-1960 and their children
* yearly GRUND data from DST 
*Output: fertility3560.dta
*************************************************************

foreach num of numlist 1980/2006{
use pnr fodreg foed_dag koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta", clear
gen year=year(foed_dag)
keep if year>1934 & year<1961
drop year
save "$work\grund`num'.dta",replace
}

foreach num of numlist 2007/2013{
use pnr  FOED_DAG koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta",clear
rename FOED_DAG foed_dag
gen year=year(foed_dag)
keep if year>1934 & year<1961
drop year
save "$work\grund`num'.dta",replace
}



* population 1930-1960 with their fodreg code and birthdate
use "$work\grund1980.dta",clear
foreach num of numlist 1980/2013{
append using "$work\grund`num'.dta"
}

sort pnr 
compress
save "$work\pop3560brutto.dta", replace

foreach num of numlist 1980/2013{
erase "$work\grund`num'.dta"
}


use "$work\pop3560brutto.dta", clear
drop if fodreg==.
by pnr: egen int fodreg_new=max(fodreg) 

drop fodreg
rename fodreg_new fodreg
by pnr: keep if _n==1 /*keep one spell per person*/

capture drop fodgroup
gen fodgroup=.

*Documentation of fodreg in dsts.dk under Population/Foedreg_kode
replace fodgroup=1 if fodreg>=5100 & fodreg<=5902 /*other countries*/

replace fodgroup=1 if fodreg==5999 /*unknown but abroad*/

replace fodgroup=2 if fodreg>=9501 & fodreg<=9599 /*Greenland*/
replace fodgroup=2 if fodreg==3999 /*Greenland*/

replace fodgroup=3 if fodre>=2401 & fodreg<=2599 /*unknown place DK*/
replace fodgroup=3 if fodreg==4998
replace fodgroup=3 if fodreg==4999 /*unknown*/

replace fodgroup=4 if fodreg>=4301 & fodreg<=4499 /*weird...*/

replace fodgroup=5 if fodreg<1000 /*new post 1970 municipal codes*/

replace fodgroup=6 if fodreg>=4501 & fodreg<=4599 /*county without documentation for the codes used...*/

replace fodgroup=7 if fodreg>=4601 & fodreg<= 4799 /*other religious groups and old names (no documentation for old names) */

replace fodgroup=8 if fodreg>=4801 & fodreg<=4989 /*catholic groups*/

replace fodgroup=9 if fodreg==9993 | fodreg==9995 | fodreg==1455 /*Values 9993 & 9995 is not documented and has few observations.*/

replace fodgroup=10 if fodreg==. /* few Observations have missings */

replace fodgroup=11 if fodreg==9999 /* 9999 is unknown */

replace fodgroup=12 if fodreg==1308 /* Fyns Amt */

replace fodgroup=13 if fodreg>=7001 & fodreg<=9348 /* Parish in Denmark */

keep if fodgroup==13
drop fodgroup

gen fodyear=year(foed_dag)

label var fodreg "Parish of birth, parent"
label var fodyear "Year of birth, parent"
label var foed_dag "Date of birth, parent"

gen pnrm=pnr if koen==2
gen pnrf=pnr if koen==1
*******************************
compress

preserve
keep if pnrm!=""
save "$work\pop3560moms.dta", replace
restore



*******************************
* find their kids
*******************************
use "$raw\FTDB2012.dta",clear // rawdata from DST
merge m:1 pnrm using "$work\pop3560moms.dta"

drop if _merge!=3
* for the merge==2 women have kids themselves 

drop _merge ftdb_lbnr koen antal 

* duplicates with the same pnr (kid) - deaths very early, reuse personal identifier
sort pnr
by pnr: gen help=_N
drop if mdoed=="1" /*mdoed is valid from 1973*/
drop help

label var fodreg "Parish of birth, mom"
label var fodyear "Year of birth, mom"
label var foed_dag "Date of birth, mom"

compress
save "$work\pop3560moms_kids.dta",replace

* how many kids do we have per woman?
sort pnrm
by pnrm: gen numkids=_N
label var numkids "Number of kids per woman"

* now we only have moms, no women without kids
save "$work\pop3560moms_kids.dta",replace

keep pnr
save "$work\pnr_kids.dta",replace

******************************************************************************
* merge to GRUND to get children's gender, birthdate, place of birth
******************************************************************************
foreach num of numlist 1980/2006{
use pnr fodreg foed_dag koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta", clear
* drop duplicates
bys pnr: keep if _n==1
merge 1:1 pnr using "$work\pnr_kids.dta"
keep if _merge==3
drop _merge
save "$work\grund`num'kids.dta",replace
}

foreach num of numlist 2007/2013{
use pnr  FOED_DAG koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta",clear
rename FOED_DAG foed_dag
* drop duplicates
bys pnr: keep if _n==1
merge 1:1 pnr using "$work\pnr_kids.dta"
keep if _merge==3
drop _merge

save "$work\grund`num'kids.dta",replace
}

use "$work\grund1980kids.dta",clear
foreach num of numlist 1980/2013{
append using "$work\grund`num'kids.dta"
}

sort pnr 
compress
save "$work\popkids_brutto.dta", replace

foreach num of numlist 1980/2013{
erase "$work\grund`num'kids.dta"
}


use "$work\popkids_brutto.dta", clear

drop if fodreg==.
by pnr: egen int fodreg_new=max(fodreg) 

drop fodreg
rename fodreg_new fodreg
by pnr: keep if _n==1 /*keep one spell per person*/

capture drop fodgroup
gen fodgroup=.

*Documentation of fodreg in dst.dk under Population/Foedreg_kode

replace fodgroup=1 if fodreg>=5100 & fodreg<=5902 /*other countries*/

replace fodgroup=1 if fodreg==5999 /*unknown but abroad*/

replace fodgroup=2 if fodreg>=9501 & fodreg<=9599 /*Greenland*/
replace fodgroup=2 if fodreg==3999 /*Greenland*/

replace fodgroup=3 if fodre>=2401 & fodreg<=2599 /*unknown place DK*/
replace fodgroup=3 if fodreg==4998
replace fodgroup=3 if fodreg==4999 /*unknown*/

replace fodgroup=4 if fodreg>=4301 & fodreg<=4499 /*weird...*/

replace fodgroup=5 if fodreg<1000 /*new post 1970 municipal codes*/

replace fodgroup=6 if fodreg>=4501 & fodreg<=4599 /*county without documentation for the codes used...*/

replace fodgroup=7 if fodreg>=4601 & fodreg<= 4799 /*other religious groups and old names (no documentation for old names) */

replace fodgroup=8 if fodreg>=4801 & fodreg<=4989 /*catholic groups*/

replace fodgroup=9 if fodreg==9993 | fodreg==9995 | fodreg==1455 /*Values 9993 & 9995 is not documented and has few observations.*/

replace fodgroup=10 if fodreg==. /* few Observations have missings */

replace fodgroup=11 if fodreg==9999 /* 9999 is unknown */

replace fodgroup=12 if fodreg==1308 /* Fyns Amt */

replace fodgroup=13 if fodreg>=7001 & fodreg<=9348 /* Parish in Denmark */

tab fodgroup,miss

*tab fodreg if fodgroup==.

gen fodyear=year(foed_dag)

tab fodyear

rename fodyear fodyear_kid
rename fodreg fodreg_kid
rename foed_dag foed_dagkid
rename koen sex_kid
rename fodgroup fodgroup_kid

label var fodreg "Parish of birth, child"
label var fodyear "Year of birth, child"
label var foed_dag "Date of birth, child"
label var sex_kid "Sex of the child"
label var fodgroup_kid "Birth registration type"

*******************************
compress
save "$work\pop3560_kids.dta", replace

use "$work\pop3560_kids.dta", clear
merge 1:1 pnr using "$work\pop3560moms_kids.dta"

gen deadkid=(_merge==2) /*kids who die before 1980, where our outcomes start, we have to omit them, we have them in the numkid count*/
drop if deadkid==1
drop _merge deadkid

sort pnrm
gen ageatbirth=fodyear_kid-fodyear 
label var ageatbirth "Mom's age at birth"

* childless women
merge m:1 pnrm using "$work\pop3560moms.dta"

bys pnr: gen help=_N
drop if _merge==2 & help==2
drop help

gen nokids=1 if _merge==2
drop _merge
label var nokids "No children observed"

replace numkids=0 if numkids==.
order pnr pnrm pnrf
compress
save "$work\pop3560_fertility.dta", replace

* 5 percent of women are childless - checked against external sources
* for 7% we lack father info
* women have on average 2.4 children, 2.58 if you exclude the childless
* average age at birth is 26

********************************************************************************************
* save the pnrs of the kids to merge on outcomes
use "$work\pop3560_fertility.dta", clear

keep pnr foed_dagkid sex_kid fodreg_kid fodgroup_kid fodyear_kid mdoed

save "$work\pop3560_kidssample.dta", replace

**********************************************************************************************


**********************************************************************************************
****** Data set of moms
**********************************************************************************************

use "$work\pop3560_fertility.dta", clear
ren pnr pnr_child
ren pnrm pnr 
ren pnrf pnr_father
gen dad_missing = (pnr_father=="")

keep pnr pnr_child pnr_father dad_missing sex_kid foed_dagkid fodreg_kid fodgroup_kid fodyear_kid mdoed nokids numkids ageatbirth

replace nokids=0 if nokids==.
***keep one obs per mom
bys pnr: egen ageat_firstbirth = min(ageatbirth)
bys pnr: egen dad_evermissing = max(dad_missing)
bys pnr: keep if _n==1 

keep pnr ageat_firstbirth dad_evermissing numkids nokids

save "$work\pop3560_fertility_tomerge.dta", replace

**********************************************************************************************
* merge moms to preschool pop data to keep relevant moms

use "$work\pop_daycare_popmunic_yrapproval.dta", clear
**********************************************************************************************

*** keep only relevant cohorts of females
keep if male==0
keep if fodyear>=1935 & fodyear<=1957
*we have 1935-1957 cohorts

* merge to fertility data
merge 1:1 pnr using "$work\pop3560_fertility_tomerge.dta"
*_merge = 2 for cohorts born in 1958-1960
drop if _merge==2
drop _merge

* merge on nurse data
merge m:1 Sognekode_IM using "$work\nurseprogram.dta"
sort Kommune_ID
by Kommune_ID: egen nurse_date1=max(nurse_date)

drop nurse_date
rename nurse_date1 nurse_date 

gen muni=Kommune_ID

gen imputenurse=0
replace nurse_date=date("1/1/1964", "MDY") if muni==120621
replace imputenurse=1 if muni==120621 /*Augustenborg*/

replace nurse_date=. if muni==120653
replace imputenurse=1 if muni==120653 /*Bogense*/

replace nurse_date=date("1/1/1964", "MDY") if muni==120622
replace imputenurse=1 if muni==120622 /*Broager*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119265
replace imputenurse=1 if muni==119265 /*Faxe*/

replace nurse_date=. if muni==120180
replace imputenurse=1 if muni==120180 /*Hammel Voldby Sby*/

replace nurse_date=date("1/1/1950", "MDY") if muni==118980
replace imputenurse=1 if muni==118980 /*Herlev*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119289
replace imputenurse=1 if muni==119289 /*Hrlev Himlingeje*/

replace nurse_date=date("1/1/1950", "MDY") if muni==118983
replace imputenurse=1 if muni==118983 /*Hje Tstrup*/

replace nurse_date=. if muni==120573
replace imputenurse=1 if muni==120573 /*Hjer*/

replace nurse_date=. if muni==120362
replace imputenurse=1 if muni==120362 /*Ikast*/

replace nurse_date=. if muni==120577
replace imputenurse=1 if muni==120577 /*Lgumkloster */

replace nurse_date=date("1/1/1960", "MDY") if muni==120631
replace imputenurse=1 if muni==120631 /*Nordborg*/

replace nurse_date=. if muni==119590
replace imputenurse=1 if muni==119590 /*Nrre by*/

replace nurse_date=date("1/1/1964", "MDY") if muni==119659
replace imputenurse=1 if muni==119659 /*Ringe*/

replace nurse_date=. if muni==120707
replace imputenurse=1 if muni==120707 /*Rudkbing*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120701
replace imputenurse=1 if muni==120701 /*Skagen*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120677
replace imputenurse=1 if muni==120677 /*Sklskr*/

replace nurse_date=. if muni==120587
replace imputenurse=1 if muni==120587 /*Skrbk*/

replace nurse_date=date("1/1/1964", "MDY") if muni==120636
replace imputenurse=1 if muni==120636 /*Sottrup*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120722
replace imputenurse=1 if muni==120722 /*Store Heddinge*/

replace nurse_date=. if muni==120208	
replace imputenurse=1 if muni==120208 /*Them*/

replace nurse_date=. if muni==120590
replace imputenurse=1 if muni==120590 /*Tinglev*/

replace nurse_date=. if muni==120549
replace imputenurse=1 if muni==120549   /*Toftlund*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120551
replace imputenurse=1 if muni==120551   /*Tystrup*/

replace nurse_date=. if muni==119601
replace imputenurse=1 if muni==119601   /*Vejlby-Strib*/

replace nurse_date=date("1/1/1950", "MDY") if muni==120554
replace imputenurse=1 if muni==120554   /*Vojens*/

replace nurse_date=date("1/1/1964", "MDY") if muni==119886
replace imputenurse=1 if muni==119886   /*rs*/

replace nurse_date=date("1/1/1950", "MDY") if muni==119354
replace imputenurse=1 if muni==119354   /*rslev*/

drop muni

gen DOB_minusnurse = foed_dag - nurse_date

gen nurse_treat = (DOB_minusnurse>0) if DOB_minusnurse<.
*all individuals born after nurse program

replace nurse_treat = 0 if nurse_treat==.
*nurse date missing for munis without a program


***************** COLLAPSE TO MUNI X YEAR LEVEL ********************

gen N=1
global fertility nokids numkids ageat_firstbirth dad_evermissing

encode Amtsnavn, gen(county)



**regress fertility outcomes on women's month of birth dummies
gen mob = month(foed_dag)


egen muni_yr = group(Kommune_ID year)



foreach out of varlist $fertility {
areg `out' i.mob  , abs(muni_yr)
predict `out'_res if e(sample)==1, d
ren `out' `out'_raw
ren `out'_res `out'
}

* for all women by municipality and year(age 3 exposure)
#delimit ;
collapse 
		(mean) county rural numinst numslots numinstperpop numslotsperpop  popmunic nurse_treat imputenurse *_raw $fertility
		(max) operating
		(count) N
		, by(Kommune_ID year) fast;
#delimit cr


*Municipality controls
*
*preserve
*merge m:1 Kommune_ID using "$work\municdata_2930.dta"
*tab _merge
*restore
merge m:1 Kommune_ID using "$work\munic_controls_recode2930.dta"
tab _merge

ren _merge muni_merge

drop if muni_merge==2

merge m:1 Kommune_ID using "$work\munic_controls_recode2021.dta"
tab _merge
drop if _merge==2
drop _merge


rename Kommune_ID muni
ren popmunic munipop

*indicator for a balanced panel
bys muni: gen totyears = _N
gen balanced = totyears==23
**munis with balanced data in every year

***** exposure at ages 4-7
sort muni year
foreach var of varlist numinstperpop operating {
by muni: gen `var'_age4 = `var'[_n+1] if balanced==1
by muni: gen `var'_age5 = `var'[_n+2] if balanced==1
by muni: gen `var'_age6 = `var'[_n+3] if balanced==1
by muni: gen `var'_age7 = `var'[_n+4] if balanced==1
}

gen frac_operating = 0 if balanced==1
replace frac_operating = 1 if operating==1 & balanced==1
replace frac_operating = 4/5 if operating==0 & operating_age4==1
replace frac_operating = 3/5 if operating==0 & operating_age4==0 & operating_age5==1
replace frac_operating = 2/5 if operating==0 & operating_age4==0 & operating_age5==0 & operating_age6==1
replace frac_operating = 1/5 if operating==0 & operating_age4==0 & operating_age5==0 & operating_age6==0 & operating_age7==1

egen numinstperpop_age3_7 = rowmean(numinstperpop numinstperpop_age4 numinstperpop_age5 numinstperpop_age6 numinstperpop_age7)

************************ Munis with ever open daycare
drop if year==.

bys muni: egen everopen = max(operating)
drop if everopen==.

codebook muni if everopen==1
codebook muni if everopen==0

keep if everopen==1

save "$work\analysis_daycare_fertility_everopen.dta", replace


**always operating
sort muni year
by muni: egen alwaysopen = min(operating)

***** label key treatment variables
label var numinstperpop "Num. centers per 1000 pop at age 3"
label var operating "Any Approved Preschool at Age 3"
label var frac_operating "Frac. Yrs age 3-7 Exposed to Preschool"
label var numinstperpop_age3_7 "Avg. num. centers per 1000 pop over age 3-7"



**** 1930-1949 cohorts sample (year<=1952)
cap drop shortsample totyears2 balanced2
gen shortsample = (year<=1952)
bys muni: egen totyears2 = total(shortsample)
gen balanced2 = totyears2==20

**** interaction with nurse program
replace nurse_treat = 1 if nurse_treat>0
cap drop daycare_nurse
gen daycare_nurse = operating*nurse_treat

label var nurse_treat "NHV at Birth"
label var daycare_nurse "Preschool x NHV"


*** which municipalities ever have a nurse program
bys muni: egen ever_nurse = max(nurse_treat)
*there are 18 municipalities that never have nurse program in our data


*** HELPNURSE SAMPLE: drop years after 1952 for the 28 munis that have worse program data (i.e. cohorts born after 1949)
* MIW not neccessary if we believe in the books on childcare that also have rough info on the nurse program - I have marked the imputed values with
* an imputenurse indicator
gen helpnurse=1 if imputenurse==0
replace helpnurse=1 if imputenurse==1 & year<=1952

save "$work\analysis_daycare_onlymoms_everopen.dta", replace






