
* stata -b do "/homes/nber/davidcal/cens1940.work/master_dofile_1920census_full_NBER_20150201.do" &

*clear all
cap clear

set more off


cd "/homes/nber/davidcal/cens1940.work"
global Kbulk  /disk/bulkw/keriksso


	***********************************now, use appended data--merge in allllll of the FB indexes*********************************************
	
	* use the appended dataset from now on

use "$Kbulk/NBER_1920census_temp_male_nf_full.dta", clear




*******
*** generate birth decade
gen birthdecade = 1850 	if (birthyear >= 1850 & birthyear < 1855)
replace birthdecade = 1855 	if (birthyear >= 1855 & birthyear < 1860)
replace birthdecade = 1860 	if (birthyear >= 1860 & birthyear < 1865)
replace birthdecade = 1865 	if (birthyear >= 1865 & birthyear < 1870)
replace birthdecade = 1870 	if (birthyear >= 1870 & birthyear < 1875)
replace birthdecade = 1875 	if (birthyear >= 1875 & birthyear < 1880)
replace birthdecade = 1880 	if (birthyear >= 1880 & birthyear < 1885)
replace birthdecade = 1885 	if (birthyear >= 1885 & birthyear < 1890)
replace birthdecade = 1890 	if (birthyear >= 1890 & birthyear < 1895)
replace birthdecade = 1895 	if (birthyear >= 1895 & birthyear < 1900)
replace birthdecade = 1900 	if (birthyear >= 1900 & birthyear < 1905)
replace birthdecade = 1905 	if (birthyear >= 1905 & birthyear < 1910)
replace birthdecade = 1910 	if (birthyear >= 1910 & birthyear < 1915)
replace birthdecade = 1915 	if (birthyear >= 1915 & birthyear < 1920)
replace birthdecade = 1920 	if (birthyear >= 1920 & birthyear < 1925)
replace birthdecade = 1925 	if (birthyear >= 1925 & birthyear < 1930)
replace birthdecade = 1930 	if (birthyear >= 1930 & birthyear < 1935)
replace birthdecade = 1935 	if (birthyear >= 1935 & birthyear <= 1940)



*******
*** merge FB_index based on first names and birth year
*   FB_index calculated using full census data (by Katherine)


*****
**  A. for each foreign nation, FB_index is calculated using the entire population sample (excluding blacks)

**  specification A1:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	NYSIIS name conversion
* 	merge FB_index based on name_given_nysiis and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: 97.88%

merge m:m name_given_nysiis birthyear using "FB Index/US1940_FBIndex_N_exclblack_male.dta"
drop if _merge == 2
rename _merge f_index_nysiis_male_1940_match

rename FBindex f_index_beforebirth_n_m_1940
label variable f_index_beforebirth_n_m_1940 "F_index: Male, excl black, NYSIIS names, 1940 census only"

* generate indicator whether f_index is above 0.5
gen f_index_nm_abovehalf = (f_index_beforebirth_n_m_1940 > 0.5) if f_index_beforebirth_n_m_1940 != .



**  specification A2:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	raw name
* 	merge FB_index based on name_given and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with raw names: 94.24%

merge m:m name_given birthyear using "FB Index/US1940_FBIndex_exclblack_male.dta"
drop if _merge == 2
rename _merge f_index_raw_male_1940_match

rename FBindex f_index_beforebirth_r_m_1940
label variable f_index_beforebirth_r_m_1940 "F_index: Male, excl black, raw names, 1940 census only"

* generate indicator whether f_index is above 0.5
gen f_index_rm_abovehalf = (f_index_beforebirth_r_m_1940 > 0.5) if f_index_beforebirth_r_m_1940 != .



**  specification A3:
* 	1940 full count census
* 	use only 20 years of cohorts prior to birthyear
* 	NYSIIS name conversion
* 	merge FB_index based on name_given_nysiis and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: 97.88%

merge m:m name_given_nysiis birthyear using "FB Index/Nearest 20 Years Only/US1940_FBIndex_N.dta"
drop if _merge == 2
rename _merge f_index_nm_1940_20y_match

rename FBindex f_index_20y_n_m_1940
label variable f_index_20y_n_m_1940 "F_index: Male, excl black, NYSIIS names, 1940 census only, 20 years before birth"

* generate indicator whether f_index is above 0.5
gen f_index_nm20_abovehalf = (f_index_20y_n_m_1940 > 0.5) if f_index_20y_n_m_1940 != .




**  specification A4:
* 	1940 full count census
* 	use only 20 years of cohorts prior to birthyear
* 	raw name conversion
* 	merge FB_index based on name_given and birthyear, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with raw names: 94.34%
rename freqUS freqUS_full
rename freqFB freqFB_full

merge m:m name_given birthyear using "FB Index/Nearest 20 Years Only/US1940_FBIndex.dta"
drop if _merge == 2
rename _merge f_index_rm_1940_20y_match

rename FBindex f_index_20y_r_m_1940
label variable f_index_20y_r_m_1940 "F_index: Male, excl black, raw names, 1940 census only, 20 years before birth"

* generate indicator whether f_index is above 0.5
gen f_index_rm20_abovehalf = (f_index_20y_r_m_1940 > 0.5) if f_index_20y_r_m_1940 != .





** 	B. for each foreign nation, FB_index is calculated using only the foreigner's population (plus the US native population, excludign blacks)
*   this is calculated for 16 sending countries, same as JPE

local var_country_jpe "Norway Sweden England Ireland Italy Austria Germany Russia Switzerland Belgium France Denmark Wales Scotland Finland Portugal"


** 	specification B1:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	NYSIIS name conversion
* 	merge FB_index based on name_given, birthyear, and hhh's country of origin, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: 97.24%


/*

* rename variables in US1940_FBIndex_N_bycountry_male.dta

preserve

use "FB Index/US1940_FBIndex_N_bycountry_male.dta", clear

rename nysiis_first name_given_nysiis

rename Irindex_N 	findex_nmc_1940_Ireland
rename Ruindex_N 	findex_nmc_1940_Russia
rename Itindex_N 	findex_nmc_1940_Italy
rename Geindex_N 	findex_nmc_1940_Germany
rename Swiindex_N 	findex_nmc_1940_Switzerland
rename Norindex_N 	findex_nmc_1940_Norway
rename Sweindex_N 	findex_nmc_1940_Sweden
rename Auindex_N 	findex_nmc_1940_Austria
rename Beindex_N 	findex_nmc_1940_Belgium
rename Frindex_N 	findex_nmc_1940_France
rename Deindex_N 	findex_nmc_1940_Denmark
rename Waindex_N 	findex_nmc_1940_Wales
rename Fiindex_N 	findex_nmc_1940_Finland
rename Scindex_N 	findex_nmc_1940_Scotland
rename Enindex_N 	findex_nmc_1940_England
rename Poindex_N 	findex_nmc_1940_Portugal

save "FB Index/US1940_FBIndex_N_bycountry_male.dta", replace

restore


* save separate US1940_FBIndex_N_bycountry_male.dta for each country, rename into f_index_nmc_1940

preserve

foreach c in `var_country_jpe' {
	use "FB Index/US1940_FBIndex_N_bycountry_male.dta", clear
	keep birthyear name_given_nysiis findex_nmc_1940_`c'
	rename findex_nmc_1940_`c' f_index_nmc_1940
	save "FB Index/US1940_FBIndex_N_bycountry_male_`c'.dta", replace
	}
	
restore	

*/


* merge in FB_index

merge m:m name_given_nysiis birthyear using "FB Index/US1940_FBIndex_N_bycountry_male.dta"
drop if _merge == 2
rename _merge merge_findex_nmc_1940

gen f_index_nmc_1940 = .
label variable f_index_nmc_1940 "F_index: Male, excl black, NYSIIS names, 1940 census only, separate by country"

foreach c in `var_country_jpe' {
	replace f_index_nmc_1940 = findex_nmc_1940_`c' if birthplace_hhh_jpe == "`c'"
	}

drop findex_nmc_1940_* 




** 	specification B2:
* 	1940 full count census
* 	use all cohorts before birthyear
* 	raw name conversion
* 	merge FB_index based on name_given, birthyear, and hhh's country of origin, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with raw names: 94.02%


/*

* rename variables in US1940_FBIndex_bycountry.dta

preserve

use "FB Index/US1940_FBIndex_bycountry.dta", clear

replace firstname = upper(firstname)
rename firstname name_given

rename Irindex 		findex_rmc_1940_Ireland
rename Ruindex 		findex_rmc_1940_Russia
rename Itindex 		findex_rmc_1940_Italy
rename Geindex 		findex_rmc_1940_Germany
rename Swiindex 	findex_rmc_1940_Switzerland
rename Norindex 	findex_rmc_1940_Norway
rename Sweindex 	findex_rmc_1940_Sweden
rename Auindex 		findex_rmc_1940_Austria
rename Beindex 		findex_rmc_1940_Belgium
rename Frindex 		findex_rmc_1940_France
rename Deindex 		findex_rmc_1940_Denmark
rename Waindex 		findex_rmc_1940_Wales
rename Fiindex 		findex_rmc_1940_Finland
rename Scindex 		findex_rmc_1940_Scotland
rename Enindex 		findex_rmc_1940_England
rename Poindex 		findex_rmc_1940_Portugal

save "FB Index/US1940_FBIndex_bycountry_male.dta", replace

restore

*/


* merge in FB_index

merge m:m name_given birthyear using "FB Index/US1940_FBIndex_bycountry_male.dta"
drop if _merge == 2
rename _merge merge_findex_rmc_1940

gen f_index_rmc_1940 = .
label variable f_index_rmc_1940 "F_index: Male, excl black, raw names, 1940 census only, separate by country"

foreach c in `var_country_jpe' {
	replace f_index_rmc_1940 = findex_rmc_1940_`c' if birthplace_hhh_jpe == "`c'"
	}

drop findex_rmc_1940_* 

*/

gen birthplace_self_code = bpl/100

**	C. state-by-state FB_index

** 	specification C1:
* 	1940 full count census
* 	use 20 cohorts before birthyear
* 	raw names
* 	merge FB_index based on name_given, birthyear, and birthplace_self_code, using only 1940 census calcualted indices
* 	FB_index excluding blacks
* 	match rate with nysiis names: %
* 	this matching implicitly only match among individuals born in the US
merge m:m name_given birthyear birthplace_self_code using "$Kbulk/FBIndex_bystate.dta"
drop if _merge == 2
rename _merge f_index_rms_1940_20y_match

rename FBindex f_index_20y_rms_1940
label variable f_index_20y_rms_1940 "F_index: Male, excl black, raw names, state-by-state, 1940 census only, 20 years before birth"

* generate indicator whether f_index is above 0.5
gen f_index_rms20_abovehalf = (f_index_20y_rms_1940 > 0.5) if f_index_20y_rms_1940 != .


save $Kbulk/Full1920_data_FBIndex_NEW.dta, replace


