****************
*** Figure 3 ***
****************


** (1) Split sample analysis

forvalues s=0/1 {

foreach x in nonnpe NPE_portfolio litigated_v2 NPE_litigated ///
kogan {

use examiner_effect_dataset, clear

** select random subsamples
keep if split == `s'

drop if `x' == .

areg `x', absorb(year_artunit)
* i) get predicted values and residual:
predict x_predict, xbd
gen double x_residual=`x'-x_predict

gen cases = 1
collapse (mean) x_residual (count) cases, by(examiner)

rename x_residual raw_examiner_effect

// save computation for each examiner
save `x'_examiner_raw_split_`s', replace

}
}

* now repeat with shrunk
forvalues s=0/1 {

foreach x in nonnpe NPE_portfolio litigated_v2 NPE_litigated ///
kogan {

use examiner_effect_dataset, clear

** select random subsamples
keep if split == `s'

drop if `x' == .

areg `x', absorb(year_artunit)
* i) get predicted values and residual:
predict x_predict, xbd
gen double x_residual=`x'-x_predict

* document the variance and residual variance
sum `x', d
gen double x_mean=r(mean)
gen double x_sd=r(sd)
gen double x_var=r(Var)
sum x_residual, d
gen double x_residual_sd=r(sd)
gen double x_residual_var=r(Var)

drop examiner_year_artunit

// ii) Extract the average residual for an examiner x year
egen examiner_year=group(examiner filing_year)
bysort examiner_year: egen double residual_examiner_share=sum(x_residual)
bysort examiner_year: replace residual_examiner_share=residual_examiner_share/[_N]
bysort examiner_year: gen examiner_year_grantn=[_N]


* iii) compute the covariance between the average residual in an examiner's portfolio 
* year t and t+1
preserve 
keep examiner filing_year residual_examiner_share examiner_year_grantn
duplicates drop 
sort examiner filing_year
bysort examiner: gen residual_examiner_share_next=residual_examiner_share[_n+1]
bysort examiner: gen examiner_year_grantn_n=examiner_year_grantn[_n+1]
gen weight=round((examiner_year_grantn+examiner_year_grantn_n)/2)
correlate residual_examiner_share residual_examiner_share_next [fw=weight], covar // WEIGHT
restore
gen double x_signal_var=r(cov_12) 
gen double x_signal_sd=sqrt(r(cov_12))


* iv) compute the variance of the idiosyncratic component, which is estimated as a remainder
gen x_epsilon_var=x_residual_var-x_signal_var
gen x_epsilon_sd=sqrt(x_epsilon_var)


* v) take a simple average of the residual for each examiner
bysort examiner: egen double residual_examiner=sum(x_residual)
bysort examiner: replace residual_examiner=residual_examiner/[_N]
bysort examiner: gen examiner_grant_n=[_N]
* note that this last variable counts the total number of patents across all years aand artunits, 
* i.e. n*T in Kane and Staiger's notation)

* vi) shrinkage
keep examiner x_residual_var x_epsilon_var x_signal_var x_mean examiner_grant_n ///
x_signal_sd x_var x_sd residual_examiner x_epsilon_sd
duplicates drop

gen shrinkage_factor=x_signal_var/(x_signal_var+x_epsilon_var/(examiner_grant_n))
gen shrunk_examiner_effect=residual_examiner*shrinkage_factor
sum shrunk_examiner_effect, d
gen examiner_effect=residual_examiner

gen signal_sd_effect = x_signal_sd/x_mean*100
gen shrunk_sd_effect = r(sd)/x_mean*100
gen p25p75_effect = (r(p75)-r(p25))/x_mean*100
gen p5p95_effect = (r(p95)-r(p5))/x_mean*100

* to compute the effect of replacing examiners above the p90 by examiners at the p90, 
* we compute the different between the mean rate above p90 and the rate at p90, multiply by the mass
* of examiners in this range (10%) and divide by the mean rate 
sum shrunk_examiner_effect if shrunk_examiner_effect>r(p90)
gen mean_above_p90=r(mean)
sum shrunk_examiner_effect, d
gen abovep90_effect=((mean_above_p90-r(p90))*0.1)/x_mean*100
drop mean_above_p90

sum shrunk_examiner_effect, d
sum shrunk_examiner_effect if shrunk_examiner_effect>r(p75)
gen mean_above_p75=r(mean)
sum shrunk_examiner_effect, d
gen abovep75_effect=((mean_above_p75-r(p75))*0.25)/x_mean*100
drop mean_above_p75


rename examiner_grant_n examiner_grantn


// save computation for each examiner
gen outcome_x="`x'"
save `x'_examiner_effect_split`s', replace

// save the overall results in a smaller table
keep x_mean x_sd  signal_sd_effect shrunk_sd_effect p25p75_effect p5p95_effect abovep90_effect abovep75_effect outcome_x 
duplicates drop 
save `x'_examiner_effect_split`s', replace

}
}


** (2) Regressions

clear
gen coeff = 0
save split_test, replace


foreach v in nonnpe NPE_portfolio litigated_v2 NPE_litigated ///
kogan {

clear
//local v "r103a"
use `v'_examiner_effect_split_0


merge 1:1 examiner using `v'_examiner_effect_split_1
keep if _merge == 3
drop _merge

merge 1:1 examiner using `v'_examiner_raw_split_1
keep if _merge == 3
drop _merge


capture rename examiner_appn_0 examiner_grantn_0
capture rename examiner_appn_1 examiner_grantn_1

clear matrix
regress examiner_effect0 shrunk_examiner_effect1, r

matrix b=e(b) 
gen coeff=b[1,1]
matrix v=e(V) 
gen se=sqrt(v[1,1])
matrix table=r(table)
gen pvalue=table[4,1]

clear matrix
regress examiner_effect0 shrunk_examiner_effect1 [aw=examiner_grantn_1+examiner_grantn_0], r

matrix b=e(b) 
gen coeff_w=b[1,1]
matrix v=e(V) 
gen se_w=sqrt(v[1,1])
matrix table=r(table)
gen pvalue_w=table[4,1]

clear matrix
regress examiner_effect0 raw_examiner_effect [aw=examiner_grantn_1+examiner_grantn_0], r

matrix b=e(b) 
gen coeff_raw=b[1,1]
matrix v=e(V) 
gen se_raw=sqrt(v[1,1])
matrix table=r(table)
gen pvalue_raw=table[4,1]

clear matrix
regress examiner_effect0 raw_examiner_effect if cases >= 50 [aw=examiner_grantn_1+examiner_grantn_0], r

matrix b=e(b) 
gen coeff_raw_50=b[1,1]
matrix v=e(V) 
gen se_raw_50=sqrt(v[1,1])
matrix table=r(table)
gen pvalue_raw_50=table[4,1]

keep coeff* se* pvalue*
duplicates drop


gen t = (coeff-1) / se
gen t_w = (coeff_w-1) / se_w
gen t_raw = (coeff_raw-1) / se_raw
gen t_raw_50 = (coeff_raw_50-1) / se_raw_50


gen outcome="`v'"
append using split_test
save split_test, replace
}


** (3) Graph


clear
use split_test

keep if outcome == "NPE_portfolio" | outcome == "nonnpe" | outcome == "kogan_patv" | outcome == "litigated_v2"

replace outcome = "NPE Purchase" if outcome == "NPE_portfolio"
replace outcome = "Non-NPE Purchase" if outcome == "nonnpe"
replace outcome = "Patent Value" if outcome == "kogan_patv"
replace outcome = "Non-NPE Litigation" if outcome == "litigated_v2"

keep outcome coeff_w se_w coeff_raw se_raw coeff_raw_50 se_raw_50

rename coeff_w coeff0
rename se_w se0
rename coeff_raw coeff1
rename se_raw se1
rename coeff_raw_50 coeff2
rename se_raw_50 se2


reshape long coeff se, i(outcome) j(spec)

gen hicoeff= coeff + 1.96 * se
gen lowcoeff = coeff - 1.96 * se

gen outcome_ind = 0 if outcome == "Patent Value"
replace outcome_ind = 1 if outcome == "Non-NPE Purchase"
replace outcome_ind = 2 if outcome == "NPE Purchase"
replace outcome_ind = 3 if outcome == "Non-NPE Litigation"


gen running_var = outcome_ind * 4 + spec + 1

twoway (bar coeff running_var if spec==0) ///
       (bar coeff running_var if spec==1) ///
       (bar coeff running_var if spec==2) ///
       (rcap hicoeff lowcoeff running_var, color(black)), ///
       legend(label(1 "Shrunk Effect") label(2 "Raw Resid") label(3 "Raw Resid (50+)") label(4 "95% CI")) ///
       xlabel(2 `""Patent Value from" "Kogan et al (2017)""' 6 "Non-PAE Purchase" 10 "PAE Purchase" 14 "Non-PAE Litigation", noticks) ///
       xtitle("") ytitle("Split Sample Coefficient")
	  
graph save split_sample_coefficient_comparison, replace
graph export split_sample_coefficient_comparison.pdf, replace


