*****

global dirwork  "~/Dropbox/Research/IntlMedDevMkts/_temp-mike"
*global dirwork  "~/Dropbox/IntlMedDevMkts/_temp-mike"
global progname     "cleaning2-data-balloons"


* The program and output folders are each assigned to a unique variable
global dirdocuments "$dirwork/_documents"
global dirresults   "$dirwork/_results"
global dirprograms  "$dirwork/_programs"
global dirscratch   "$dirwork/_scratch/$progname"

* The original and analysis subfolders are each assigned to separate variables
* This provides some protection against accidentally destrotying the original data
global dirodata     "$dirwork/_data/original"
global diradata     "$dirwork/_data/analysis"

* Create scratch folder if necessary
cap mkdir $dirscratch

cap log close
*log using $dirscratch/log.txt, replace t

* Change working directory
cd $dirwork

set more off

*****
***** MERGE US and EU data
*****
use $dirwork/_scratch/cleaning-us-data/USballoons.dta, clear
append using $dirwork/_scratch/cleaning-eu-data/EUballoons.dta
/*
*Save as Stata 13
*use $dirwork/_scratch/cleaning-us-data/USballoons_stata13.dta, clear
*append using $dirwork/_scratch/cleaning-eu-data/EUballoons_stata13.dta
*save ../MGanalysis/Balloons/balloons_stata13.dta, replace

use ../MGanalysis/Balloons/balloons_stata13.dta, clear
*/

*****
***** CLEANING
*****
	
	*** Renaming variables 
    rename diagnosticangiography diagnostic
		
	*** Cleaning product names (don't want to have erroneous "new" products bc of this---a bit of collapse and looking at product names needed)
		drop if product=="Not Classified" | product=="NOT CLASSIFIED"
		****
		*use regular expressions to remove stuff like PTCA, Balloon, OTW, RX, Monorail, NC from names
		local remove "PTCA Balloon OTW RX Monorail NC / - and Jocath 1.2 5 Zipper Across Leo Hypro MX LX Dilatation Catheter Flex Push Long" 	
		foreach x of local remove{
		replace product=regexr(product,"`x'", "") 
		}
        
		*Remove symbols that wouldn't work above
		replace product = subinstr(product, "()","",.) 
		replace product = subinstr(product, "(","",.) 
		replace product = subinstr(product, ")","",.) 
		
		replace product=trim(product)
		
		*Cleaning specific manufacturer issues
		*Boston Scientific - Apex, Maverick, Quantum
		replace product=regexr(product,"XMaverick 2", "Maverick 2")
		replace product=regexr(product,"XMaverick", "Maverick")
		replace product=regexr(product,"XQuantum Ranger", "Ranger")
		replace product=regexr(product,"XQuantum", "Quantum Maverick")
		*Abbot Vascular 
		replace product=regexr(product,"Mini Trek", "MiniTrek")
		*Atrium
		replace product=regexr(product,"Hyperlite", "HyperLite")
		*Invatec
		replace product=regexr(product,"Avion  2 Bifurcated", "Avion")
		replace product=regexr(product,"Avion Plus", "Avion")
		*Translumina
		replace product=regexr(product,"Cathy No. 3", "Cathy No 3")
		
		****
		bysort manufacturer product: egen sumQj = total(q)
		drop if sumQj<10 //very likely useless/spurious if sell less than 10

		save ../MGanalysis/Balloons/balloons_labid.dta, replace
		
*Collapse to US/EU level
gen US=(country=="US")
	
preserve
collapse (mean) diagnostic, by(mm yyyy US lab_id)	
collapse (sum) diagnostic, by(mm yyyy US)	
save temp, replace
restore
collapse (sum) q revenue, by(mm yyyy US manufacturer product)	
merge m:1 mm yyyy US using temp
drop _merge
		
save ../MGanalysis/Balloons/balloons.dta, replace
