## ======================================
## Table 3: Replication of J&L’s analysis with placebo tests 
## substituting Southern CA counties (excluding LA) as placebo treated units. 
## ======================================

  rm(list=ls())
  
  #Assume base R is running
  my.wd <- getSrcDirectory(function(x){x})
  
  #Check for errors
  if(grepl("error", tolower(class(my.wd)[1])) | my.wd==""){
    #Try to access working directory through R Studio API
    my.wd <- tryCatch(dirname(rstudioapi::getActiveDocumentContext()$path),
                      error = function(e) e)
    
  }
  
  #Set working directory
  setwd(my.wd)

## ======================================
## Load packages and functions
## ======================================

  #Read in function to reshape data for modeling
  source("Functions/prepModelData.R")
  source("Functions/installPackageNotFound.R")
  source("Functions/formatSig.R")

  #Libraries for modeling
  installPackageNotFound("data.table")
  installPackageNotFound("plm")
  installPackageNotFound("stargazer")
  installPackageNotFound("lmtest")
  
## ======================================
## Read in data
## ======================================
  
  #Hospitalizations
  hosp.zip3.qtr.1983.2009 <- read.csv("../Data/hosp_zip3_quarter_1983_2009.csv", stringsAsFactors = FALSE)
  
## ======================================
## Shuffle and assign LA m and v to SoCal and combine results with MI rules
## ======================================
  #Set seed
  set.seed(94063)
  
  #Set number of simulations
  n <- 1000

  #Names of m and v variables without LA county border info
  socal.m <- "m.scaled"
  socal.v <- "v.scaled"
  
  #Define la and socal zip3s
  la.zip3s <- unique(hosp.zip3.qtr.1983.2009$zip[hosp.zip3.qtr.1983.2009$prop.la > 0])
  soc.zip3s <- unique(hosp.zip3.qtr.1983.2009$zip[hosp.zip3.qtr.1983.2009$prop.socal > 0])
  
  #Create df of m and v for LA zip3s (the "true" m and v assuming equal population distribution of cities in municipalities)
  true.mv <- hosp.zip3.qtr.1983.2009[hosp.zip3.qtr.1983.2009$prop.la > 0, c("zip", "year.qtr", socal.m, socal.v)]
  
  #Create dummy variables for the interactions 1998 x la, and 1999 x la
  hosp.zip3.qtr.1983.2009$la.1998 <- as.numeric(hosp.zip3.qtr.1983.2009$year==1998)*hosp.zip3.qtr.1983.2009$prop.la
  hosp.zip3.qtr.1983.2009$la.1999 <- as.numeric(hosp.zip3.qtr.1983.2009$year>=1999)*hosp.zip3.qtr.1983.2009$prop.la
  hosp.zip3.qtr.1983.2009$soc.1998 <- as.numeric(hosp.zip3.qtr.1983.2009$year==1998)*hosp.zip3.qtr.1983.2009$prop.socal
  hosp.zip3.qtr.1983.2009$soc.1999 <- as.numeric(hosp.zip3.qtr.1983.2009$year>=1999)*hosp.zip3.qtr.1983.2009$prop.socal
  
  #Observation window lists
  obs.window.list <- list(c(1995:1999), c(1993:2009))

  #Variable name vector
  var.names <- c("I(disease.type * this.m)", "I(disease.type * this.v)", "this.m", "this.v")
  
  #Lists to hold results
  sim.param.list <- list()
  sim.se.list <- list()
  sim.p.list <- list()
  sim.t.list <- list()
  r2.list <- list()
  
  for(obs.window in obs.window.list){
    print(paste0("Running ", n, " simulations for ", min(obs.window), "-", max(obs.window), "..."))
    
    flush.console() 
    
    #Initialize progress bar
    progress.bar <- txtProgressBar(min = 0, max = n, style = 3)
    
    for(f in 1:n){
      
        #Sample from vector of zip3s from LA county
        matching.zip <- sample(la.zip3s, size = length(soc.zip3s), replace = TRUE)
        
        #Randomly assign a socal zip to a matching la zip sampled with replacement
        zip.lookup <- data.frame(placebo.zip = sample(soc.zip3s, 
                                                      size = length(soc.zip3s)),
                                 la.zip = matching.zip)
        
        #Create data frame of zip3s in the placebo county over time period represented for LA
        syn.mv <- merge(true.mv, zip.lookup, by.x = "zip", by.y = "la.zip")
        names(syn.mv)[names(syn.mv)%in%c(socal.m, socal.v)] <- c("m.soc", "v.soc")
        
        #Merge synthetic m and v to hospitalizations data
        placebo.dta <- merge(hosp.zip3.qtr.1983.2009, syn.mv[,c("placebo.zip", "year.qtr", "m.soc", "v.soc")],
                             by.x = c("zip", "year.qtr"), by.y = c("placebo.zip", "year.qtr"), all.x = TRUE)
        
        #Replace NA m and v with zero
        placebo.dta$m.soc[is.na(placebo.dta$m.soc)] <- 0
        placebo.dta$v.soc[is.na(placebo.dta$v.soc)] <- 0
        
        #Apply proportional SoCal factor
        placebo.dta$m.soc <- placebo.dta$m.soc*placebo.dta$prop.socal
        placebo.dta$v.soc <- placebo.dta$v.soc*placebo.dta$prop.socal
        
        ## Stacks foodborne illnesses on top of control illnesses and applies log transformation
        dta.agg.jl <- prepModelData(dta = placebo.dta, 
                                    fbi.name = "jl.codes.mdc6", 
                                    control.name = "control.jl.codes", 
                                    years = obs.window,
                                    control.grp = TRUE,
                                    filter = FALSE)
        
        #Run JL Model with standard m and v
        jl.model <- plm(logCount ~ I(disease.type*m.soc) + 
                          I(disease.type*v.soc) + 
                          m.soc + 
                          v.soc,
                        index = c("geo.type.pk", "year.qtr"),
                        model="within", effect="twoways", data= dta.agg.jl)
        
        #Save coefficients
        sim.param.list[[f]] <- jl.model$coefficients
        
        ## Adjust standard errors to cluster robust standard errors
        adj.test <- coeftest(jl.model, vcovHC(jl.model, type="HC0", cluster="group"))
        
        ## Save ses and p-values with CRSE
        sim.se.list[[f]] <- adj.test[,2]
        sim.t.list[[f]] <- adj.test[,3]
        sim.p.list[[f]] <- adj.test[,4]
        
        #R2 with lm
        r2.list[[f]] <- summary(lm(logCount ~ m.soc + v.soc + I(disease.type*m.soc) + I(disease.type*v.soc) +
                                     factor(year.qtr) + factor(geo.type.pk), data = dta.agg.jl))$r.squared
        
        #Increment progress bar
        setTxtProgressBar(progress.bar, f)
    }
    
    #Take median t-stats for m and v results
    median.t.stats <- c(median(unlist(lapply(sim.t.list, FUN = function(x)return(x[1])))),
                        median(unlist(lapply(sim.t.list, FUN = function(x)return(x[2])))),
                        median(unlist(lapply(sim.t.list, FUN = function(x)return(x[3])))),
                        median(unlist(lapply(sim.t.list, FUN = function(x)return(x[4])))))
    
    #How many reject the null?
    assign(paste0("rejection.rate.m.", min(obs.window), ".", max(obs.window)), mean(unlist(lapply(sim.p.list, FUN = function(x)return(x[1]<0.05)))))
    assign(paste0("rejection.rate.v.", min(obs.window), ".", max(obs.window)), mean(unlist(lapply(sim.p.list, FUN = function(x)return(x[2]<0.05)))))
  
    #Representative model
    average.dist <- unlist(lapply(sim.t.list, FUN = function(x)return(sum((x-median.t.stats)^2))))
    choose.model <- which(average.dist==min(average.dist))
    
    #Rename parameter vectors
    names(sim.param.list[[choose.model]]) <- var.names
    names(sim.se.list[[choose.model]]) <- var.names
    names(sim.p.list[[choose.model]]) <- var.names
    
    #Save in objects
    assign(paste0("imp.results.soc.", min(obs.window), ".", max(obs.window)), sim.param.list[[choose.model]])
    assign(paste0("imp.se.soc.", min(obs.window), ".", max(obs.window)), sim.se.list[[choose.model]])
    assign(paste0("imp.p.soc.", min(obs.window), ".", max(obs.window)), sim.p.list[[choose.model]])
    assign(paste0("imp.r2.soc.", min(obs.window), ".", max(obs.window)), mean(unlist(r2.list)))

    #Close progress bar
    close(progress.bar)
    flush.console() 
  }
  
  
## ==========================
## Placebo Analysis
## ========================== 
  
  #Variables for model specifications
  variable.list <- list(c("m", "v"), c("la.1999", "la.1998"), c("soc.1999", "soc.1998"))
  names(variable.list) <- c("LA Original", "LA Dummy",  "SoCal Dummy")
  
  #Lists to hold results
  model.list <- list()
  se.list <- list()
  r2.list <- list()
  
  for(obs.window in obs.window.list){
    for(k in 1:length(variable.list)){
        
        ## Reset data from placebo analysis
        dta.agg.jl <- prepModelData(dta = hosp.zip3.qtr.1983.2009, 
                                    fbi.name = "jl.codes.mdc6", 
                                    control.name = "control.jl.codes", 
                                    years = obs.window,
                                    control.grp = TRUE,
                                    filter = FALSE)
        
        #Spec
        this.spec <- names(variable.list)[k]
        
        #Create variables
        dta.agg.jl[c("this.m", "this.v")] <- dta.agg.jl[c(variable.list[[k]])]
        
        #Formula
        this.formula <- "logCount ~ I(disease.type*this.m) + I(disease.type*this.v) + this.m + this.v"
        
        #Run JL Model with standard m and v
        jl.model.run <- plm(formula(this.formula),
                            index = c("geo.type.pk", "year.qtr"),
                            model="within", effect="twoways", data= dta.agg.jl)
        
        #Save model results
        model.list[[this.spec]] <- jl.model.run
        
        ## Adjust standard errors to cluster robust standard errors
        adjusted.se <- coeftest(jl.model.run, vcov=vcovHC(jl.model.run, type="HC0", cluster="group"))
        se.list[[this.spec]] <- adjusted.se
        
        #Calculate r2 with fixed effects
        r2 <- formatSig(summary(lm(formula(paste0(this.formula,  " +
                                                  factor(year.qtr) + factor(geo.type.pk)")), data = dta.agg.jl))$r.squared, 2)
        r2.list[[this.spec]] <-  r2
    }
    
    #Assign lists to names based on observation window
    assign(paste0("model.list.la.", min(obs.window), ".", max(obs.window)), model.list)
    assign(paste0("se.list.la.", min(obs.window), ".", max(obs.window)), se.list)
    assign(paste0("r2.list.la.", min(obs.window), ".", max(obs.window)), r2.list)
    
  }

## ==========================
## Output: Table
## ========================== 
  
  #Labels
  cov.label <- c("Foodborne x mandatory disclosure post-1998", 
                 "Foodborne x voluntary disclosure post-1998",
                 "Mandatory disclosure post-1998 (Digestive)", 
                 "Voluntary disclosure post-1998 (Digestive)")
  title <-  c("Table 3: Replication of J&L analysis with placebo tests substituting Southern CA counties (excluding LA) as placebo treated units.")
  
  #Print table
  stargazer(model.list.la.1995.1999[["LA Original"]],
            model.list.la.1993.2009[["LA Original"]],
            model.list.la.1995.1999[["LA Original"]],
            model.list.la.1993.2009[["LA Original"]],
            model.list.la.1995.1999[["LA Dummy"]],
            model.list.la.1993.2009[["LA Dummy"]],
            model.list.la.1995.1999[["SoCal Dummy"]],
            model.list.la.1993.2009[["SoCal Dummy"]],
            coef = list(round(model.list.la.1995.1999[["LA Original"]]$coefficients, 2), 
                        round(model.list.la.1993.2009[["LA Original"]]$coefficients, 2),
                        round(imp.results.soc.1995.1999, 2), 
                        round(imp.results.soc.1993.2009, 2),
                        round(model.list.la.1995.1999[["LA Dummy"]]$coefficients, 2), 
                        round(model.list.la.1993.2009[["LA Dummy"]]$coefficients, 2),
                        round(model.list.la.1995.1999[["SoCal Dummy"]]$coefficients, 2), 
                        round(model.list.la.1993.2009[["SoCal Dummy"]]$coefficients, 2)),
            se = list(se.list.la.1995.1999[["LA Original"]][,2], 
                      se.list.la.1993.2009[["LA Original"]][,2], 
                      imp.se.soc.1995.1999,
                      imp.se.soc.1993.2009,
                      se.list.la.1995.1999[["LA Dummy"]][,2], 
                      se.list.la.1993.2009[["LA Dummy"]][,2], 
                      se.list.la.1995.1999[["SoCal Dummy"]][,2], 
                      se.list.la.1993.2009[["SoCal Dummy"]][,2]),
            p = list(se.list.la.1995.1999[["LA Original"]][,4], 
                     se.list.la.1993.2009[["LA Original"]][,4], 
                     imp.p.soc.1995.1999,
                     imp.p.soc.1993.2009,
                     se.list.la.1995.1999[["LA Dummy"]][,4], 
                     se.list.la.1993.2009[["LA Dummy"]][,4], 
                     se.list.la.1995.1999[["SoCal Dummy"]][,4], 
                     se.list.la.1993.2009[["SoCal Dummy"]][,4]),
            type = "text",
            covariate.labels = cov.label,
            title = title,
            digits = 2,
            dep.var.labels = "",
            dep.var.caption = "",
            notes.append = FALSE,
            column.sep.width = "1pt",
            no.space = TRUE,
            omit.stat = c("rsq", "adj.rsq", "f"),
            notes.align = "l",
            add.lines = list(c("R2", 
                               r2.list.la.1995.1999[["LA Original"]],
                               r2.list.la.1993.2009[["LA Original"]], 
                               formatSig(imp.r2.soc.1995.1999, 2),
                               formatSig(imp.r2.soc.1993.2009, 2),
                               r2.list.la.1995.1999[["LA Dummy"]],
                               r2.list.la.1993.2009[["LA Dummy"]],
                               r2.list.la.1995.1999[["SoCal Dummy"]],
                               r2.list.la.1993.2009[["SoCal Dummy"]]),
                             c("Observation period", 
                               "1995-99", 
                               "1993-2009",
                               "1995-99", 
                               "1993-2009",
                               "1995-99", 
                               "1993-2009",
                               "1995-99", 
                               "1993-2009")))

  ## ==========================
  ## Test pairwise differences between coefficients
  ## ========================== 
  
  #Object to hold results
  p.list <- list()
  
  for(obs.window in obs.window.list){
    #Label observation window
    this.window <- paste0(min(obs.window), "-", max(obs.window))
    
    #Get model info
    model.list <- get(paste0("model.list.la.", min(obs.window), ".", max(obs.window)))
    imp.results <- get(paste0("imp.results.soc.", min(obs.window), ".", max(obs.window)))
    se.list <- get(paste0("se.list.la.", min(obs.window), ".", max(obs.window)))
    imp.se <- get(paste0("imp.se.soc.", min(obs.window), ".", max(obs.window)))
    
    #Statistical test of the difference between LA and SoCal estimates - continuous
    mandatory.diff <- model.list[["LA Original"]]$coefficients[["I(disease.type * this.m)"]] - imp.results[["I(disease.type * this.m)"]]
    voluntary.diff <- model.list[["LA Original"]]$coefficients[["I(disease.type * this.v)"]] - imp.results[["I(disease.type * this.v)"]]
    mandatory.diff.se <- sqrt(se.list[["LA Original"]]["I(disease.type * this.m)",2]^2 + imp.se[["I(disease.type * this.m)"]]^2)
    voluntary.diff.se <- sqrt(se.list[["LA Original"]]["I(disease.type * this.v)",2]^2 + imp.se[["I(disease.type * this.v)"]]^2)
    mandatory.diff.p <- 2*pnorm(abs(mandatory.diff/mandatory.diff.se), lower.tail = FALSE)
    voluntary.diff.p <- 2*pnorm(abs(voluntary.diff/voluntary.diff.se), lower.tail = FALSE)
    
    #Save to list
    p.list[[this.window]][["continuous"]] <- c(formatSig(mandatory.diff.p,2),formatSig(voluntary.diff.p,2))
    
    #Statistical test of the difference between LA and SoCal estimates - dummy
    mandatory.diff <- model.list[["LA Dummy"]]$coefficients[["I(disease.type * this.m)"]] - model.list[["SoCal Dummy"]]$coefficients[["I(disease.type * this.m)"]]
    voluntary.diff <- model.list[["LA Dummy"]]$coefficients[["I(disease.type * this.v)"]] - model.list[["SoCal Dummy"]]$coefficients[["I(disease.type * this.v)"]]
    mandatory.diff.se <- sqrt(se.list[["LA Dummy"]]["I(disease.type * this.m)",2]^2 + se.list[["SoCal Dummy"]]["I(disease.type * this.m)",2]^2)
    voluntary.diff.se <- sqrt(se.list[["LA Dummy"]]["I(disease.type * this.v)",2]^2 + se.list[["SoCal Dummy"]]["I(disease.type * this.v)",2]^2)
    mandatory.diff.p <- 2*pnorm(abs(mandatory.diff/mandatory.diff.se), lower.tail = FALSE)
    voluntary.diff.p <- 2*pnorm(abs(voluntary.diff/voluntary.diff.se), lower.tail = FALSE)
    
    #Save to list
    p.list[[this.window]][["dummy"]] <- c(formatSig(mandatory.diff.p,2),formatSig(voluntary.diff.p,2))
    

  }
  
  #Display footnote
  print("Footnote 39: The following table reports p-values for pairwise comparisons of treatment effect coefficients")
  footnote.table <- data.frame(c("","LA", ""), 
             c("", "1995-99", "1993-2009"), 
             c("Mandatory", p.list$`1995-1999`$continuous[1], p.list$`1993-2009`$continuous[1]),
             c("", p.list$`1995-1999`$dummy[1], p.list$`1993-2009`$dummy[1]),
             c("Voluntary", p.list$`1995-1999`$continuous[2], p.list$`1993-2009`$continuous[2]),
             c("", p.list$`1995-1999`$dummy[2], p.list$`1993-2009`$dummy[2]))
  names(footnote.table) <- c("", "", "S.CA", "", "", "")
  print(footnote.table)
  
  