###################################################################
# Name              : ComputeCovarianceMatrixA.R
# Description       : This program computes the covariance matrices A,
#                     that is the covariance matrices of investment 
#                     and cognitive factors as predicted by the 
#                     estimated investment and production functions 
#                     where we have taken out the village-specific means 
#                     out. 
###################################################################




###################################################################
# SETTINGS 
###################################################################
rm(list=ls())                  # Clear all objects from memory
set.seed(08072018)             # Random number generator seed 

###################################################################
# DEFINE DIRECTORIES
###################################################################
# Main working directory 
dir                 <- c("AER-2015-0183_data_appendix/")          # Write name of your main working directory

# Sub-directories 
dir_anal           <- paste(dir,c("code/R"), sep="") 
dir_functions      <- paste(dir,c("code/R/functions"), sep="")
dir_pf             <- paste(dir,c("code/R/specsPF"), sep="")
dir_data           <- paste(dir,c("data"), sep="")

# Specification of the measurement model
nameFM             <- c("latentfactors_noinstruments")                    # Name of the specification
inputFM            <- paste("specsFM/",nameFM, ".R", sep="")   # Name of the file with all the details of the specification

# Directories with estimates of the model on true data 
dir_outputFM       <- paste(dir,c("output/"), nameFM, sep="")
dir_outputPF       <- paste(dir, c("output/"), nameFM, "/FirstStage_toyprice_foodprice_conflict", sep="")


###################################################################
# CALL FUNCTIONS
###################################################################
setwd(dir_functions)
source("call.data_func.R")
source("draw.factor_func.R")

##################################################################
# LOAD PACKAGES
###################################################################
library("MASS")
library("mixtools")
library("mvtnorm")
library("minpack.lm")
library("corpcor")
library("gdata")
library("Matrix")
library("ks")

###################################################################
# Step 1: Load the true data and estimates of the factor scores, 
# investment and production functions on the true data, 
# which we will use to generate the MC data
###################################################################
data               <- "measures.csv"
nV                 <- 3             # Number of variables we're generating (2 investments and one cognition)

# Load data
setwd(dir_data)                             # Open directory that contains the data (true data and bootstrap datasets)
measures <- call.data(data)

# Load details of the empirical specification
setwd(dir_anal)                             # Open director that contains the detail of the specification
source(inputFM)                             # Load  file specifying the measurement system

# Load estimates of the factor model and factor scores 
setwd(dir_outputFM)
load("fs_true.R")
scores <- list(fs_true[[5]],fs_true[[6]])

# Load data
setwd(dir_data)                             # Open directory that contains the data (true data and bootstrap datasets)
measures <- read.csv(data, header=T)
measures <- measures[order(measures$treat), ]
n        <- nrow(measures)

# Define observables 
Zall         <- cbind(measures$meanprice_juglibros_log_st, measures$FUmeanprice_log_st, measures$tpobc_pop1993_st)
X1           <- cbind(measures$ln_nkids0_st) 
Xnames       <- c("kids")

mscoresall   <- rbind(scores[[1]], scores[[2]])
keep         <- which(complete.cases(cbind(Zall,X1, mscoresall)))
keep1        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==1),]))
keep2        <- which(complete.cases(cbind(Zall,X1, mscoresall)[which(measures$treat==2),]))
treatnomiss  <- measures$treat[keep]


fs   <- list()
fs[[1]] <- scores[[1]][keep1,]
fs[[2]] <- scores[[2]][keep2,]

# Assemble data of initial conditions factor scores and instruments 
posIC      <- c(2,4,7,8)
IC         <- rbind(fs[[1]][,posIC], fs[[2]][,posIC])



###################################################################
# Step 2: Compute residuals from the investment 
# and production functions 
###################################################################
setwd(dir_outputPF)
load("OUTPUT_pval.R")
load("OUTPUT_IV_pval.R")

X_Invest            <- cbind(rep(1,length(keep)), treatnomiss, IC, X1[keep,], Zall[keep,] )
res_Invest          <- rbind(fs[[1]][,c(5,6)], fs[[2]][,c(5,6)]) - X_Invest %*% invest_true 

X_PF                <- cbind(rep(1,length(keep)), treatnomiss, IC, X1[keep,], rbind(fs[[1]][,c(5,6)], fs[[2]][,c(5,6)]))
res_PF              <- rbind(fs[[1]][,c(1,3)], fs[[2]][,c(1,3)]) - X_PF %*% iv_true

# Compute mean in each village 
res_data  <- cbind(res_Invest,res_PF[,1])
mean_vi   <- aggregate(cbind(res_Invest, res_PF[,1]), list(measures$cod_dane[keep]), mean)

# Demean residual by the village-level mean 
demean_res         <- matrix(0, length(keep), nV)
for (i in 1:length(keep)){
    demean_res[i,]    <- as.numeric(cbind(res_data)[i,] - mean_vi[which(mean_vi==measures$cod_dane[keep][i]),2:(1+nV)])
}


# Number of villages and number of people by village 
code_village = unique(measures$cod_dane[keep])
le         <- rep(0,length(code_village))
for (v in 1:length(code_village))    le[v] <- length(which(measures$cod_dane[keep]==code_village[v])) 


# Compute spatial covariance for each village 
cov_vi     <- list()

for (v in 1:length(code_village)){ 
cov_vi[[v]]   <- matrix(0,nV*le[v],nV*le[v])

  for (p in 1:le[v]){
    for (q in 1:le[v]){
        for (i in 1:nV){
            for (j in 1:nV){
              cov_vi[[v]][((p-1)*nV + i),((q-1)*nV + j)] <- demean_res[which(measures$cod_dane[keep]==code_village[v]),][p,i] *  
                demean_res[which(measures$cod_dane[keep]==code_village[v]),][q,j]
            
            }
        }
    }
  }
}


# Extract the diagonals nV * nV matrices and save them in an array 
avg_within_vi  <- list()

for (v in 1:length(code_village)){ 
  avg_within_vi[[v]]  <- array(0, dim=c(nV, nV,le[v]))

  seqStart      <- seq(1,le[v]*nV, nV)
 
  for (i in 1:le[v]) {
    avg_within_vi[[v]][,,i]  <- cov_vi[[v]][seqStart[i]:(seqStart[i]+nV-1), seqStart[i]:(seqStart[i]+nV-1)]
  }
} 


# Extract the off diagonals 3*3 matrices and save them in an array 
avg_across_vi  <- list()

for (v in 1:length(code_village)){ 
  
  avg_across_vi[[v]]  <- array(0, dim=c(nV, nV,le[v]*le[v] - le[v]))
  
  seqStart      <- seq(1,le[v]*nV, nV)
  vec           <- t(combn( seqStart, 2)) 
  pairs         <- rbind(t(combn( seqStart, 2)), cbind(vec[,2], vec[,1]))
  
  for (j in 1:(le[v]*le[v] - le[v])){ 
    avg_across_vi[[v]][,,j]  <- cov_vi[[v]][(pairs[j,1]:(pairs[j,1]+nV -1)), (pairs[j,2]:(pairs[j,2]++nV -1))]
  }
  
} 


# Take the averages of the covariances within people and across people for each village 
avg_within_v   <- array(0, dim=c(nV, nV,length(code_village)))
avg_across_v   <- array(0, dim=c(nV, nV,length(code_village)))

for (v in 1:length(code_village)){ 
  
  avg_within_v[,,v]   <- apply(avg_within_vi[[v]], c(1, 2),mean)
  avg_across_v[,,v]   <- apply(avg_across_vi[[v]], c(1, 2),mean)
    
} 

  
# Take the averages of the covariances within people and across people across villages 
avg_within   <- apply(avg_within_v, c(1, 2),mean)
avg_across   <- apply(avg_across_v, c(1, 2),mean)


for (v in 1:length(code_village)){ 
  print(solve(avg_across_v[,,v]))
} 

avg_across      <- diag(nV) * 0.04
###################################################################
# Step 3: Save covariance matrices 
###################################################################
setwd(dir_data)
save(avg_within, avg_across, file="covA.R")
write.csv(rbind(avg_within,avg_across), file="covA.csv")