LSE-MY457 · elena-pr · Mar 14, 2024 · Apr 20, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/seminars/.DS_Store b/seminars/.DS_Store
diff --git a/seminars/seminar4/.DS_Store b/seminars/seminar4/.DS_Store
diff --git a/seminars/seminar4/coding/.Rhistory b/seminars/seminar4/coding/.Rhistory
@@ -0,0 +1,116 @@
+# this chunk contains code that sets global options for the entire .Rmd.
+# we use include=FALSE to suppress it from the top of the document, but it will still appear in the appendix.
+knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, linewidth=60)
+# you can include your libraries here:
+library(tidyverse)
+# and any other options in R:
+options(scipen=999)
+# nothing in this chunk will be printed in the main document
+# except the output of the code.
+print("hello world")
+# everything in this chunk will be printed in the main document,
+# plus the output of the code.
+print("hello world")
+# everything in this chunk will be printed in the main document,
+# but the code won't run and so the output won't be included.
+print("hello world")
+# here we define alpha, so that we have an object to reference in-line
+alpha <- 12345
+# making a simple scatterplot using the cars data
+plot(cars)
+# making a simple table of the mtcars data
+knitr::kable(mtcars[1:5,], caption = "The first five rows of mtcars")
+# this chunk generates the complete code appendix.
+# eval=FALSE tells R not to re-run (``evaluate'') the code here.
+unlink("OneDrive - London School of Economics/PhD/Classes/MY457/pset_template_cache", recursive = TRUE)
+install.packages("tinytex")
+tinytex::install_tinytex()
+tinytex:::install_prebuilt()
+knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, linewidth=60)
+# you can include your libraries here:
+library(tidyverse)
+# and any other options in R:
+options(scipen=999)
+library(readr)
+cses5 <- read_csv("OneDrive - London School of Economics/PhD/Papers/Voting Paper/Process/cses5.csv")
+View(cses5)
+head(cses5)
+summary(cses5)
+install.packages("dplyr")
+library(dplyr)
+filtered_data <- cses_data %>%
+filter(E1006_UNALPHA3 == "ITA")
+cses_data <- read.csv("cses5.csv")
+setwd("/Users/elenapro/Library/CloudStorage/OneDrive-LondonSchoolofEconomics/PhD/Classes/MY457/lse-my457.github.io")
+setwd("/Users/elenapro/Library/CloudStorage/OneDrive-LondonSchoolofEconomics/PhD/Classes/MY457/lse-my457.github.io/seminars/seminar4/coding")
+knitr::opts_chunk$set(echo = TRUE)
+library(dplyr)
+library(ggplot2)
+library(AER)
+# SIMULATE DATA
+# PARAMETERS
+N <- 10000
+U <- rnorm(N, mean = 5, sd = 3)
+b0 <- 2
+b1 <- 1.5
+# POTENTIAL OUTCOMES
+y0 <- b0 + b1 * U + rnorm(N)
+y1 <- y0 + mean(y0) + rnorm(N)
+y1[which(y0 < median(y0))] <- y1[which(y0 < median(y0))] / 2
+# CREATE DATAFRAME
+df <- cbind(y0, y1, U) %>% as_tibble()
+# GENERATE TYPES OF COMPLIANCE
+type <- rep(NA, 10000)
+type[which(y1 > median(y1))] <-
+sample(c(rep('Complier', 3500), rep('Always Taker', 1000), rep('Never Taker', 500)))
+type[which(y1 < median(y1))] <-
+sample(c(rep('Complier', 1500), rep('Always Taker', 1500), rep('Never Taker', 2000)))
+df$type <- type
+# CREATE INSTRUMENT
+df$z <- sample(c(rep(0, 5000), rep(1, 5000)))
+# CREATE TREATMENT ASSIGNMENT
+df <- df %>%
+mutate(d = case_when(type == 'Always Taker' ~ 1,
+type == 'Never Taker' ~ 0,
+(type == 'Complier' & z == 0) ~ 0,
+(type == 'Complier' & z == 1) ~ 1))
+# REAL OUTCOMES
+df <- df %>% mutate(y = case_when(d == 0 ~ y0, d == 1 ~ y1))
+# TRUE ATE
+true_ate <- t.test(df$y1, df$y0, paired = TRUE)
+true_ate
+# NAIVE "ATE"
+naive_ate <- lm(y ~ d, data = df)
+true_ate
+summary(naive_ate)
+# 1. Effect of Z on Y
+mean(df$y[df$z==1])-mean(df$y[df$z==0])
+y.on.z <- lm(y ~ z, data = df)
+summary(y.on.z)
+itt_est <- coef(y.on.z)[2]
+###
+# 2. Effect of Z on D
+mean(df$d[df$z==1])-mean(df$d[df$z==0])
+d.on.z <- lm(d ~ z, data = df)
+summary(d.on.z)
+prop_compliers <- coef(d.on.z)[2]
+###
+# 3. WALD ESTIMATE
+itt_est/prop_compliers
+# 2SLS using lm
+df$d_hat <- predict(d.on.z)
+iv_2sls_2 <- lm(y ~ d_hat, data = df)
+summary(iv_2sls_2)
+# 2SLS using ivreg
+iv_2sls <- ivreg(y ~ d | z, data = df)
+summary(iv_2sls)
+df <- df %>% mutate(y_diff = y1 - y0)
+p1 <- ggplot(data = df, aes(x = y_diff, fill = type)) +
+geom_density(alpha = 0.2) + ggtitle("All Units")
+p1
+p2 <- ggplot(data = df[df$d == 1, ], aes(x = y_diff, fill = type)) +
+geom_density(alpha = 0.2) + ggtitle("Treated Units")
+p2
+p3 <- ggplot(data = df[df$d == 0, ], aes(x = y_diff, fill = type)) +
+geom_density(alpha = 0.2) + ggtitle("Control Units")
+p3
diff --git a/seminars/seminar4/coding/Class04-InstrumentalVariables.Rmd b/seminars/seminar4/coding/Class04-InstrumentalVariables.Rmd
@@ -39,6 +39,8 @@ y0 <- b0 + b1 * U + rnorm(N)
 y1 <- y0 + mean(y0) + rnorm(N)
 y1[which(y0 < median(y0))] <- y1[which(y0 < median(y0))] / 2
 
+#the mean of Y0 is the treatment effect
+
 # CREATE DATAFRAME
 df <- cbind(y0, y1, U) %>% as_tibble()
 
@@ -50,6 +52,8 @@ type[which(y1 < median(y1))] <-
   sample(c(rep('Complier', 1500), rep('Always Taker', 1500), rep('Never Taker', 2000)))
 df$type <- type
 
+#only three categories because defiers because of the monotonicity assumption
+
 # CREATE INSTRUMENT
 df$z <- sample(c(rep(0, 5000), rep(1, 5000)))
 
@@ -74,7 +78,7 @@ true_ate
 
 In contrast, the naive approach would be if we just regress the observed outcome on the treatment assignment indicator.
 ```{r}
-# NAIVE "ATE"
+# NAIVE "ATE" - naive comparison
 naive_ate <- lm(y ~ d, data = df)
 
 true_ate
@@ -88,15 +92,15 @@ Now, because we do not observe both potential outcomes, we need a different stra
 mean(df$y[df$z==1])-mean(df$y[df$z==0])
 y.on.z <- lm(y ~ z, data = df)
 summary(y.on.z)
-itt_est <- coef(y.on.z)[2]
+itt_est <- coef(y.on.z)[2] # the intention to treat - they get encouraged but they are not forced to take the D
 
 ###
 
 # 2. Effect of Z on D
 mean(df$d[df$z==1])-mean(df$d[df$z==0])
 d.on.z <- lm(d ~ z, data = df)
 summary(d.on.z)
-prop_compliers <- coef(d.on.z)[2]
+prop_compliers <- coef(d.on.z)[2] # proportion of compliers - important for [[Voting Paper]]
 
 ###