Add missing code to ChileSchools

idhx · Feb 25, 2021 · e29b58b · e29b58b
1 parent 1e3175d
commit e29b58b
Show file tree

Hide file tree

Showing 3 changed files with 448 additions and 0 deletions.
diff --git a/ChileSchools/chile_schools.R b/ChileSchools/chile_schools.R
@@ -0,0 +1,202 @@
+#' ---
+#' title: "Regression and Other Stories: ChileSchools"
+#' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
+#' date: "`r format(Sys.Date())`"
+#' output:
+#'   html_document:
+#'     theme: readable
+#'     toc: true
+#'     toc_depth: 2
+#'     toc_float: true
+#'     code_download: true
+#' ---
+
+#' Code and figures for ChileSchools example. See Chapter 21 in
+#' Regression and Other Stories.
+#'
+#' Data are from
+#' 
+#' - Chay, K. Y., McEwan, P. J., and Urquiola, M. (2005). The central
+#'   role of noise in evaluating interventions that use test scores to
+#'   rank schools. American Economic Review 95, 1237–1258.
+#' 
+#' -------------
+#' 
+
+#+ setup, include=FALSE
+knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
+# switch this to TRUE to save figures in separate files
+savefigs <- FALSE
+
+#' #### Load packages
+library("rprojroot")
+root<-has_file(".ROS-Examples-root")$make_fix_file()
+library("foreign")
+library("arm")
+library("rstanarm")
+library("brms")
+library("ivpack")
+
+#' #### Load data
+#'
+#' The outcomes in these analyses are the *gain scores* between 88 and 92. 
+chile <- read.csv(root("ChileSchools/data","chile.csv"))
+print(head(chile), digits=3)
+
+#' ## Fit models
+fit_1a <- stan_glm(read92 ~ eligible + rule2, data=chile, refresh=0)
+fit_1b <- stan_glm(read92 ~ eligible + rule2, data=chile, subset = abs(rule2)<5, refresh=0)
+
+fit_2b <- stan_glm(read92 ~ eligible + rule2 + eligible:rule2, data=chile, subset = abs(rule2)<5, refresh=0)
+
+fit_3a <- stan_glm(read92 ~ eligible + rule2 + read88 + math88, data=chile, refresh=0)
+fit_3b <- stan_glm(read92 ~ eligible + rule2 + read88 + math88, data=chile, subset = abs(rule2)<5, refresh=0)
+
+chile$z_read88 <- (chile$read88 - mean(chile$read88))/sd(chile$read88)
+chile$z_math88 <- (chile$math88 - mean(chile$math88))/sd(chile$math88)
+fit_5a <- stan_glm(read92 ~ eligible + rule2 + z_read88 + z_math88 + eligible:z_read88, data=chile, refresh=0)
+fit_5b <- stan_glm(read92 ~ eligible + rule2 + z_read88 + z_math88 + eligible:z_read88, data=chile, subset = abs(rule2)<5, refresh=0)
+
+#' ## Plots
+whiteline <- function(a, b, from=-1000, to=1000){
+  curve(a + b*x, from=from, to=to, col="white", lwd=4, add=TRUE)
+  curve(a + b*x, from=from, to=to, lwd=2, add=TRUE)
+}
+
+#+ eval=FALSE, include=FALSE
+if (savefigs) pdf(root("ChileSchools/figs","rd_chile_1a.pdf"), height=3.6, width=5, colormodel="gray")
+#+
+par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
+yy <- chile$read92
+xx <- chile$rule2
+plot(y=yy, x=xx, type="n", 
+     xlab="Assignment variable",
+     ylab="Post-test", bty="l", yaxt="n")
+axis(2, seq(0,100,20))
+points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
+       cex=.7,col="gray60")
+points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
+       cex=.7)
+whiteline(coef(fit_1a)[1], coef(fit_1a)[3], from=0)
+whiteline(coef(fit_1a)[1] + coef(fit_1a)[2], coef(fit_1a)[3], to=0)
+abline(v=0, lwd=4, col="gray")
+mtext("All the data", 3, 1)
+#+ eval=FALSE, include=FALSE
+dev.off()
+
+#+ eval=FALSE, include=FALSE
+if (savefigs) pdf(root("ChileSchools/figs","rd_chile_1b.pdf"), height=3.6, width=5, colormodel="gray")
+#+
+par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
+yy <- chile$read92
+xx <- chile$rule2
+plot(y=yy, x=xx, type="n", 
+     xlab="Assignment variable",
+     ylab="Post-test", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
+axis(2, seq(0,100,20))
+points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
+       cex=.7,col="gray60")
+points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
+       cex=.7)
+whiteline(coef(fit_1b)[1], coef(fit_1b)[3], from=0)
+whiteline(coef(fit_1b)[1] + coef(fit_1b)[2], coef(fit_1b)[3], to=0)
+abline(v=0, lwd=4, col="gray")
+mtext("Restricting to data near the cutoff", 3, 1)
+#+ eval=FALSE, include=FALSE
+dev.off()
+
+#+ eval=FALSE, include=FALSE
+if (savefigs) pdf(root("ChileSchools/figs","rd_chile_2b.pdf"), height=3.6, width=5, colormodel="gray")
+#+
+par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
+yy <- chile$read92
+xx <- chile$rule2
+plot(y=yy, x=xx, type="n", 
+     xlab="Assignment variable",
+     ylab="Ppst-test", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
+axis(2, seq(0,100,20))
+points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
+       cex=.7,col="gray60")
+points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
+       cex=.7)
+whiteline(coef(fit_2b)[1], coef(fit_2b)[3], from=0)
+whiteline(coef(fit_2b)[1] + coef(fit_2b)[2], coef(fit_2b)[3] + coef(fit_2b)[4], to=0)
+abline(v=0, lwd=4, col="gray")
+mtext("Model with interaction", 3, 1)
+#+ eval=FALSE, include=FALSE
+dev.off()
+
+#+ eval=FALSE, include=FALSE
+if (savefigs) pdf(root("ChileSchools/figs","rd_chile_3b.pdf"), height=3.6, width=5, colormodel="gray")
+#+
+par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
+yy <- chile$read92 - coef(fit_3b)[4] * (chile$read88 - mean(chile$read88)) - coef(fit_3b)[5] * (chile$math88 - mean(chile$math88))
+xx <- chile$rule2
+plot(y=yy, x=xx, type="n", 
+     xlab="Assignment variable",
+     ylab="Adjusted outcome", bty="l", yaxt="n", xlim=c(-5,5), xaxs="i")
+axis(2, seq(-100,100,20))
+points(xx[chile$eligible==0],yy[chile$eligible==0],pch=20,
+       cex=.7,col="gray60")
+points(xx[chile$eligible==1],yy[chile$eligible==1],pch=20,
+       cex=.7)
+whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88), coef(fit_3b)[3], from=0)
+whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88) + coef(fit_3b)[2], coef(fit_3b)[3], to=0)
+abline(v=0, lwd=4, col="gray")
+mtext("Adjusting for pre-test scores", 3, 1)
+#+ eval=FALSE, include=FALSE
+dev.off()
+
+#+ eval=FALSE, include=FALSE
+if (savefigs) pdf(root("ChileSchools/figs","rd_chile_4b.pdf"), height=3.6, width=5, colormodel="gray")
+#+
+par(mar=c(3,3,2,1), mgp=c(1.7, .5, 0), tck=-.01)
+yy <- chile$read92 - coef(fit_3b)[4] * (chile$read88 - mean(chile$read88)) - coef(fit_3b)[5] * (chile$math88 - mean(chile$math88))
+xx <- chile$rule2
+n_bins <- 20
+n <- length(xx)
+halfwidth <- 5
+cutpoints <- c(seq(-halfwidth, halfwidth, length=n_bins+1))
+xx_bin <- rep(NA, n_bins)
+yy_bin <- rep(NA, n_bins)
+for (i in 1:n_bins){
+  keep <- xx > cutpoints[i] & xx <= cutpoints[i+1]
+  xx_bin[i] <- mean(xx[keep])
+  yy_bin[i] <- mean(yy[keep])
+}
+plot(y=yy_bin, x=xx_bin, type="n", 
+     xlab="Assignment variable",
+     ylab="Adjusted outcome", bty="l", yaxt="n", xlim=c(-halfwidth, halfwidth), xaxs="i")
+axis(2, seq(-100,100,1))
+points(xx_bin[xx_bin>0], yy_bin[xx_bin>0],pch=20,
+       cex=2, col="gray60")
+points(xx_bin[xx_bin<0], yy_bin[xx_bin<0],pch=20,
+       cex=2)
+whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88), coef(fit_3b)[3], from=0)
+whiteline(coef(fit_3b)[1] + coef(fit_3b)[4] *mean(chile$read88) + coef(fit_3b)[5] *mean(chile$math88) + coef(fit_3b)[2], coef(fit_3b)[3], to=0)
+abline(v=0, lwd=4, col="gray")
+mtext("Binned averages with same regression lines", 3, 1)
+#+ eval=FALSE, include=FALSE
+dev.off()
+
+#' ## Additional models
+
+#' #### Noncompliance rates
+mean(chile$p90[chile$eligible==0 & abs(chile$rule2) < 5])  # .053
+mean(chile$p90[chile$eligible==1 & abs(chile$rule2) < 5]) # .606
+
+#' #### IV model on restricted dataset
+# with brms as in IV section earlier in the chapter
+set.seed(1234)
+chile$diff_read92 <- chile$read92 - chile$read88
+rd_f1 <- bf(p90 ~ eligible)
+rd_f2 <- bf(diff_read92 ~ p90)
+#+ results='hide'
+IV_brm_rd <- brm(formula=rd_f1 + rd_f2, data = chile[abs(chile$rule2) < 5,])
+#+
+print(IV_brm_rd, digits=3)
+# t.e. is 5.08 with s.e. of .90
+
+#' #### Compare to IV regression by two-stage least squares
+summary(ivreg(formula = diff_read92 ~ p90 | eligible, data=chile, subset = abs(rule2) < 5))
+# est is 5.09 with s.e. of .88