Goal: turn estimates and graphs into camera-ready outputs โ .tex for LaTeX, .docx for Word collaborators, .html for journal supplements, .pdf / .png figures with consistent ggplot2 styling. R's modern table ecosystem (modelsummary, gtsummary, kableExtra, gt, flextable) is unusually flexible; one call โ any format.
- Regression tables โ
modelsummary(preferred),texreg,stargazer,gtsummary::tbl_regression - Summary-statistics tables โ
gtsummary,modelsummary::datasummary* - Coefficient plots โ
modelplot, hand-rolled ggplot - Event-study plots (
fixest::iplot,ggplot2) - Forest plots (subgroup / heterogeneity)
- Marginal-effect plots (
marginaleffects::plot_slopes/plot_predictions) - Binscatter (
binsreg) - RD plots (
rdplot) - CATE heatmap
- Multi-panel combined (
cowplot,patchwork) - Themes, fonts, color palettes
- Quarto / RMarkdown for narrative + code + outputs
library(modelsummary)
# LaTeX output
modelsummary(
list("(1)" = m1, "(2)" = m2, "(3)" = m3,
"(4)" = m4, "(5)" = m5, "(6)" = m6),
stars = c('*' = .1, '**' = .05, '***' = .01),
coef_map = c("training" = "Training",
"age" = "Age",
"edu" = "Education",
"tenure" = "Tenure"),
gof_map = list(
list(raw = "nobs", clean = "N", fmt = 0),
list(raw = "r.squared", clean = "Rยฒ", fmt = 3),
list(raw = "adj.r.squared", clean = "Adj. Rยฒ", fmt = 3),
list(raw = "FE: worker_id", clean = "Worker FE",fmt = 0),
list(raw = "FE: year", clean = "Year FE", fmt = 0)
),
notes = list("Cluster-robust SEs in parentheses, clustered by worker_id."),
output = "tables/table_main.tex"
)
# Word
modelsummary(list("(1)" = m1, "(3)" = m3, "(6)" = m6),
stars = TRUE, output = "tables/table_main.docx")
# HTML
modelsummary(list("(1)" = m1, "(6)" = m6),
stars = TRUE, output = "tables/table_main.html")
# Markdown
modelsummary(list(m1, m6), stars = TRUE,
output = "markdown") # prints to consolelibrary(texreg)
texreg(list(m1, m3, m6),
caption = "Effect of training on log wage",
label = "tab:main",
custom.coef.map = list("training" = "Training",
"age" = "Age", "edu" = "Education"),
stars = c(0.01, 0.05, 0.10),
file = "tables/table_main_texreg.tex")library(stargazer)
stargazer(m1$lm, m3$lm, m6$lm, # convert fixest โ lm if needed
out = "tables/table_main_stargazer.tex",
type = "latex",
star.cutoffs = c(0.10, 0.05, 0.01))library(gtsummary)
m6 %>%
tbl_regression(
label = list(training ~ "Training",
age ~ "Age",
edu ~ "Education")
) %>%
bold_p(t = 0.05) %>%
add_significance_stars() %>%
as_kable_extra(format = "latex", booktabs = TRUE) %>%
kableExtra::save_kable("tables/m6_clinical.tex")library(modelsummary)
# Quick skim
df %>% select(log_wage, age, edu, tenure, training) %>%
datasummary_skim(output = "tables/summary_skim.tex")
# Custom layout
datasummary(
log_wage + age + edu + tenure ~ N + Mean + SD + Min + Median + Max,
data = df,
output = "tables/summary_custom.tex"
)
# Stratified balance table (one call)
datasummary_balance(~ training,
data = df %>% select(training, age, edu, tenure, female),
dinm = TRUE, dinm_statistic = "p.value",
output = "tables/balance.tex")library(modelsummary)
modelplot(list("M1" = m1, "M3" = m3, "M6" = m6),
coef_map = c("training" = "Training")) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5) +
labs(x = "Coefficient on training", y = "",
title = "Effect of training across specifications") +
theme_classic() + theme(legend.title = element_blank())
ggsave("figures/coefplot_main.pdf", width = 6, height = 3.5)
# Multiple coefficients + multiple models
modelplot(list("Raw" = m1, "+ unit FE" = m3, "+ unitรyear + indรyear FE" = m6),
coef_map = c("training" = "Training",
"age" = "Age", "edu" = "Education", "tenure" = "Tenure")) +
geom_vline(xintercept = 0, linetype = "dashed") +
facet_wrap(~ term, scales = "free_x")
ggsave("figures/coefplot_multi.pdf", width = 8, height = 4)library(broom)
coef_df <- imap_dfr(list("M1" = m1, "M3" = m3, "M6" = m6),
function(fit, name) {
tidy(fit, conf.int = TRUE) %>%
filter(term == "training") %>%
mutate(spec = name)
})
ggplot(coef_df, aes(estimate, spec)) +
geom_point(size = 3, color = "navy") +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
height = 0.2, color = "navy") +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(x = "Coefficient on training", y = "",
title = "Effect of training") +
theme_classic()
ggsave("figures/coefplot_handroll.pdf", width = 6, height = 3.5)library(fixest)
# Cleanest: fixest::iplot
es <- feols(log_wage ~ i(rel_time, ref = -1) | worker_id + year,
data = df %>% filter(!is.na(first_treat)),
cluster = ~ worker_id)
# Default plot
iplot(es,
xlab = "Years relative to treatment",
ylab = "Coefficient (ATT)",
main = "Event study")
# Save iplot output (uses base graphics)
pdf("figures/event_study.pdf", width = 7, height = 4)
iplot(es,
ref.line = -0.5, pt.join = TRUE,
xlab = "Years relative to treatment",
ylab = "Coefficient (ATT)")
dev.off()
# ggplot version (more flexibility) โ pull coefficients via broom
library(broom); library(ggplot2)
es_df <- broom::tidy(es, conf.int = TRUE) %>%
filter(stringr::str_detect(term, "rel_time::")) %>%
mutate(k = as.integer(stringr::str_extract(term, "-?\\d+"))) %>%
bind_rows(tibble(term = "rel_time::-1",
estimate = 0, std.error = 0,
conf.low = 0, conf.high = 0, k = -1)) %>%
arrange(k)
ggplot(es_df, aes(k, estimate)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = -0.5, linetype = "dashed", color = "red") +
geom_line(color = "navy") +
geom_point(size = 2, color = "navy") +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0.2, color = "navy") +
labs(x = "Years relative to treatment", y = "Coefficient (ATT)") +
theme_classic()
ggsave("figures/event_study_gg.pdf", width = 7, height = 4)
# After did::att_gt
ggdid(cs) # comes with did package
ggsave("figures/csdid_event.pdf", width = 7, height = 4)library(broom)
groups <- list(
"All" = df,
"Female = 0" = df %>% filter(female == 0),
"Female = 1" = df %>% filter(female == 1),
"Young (<40)" = df %>% filter(age < 40),
"Old (โฅ40)" = df %>% filter(age >= 40),
"Manuf" = df %>% filter(industry == "manuf"),
"Service" = df %>% filter(industry == "service")
)
forest_df <- imap_dfr(groups, function(d, name) {
fit <- feols(log_wage ~ training | worker_id + year,
data = d, cluster = ~ worker_id)
tidy(fit, conf.int = TRUE) %>%
filter(term == "training") %>%
mutate(group = name, N = nobs(fit))
})
ggplot(forest_df,
aes(estimate, fct_rev(factor(group)))) +
geom_point(size = 3, color = "navy") +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high),
height = 0.2, color = "navy") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_text(aes(label = paste0("N=", N)),
x = max(forest_df$conf.high) * 1.1, hjust = 0, size = 3) +
labs(x = "Coefficient on training", y = "") +
theme_classic() + xlim(NA, max(forest_df$conf.high) * 1.3)
ggsave("figures/forest.pdf", width = 7, height = 4)library(marginaleffects)
# Continuous moderator
m_int <- feols(log_wage ~ training * tenure + age + edu | worker_id + year,
data = df, cluster = ~ worker_id)
plot_slopes(m_int, variables = "training",
condition = list(tenure = seq(0, 20, by = 1))) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(x = "Tenure (years)", y = "Marginal effect of training",
title = "How training's effect varies with tenure")
ggsave("figures/marginsplot_tenure.pdf", width = 7, height = 4)
# Categorical moderator
plot_slopes(m_int, variables = "training", condition = "female") +
labs(x = "Female", y = "Marginal effect of training")
# Predicted Y across levels
plot_predictions(m_int,
condition = list(training = c(0,1), tenure = seq(0,20,by=2))) +
labs(x = "Tenure", y = "Predicted log wage")library(binsreg)
# Default โ modern choice (Cattaneo et al.)
bs <- binsreg(y = df$log_wage, x = df$tenure,
w = df %>% select(age, edu, female),
nbins = 20, polyreg = 1, ci = c(2, 2))
bs$bins_plot +
labs(x = "Tenure", y = "Residualized log wage",
title = "Wage vs. tenure, residualized on age + edu + female") +
theme_classic()
ggsave("figures/binscatter.pdf", width = 7, height = 4)
# By group
binsreg(y = df$log_wage, x = df$tenure, by = df$training,
w = df %>% select(age, edu),
nbins = 20, polyreg = 1)$bins_plot +
labs(color = "Treatment")
ggsave("figures/binscatter_bygroup.pdf", width = 7, height = 4)library(rdrobust)
# Default
rdplot(y = df$outcome, x = df$running_var, c = 0,
p = 1, binselect = "esmv",
title = "Effect of eligibility on earnings",
y.label = "Earnings", x.label = "Eligibility score (centered)")
# Save
pdf("figures/rd_main.pdf", width = 7, height = 4)
rdplot(y = df$outcome, x = df$running_var, c = 0)
dev.off()
# Density (manipulation) plot
library(rddensity)
rdd <- rddensity(X = df$running_var, c = 0)
rdplotdensity(rdd, X = df$running_var)
ggsave("figures/rd_density.pdf", width = 6, height = 4)library(grf)
# Build a 2-D grid holding other vars at median
grid <- expand_grid(
age = seq(quantile(df$age, .05), quantile(df$age, .95), length.out = 30),
tenure = seq(quantile(df$tenure, .05), quantile(df$tenure, .95), length.out = 30)
) %>%
mutate(edu = median(df$edu),
firm_size = median(df$firm_size))
X_grid <- grid %>% select(age, edu, tenure, firm_size) %>% as.matrix()
grid$tau <- predict(cf, newdata = X_grid)$predictions
ggplot(grid, aes(age, tenure, fill = tau)) +
geom_tile() +
scale_fill_gradient2(low = "darkred", mid = "white", high = "navy",
midpoint = 0, name = "CATE") +
labs(x = "Age", y = "Tenure", title = "Heterogeneous treatment effect") +
theme_classic()
ggsave("figures/cate_heatmap.pdf", width = 7, height = 5)library(cowplot)
library(patchwork)
# cowplot
p_combo <- plot_grid(p_kde, p_ecdf, p_qq, p_corr,
labels = "auto", ncol = 2)
ggsave("figures/combined_distributions.pdf", p_combo, width = 10, height = 8)
# patchwork (more flexible)
(p1 + p2) / (p3 + p4) +
plot_annotation(title = "Distribution diagnostics",
tag_levels = 'A')
ggsave("figures/combined_patchwork.pdf", width = 10, height = 8)Set once at the top of 08_tables_figures.R:
library(ggplot2)
library(showtext) # font embedding
# Global theme
theme_set(
theme_classic(base_size = 11, base_family = "sans") +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(color = "gray30"),
strip.background = element_rect(fill = "gray95", color = NA),
panel.grid.major.y = element_line(color = "gray90", linetype = "dotted")
)
)
# Default colors โ colorblind-safe palette (viridis)
options(ggplot2.discrete.colour = c("#1f77b4","#d62728","#2ca02c","#9467bd","#ff7f0e"),
ggplot2.discrete.fill = c("#1f77b4","#d62728","#2ca02c","#9467bd","#ff7f0e"))
# Font embedding
font_add_google("EB Garamond", "garamond")
showtext_auto()
# Save high-res
ggsave("figures/x.pdf", width = 7, height = 4, device = cairo_pdf)
ggsave("figures/x.png", width = 7, height = 4, dpi = 300)For collaborators or reproducible research, render the entire empirical pipeline to a single PDF / HTML / Word document:
---
title: "Effect of Training on Wages โ Replication"
author: "Bryce Wang"
date: today
format:
pdf:
documentclass: article
fontfamily: ebgaramond
fontsize: 11pt
geometry:
- margin=1in
toc: false
number-sections: true
keep-tex: true
html:
theme: cosmo
toc: true
toc-location: left
execute:
echo: false
warning: false
---#| include: false
library(tidyverse); library(fixest); library(modelsummary)
library(here)
df <- readRDS(here("data","analysis.rds"))
#| label: tbl-main
#| tbl-cap: "Effect of training on log wage"
modelsummary(list("(1)" = m1, "(3)" = m3, "(6)" = m6),
stars = TRUE, output = "kableExtra")
#| label: fig-event
#| fig-cap: "Event study"
#| fig-width: 7
#| fig-height: 4
iplot(es)
Render with quarto render main.qmd โ produces a single self-contained document, tables / figures inline.
project/
โโโ tables/
โ โโโ summary_full.tex
โ โโโ balance.tex
โ โโโ table_main.tex
โ โโโ table_main.docx
โ โโโ subsamples.tex
โ โโโ outcome_ladder.tex
โ โโโ mediation.tex
โ โโโ cate_blp.tex
โโโ figures/
โ โโโ corr_heatmap.pdf
โ โโโ distributions.pdf
โ โโโ trend_did.pdf
โ โโโ panel_coverage.pdf
โ โโโ coefplot_main.pdf
โ โโโ event_study.pdf
โ โโโ csdid_event.pdf
โ โโโ forest.pdf
โ โโโ marginsplot_tenure.pdf
โ โโโ binscatter.pdf
โ โโโ rd_main.pdf
โ โโโ rd_density.pdf
โ โโโ bacon.pdf
โ โโโ honestdid.pdf
โ โโโ ri.pdf
โ โโโ spec_curve.pdf
โ โโโ loo.pdf
โ โโโ cate_tenure.pdf
โ โโโ cate_heatmap.pdf
โ โโโ combined.pdf
โโโ logs/
โ โโโ 04_diagnostics.txt
โโโ main.qmd # renders to PDF / HTML / Word
Everything regenerated by Rscript main.R or quarto render main.qmd. No manual Excel formatting, no PowerPoint screenshots, no LaTeX hand-editing.