meta-showdown/3-resultsFramework.R at master · nicebread/meta-showdown · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
## ======================================================================
## This file loads the results of the meta-analyses (which were generated in 2-analysisFramework.R)
## and computes summaries of them (such as mean error, MSE, coverage, etc.)
## ======================================================================

# run this file:
# source("3-resultsFramework.R", echo=TRUE)

source("0-start.R")

# load the results files which were generated in 2-analysisFramework.R,
# combine them into one large data frame
analysisFiles <- list.files("analysisPartsRev2", pattern=".*\\.RData", full.names=TRUE)

print(paste0("Collecting results from ", length(analysisFiles), " analysis files."))

# loop through all files
res_list <- list()
for (f in analysisFiles) {
	print(f)
	load(f)	# the simulation data frame always is called "res"
	#res$id <- paste0(f, "_", res$id)
	res_list[[f]] <- res
}
res.final <- bind_rows(res_list)
str(res.final)

# final data set in long format:
save(res.final, file="dataFiles/res.final.RData")
#load(file="dataFiles/res.final.RData")


# Show conditions
tab <- res.final %>% group_by(k, delta, qrpEnv, censor, tau) %>% summarise(n.MA=length(unique(id)))
print(tab, n=432)
all(tab$n.MA == 1000)

res.final <- res.final %>% droplevels()

# reshape long format to wide format
res.wide <- dcast(res.final, id + condition + k + delta + qrpEnv + censor + tau + method ~ term + variable, value.var="value")
head(res.wide, 16)

# define some meaningful labels for the plots
res.wide$delta.label <- factor(res.wide$delta, levels=unique(res.wide$delta), labels=paste0("delta = ", unique(res.wide$delta)))
res.wide$k.label <- factor(res.wide$k, levels=sort(unique(res.wide$k)), labels=paste0("k = ", sort(unique(res.wide$k))))
res.wide$qrp.label <- factor(res.wide$qrpEnv, levels=unique(res.wide$qrpEnv), labels=paste0("QRP = ", unique(res.wide$qrpEnv)))
res.wide$censor.label <- factor(res.wide$censor, levels=unique(res.wide$censor), labels=paste0("Publication bias = ", unique(res.wide$censor)))
res.wide$tau.label <- factor(res.wide$tau, levels=unique(res.wide$tau), labels=paste0("tau = ", unique(res.wide$tau)))
res.wide$estimator_type <- factor(res.wide$estimator_type, levels=c(1, 2, 3, 4), labels=c("WAAP", "WLS", "PET", "PEESE"))

# sanity check: each condition should have 1000 sims
tab <- res.wide %>% group_by(k, delta, qrpEnv, censor, tau) %>% dplyr::select(id) %>% dplyr::summarise(n.MA=length(unique(id)))
print(tab, n=50)


save(res.wide, file="dataFiles/res.wide.RData", compress="gzip")
#load(file="dataFiles/res.wide.RData")

# ---------------------------------------------------------------------
#  save a reduced version that applies some filters
# this is legacy code, we do not apply any filters any more. So res.wide.red is simply a copy of res.wide

res.wide.red <- res.wide

# ---------------------------------------------------------------------
# For hypothesis test: Add H0.rejection rule & whether the CI is consistent with zero (i.e., includes zero)

# merge two p-value columns into one (p-curve uses "skewtest_p.value", all other use "b0_p.value")
res.wide.red$p.value <- res.wide.red$b0_p.value
res.wide.red$p.value[res.wide.red$method %in% c("pcurve.evidence", "pcurve.hack", "pcurve.lack")] <- res.wide.red$skewtest_p.value[res.wide.red$method %in% c("pcurve.evidence", "pcurve.hack", "pcurve.lack")]
res.wide.red <- res.wide.red %>% select(-b0_p.value, -skewtest_p.value)

# compute H0 rejection: we have three cases:
# 1. Any H0 rejection (i.e., p < .05): variable 'H0.reject'
# 2. Posified H0 rejection (i.e., p < .05 AND estimate in correct direction). Significant p-values in the wrong direction are treated as "no H0 rejection": variable 'H0.reject.pos'  --> this is the main variable for our analyses, and how we think the hypothesis test should be treated in practice
# 3. H0 rejection in wrong direction (i.e., p < .05 AND estimate in wrong direction): variable 'H0.reject.wrongSign'

# The p-curve skewness tests only uses directionally consistent studies for computation, there is no estimate; estimate is set to NA there. Therefore, we treat this only as 'H0.reject.pos'
# p-uniform does a one-sided test by default. The CI is based on profile likelihood and is *two-sided*. The p-value of p-uniform has been doubled in MA-methods/5-p-uniform.R. Therefore, only 'H0.reject.pos' exists for p-uniform.
# Finally, p-uniform has the special case where the estimate is set to zero when the average p value of the statistically significant studies is larger than .025 (in about 10% of all simulations). In this case, the function returns an estimate (i.e., exactly zero), but no p-value and no CI. This case is treated as H0.reject.pos = FALSE.
# Hence, H0.reject.pos and the CI of p-uniform do not give ecxactly the same result, as in the special case we *do not* reject H0, but have no CI.

res.wide.red$H0.reject <- (res.wide.red$p.value < .05)
res.wide.red$H0.reject[res.wide.red$method %in% c("puniform", "pcurve.evidence", "pcurve.hack", "pcurve.lack")] <- NA

res.wide.red$H0.reject.pos <- (res.wide.red$p.value < .05) & (is.na(res.wide.red$b0_estimate) | res.wide.red$b0_estimate > 0)
res.wide.red$H0.reject.pos[is.na(res.wide.red$p.value)] <- NA # if no p-value is provided, set to NA

# special case: puniform returns code 2 = "set to zero if avg. p-value > .025"
# this not is a H0 rejection, but without providing a p-value
res.wide.red[res.wide.red$method=="puniform" & res.wide.red$b0_comment == 2, "H0.reject.pos"] <- FALSE

res.wide.red$H0.reject.wrongSign <- (res.wide.red$p.value < .05) & (res.wide.red$b0_estimate < 0)
res.wide.red$H0.reject.wrongSign[is.na(res.wide.red$p.value)] <- NA
res.wide.red$H0.reject.wrongSign[res.wide.red$method %in% c("puniform", "pcurve.evidence", "pcurve.hack", "pcurve.lack")] <- NA

# consisZero computation
# consisZero indicates whether the CI excludes zero (i.e., it could be completely positive OR completely negative)
# consisZero.pos indicates whether the CI is entirely positive (i.e., in the expected direction)
res.wide.red$consisZero <- (0 > res.wide.red$b0_conf.low) & (0 < res.wide.red$b0_conf.high)
res.wide.red$consisZero.pos <- (0 > res.wide.red$b0_conf.low)

# sanity check: H0.reject and !consisZero should be identical except for p-curve and p-uniform
table(!res.wide.red$consisZero, res.wide.red$method, useNA="a")
table(res.wide.red$H0.reject, res.wide.red$method, useNA="a")

table(!res.wide.red$consisZero.pos, res.wide.red$method, useNA="a")
table(res.wide.red$H0.reject.pos, res.wide.red$method, useNA="a")
# --> 284 cases are still inconsistent in puniform. This happens when the CI borderline excludes 0 but p is slightly above .05 (such as .05002)

save(res.wide.red, file="dataFiles/res.wide.red.RData", compress="gzip")
#load(file="dataFiles/res.wide.red.RData")


# ---------------------------------------------------------------------
#  Compute summary measures across replications

posify <- function(x) {x[x<0] <- 0; return(x)}

summ <- res.wide.red %>% group_by(condition, k, k.label, delta, delta.label, qrpEnv, qrp.label, censor, censor.label, tau, tau.label, method) %>%
	dplyr::summarise(
		meanEst		= mean(b0_estimate, na.rm=TRUE),
		meanEst.pos	= mean(posify(b0_estimate), na.rm=TRUE),
		ME 			= mean(b0_estimate - delta, na.rm=TRUE),
		RMSE		= sqrt(mean((b0_estimate - delta)^2, na.rm=TRUE)),
		ME.pos = mean(posify(b0_estimate) - delta, na.rm=TRUE),
		RMSE.pos = sqrt(mean((posify(b0_estimate) - delta)^2, na.rm=TRUE)),
		MAD			= mean(abs(b0_estimate - delta), na.rm=TRUE), # mean absolute deviation

		perc2.5		= quantile(b0_estimate, probs=.025, na.rm=TRUE),
		perc97.5	= quantile(b0_estimate, probs=.975, na.rm=TRUE),
		perc2.5.pos		= quantile(posify(b0_estimate), probs=.025, na.rm=TRUE),
		perc97.5.pos	= quantile(posify(b0_estimate), probs=.975, na.rm=TRUE),

		coverage 	= sum(delta > b0_conf.low & delta < b0_conf.high, na.rm=TRUE) / sum(!is.na(b0_conf.high)),
		coverage.pos 	= sum(delta > b0_conf.low & delta < b0_conf.high & b0_estimate > 0, na.rm=TRUE) / sum(!is.na(b0_conf.high) & b0_estimate > 0),
		n.ci = sum(!is.na(b0_conf.high)),

		consisZero.rate  = sum(consisZero, na.rm=TRUE) / sum(!is.na(consisZero)),
		consisZero.rate.pos = sum(consisZero.pos, na.rm=TRUE) / sum(!is.na(consisZero.pos)),
		H0.reject.rate = sum(H0.reject, na.rm=TRUE)/sum(!is.na(H0.reject)),
		H0.reject.pos.rate = sum(H0.reject.pos, na.rm=TRUE)/sum(!is.na(H0.reject.pos)),
		H0.reject.wrongSign.rate = sum(H0.reject.wrongSign, na.rm=TRUE)/sum(!is.na(H0.reject.wrongSign)),

		n.p.values = sum(!is.na(H0.reject.pos)),
		n.validEstimates = sum(!is.na(b0_estimate), na.rm=TRUE)
	)

print(summ, n=50)


# summ contains the full summary of the simulations. This object can then be used to build tables, plots, etc.
library(rio)
export(summ, file="dataFiles/summ.csv")
save(summ, file="dataFiles/summ.RData")

# also export into Shiny app
save(summ, file="Shiny/metaExplorer/summ.RData")
#load("dataFiles/summ.RData")