Skip to content

Commit 2877f5a

Browse files
authored
Merge pull request #38 from tsalo/doc/example
[DOC] Add more comprehensive estimator example
2 parents 67af81c + 0ce20b1 commit 2877f5a

File tree

6 files changed

+270
-11
lines changed

6 files changed

+270
-11
lines changed
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
2+
# ex: set sts=4 ts=4 sw=4 et:
3+
r"""
4+
.. _meta1:
5+
6+
================================================
7+
Run Estimators on a simulated dataset
8+
================================================
9+
10+
PyMARE implements a range of meta-analytic estimators.
11+
In this example, we build a simulated dataset with a known ground truth and
12+
use it to compare PyMARE's estimators.
13+
14+
.. note::
15+
The variance of the true effect is composed of both between-study variance
16+
(:math:`\tau^{2}`) and within-study variance (:math:`\sigma^{2}`).
17+
Within-study variance is generally taken from sampling variance values from
18+
individual studies (``v``), while between-study variance can be estimated
19+
via a number of methods.
20+
"""
21+
# sphinx_gallery_thumbnail_number = 3
22+
import numpy as np
23+
import matplotlib.pyplot as plt
24+
from scipy import stats
25+
import seaborn as sns
26+
27+
import pymare
28+
29+
30+
# A small function to make things easier later on
31+
def var_to_ci(y, v, n):
32+
"""Convert sampling variance to 95% CI"""
33+
term = 1.96 * np.sqrt(v) / np.sqrt(n)
34+
return y - term, y + term
35+
36+
37+
###############################################################################
38+
# Here we simulate a dataset
39+
# -----------------------------------------------------------------------------
40+
# This is a simple dataset with a one-sample design.
41+
# We are interested in estimating the true effect size from a set of one-sample
42+
# studies.
43+
N_STUDIES = 40
44+
BETWEEN_STUDY_VAR = 400 # population variance
45+
between_study_sd = np.sqrt(BETWEEN_STUDY_VAR)
46+
TRUE_EFFECT = 20
47+
sample_sizes = np.round(np.random.normal(loc=50, scale=20, size=N_STUDIES)).astype(int)
48+
within_study_vars = np.random.normal(loc=400, scale=100, size=N_STUDIES)
49+
study_means = np.random.normal(loc=TRUE_EFFECT, scale=between_study_sd, size=N_STUDIES)
50+
51+
sample_sizes[sample_sizes <= 1] = 2
52+
within_study_vars = np.abs(within_study_vars)
53+
54+
# Convert data types and match PyMARE nomenclature
55+
y = study_means
56+
X = np.ones((N_STUDIES))
57+
v = within_study_vars
58+
n = sample_sizes
59+
sd = np.sqrt(v * n)
60+
z = y / sd
61+
p = stats.norm.sf(abs(z)) * 2
62+
63+
###############################################################################
64+
# Plot variable distributions
65+
# -----------------------------------------------------------------------------
66+
fig, axes = plt.subplots(nrows=5, figsize=(16, 10))
67+
sns.distplot(y, ax=axes[0], bins=20)
68+
axes[0].set_title('y')
69+
sns.distplot(v, ax=axes[1], bins=20)
70+
axes[1].set_title('v')
71+
sns.distplot(n, ax=axes[2], bins=20)
72+
axes[2].set_title('n')
73+
sns.distplot(z, ax=axes[3], bins=20)
74+
axes[3].set_title('z')
75+
sns.distplot(p, ax=axes[4], bins=20)
76+
axes[4].set_title('p')
77+
for i in range(5):
78+
axes[i].set_yticks([])
79+
fig.tight_layout()
80+
81+
###############################################################################
82+
# Plot means and confidence intervals
83+
# -----------------------------------
84+
# Here we can show study-wise mean effect and CIs, along with the true effect
85+
# and CI corresponding to the between-study variance.
86+
fig, ax = plt.subplots(figsize=(8, 16))
87+
study_ticks = np.arange(N_STUDIES)
88+
89+
# Get 95% CI for individual studies
90+
lower_bounds, upper_bounds = var_to_ci(y, v, n)
91+
ax.scatter(y, study_ticks+1)
92+
for study in study_ticks:
93+
ax.plot((lower_bounds[study], upper_bounds[study]), (study+1, study+1), color='blue')
94+
ax.axvline(0, color='gray', alpha=0.2, linestyle='--', label='Zero')
95+
ax.axvline(np.mean(y), color='orange', alpha=0.2, label='Mean of Observed Effects')
96+
97+
# Get 95% CI for true effect
98+
lower_bound, upper_bound = var_to_ci(TRUE_EFFECT, BETWEEN_STUDY_VAR, 1)
99+
ax.scatter((TRUE_EFFECT,), (N_STUDIES+1,), color='green', label='True Effect')
100+
ax.plot((lower_bound, upper_bound), (N_STUDIES+1, N_STUDIES+1),
101+
color='green', linewidth=3, label='Between-Study 95% CI')
102+
ax.set_ylim((0, N_STUDIES+2))
103+
ax.set_xlabel('Mean (95% CI)')
104+
ax.set_ylabel('Study')
105+
ax.legend()
106+
fig.tight_layout()
107+
108+
###############################################################################
109+
# Create a Dataset object containing the data
110+
# --------------------------------------------
111+
dset = pymare.Dataset(y=y, X=None, v=v, n=n, add_intercept=True)
112+
113+
# Here is a dictionary to house results across models
114+
results = {}
115+
116+
###############################################################################
117+
# Fit models
118+
# -----------------------------------------------------------------------------
119+
# When you have ``z`` or ``p``:
120+
#
121+
# - :class:`pymare.estimators.Stouffers`
122+
# - :class:`pymare.estimators.Fishers`
123+
#
124+
# When you have ``y`` and ``v`` and don't want to estimate between-study variance:
125+
#
126+
# - :class:`pymare.estimators.WeightedLeastSquares`
127+
#
128+
# When you have ``y`` and ``v`` and want to estimate between-study variance:
129+
#
130+
# - :class:`pymare.estimators.DerSimonianLaird`
131+
# - :class:`pymare.estimators.Hedges`
132+
# - :class:`pymare.estimators.VarianceBasedLikelihoodEstimator`
133+
#
134+
# When you have ``y`` and ``n`` and want to estimate between-study variance:
135+
#
136+
# - :class:`pymare.estimators.SampleSizeBasedLikelihoodEstimator`
137+
#
138+
# When you have ``y`` and ``v`` and want a hierarchical model:
139+
#
140+
# - :class:`pymare.estimators.StanMetaRegression`
141+
142+
###############################################################################
143+
# First, we have "combination models", which combine p and/or z values
144+
# `````````````````````````````````````````````````````````````````````````````
145+
# The two combination models in PyMARE are Stouffer's and Fisher's Tests.
146+
#
147+
# Notice that these models don't use :class:`pymare.core.Dataset` objects.
148+
stouff = pymare.estimators.StoufferCombinationTest()
149+
stouff.fit(z[:, None])
150+
print('Stouffers')
151+
print('p: {}'.format(stouff.params_["p"]))
152+
print()
153+
154+
fisher = pymare.estimators.FisherCombinationTest()
155+
fisher.fit(z[:, None])
156+
print('Fishers')
157+
print('p: {}'.format(fisher.params_["p"]))
158+
159+
###############################################################################
160+
# Now we have a fixed effects model
161+
# `````````````````````````````````````````````````````````````````````````````
162+
# This estimator does not attempt to estimate between-study variance.
163+
# Instead, it takes ``tau2`` (:math:`\tau^{2}`) as an argument.
164+
wls = pymare.estimators.WeightedLeastSquares()
165+
wls.fit_dataset(dset)
166+
wls_summary = wls.summary()
167+
results['Weighted Least Squares'] = wls_summary.to_df()
168+
print('Weighted Least Squares')
169+
print(wls_summary.to_df().T)
170+
171+
###############################################################################
172+
# Methods that estimate between-study variance
173+
# `````````````````````````````````````````````````````````````````````````````
174+
# The ``DerSimonianLaird``, ``Hedges``, and ``VarianceBasedLikelihoodEstimator``
175+
# estimators all estimate between-study variance from the data, and use ``y``
176+
# and ``v``.
177+
#
178+
# ``DerSimonianLaird`` and ``Hedges`` use relatively simple methods for
179+
# estimating between-study variance, while ``VarianceBasedLikelihoodEstimator``
180+
# can use either maximum-likelihood (ML) or restricted maximum-likelihood (REML)
181+
# to iteratively estimate it.
182+
dsl = pymare.estimators.DerSimonianLaird()
183+
dsl.fit_dataset(dset)
184+
dsl_summary = dsl.summary()
185+
results['DerSimonian-Laird'] = dsl_summary.to_df()
186+
print('DerSimonian-Laird')
187+
print(dsl_summary.to_df().T)
188+
print()
189+
190+
hedge = pymare.estimators.Hedges()
191+
hedge.fit_dataset(dset)
192+
hedge_summary = hedge.summary()
193+
results['Hedges'] = hedge_summary.to_df()
194+
print('Hedges')
195+
print(hedge_summary.to_df().T)
196+
print()
197+
198+
vb_ml = pymare.estimators.VarianceBasedLikelihoodEstimator(method='ML')
199+
vb_ml.fit_dataset(dset)
200+
vb_ml_summary = vb_ml.summary()
201+
results['Variance-Based with ML'] = vb_ml_summary.to_df()
202+
print('Variance-Based with ML')
203+
print(vb_ml_summary.to_df().T)
204+
print()
205+
206+
vb_reml = pymare.estimators.VarianceBasedLikelihoodEstimator(method='REML')
207+
vb_reml.fit_dataset(dset)
208+
vb_reml_summary = vb_reml.summary()
209+
results['Variance-Based with REML'] = vb_reml_summary.to_df()
210+
print('Variance-Based with REML')
211+
print(vb_reml_summary.to_df().T)
212+
print()
213+
214+
# The ``SampleSizeBasedLikelihoodEstimator`` estimates between-study variance
215+
# using ``y`` and ``n``, but assumes within-study variance is homogenous
216+
# across studies.
217+
sb_ml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method='ML')
218+
sb_ml.fit_dataset(dset)
219+
sb_ml_summary = sb_ml.summary()
220+
results['Sample Size-Based with ML'] = sb_ml_summary.to_df()
221+
print('Sample Size-Based with ML')
222+
print(sb_ml_summary.to_df().T)
223+
print()
224+
225+
sb_reml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method='REML')
226+
sb_reml.fit_dataset(dset)
227+
sb_reml_summary = sb_reml.summary()
228+
results['Sample Size-Based with REML'] = sb_reml_summary.to_df()
229+
print('Sample Size-Based with REML')
230+
print(sb_reml_summary.to_df().T)
231+
232+
###############################################################################
233+
# What about the Stan estimator?
234+
# `````````````````````````````````````````````````````````````````````````````
235+
# We're going to skip this one here because of how computationally intensive it
236+
# is.
237+
238+
###############################################################################
239+
# Let's check out our results!
240+
# `````````````````````````````````````````````````````````````````````````````
241+
fig, ax = plt.subplots(figsize=(8, 8))
242+
243+
for i, (estimator_name, summary_df) in enumerate(results.items()):
244+
ax.scatter((summary_df.loc[0, 'estimate'],), (i+1,), label=estimator_name)
245+
ax.plot((summary_df.loc[0, 'ci_0.025'], summary_df.loc[0, 'ci_0.975']),
246+
(i+1, i+1),
247+
linewidth=3)
248+
249+
# Get 95% CI for true effect
250+
lower_bound, upper_bound = var_to_ci(TRUE_EFFECT, BETWEEN_STUDY_VAR, 1)
251+
ax.scatter((TRUE_EFFECT,), (i+2,), label='True Effect')
252+
ax.plot((lower_bound, upper_bound), (i+2, i+2),
253+
linewidth=3, label='Between-Study 95% CI')
254+
ax.set_ylim((0, i+3))
255+
ax.set_yticklabels([None] + list(results.keys()) + ['True Effect'])
256+
257+
ax.set_xlabel('Mean (95% CI)')
258+
fig.tight_layout()

examples/02_meta-analysis/plot_run_meta-analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
22
# ex: set sts=4 ts=4 sw=4 et:
33
"""
4-
.. _meta1:
4+
.. _meta2:
55
66
========================
77
Running a meta-analysis

pymare/effectsize/base.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
def solve_system(system, known_vars=None):
1919
"""Solve and evaluate a system of SymPy equations given known inputs.
20-
20+
2121
Args:
2222
system ([sympy.core.expr.Expr]): A list of SymPy expressions defining
2323
the system to solve.
@@ -178,7 +178,7 @@ def get(self, stat, error=True):
178178
error (bool): Specifies behavior in the event that the requested
179179
quantity cannot be computed. If True (default), raises an
180180
exception. If False, returns None.
181-
181+
182182
Returns:
183183
A float or ndarray containing the requested parameter values, if
184184
successfully computed.
@@ -239,7 +239,7 @@ def to_dataset(self, measure='RM', **kwargs):
239239
240240
Args:
241241
measure (str): The measure to map to the Dataset's y and v
242-
attributes (where y is the desired measure, and v is its
242+
attributes (where y is the desired measure, and v is its
243243
variance). Valid values include:
244244
* 'RM': Raw mean of the group.
245245
* 'SM': Standardized mean. This is often called Hedges g.
@@ -319,7 +319,7 @@ def to_dataset(self, measure='SMD', **kwargs):
319319
320320
Args:
321321
measure (str): The measure to map to the Dataset's y and v
322-
attributes (where y is the desired measure, and v is its
322+
attributes (where y is the desired measure, and v is its
323323
variance). Valid values include:
324324
* 'RMD': Raw mean difference between groups.
325325
* 'SMD': Standardized mean difference between groups. This

pymare/effectsize/expressions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,4 @@
9494
"type": 2,
9595
"description": "Variance of SMD"
9696
}
97-
]
97+
]

pymare/estimators/estimators.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,10 +340,10 @@ class SampleSizeBasedLikelihoodEstimator(BaseEstimator):
340340
keyword arguments.
341341
342342
References:
343-
Sangnawakij, P., Böhning, D., Niwitpong, S. A., Adams, S., Stanton, M.,
344-
& Holling, H. (2019). Meta-analysis without study-specific variance
345-
information: Heterogeneity case. Statistical Methods in Medical Research,
346-
28(1), 196–210. https://doi.org/10.1177/0962280217718867
343+
Sangnawakij, P., Böhning, D., Niwitpong, S. A., Adams, S., Stanton, M.,
344+
& Holling, H. (2019). Meta-analysis without study-specific variance
345+
information: Heterogeneity case. Statistical Methods in Medical Research,
346+
28(1), 196–210. https://doi.org/10.1177/0962280217718867
347347
"""
348348

349349
def __init__(self, method='ml', **kwargs):

pymare/info.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
'sphinx_copybutton',
6969
'sphinx_gallery',
7070
'pillow',
71-
'matplotlib'
71+
'matplotlib',
72+
'seaborn'
7273
]
7374

7475
EXTRA_REQUIRES = {

0 commit comments

Comments
 (0)