Ecc_matters/mcmc.py at master · IamMuhammadZeeshan/Ecc_matters · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# TODO: rename "data likelihood" to "sample likelihood"
from __future__ import division, print_function


import numpy


def run_mcmc(
        intensity_fn, expval_fn, data_likelihood_samples,
        log_prior_fn,
        init_state,
        param_names,
        constants=None,
        data_likelihood_weights=None,
        args=None, kwargs=None,
        before_prior_aux_fn=None, after_prior_aux_fn=None,
        out_pos=None, out_log_prob=None,
        nsamples=100,
        rand_state=None,
        debug_log_prob=False,
        nthreads=1, pool=None, runtime_sortingfn=None,
        verbose=False,
        dtype=numpy.float64,
    ):
    """

    :param function intensity_fn:

    :param function expval_fn:

    :param array_like data_likelihood_samples:

    :param array_like init_state:

    :param list param_names:

    :param dict constants: (optional)

    :param array_like data_likelihood_weights: (optional)

    :param list args: (optional)

    :param dict kwargs: (optional)

    :param function before_prior_aux_fn: (optional)

    :param function after_prior_aux_fn: (optional)

    :param array_like out_pos: (optional)

    :param array_like out_log_prob: (optional)

    :param int nsamples: (default 100)

    :param numpy.random.RandomState rand_state: (optional)

    :param bool debug_log_prob: (optional)

    :param int nthreads: (optional)

    :param multiprocessing.Pool pool: (optional)

    :param function runtime_sortingfn: (optional)

    :param bool verbose: (optional)

    :param type dtype: (optional)

    :return: array_like, shape (n_samples, n_walkers, n_params)
        MCMC chain for each ensemble walker. Element ``[i,j,k]`` is the value of
        the ith sample in the chain, for the jth walker, for the kth free
        parameter.

    :return: array_like, shape (n_samples, n_walkers)
        Values of the (non-normalized) log-posterior function corresponding to
        each step in the MCMC chain for each ensemble walker. Element ``[i,j]``
        is the log-posterior for the ith sample in the chain, for the jth
        walker.
    """
    import sys
    import numpy
    import emcee

    # If no args or kwargs provided, set as empty list/dict
    if args is None:
        args = []
    if kwargs is None:
        kwargs = {}

    # If no constants provides, set as empty dict
    if constants is None:
        constants = {}

    # Count the number of free parameter dimensions
    ndim = len(param_names) - len(constants)

    # Count the number of walkers
    nwalkers = len(init_state)
    # Count the number of individual events
    nindiv = len(data_likelihood_samples)

    # We're going to iterate over the samples and weights together, so if there
    # are no weights, we at least need to make it into a list of the proper
    # size.
    if data_likelihood_weights is None:
        data_likelihood_weights = [None for _ in data_likelihood_samples]

    # Ensure samples all have same dimensionality
    ndim_indiv = None
    for samples in data_likelihood_samples:
        S, D = numpy.shape(samples)

        if ndim_indiv is None:
            ndim_indiv = D

        assert ndim_indiv == D

    # Ensure number of dimensions equals the number of dimensions in the initial
    # state.
    assert ndim == len(init_state[0])

    # Initialize output arrays if not provided.
    # Otherwise check provided arrays have proper shape.
    if out_pos is None:
        out_pos = numpy.empty((nsamples, nwalkers, ndim), dtype=dtype)
    else:
        assert numpy.shape(out_pos) == (nsamples, nwalkers, ndim)
    if out_log_prob is None:
        out_log_prob = numpy.empty((nsamples, nwalkers), dtype=dtype)
    else:
        assert numpy.shape(out_log_prob) == (nsamples, nwalkers)


    if debug_log_prob:
        return log_prob


    sampler_args = (
        param_names, constants,
        intensity_fn, expval_fn, log_prior_fn,
        data_likelihood_samples, data_likelihood_weights,
        before_prior_aux_fn, after_prior_aux_fn,
        args, kwargs,
    )

    sampler = emcee.EnsembleSampler(
        nwalkers, ndim, log_prob,
        args=sampler_args,
        threads=nthreads, pool=pool, runtime_sortingfn=runtime_sortingfn,
    )
    sample_iter = sampler.sample(
        init_state,
        iterations=nsamples, rstate0=rand_state,
    )

    if verbose:
        progress_pct = 0
        def display_progress(p, s):
            print(
                "Progress: {p}%; Samples: {s}".format(p=p, s=s),
                file=sys.stderr,
            )
        display_progress(progress_pct, 0)


    for i, result in enumerate(sample_iter):
        pos = result[0]
        log_post = result[1]

        out_pos[i] = pos
        out_log_prob[i] = log_post

        if verbose:
            new_progress_pct = i / nsamples * 100
            if new_progress_pct >= progress_pct + 1:
                progress_pct = int(new_progress_pct)
                display_progress(progress_pct, i)


    return out_pos, out_log_prob


def log_prob(
        params_free,
        param_names, constants,
        intensity_fn, expval_fn, log_prior_fn,
        data_likelihood_samples, data_likelihood_weights,
        before_prior_aux_fn, after_prior_aux_fn,
        args, kwargs,
    ):
    import numpy

    params = get_params(params_free, constants, param_names)

    if before_prior_aux_fn is not None:
        aux_info = before_prior_aux_fn(params, *args, **kwargs)
    else:
        aux_info = None

    log_pi = log_prior_fn(params, aux_info, *args, **kwargs)

    if numpy.isfinite(log_pi):
        if after_prior_aux_fn is not None:
            aux_info = after_prior_aux_fn(params, aux_info, *args, **kwargs)

        log_events_contribution = 0.0
        iterables = zip(data_likelihood_samples, data_likelihood_weights)
        for samples, weights in iterables:
            intensity = intensity_fn(samples, params, aux_info, *args, **kwargs)

            if weights is not None:
                intensity *= weights

            log_events_contribution += numpy.log(numpy.mean(intensity))

        mean = expval_fn(params, aux_info, *args, **kwargs)

        log_prob = log_pi + log_events_contribution - mean

        if numpy.isfinite(log_prob):
            return log_prob

    return -numpy.inf


def get_params(variables, constants, names):
    """
    """
    if len(variables) + len(constants) != len(names):
        raise ValueError(
            "Incorrect number of variables and constants. "
            "Expected {expected}, but got {actual}."
            .format(
                expected=len(names),
                actual=len(variables)+len(constants),
            )
        )

    params = []
    i = 0

    for name in names:
        if name in constants:
            param = constants[name]
        else:
            param = variables[i]
            i += 1

        params.append(param)

    return params