probabilit/tests/test_modeling.py at 11fb3230996920fc8cba11822a45673630ccb9c9 · equinor/probabilit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
from probabilit.modeling import (
    EmpiricalDistribution,
    Constant,
    Log,
    Exp,
    Distribution,
    Floor,
    Equal,
    All,
    Min,
    Max,
)
from probabilit.distributions import Triangular
import numpy as np


class TestModelingExamples:
    def test_die_problem(self):
        """If we throw 2 die, what is the probability that each one ends up
        with the same number?"""

        die1 = Floor(1 + Distribution("uniform") * 6)
        die2 = Floor(1 + Distribution("uniform") * 6)
        equal = Equal(die1, die2)

        samples = equal.sample(999, random_state=42)

        np.testing.assert_allclose(samples.mean(), 1 / 6, atol=0.001)

    def test_estimating_pi(self):
        """Consider the unit square [0, 1]^2. The area of the square is 1.
        The area of a quarter circle is pi * r^2 / 4 = pi / 4.
        So the fraction (quarter circle area) / (total area) = pi / 4.

        Use this to estimate pi.
        """

        x = Distribution("uniform")
        y = Distribution("uniform")
        inside = x**2 + y**2 < 1
        pi_estimate = 4 * inside

        samples = pi_estimate.sample(9999, random_state=42)
        np.testing.assert_allclose(samples.mean(), np.pi, atol=0.01)

    def test_broken_stick_problem(self):
        """Consider a stick of length 1. Pick two points uniformly at random on
        the stick, and break the stick at those points. What is the probability
        that the three segments obtained in this way form a triangle?

        Of course this is the probability that no one of the short sticks is
        longer than 1/2. This probability turns out to be 1/4.

        https://sites.math.duke.edu/education/webfeatsII/gdrive/Team%20D/project/brokenstick.htm
        https://mathoverflow.net/questions/2014/if-you-break-a-stick-at-two-points-chosen-uniformly-the-probability-the-three-r
        """

        # Cuts along the stick
        cut1 = Distribution("uniform", loc=0, scale=1)
        cut2 = Distribution("uniform", loc=0, scale=1)

        # The lengths
        length1 = Min(cut1, cut2)
        length2 = Max(cut1, cut2) - Min(cut1, cut2)
        length3 = 1 - Max(cut1, cut2)

        # No one of the short sticks is longer than 1/2 <=> all are shorter
        prob = All(length1 < 1 / 2, length2 < 1 / 2, length3 < 1 / 2)

        samples = prob.sample(9999, random_state=42)
        np.testing.assert_allclose(samples.mean(), 1 / 4, atol=0.01)

    def test_mutual_fund_problem(self):
        """Suppose you save 1200 units of money per year and that the yearly
        interest rate has a   distribution `N(1.11, 0.15)`.
        How much money will you have over a 20 year horizon?

        From: https://curvo.eu/backtest/en/market-index/sp-500?currency=eur
        In the last 33 years, the S&P 500 index (in EUR) had a compound annual
        growth rate of 10.83%, a standard deviation of 15.32%, and a Sharpe ratio of 0.66.
        """

        saved_per_year = 1200
        returns = 0
        for year in range(20):
            interest = Distribution("norm", loc=1.11, scale=0.15)
            returns = returns * interest + saved_per_year
        samples = returns.sample(999, random_state=42)

        # Regression test essentially
        np.testing.assert_allclose(samples.mean(), 76583.58738496085)
        np.testing.assert_allclose(samples.std(), 33483.2245611436)

    def test_total_person_hours(self):
        """Based on Example 19.2 from Risk Analysis: A Quantitative Guide, 3rd Edition by David Vose.

        Estimate the number of person-hours requires to rivet 562 plates of a ship's hull.
        The quickest anyone has ever riveted a single plate is 3h 45min, while the worst time recorded is 5h 30min.
        Most likely value is estimated to be 4h 15min.
        What is the total person-hours?

        Naively, we could model the problem as:
        total_person_hours = 562 * Triangular(3.75, 4.25, 5.5),
        but note that the triangular distribution here models the uncertainty of an individual plate,
        but we are using it as if it were the distribution of the average time for 562 plates.

        A straight forward approach that gives the correct answer is to add 562 triangular distributions.
        """

        rng = np.random.default_rng(42)
        num_rivets = 562
        total_person_hours = 0

        for i in range(num_rivets):
            total_person_hours += Triangular(
                low=3.75, mode=4.25, high=5.5, low_perc=0, high_perc=1.0
            )

        num_samples = 1000
        res_total_person_hours = total_person_hours.sample(num_samples, rng)

        # The mean and standard deviation of a Triangular(3.75, 4.25, 5.5) are 4.5 and 0.368,
        # so by the Central Limit Theoreom we have that
        # total_person_hours = Normal(4.5 * 562, 0.368 * sqrt(562)) = Normal(2529, 8.724)
        expected_mean = 4.5 * num_rivets
        expected_std = 0.368 * np.sqrt(num_rivets)

        sample_mean = np.mean(res_total_person_hours)
        sample_std = np.std(res_total_person_hours, ddof=1)

        # Within 2% of theoretical values
        np.testing.assert_allclose(sample_mean, expected_mean, rtol=0.02)
        np.testing.assert_allclose(sample_std, expected_std, rtol=0.02)


def test_copying():
    # Create a graph
    mu = Distribution("norm", loc=0, scale=1)
    a = Distribution("norm", loc=mu, scale=Constant(0.5))

    # Create a copy
    a2 = a.copy()

    # The copy is not the same object
    assert a2 is not a

    # However, the IDs match and they are equal
    assert a2 == a and (a2._id == a._id)

    # The same holds for parents - they are copied
    assert a2.kwargs["loc"] is not a.kwargs["loc"]

    a.sample()
    assert hasattr(a, "samples_")
    assert not hasattr(a2, "samples_")

    # Now create a copy and ensure samples are copied too
    a3 = a.copy()
    assert hasattr(a3, "samples_")
    assert a3.samples_ is not a.samples_


def test_constant_arithmetic():
    # Test that converstion with int works
    two = Constant(2)
    result = two + 2
    np.testing.assert_allclose(result.sample(), 4)

    # Test that subtraction works both ways
    two = Constant(2)
    five = Constant(5)
    result1 = five - two
    result2 = 5 - two
    result3 = five - two
    np.testing.assert_allclose(result1.sample(), result2.sample())
    np.testing.assert_allclose(result1.sample(), result2.sample())
    np.testing.assert_allclose(result1.sample(), result3.sample())
    np.testing.assert_allclose(result1.sample(), 5 - 2)

    # Test that divison works both ways
    two = Constant(2)
    five = Constant(5)
    result1 = five / two
    result2 = 5 / two
    result3 = five / two
    np.testing.assert_allclose(result1.sample(), result2.sample())
    np.testing.assert_allclose(result1.sample(), result2.sample())
    np.testing.assert_allclose(result1.sample(), result3.sample())
    np.testing.assert_allclose(result1.sample(), 5 / 2)

    # Test absolute value and negation
    result = abs(-two)
    np.testing.assert_allclose(result.sample(), 2)

    # Test powers
    result = five**two
    np.testing.assert_allclose(result.sample(), 5**2)


def test_constant_expressions():
    # Test a few longer expressions
    two = Constant(2)
    five = Constant(5)
    result = two + two - five**2 + abs(-five)
    np.testing.assert_allclose(result.sample(), 2 + 2 - 5**2 + abs(-5))

    result = two / five - two**3 + Exp(5)
    np.testing.assert_allclose(result.sample(), 2 / 5 - 2**3 + np.exp(5))

    result = 1 / five - (Log(5) + Exp(Log(10)))
    np.testing.assert_allclose(result.sample(), 1 / 5 - (np.log(5) + 10))


def test_single_expression():
    # A graph with a single node is an edge-case
    samples = Constant(2).sample()
    np.testing.assert_allclose(samples, 2)


def test_constant_idempotent():
    for a in [-1, 0.0, 1.3, 3]:
        assert Constant(Constant(a)).value == Constant(a).value


def test_empirical_distribution():
    # Test that an empirical distribution can be a parameter
    location = EmpiricalDistribution(data=[1, 2, 3, 3, 3, 3])
    result = Distribution("norm", loc=location, scale=1)
    (result**2).sample(99, random_state=42)


if __name__ == "__main__":
    import pytest

    pytest.main(args=[__file__, "-v", "--capture=sys"])