From d64c93b4f135ac7e67ba79f545cc22308bd558a1 Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 09:55:14 +0200 Subject: [PATCH 01/11] generalized triangular --- src/probabilit/distributions.py | 36 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 550a237..64774a9 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -27,7 +27,7 @@ def PERT(minimum, mode, maximum, gamma=4.0): return Distribution("beta", a=a, b=b, loc=loc, scale=scale) -def Triangular(p10, mode, p90): +def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): """Find optimal scipy parametrization given (p10, mode, p90) and return Distribution("triang", loc=..., scale=..., c=...). @@ -36,21 +36,23 @@ def Triangular(p10, mode, p90): Examples -------- - >>> Triangular(p10=1, mode=5, p90=9) + >>> Triangular(low=1, mode=5, high=9) Distribution("triang", loc=-2.236068061140598, scale=14.472136057963969, c=0.5000000024295282) """ # A few comments on fitting can be found here: # https://docs.analytica.com/index.php/Triangular10_50_90 - if not (p10 < mode < p90): - raise ValueError(f"Must have {p10=} < {mode=} < {p90=}") + if not (low < mode < high): + raise ValueError(f"Must have {low=} < {mode=} < {high=}") + if not ((0 <= low_perc <= 1.0) and (0 <= high_perc <= 1.0)): + raise ValueError("Percentiles must be between 0 and 1.") # Optimize parameters - loc, scale, c = _triang_params_from_perc(p10=p10, mode=mode, p90=p90) + loc, scale, c = _triang_params_from_perc(low, mode, high, low_perc, high_perc) return Distribution("triang", loc=loc, scale=scale, c=c) -def _triang_params_from_perc(p10, mode, p90): +def _triang_params_from_perc(low, mode, high, low_perc=0.1, high_perc=0.9): """Given (p10, mode, p90), finds (shift, scale, c). Examples @@ -66,23 +68,23 @@ def _triang_params_from_perc(p10, mode, p90): >>> math.isclose(c, 0.85, rel_tol=0.001) True """ - assert p10 < mode < p90 + assert low < mode < high # Shift and scale inputs before solving optimization problem - spread = p90 - p10 - center = (p90 + p10) / 2 - p10 = (p10 - center) / spread + spread = high - low + center = (high + low) / 2 + low = (low - center) / spread mode = (mode - center) / spread - p90 = (p90 - center) / spread + high = (high - center) / spread # Given (p10, mode, p90) we need to find a scipy parametrization # in terms of (loc, scale, c). This cannot be solved analytically. - desired = np.array([p10, mode, p90]) + desired = np.array([low, mode, high]) # Initial guess - loc_initial = p10 - scale_initial = np.log(p90 - p10) - c_initial = sp.special.logit((mode - p10) / (p90 - p10)) + loc_initial = low + scale_initial = np.log(high - low) + c_initial = sp.special.logit((mode - low) / (high - low)) x0 = np.array([loc_initial, scale_initial, c_initial]) # Optimize @@ -108,7 +110,7 @@ def _triang_params_from_perc(p10, mode, p90): return float(loc_opt), float(scale_opt), float(c_opt) -def _triang_extract(triangular): +def _triang_extract(triangular, low_perc=0.1, high_perc=0.9): """Given a triangular distribution, extract (p10, mode, p90). Examples @@ -121,7 +123,7 @@ def _triang_extract(triangular): >>> p90 5.4 """ - p10, p90 = triangular.ppf([0.1, 0.9]) + p10, p90 = triangular.ppf([low_perc, high_perc]) loc = triangular.kwds.get("loc", 0) scale = triangular.kwds.get("scale", 1) c = triangular.kwds.get("c", 0.5) From b51c0fcb8d5d95fcbeed9d4bce689e3a4e1b0f3a Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 10:03:26 +0200 Subject: [PATCH 02/11] propagate parameters properly. extend test --- src/probabilit/distributions.py | 10 ++++++---- tests/test_distributions.py | 11 +++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 64774a9..1263515 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -38,6 +38,8 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): -------- >>> Triangular(low=1, mode=5, high=9) Distribution("triang", loc=-2.236068061140598, scale=14.472136057963969, c=0.5000000024295282) + >>> Triangular(low=1, mode=5, high=9, low_perc=0.25, high_perc=0.75) + Distribution("triang", loc=-8.656853751016396, scale=27.313707268205516, c=0.5000000023789611) """ # A few comments on fitting can be found here: # https://docs.analytica.com/index.php/Triangular10_50_90 @@ -89,7 +91,7 @@ def _triang_params_from_perc(low, mode, high, low_perc=0.1, high_perc=0.9): # Optimize result = sp.optimize.minimize( - _triang_objective, x0=x0, args=(desired,), method="BFGS" + _triang_objective, x0=x0, args=(desired, low_perc, high_perc), method="BFGS" ) assert result.fun < 1e-2 @@ -132,7 +134,7 @@ def _triang_extract(triangular, low_perc=0.1, high_perc=0.9): return float(p10), float(mode), float(p90) -def _triang_objective(parameters, desired): +def _triang_objective(parameters, desired, low_perc, high_perc): """Pass parameters (loc, log(scale), logit(c)) into sp.stats.triang and return the RMSE between actual and desired (p10, mode, p90).""" @@ -144,8 +146,8 @@ def _triang_objective(parameters, desired): triangular = sp.stats.triang(loc=loc, scale=scale, c=c) # Extract information - p10, mode, p90 = _triang_extract(triangular) - actual = np.array([p10, mode, p90]) + low, mode, high = _triang_extract(triangular, low_perc, high_perc) + actual = np.array([low, mode, high]) if not np.isfinite(actual).all(): return 1e3 diff --git a/tests/test_distributions.py b/tests/test_distributions.py index d42bbfd..d24afc0 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -11,14 +11,17 @@ class TestTriangular: @pytest.mark.parametrize("c", np.linspace(0.5, 0.95, num=7)) @pytest.mark.parametrize("scale", [1, 10, 100, 1000]) - def test_triang_params_from_perc(self, c, scale): + @pytest.mark.parametrize("high_perc", [0.7, 0.8, 0.9]) + def test_triang_params_from_perc(self, c, scale, high_perc): # Test round-trips loc = 0 initial = np.array([loc, scale, c]) dist = triang(loc=loc, scale=scale, c=c) - p10, mode, p90 = _triang_extract(dist) - if (p10 < mode - 0.01) and (p90 > mode + 0.01): - loc, scale, c = _triang_params_from_perc(p10, mode, p90) + p10, mode, high = _triang_extract(dist, high_perc=high_perc) + if (p10 < mode - 0.01) and (high > mode + 0.01): + loc, scale, c = _triang_params_from_perc( + p10, mode, high, high_perc=high_perc + ) final = np.array([loc, scale, c]) np.testing.assert_allclose(final, initial, atol=1e-3) From f294aea11042a6fc4626e9c9c6148e8cbc83853e Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 10:36:36 +0200 Subject: [PATCH 03/11] simplified triangular code - no need to fit mode --- src/probabilit/distributions.py | 134 +++++++++----------------------- tests/test_distributions.py | 41 ++++++---- 2 files changed, 60 insertions(+), 115 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 1263515..5f2ffb4 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -1,6 +1,4 @@ import scipy as sp -import numpy as np -import warnings from probabilit.modeling import Distribution @@ -28,7 +26,7 @@ def PERT(minimum, mode, maximum, gamma=4.0): def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): - """Find optimal scipy parametrization given (p10, mode, p90) and + """Find optimal scipy parametrization given (low, mode, high) and return Distribution("triang", loc=..., scale=..., c=...). This distribution does *not* work with composite distributions. @@ -37,9 +35,9 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): Examples -------- >>> Triangular(low=1, mode=5, high=9) - Distribution("triang", loc=-2.236068061140598, scale=14.472136057963969, c=0.5000000024295282) + Distribution("triang", loc=-2.2360679774997894, scale=14.472135954999578, c=0.5000000000000001) >>> Triangular(low=1, mode=5, high=9, low_perc=0.25, high_perc=0.75) - Distribution("triang", loc=-8.656853751016396, scale=27.313707268205516, c=0.5000000023789611) + Distribution("triang", loc=-8.656854249492383, scale=27.313708498984766, c=0.5) """ # A few comments on fitting can be found here: # https://docs.analytica.com/index.php/Triangular10_50_90 @@ -50,110 +48,48 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): raise ValueError("Percentiles must be between 0 and 1.") # Optimize parameters - loc, scale, c = _triang_params_from_perc(low, mode, high, low_perc, high_perc) - return Distribution("triang", loc=loc, scale=scale, c=c) - - -def _triang_params_from_perc(low, mode, high, low_perc=0.1, high_perc=0.9): - """Given (p10, mode, p90), finds (shift, scale, c). - - Examples - -------- - >>> from scipy.stats import triang - >>> import math - >>> dist = triang(loc=-5, scale=13, c=0.85) - >>> loc, scale, c = _triang_params_from_perc(*_triang_extract(dist)) - >>> math.isclose(loc, -5, rel_tol=0.001) - True - >>> math.isclose(scale, 13, rel_tol=0.001) - True - >>> math.isclose(c, 0.85, rel_tol=0.001) - True - """ - assert low < mode < high - - # Shift and scale inputs before solving optimization problem - spread = high - low - center = (high + low) / 2 - low = (low - center) / spread - mode = (mode - center) / spread - high = (high - center) / spread - - # Given (p10, mode, p90) we need to find a scipy parametrization - # in terms of (loc, scale, c). This cannot be solved analytically. - desired = np.array([low, mode, high]) - - # Initial guess - loc_initial = low - scale_initial = np.log(high - low) - c_initial = sp.special.logit((mode - low) / (high - low)) - x0 = np.array([loc_initial, scale_initial, c_initial]) - - # Optimize - result = sp.optimize.minimize( - _triang_objective, x0=x0, args=(desired, low_perc, high_perc), method="BFGS" + a, b, c = _fit_trigen_distribution( + most_likely_val=mode, + low_val=low, + high_val=high, + lower_percentile=low_perc, + upper_percentile=high_perc, ) + return Distribution("triang", loc=float(a), scale=float(b - a), c=float(c)) - assert result.fun < 1e-2 - # Issues can arise. Determining this beforehand - # is hard, so we simply try to optimize and see if we get close. - if result.fun > 1e-4: - warnings.warn(f"Optimization of triangular params did not converge:\n{result}") - - # Extract parameters - loc_opt = result.x[0] - scale_opt = np.exp(result.x[1]) - c_opt = sp.special.expit(result.x[2]) - - # Shift and scale problem back - loc_opt = loc_opt * spread + center - scale_opt = scale_opt * spread - - return float(loc_opt), float(scale_opt), float(c_opt) - - -def _triang_extract(triangular, low_perc=0.1, high_perc=0.9): - """Given a triangular distribution, extract (p10, mode, p90). - - Examples - -------- - >>> from scipy.stats import triang - >>> dist = triang(loc=-5, scale=13, c=0.6) - >>> p10, mode, p90 = _triang_extract(dist) - >>> mode - 2.8 - >>> p90 - 5.4 - """ - p10, p90 = triangular.ppf([low_perc, high_perc]) - loc = triangular.kwds.get("loc", 0) - scale = triangular.kwds.get("scale", 1) - c = triangular.kwds.get("c", 0.5) - mode = loc + scale * c - return float(p10), float(mode), float(p90) +def _fit_trigen_distribution( + most_likely_val, low_val, high_val, lower_percentile=0.10, upper_percentile=0.90 +): + def trigen_cdf(x, a, b, mode): + """Calculate CDF of Trigen distribution at point x""" + if x <= mode: + return ((x - a) ** 2) / ((b - a) * (mode - a)) + else: + return 1 - ((b - x) ** 2) / ((b - a) * (b - mode)) + def equations(params): + """System of equations to solve for a and b""" + a, b = params -def _triang_objective(parameters, desired, low_perc, high_perc): - """Pass parameters (loc, log(scale), logit(c)) into sp.stats.triang - and return the RMSE between actual and desired (p10, mode, p90).""" + # Calculate CDFs at the given percentile values + cdf_low = trigen_cdf(low_val, a, b, most_likely_val) + cdf_high = trigen_cdf(high_val, a, b, most_likely_val) - loc, scale, c = parameters - scale = np.exp(scale) # Scale must be positive - c = np.clip(sp.special.expit(c), 0, 1) # C must be between 0 and 1 + # Return the difference from target percentiles + return (cdf_low - lower_percentile, cdf_high - upper_percentile) - # Create distribution - triangular = sp.stats.triang(loc=loc, scale=scale, c=c) + # Initial guesses for a and b + a0 = low_val - abs(most_likely_val - low_val) + b0 = high_val + abs(high_val - most_likely_val) - # Extract information - low, mode, high = _triang_extract(triangular, low_perc, high_perc) - actual = np.array([low, mode, high]) + # Solve the system of equations + a_fitted, b_fitted = sp.optimize.fsolve(equations, (a0, b0)) - if not np.isfinite(actual).all(): - return 1e3 + # Calculate the relative position of the mode + c = (most_likely_val - a_fitted) / (b_fitted - a_fitted) - # RMSE - return np.sqrt(np.sum((desired - actual) ** 2)) / scale + return a_fitted, b_fitted, c def _pert_to_beta(minimum, mode, maximum, gamma=4.0): diff --git a/tests/test_distributions.py b/tests/test_distributions.py index d24afc0..311f271 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,6 +1,5 @@ from probabilit.distributions import ( - _triang_params_from_perc, - _triang_extract, + _fit_trigen_distribution, _pert_to_beta, ) import pytest @@ -9,21 +8,31 @@ class TestTriangular: - @pytest.mark.parametrize("c", np.linspace(0.5, 0.95, num=7)) - @pytest.mark.parametrize("scale", [1, 10, 100, 1000]) - @pytest.mark.parametrize("high_perc", [0.7, 0.8, 0.9]) - def test_triang_params_from_perc(self, c, scale, high_perc): + @pytest.mark.parametrize("c", [0.2, 0.5, 0.7]) + @pytest.mark.parametrize("loc", [-1, 0, 1]) + @pytest.mark.parametrize("scale", [1, 10, 25]) + def test_triang_params_from_perc(self, c, loc, scale): # Test round-trips - loc = 0 - initial = np.array([loc, scale, c]) - dist = triang(loc=loc, scale=scale, c=c) - p10, mode, high = _triang_extract(dist, high_perc=high_perc) - if (p10 < mode - 0.01) and (high > mode + 0.01): - loc, scale, c = _triang_params_from_perc( - p10, mode, high, high_perc=high_perc - ) - final = np.array([loc, scale, c]) - np.testing.assert_allclose(final, initial, atol=1e-3) + a = loc + b = loc + scale + most_likely_val = loc + c * scale + lower_percentile = 0.1 + upper_percentile = 0.8 + + # Get parameters to optimize toward + distr = triang(loc=loc, scale=scale, c=c) + target_low, target_high = distr.ppf([lower_percentile, upper_percentile]) + + # Found parameters + a_f, b_f, c_f = _fit_trigen_distribution( + most_likely_val=most_likely_val, + low_val=target_low, + high_val=target_high, + lower_percentile=lower_percentile, + upper_percentile=upper_percentile, + ) + + np.testing.assert_allclose([a_f, b_f, c_f], [a, b, c], atol=1e-8) class TestPERT: From ede388c339e0891b4c82365ad37e0a19d7a6d672 Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 10:39:33 +0200 Subject: [PATCH 04/11] simplified variable names --- src/probabilit/distributions.py | 26 ++++++++++++-------------- tests/test_distributions.py | 18 +++++++++--------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 5f2ffb4..69adeab 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -49,18 +49,16 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): # Optimize parameters a, b, c = _fit_trigen_distribution( - most_likely_val=mode, - low_val=low, - high_val=high, - lower_percentile=low_perc, - upper_percentile=high_perc, + mode=mode, + low=low, + high=high, + low_perc=low_perc, + high_perc=high_perc, ) return Distribution("triang", loc=float(a), scale=float(b - a), c=float(c)) -def _fit_trigen_distribution( - most_likely_val, low_val, high_val, lower_percentile=0.10, upper_percentile=0.90 -): +def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): def trigen_cdf(x, a, b, mode): """Calculate CDF of Trigen distribution at point x""" if x <= mode: @@ -73,21 +71,21 @@ def equations(params): a, b = params # Calculate CDFs at the given percentile values - cdf_low = trigen_cdf(low_val, a, b, most_likely_val) - cdf_high = trigen_cdf(high_val, a, b, most_likely_val) + cdf_low = trigen_cdf(low, a, b, mode) + cdf_high = trigen_cdf(high, a, b, mode) # Return the difference from target percentiles - return (cdf_low - lower_percentile, cdf_high - upper_percentile) + return (cdf_low - low_perc, cdf_high - high_perc) # Initial guesses for a and b - a0 = low_val - abs(most_likely_val - low_val) - b0 = high_val + abs(high_val - most_likely_val) + a0 = low - abs(mode - low) + b0 = high + abs(high - mode) # Solve the system of equations a_fitted, b_fitted = sp.optimize.fsolve(equations, (a0, b0)) # Calculate the relative position of the mode - c = (most_likely_val - a_fitted) / (b_fitted - a_fitted) + c = (mode - a_fitted) / (b_fitted - a_fitted) return a_fitted, b_fitted, c diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 311f271..8e2648e 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -15,21 +15,21 @@ def test_triang_params_from_perc(self, c, loc, scale): # Test round-trips a = loc b = loc + scale - most_likely_val = loc + c * scale - lower_percentile = 0.1 - upper_percentile = 0.8 + mode = loc + c * scale + low_perc = 0.1 + high_perc = 0.8 # Get parameters to optimize toward distr = triang(loc=loc, scale=scale, c=c) - target_low, target_high = distr.ppf([lower_percentile, upper_percentile]) + target_low, target_high = distr.ppf([low_perc, high_perc]) # Found parameters a_f, b_f, c_f = _fit_trigen_distribution( - most_likely_val=most_likely_val, - low_val=target_low, - high_val=target_high, - lower_percentile=lower_percentile, - upper_percentile=upper_percentile, + mode=mode, + low=target_low, + high=target_high, + low_perc=low_perc, + high_perc=high_perc, ) np.testing.assert_allclose([a_f, b_f, c_f], [a, b, c], atol=1e-8) From 636f56df0073a227555dd4687a0e6bc8557364cc Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 11:50:20 +0200 Subject: [PATCH 05/11] more doctests and comments --- src/probabilit/distributions.py | 34 ++++++++++++++++++++++++--------- tests/test_distributions.py | 12 +++++------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 69adeab..9f1951a 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -1,3 +1,5 @@ +import numpy as np +import warnings import scipy as sp from probabilit.modeling import Distribution @@ -48,17 +50,29 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): raise ValueError("Percentiles must be between 0 and 1.") # Optimize parameters - a, b, c = _fit_trigen_distribution( + loc, scale, c = _fit_trigen_distribution( mode=mode, low=low, high=high, low_perc=low_perc, high_perc=high_perc, ) - return Distribution("triang", loc=float(a), scale=float(b - a), c=float(c)) + return Distribution("triang", loc=loc, scale=scale, c=c) def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): + """Returns a tuple (loc, scale, c) to be used with scipy. + + Examples + -------- + >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.10, high_perc=0.90) + (-0.20798609791776668, 12.538002235459674, 0.6546486388959271) + >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.4, high_perc=0.6) + (-27.630133666236873, 65.82946735440106, 0.5412490044073675) + >>> _fit_trigen_distribution(8, 3, 10, low_perc=0, high_perc=1.0) + (3.0000000087355345, 6.999999995484782, 0.7142857134985173) + """ + def trigen_cdf(x, a, b, mode): """Calculate CDF of Trigen distribution at point x""" if x <= mode: @@ -77,17 +91,19 @@ def equations(params): # Return the difference from target percentiles return (cdf_low - low_perc, cdf_high - high_perc) - # Initial guesses for a and b + # Initial guesses for a and b, the lower and upper bounds for support a0 = low - abs(mode - low) b0 = high + abs(high - mode) # Solve the system of equations - a_fitted, b_fitted = sp.optimize.fsolve(equations, (a0, b0)) - - # Calculate the relative position of the mode - c = (mode - a_fitted) / (b_fitted - a_fitted) - - return a_fitted, b_fitted, c + a, b = sp.optimize.fsolve(equations, (a0, b0)) + rmse = np.sqrt(np.sum(np.array(equations([a, b])) ** 2)) + if rmse > 1e-6: + warnings.warn(f"Optimization of Triangular params has {rmse=}") + + # Calculate the relative position of the mode, return (loc, scale, c) + c = (mode - a) / (b - a) + return float(a), float(b - a), float(c) def _pert_to_beta(minimum, mode, maximum, gamma=4.0): diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 8e2648e..05ecc4a 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -8,15 +8,13 @@ class TestTriangular: - @pytest.mark.parametrize("c", [0.2, 0.5, 0.7]) + @pytest.mark.parametrize("c", [0.25, 0.5, 0.7]) @pytest.mark.parametrize("loc", [-1, 0, 1]) @pytest.mark.parametrize("scale", [1, 10, 25]) - def test_triang_params_from_perc(self, c, loc, scale): + @pytest.mark.parametrize("low_perc", [0.05, 0.1, 0.2]) + def test_triang_params_from_perc(self, c, loc, scale, low_perc): # Test round-trips - a = loc - b = loc + scale mode = loc + c * scale - low_perc = 0.1 high_perc = 0.8 # Get parameters to optimize toward @@ -24,7 +22,7 @@ def test_triang_params_from_perc(self, c, loc, scale): target_low, target_high = distr.ppf([low_perc, high_perc]) # Found parameters - a_f, b_f, c_f = _fit_trigen_distribution( + loc_f, scale_f, c_f = _fit_trigen_distribution( mode=mode, low=target_low, high=target_high, @@ -32,7 +30,7 @@ def test_triang_params_from_perc(self, c, loc, scale): high_perc=high_perc, ) - np.testing.assert_allclose([a_f, b_f, c_f], [a, b, c], atol=1e-8) + np.testing.assert_allclose([loc_f, scale_f, c_f], [loc, scale, c], atol=1e-8) class TestPERT: From 5fcbf4b8563691416a16c0fa1d6c5742de523d1d Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 11:52:11 +0200 Subject: [PATCH 06/11] boundary check --- src/probabilit/distributions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 9f1951a..d704ba2 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -70,11 +70,13 @@ def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.4, high_perc=0.6) (-27.630133666236873, 65.82946735440106, 0.5412490044073675) >>> _fit_trigen_distribution(8, 3, 10, low_perc=0, high_perc=1.0) - (3.0000000087355345, 6.999999995484782, 0.7142857134985173) + (3.0000000177479746, 7.000000005695321, 0.7142857111691342) """ def trigen_cdf(x, a, b, mode): """Calculate CDF of Trigen distribution at point x""" + if not (a <= x <= b): + return x * 0 if x <= mode: return ((x - a) ** 2) / ((b - a) * (mode - a)) else: From d1c5ec4bd4d172d60a50f7b3cfcda57108265bab Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 11:52:51 +0200 Subject: [PATCH 07/11] limits --- src/probabilit/distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index d704ba2..4149307 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -75,7 +75,7 @@ def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): def trigen_cdf(x, a, b, mode): """Calculate CDF of Trigen distribution at point x""" - if not (a <= x <= b): + if not (a < x < b): return x * 0 if x <= mode: return ((x - a) ** 2) / ((b - a) * (mode - a)) From 755d80c2cd026ae7e5b1cbdbda1925a3e2540098 Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 11:57:56 +0200 Subject: [PATCH 08/11] better initial guess for fsolve. verified in a test --- src/probabilit/distributions.py | 10 ++++++---- tests/test_distributions.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 4149307..b7249f7 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -70,13 +70,15 @@ def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.4, high_perc=0.6) (-27.630133666236873, 65.82946735440106, 0.5412490044073675) >>> _fit_trigen_distribution(8, 3, 10, low_perc=0, high_perc=1.0) - (3.0000000177479746, 7.000000005695321, 0.7142857111691342) + (3.00000001531508, 6.999999869614844, 0.7142857254024537) """ def trigen_cdf(x, a, b, mode): """Calculate CDF of Trigen distribution at point x""" - if not (a < x < b): + if x <= a: return x * 0 + if x >= b: + return x * 0 + 1.0 if x <= mode: return ((x - a) ** 2) / ((b - a) * (mode - a)) else: @@ -94,8 +96,8 @@ def equations(params): return (cdf_low - low_perc, cdf_high - high_perc) # Initial guesses for a and b, the lower and upper bounds for support - a0 = low - abs(mode - low) - b0 = high + abs(high - mode) + a0 = low - abs(high - low) + b0 = high + abs(high - low) # Solve the system of equations a, b = sp.optimize.fsolve(equations, (a0, b0)) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 05ecc4a..81cbd59 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -8,10 +8,10 @@ class TestTriangular: - @pytest.mark.parametrize("c", [0.25, 0.5, 0.7]) + @pytest.mark.parametrize("c", [0.1, 0.5, 0.7]) @pytest.mark.parametrize("loc", [-1, 0, 1]) @pytest.mark.parametrize("scale", [1, 10, 25]) - @pytest.mark.parametrize("low_perc", [0.05, 0.1, 0.2]) + @pytest.mark.parametrize("low_perc", [0.01, 0.05, 0.1, 0.2]) def test_triang_params_from_perc(self, c, loc, scale, low_perc): # Test round-trips mode = loc + c * scale From f2f5a705a4cc6f8792f81f4bf3a05a8b967e2607 Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 12:17:04 +0200 Subject: [PATCH 09/11] renamings. more tests --- src/probabilit/distributions.py | 27 ++++++++++++++------------- tests/test_distributions.py | 28 +++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index b7249f7..b523648 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -46,13 +46,13 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): if not (low < mode < high): raise ValueError(f"Must have {low=} < {mode=} < {high=}") - if not ((0 <= low_perc <= 1.0) and (0 <= high_perc <= 1.0)): + if not ((0 < low_perc <= 1.0) and (0 <= high_perc < 1.0)): raise ValueError("Percentiles must be between 0 and 1.") # Optimize parameters - loc, scale, c = _fit_trigen_distribution( - mode=mode, + loc, scale, c = _fit_triangular_distribution( low=low, + mode=mode, high=high, low_perc=low_perc, high_perc=high_perc, @@ -60,21 +60,22 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): return Distribution("triang", loc=loc, scale=scale, c=c) -def _fit_trigen_distribution(mode, low, high, low_perc=0.10, high_perc=0.90): +def _fit_triangular_distribution(low, mode, high, low_perc=0.10, high_perc=0.90): """Returns a tuple (loc, scale, c) to be used with scipy. Examples -------- - >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.10, high_perc=0.90) + >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.10, high_perc=0.90) (-0.20798609791776668, 12.538002235459674, 0.6546486388959271) - >>> _fit_trigen_distribution(8, 3, 10, low_perc=0.4, high_perc=0.6) + >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.4, high_perc=0.6) (-27.630133666236873, 65.82946735440106, 0.5412490044073675) - >>> _fit_trigen_distribution(8, 3, 10, low_perc=0, high_perc=1.0) + >>> _fit_triangular_distribution(3, 8, 10, low_perc=0, high_perc=1.0) (3.00000001531508, 6.999999869614844, 0.7142857254024537) + """ - def trigen_cdf(x, a, b, mode): - """Calculate CDF of Trigen distribution at point x""" + def triangular_cdf(x, a, b, mode): + """Calculate CDF of triangular distribution at point x""" if x <= a: return x * 0 if x >= b: @@ -89,15 +90,15 @@ def equations(params): a, b = params # Calculate CDFs at the given percentile values - cdf_low = trigen_cdf(low, a, b, mode) - cdf_high = trigen_cdf(high, a, b, mode) + cdf_low = triangular_cdf(low, a, b, mode) + cdf_high = triangular_cdf(high, a, b, mode) # Return the difference from target percentiles return (cdf_low - low_perc, cdf_high - high_perc) # Initial guesses for a and b, the lower and upper bounds for support - a0 = low - abs(high - low) - b0 = high + abs(high - low) + a0 = low - abs(mode - low) + b0 = high + abs(high - mode) # Solve the system of equations a, b = sp.optimize.fsolve(equations, (a0, b0)) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 81cbd59..5b39047 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,5 +1,5 @@ from probabilit.distributions import ( - _fit_trigen_distribution, + _fit_triangular_distribution, _pert_to_beta, ) import pytest @@ -12,7 +12,7 @@ class TestTriangular: @pytest.mark.parametrize("loc", [-1, 0, 1]) @pytest.mark.parametrize("scale", [1, 10, 25]) @pytest.mark.parametrize("low_perc", [0.01, 0.05, 0.1, 0.2]) - def test_triang_params_from_perc(self, c, loc, scale, low_perc): + def test_triangular_roundstrips(self, c, loc, scale, low_perc): # Test round-trips mode = loc + c * scale high_perc = 0.8 @@ -22,7 +22,7 @@ def test_triang_params_from_perc(self, c, loc, scale, low_perc): target_low, target_high = distr.ppf([low_perc, high_perc]) # Found parameters - loc_f, scale_f, c_f = _fit_trigen_distribution( + loc_f, scale_f, c_f = _fit_triangular_distribution( mode=mode, low=target_low, high=target_high, @@ -32,6 +32,28 @@ def test_triang_params_from_perc(self, c, loc, scale, low_perc): np.testing.assert_allclose([loc_f, scale_f, c_f], [loc, scale, c], atol=1e-8) + @pytest.mark.parametrize("delta", [0.001, 0.01, 0.1, 0.2, 0.3, 0.4]) + def test_triangular_roundstrips_squeeze(self, delta): + loc = 0 + scale = 10 + c = 0.8 + mode = loc + c * scale + + # Get parameters to optimize toward + distr = triang(loc=loc, scale=scale, c=c) + target_low, target_high = distr.ppf([delta, 1 - delta]) + + # Found parameters + loc_f, scale_f, c_f = _fit_triangular_distribution( + mode=mode, + low=target_low, + high=target_high, + low_perc=delta, + high_perc=1 - delta, + ) + + np.testing.assert_allclose([loc_f, scale_f, c_f], [loc, scale, c], atol=1e-8) + class TestPERT: @pytest.mark.parametrize("gamma", [1, 3, 4, 7]) From 6698c06fbf77cc473ed07bf656b2a5e8bef97d2c Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Wed, 6 Aug 2025 12:20:14 +0200 Subject: [PATCH 10/11] doctest --- src/probabilit/distributions.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index b523648..91d6d44 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -66,12 +66,11 @@ def _fit_triangular_distribution(low, mode, high, low_perc=0.10, high_perc=0.90) Examples -------- >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.10, high_perc=0.90) - (-0.20798609791776668, 12.538002235459674, 0.6546486388959271) + (-0.207..., 12.53..., 0.65...) >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.4, high_perc=0.6) - (-27.630133666236873, 65.82946735440106, 0.5412490044073675) + (-27.63..., 65.82..., 0.54...) >>> _fit_triangular_distribution(3, 8, 10, low_perc=0, high_perc=1.0) - (3.00000001531508, 6.999999869614844, 0.7142857254024537) - + (3.00..., 6.99..., 0.71...) """ def triangular_cdf(x, a, b, mode): From 11fb3230996920fc8cba11822a45673630ccb9c9 Mon Sep 17 00:00:00 2001 From: Tommy Odland Date: Fri, 8 Aug 2025 12:19:30 +0200 Subject: [PATCH 11/11] allow lower and higher percentile to be 0 and 1 in Triangular --- src/probabilit/distributions.py | 25 ++++++++++++++++--------- tests/test_modeling.py | 11 ++++++----- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/probabilit/distributions.py b/src/probabilit/distributions.py index 91d6d44..f15341d 100644 --- a/src/probabilit/distributions.py +++ b/src/probabilit/distributions.py @@ -40,23 +40,30 @@ def Triangular(low, mode, high, low_perc=0.1, high_perc=0.9): Distribution("triang", loc=-2.2360679774997894, scale=14.472135954999578, c=0.5000000000000001) >>> Triangular(low=1, mode=5, high=9, low_perc=0.25, high_perc=0.75) Distribution("triang", loc=-8.656854249492383, scale=27.313708498984766, c=0.5) + >>> Triangular(low=1, mode=5, high=9, low_perc=0, high_perc=1) + Distribution("triang", loc=1, scale=8, c=0.5) """ # A few comments on fitting can be found here: # https://docs.analytica.com/index.php/Triangular10_50_90 if not (low < mode < high): raise ValueError(f"Must have {low=} < {mode=} < {high=}") - if not ((0 < low_perc <= 1.0) and (0 <= high_perc < 1.0)): + if not ((0 <= low_perc <= 1.0) and (0 <= high_perc <= 1.0)): raise ValueError("Percentiles must be between 0 and 1.") - # Optimize parameters - loc, scale, c = _fit_triangular_distribution( - low=low, - mode=mode, - high=high, - low_perc=low_perc, - high_perc=high_perc, - ) + # No need to optimize if low and high are boundaries of distribution support + if np.isclose(low_perc, 0.0) and np.isclose(high_perc, 1.0): + loc, scale, c = low, high - low, (mode - low) / (high - low) + + else: + # Optimize parameters + loc, scale, c = _fit_triangular_distribution( + low=low, + mode=mode, + high=high, + low_perc=low_perc, + high_perc=high_perc, + ) return Distribution("triang", loc=loc, scale=scale, c=c) diff --git a/tests/test_modeling.py b/tests/test_modeling.py index 5bd6d37..aabd160 100644 --- a/tests/test_modeling.py +++ b/tests/test_modeling.py @@ -113,10 +113,10 @@ def test_total_person_hours(self): for i in range(num_rivets): total_person_hours += Triangular( - low=3.75, mode=4.25, high=5.5, low_perc=0.00001, high_perc=0.99999 + low=3.75, mode=4.25, high=5.5, low_perc=0, high_perc=1.0 ) - num_samples = 10000 + num_samples = 1000 res_total_person_hours = total_person_hours.sample(num_samples, rng) # The mean and standard deviation of a Triangular(3.75, 4.25, 5.5) are 4.5 and 0.368, @@ -126,10 +126,11 @@ def test_total_person_hours(self): expected_std = 0.368 * np.sqrt(num_rivets) sample_mean = np.mean(res_total_person_hours) - sample_std = np.std(res_total_person_hours) + sample_std = np.std(res_total_person_hours, ddof=1) - assert abs(sample_mean - expected_mean) < 0.3 - assert abs(sample_std - expected_std) < 0.1 + # Within 2% of theoretical values + np.testing.assert_allclose(sample_mean, expected_mean, rtol=0.02) + np.testing.assert_allclose(sample_std, expected_std, rtol=0.02) def test_copying():