1- import scipy as sp
21import numpy as np
32import warnings
3+ import scipy as sp
44from probabilit .modeling import Distribution
55
66
@@ -27,129 +27,87 @@ def PERT(minimum, mode, maximum, gamma=4.0):
2727 return Distribution ("beta" , a = a , b = b , loc = loc , scale = scale )
2828
2929
30- def Triangular (p10 , mode , p90 ):
31- """Find optimal scipy parametrization given (p10 , mode, p90 ) and
30+ def Triangular (low , mode , high , low_perc = 0.1 , high_perc = 0.9 ):
31+ """Find optimal scipy parametrization given (low , mode, high ) and
3232 return Distribution("triang", loc=..., scale=..., c=...).
3333
3434 This distribution does *not* work with composite distributions.
3535 The arguments must be numbers, they cannot be other distributions.
3636
3737 Examples
3838 --------
39- >>> Triangular(p10=1, mode=5, p90=9)
40- Distribution("triang", loc=-2.236068061140598, scale=14.472136057963969, c=0.5000000024295282)
39+ >>> Triangular(low=1, mode=5, high=9)
40+ Distribution("triang", loc=-2.2360679774997894, scale=14.472135954999578, c=0.5000000000000001)
41+ >>> Triangular(low=1, mode=5, high=9, low_perc=0.25, high_perc=0.75)
42+ Distribution("triang", loc=-8.656854249492383, scale=27.313708498984766, c=0.5)
4143 """
4244 # A few comments on fitting can be found here:
4345 # https://docs.analytica.com/index.php/Triangular10_50_90
4446
45- if not (p10 < mode < p90 ):
46- raise ValueError (f"Must have { p10 = } < { mode = } < { p90 = } " )
47+ if not (low < mode < high ):
48+ raise ValueError (f"Must have { low = } < { mode = } < { high = } " )
49+ if not ((0 < low_perc <= 1.0 ) and (0 <= high_perc < 1.0 )):
50+ raise ValueError ("Percentiles must be between 0 and 1." )
4751
4852 # Optimize parameters
49- loc , scale , c = _triang_params_from_perc (p10 = p10 , mode = mode , p90 = p90 )
50- return Distribution ("triang" , loc = loc , scale = scale , c = c )
51-
52-
53- def _triang_params_from_perc (p10 , mode , p90 ):
54- """Given (p10, mode, p90), finds (shift, scale, c).
55-
56- Examples
57- --------
58- >>> from scipy.stats import triang
59- >>> import math
60- >>> dist = triang(loc=-5, scale=13, c=0.85)
61- >>> loc, scale, c = _triang_params_from_perc(*_triang_extract(dist))
62- >>> math.isclose(loc, -5, rel_tol=0.001)
63- True
64- >>> math.isclose(scale, 13, rel_tol=0.001)
65- True
66- >>> math.isclose(c, 0.85, rel_tol=0.001)
67- True
68- """
69- assert p10 < mode < p90
70-
71- # Shift and scale inputs before solving optimization problem
72- spread = p90 - p10
73- center = (p90 + p10 ) / 2
74- p10 = (p10 - center ) / spread
75- mode = (mode - center ) / spread
76- p90 = (p90 - center ) / spread
77-
78- # Given (p10, mode, p90) we need to find a scipy parametrization
79- # in terms of (loc, scale, c). This cannot be solved analytically.
80- desired = np .array ([p10 , mode , p90 ])
81-
82- # Initial guess
83- loc_initial = p10
84- scale_initial = np .log (p90 - p10 )
85- c_initial = sp .special .logit ((mode - p10 ) / (p90 - p10 ))
86- x0 = np .array ([loc_initial , scale_initial , c_initial ])
87-
88- # Optimize
89- result = sp .optimize .minimize (
90- _triang_objective , x0 = x0 , args = (desired ,), method = "BFGS"
53+ loc , scale , c = _fit_triangular_distribution (
54+ low = low ,
55+ mode = mode ,
56+ high = high ,
57+ low_perc = low_perc ,
58+ high_perc = high_perc ,
9159 )
92-
93- assert result .fun < 1e-2
94- # Issues can arise. Determining this beforehand
95- # is hard, so we simply try to optimize and see if we get close.
96- if result .fun > 1e-4 :
97- warnings .warn (f"Optimization of triangular params did not converge:\n { result } " )
98-
99- # Extract parameters
100- loc_opt = result .x [0 ]
101- scale_opt = np .exp (result .x [1 ])
102- c_opt = sp .special .expit (result .x [2 ])
103-
104- # Shift and scale problem back
105- loc_opt = loc_opt * spread + center
106- scale_opt = scale_opt * spread
107-
108- return float (loc_opt ), float (scale_opt ), float (c_opt )
60+ return Distribution ("triang" , loc = loc , scale = scale , c = c )
10961
11062
111- def _triang_extract ( triangular ):
112- """Given a triangular distribution, extract (p10, mode, p90) .
63+ def _fit_triangular_distribution ( low , mode , high , low_perc = 0.10 , high_perc = 0.90 ):
64+ """Returns a tuple (loc, scale, c) to be used with scipy .
11365
11466 Examples
11567 --------
116- >>> from scipy.stats import triang
117- >>> dist = triang(loc=-5, scale=13, c=0.6)
118- >>> p10, mode, p90 = _triang_extract(dist)
119- >>> mode
120- 2.8
121- >>> p90
122- 5.4
68+ >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.10, high_perc=0.90)
69+ (-0.207..., 12.53..., 0.65...)
70+ >>> _fit_triangular_distribution(3, 8, 10, low_perc=0.4, high_perc=0.6)
71+ (-27.63..., 65.82..., 0.54...)
72+ >>> _fit_triangular_distribution(3, 8, 10, low_perc=0, high_perc=1.0)
73+ (3.00..., 6.99..., 0.71...)
12374 """
124- p10 , p90 = triangular .ppf ([0.1 , 0.9 ])
125- loc = triangular .kwds .get ("loc" , 0 )
126- scale = triangular .kwds .get ("scale" , 1 )
127- c = triangular .kwds .get ("c" , 0.5 )
128- mode = loc + scale * c
129-
130- return float (p10 ), float (mode ), float (p90 )
131-
132-
133- def _triang_objective (parameters , desired ):
134- """Pass parameters (loc, log(scale), logit(c)) into sp.stats.triang
135- and return the RMSE between actual and desired (p10, mode, p90)."""
136-
137- loc , scale , c = parameters
138- scale = np .exp (scale ) # Scale must be positive
139- c = np .clip (sp .special .expit (c ), 0 , 1 ) # C must be between 0 and 1
140-
141- # Create distribution
142- triangular = sp .stats .triang (loc = loc , scale = scale , c = c )
143-
144- # Extract information
145- p10 , mode , p90 = _triang_extract (triangular )
146- actual = np .array ([p10 , mode , p90 ])
147-
148- if not np .isfinite (actual ).all ():
149- return 1e3
15075
151- # RMSE
152- return np .sqrt (np .sum ((desired - actual ) ** 2 )) / scale
76+ def triangular_cdf (x , a , b , mode ):
77+ """Calculate CDF of triangular distribution at point x"""
78+ if x <= a :
79+ return x * 0
80+ if x >= b :
81+ return x * 0 + 1.0
82+ if x <= mode :
83+ return ((x - a ) ** 2 ) / ((b - a ) * (mode - a ))
84+ else :
85+ return 1 - ((b - x ) ** 2 ) / ((b - a ) * (b - mode ))
86+
87+ def equations (params ):
88+ """System of equations to solve for a and b"""
89+ a , b = params
90+
91+ # Calculate CDFs at the given percentile values
92+ cdf_low = triangular_cdf (low , a , b , mode )
93+ cdf_high = triangular_cdf (high , a , b , mode )
94+
95+ # Return the difference from target percentiles
96+ return (cdf_low - low_perc , cdf_high - high_perc )
97+
98+ # Initial guesses for a and b, the lower and upper bounds for support
99+ a0 = low - abs (mode - low )
100+ b0 = high + abs (high - mode )
101+
102+ # Solve the system of equations
103+ a , b = sp .optimize .fsolve (equations , (a0 , b0 ))
104+ rmse = np .sqrt (np .sum (np .array (equations ([a , b ])) ** 2 ))
105+ if rmse > 1e-6 :
106+ warnings .warn (f"Optimization of Triangular params has { rmse = } " )
107+
108+ # Calculate the relative position of the mode, return (loc, scale, c)
109+ c = (mode - a ) / (b - a )
110+ return float (a ), float (b - a ), float (c )
153111
154112
155113def _pert_to_beta (minimum , mode , maximum , gamma = 4.0 ):
0 commit comments