Skip to content

Commit

Permalink
ENH Add beta fragility and gpd risk estimates (#118)
Browse files Browse the repository at this point in the history
* ENH Add beta_fragility_heuristic and gpd_risk_estimates functions

* Fix formatting according to PEP8/flake8

* Fix PEP8 warning W503 line break before binary operator

* Use numpy.around for consistency between
python 2 and 3

* Fix length of zero lists returned from gpd_risk_estimates

* Clarify variable names, fix thresholds, use proper list lengths

* Fix broken merge
- old tests work
- new tests still failing

* Have tests assume input is aligned
- fixes TypeError: cannot concatenate a non-NDFrame object
- also, switch the functions from SIMPLE_STAT_FUNCS to FACTOR_STAT_FUNCS

* Fix formatting to prevent flake8 errors

* remove merge backup files

* remove one more merge backup file

* Switch to np.zeros and fix comment formatting

* fix ReST formatting for notes headers
  • Loading branch information
jeyoor authored and twiecki committed Sep 30, 2019
1 parent 4cb0508 commit b561c5f
Show file tree
Hide file tree
Showing 3 changed files with 373 additions and 4 deletions.
4 changes: 4 additions & 0 deletions empyrical/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
beta,
beta_aligned,
cagr,
beta_fragility_heuristic,
beta_fragility_heuristic_aligned,
gpd_risk_estimates,
gpd_risk_estimates_aligned,
calmar_ratio,
capture,
conditional_value_at_risk,
Expand Down
325 changes: 322 additions & 3 deletions empyrical/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
import math
import pandas as pd
import numpy as np
from scipy import stats
from math import pow
from scipy import stats, optimize
from six import iteritems
from sys import float_info

from .utils import nanmean, nanstd, nanmin, up, down, roll, rolling_window
from .periods import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR
Expand Down Expand Up @@ -1527,8 +1529,7 @@ def tail_ratio(returns):


def capture(returns, factor_returns, period=DAILY):
"""
Compute capture ratio.
"""Compute capture ratio.
Parameters
----------
Expand Down Expand Up @@ -1561,6 +1562,322 @@ def capture(returns, factor_returns, period=DAILY):
annual_return(factor_returns, period=period))


def beta_fragility_heuristic(returns, factor_returns):
"""Estimate fragility to drops in beta.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
Returns
-------
float, np.nan
The beta fragility of the strategy.
Note
----
A negative return value indicates potential losses
could follow volatility in beta.
The magnitude of the negative value indicates the size of
the potential loss.
seealso::
`A New Heuristic Measure of Fragility and
Tail Risks: Application to Stress Testing`
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
An IMF Working Paper describing the heuristic
"""
if len(returns) < 3 or len(factor_returns) < 3:
return np.nan

return beta_fragility_heuristic_aligned(
*_aligned_series(returns, factor_returns))


def beta_fragility_heuristic_aligned(returns, factor_returns):
"""Estimate fragility to drops in beta
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
Returns
-------
float, np.nan
The beta fragility of the strategy.
Note
----
If they are pd.Series, expects returns and factor_returns have already
been aligned on their labels. If np.ndarray, these arguments should have
the same shape.
seealso::
`A New Heuristic Measure of Fragility and
Tail Risks: Application to Stress Testing`
https://www.imf.org/external/pubs/ft/wp/2012/wp12216.pdf
An IMF Working Paper describing the heuristic
"""
if len(returns) < 3 or len(factor_returns) < 3:
return np.nan

# combine returns and factor returns into pairs
returns_series = pd.Series(returns)
factor_returns_series = pd.Series(factor_returns)
pairs = pd.concat([returns_series, factor_returns_series], axis=1)
pairs.columns = ['returns', 'factor_returns']

# exclude any rows where returns are nan
pairs = pairs.dropna()
# sort by beta
pairs = pairs.sort_values(by='factor_returns')

# find the three vectors, using median of 3
start_index = 0
mid_index = int(np.around(len(pairs) / 2, 0))
end_index = len(pairs) - 1

(start_returns, start_factor_returns) = pairs.iloc[start_index]
(mid_returns, mid_factor_returns) = pairs.iloc[mid_index]
(end_returns, end_factor_returns) = pairs.iloc[end_index]

factor_returns_range = (end_factor_returns - start_factor_returns)
start_returns_weight = 0.5
end_returns_weight = 0.5

# find weights for the start and end returns
# using a convex combination
if not factor_returns_range == 0:
start_returns_weight = \
(mid_factor_returns - start_factor_returns) / \
factor_returns_range
end_returns_weight = \
(end_factor_returns - mid_factor_returns) / \
factor_returns_range

# calculate fragility heuristic
heuristic = (start_returns_weight*start_returns) + \
(end_returns_weight*end_returns) - mid_returns

return heuristic


def gpd_risk_estimates(returns, var_p=0.01):
"""Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
var_p : float
The percentile to use for estimating the VaR and ES
Returns
-------
[threshold, scale_param, shape_param, var_estimate, es_estimate]
: list[float]
threshold - the threshold use to cut off exception tail losses
scale_param - a parameter (often denoted by sigma, capturing the
scale, related to variance)
shape_param - a parameter (often denoted by xi, capturing the shape or
type of the distribution)
var_estimate - an estimate for the VaR for the given percentile
es_estimate - an estimate for the ES for the given percentile
Note
----
seealso::
`An Application of Extreme Value Theory for
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
A paper describing how to use the Generalized Pareto
Distribution to estimate VaR and ES.
"""
if len(returns) < 3:
result = np.zeros(5)
if isinstance(returns, pd.Series):
result = pd.Series(result)
return result
return gpd_risk_estimates_aligned(*_aligned_series(returns, var_p))


def gpd_risk_estimates_aligned(returns, var_p=0.01):
"""Estimate VaR and ES using the Generalized Pareto Distribution (GPD)
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
var_p : float
The percentile to use for estimating the VaR and ES
Returns
-------
[threshold, scale_param, shape_param, var_estimate, es_estimate]
: list[float]
threshold - the threshold use to cut off exception tail losses
scale_param - a parameter (often denoted by sigma, capturing the
scale, related to variance)
shape_param - a parameter (often denoted by xi, capturing the shape or
type of the distribution)
var_estimate - an estimate for the VaR for the given percentile
es_estimate - an estimate for the ES for the given percentile
Note
----
seealso::
`An Application of Extreme Value Theory for
Measuring Risk <https://link.springer.com/article/10.1007/s10614-006-9025-7>`
A paper describing how to use the Generalized Pareto
Distribution to estimate VaR and ES.
"""
result = np.zeros(5)
if not len(returns) < 3:

DEFAULT_THRESHOLD = 0.2
MINIMUM_THRESHOLD = 0.000000001
returns_array = pd.Series(returns).as_matrix()
flipped_returns = -1 * returns_array
losses = flipped_returns[flipped_returns > 0]
threshold = DEFAULT_THRESHOLD
finished = False
scale_param = 0
shape_param = 0
while not finished and threshold > MINIMUM_THRESHOLD:
losses_beyond_threshold = \
losses[losses >= threshold]
param_result = \
gpd_loglikelihood_minimizer_aligned(losses_beyond_threshold)
if (param_result[0] is not False and
param_result[1] is not False):
scale_param = param_result[0]
shape_param = param_result[1]
var_estimate = gpd_var_calculator(threshold, scale_param,
shape_param, var_p,
len(losses),
len(losses_beyond_threshold))
# non-negative shape parameter is required for fat tails
# non-negative VaR estimate is required for loss of some kind
if (shape_param > 0 and var_estimate > 0):
finished = True
if (not finished):
threshold = threshold / 2
if (finished):
es_estimate = gpd_es_calculator(var_estimate, threshold,
scale_param, shape_param)
result = np.array([threshold, scale_param, shape_param,
var_estimate, es_estimate])
if isinstance(returns, pd.Series):
result = pd.Series(result)
return result


def gpd_es_calculator(var_estimate, threshold, scale_param,
shape_param):
result = 0
if ((1 - shape_param) != 0):
# this formula is from Gilli and Kellezi pg. 8
var_ratio = (var_estimate/(1 - shape_param))
param_ratio = ((scale_param - (shape_param * threshold)) /
(1 - shape_param))
result = var_ratio + param_ratio
return result


def gpd_var_calculator(threshold, scale_param, shape_param,
probability, total_n, exceedance_n):
result = 0
if (exceedance_n > 0 and shape_param > 0):
# this formula is from Gilli and Kellezi pg. 12
param_ratio = scale_param / shape_param
prob_ratio = (total_n/exceedance_n) * probability
result = threshold + (param_ratio *
(pow(prob_ratio, -shape_param) - 1))
return result


def gpd_loglikelihood_minimizer_aligned(price_data):
result = [False, False]
DEFAULT_SCALE_PARAM = 1
DEFAULT_SHAPE_PARAM = 1
if (len(price_data) > 0):
gpd_loglikelihood_lambda = \
gpd_loglikelihood_factory(price_data)
optimization_results = \
optimize.minimize(gpd_loglikelihood_lambda,
[DEFAULT_SCALE_PARAM,
DEFAULT_SHAPE_PARAM],
method='Nelder-Mead')
if optimization_results.success:
resulting_params = optimization_results.x
if len(resulting_params) == 2:
result[0] = resulting_params[0]
result[1] = resulting_params[1]
return result


def gpd_loglikelihood_factory(price_data):
return lambda params: gpd_loglikelihood(params, price_data)


def gpd_loglikelihood(params, price_data):
if (params[1] != 0):
return -gpd_loglikelihood_scale_and_shape(params[0],
params[1],
price_data)
else:
return -gpd_loglikelihood_scale_only(params[0], price_data)


def gpd_loglikelihood_scale_and_shape_factory(price_data):
# minimize a function of two variables requires a list of params
# we are expecting the lambda below to be called as follows:
# parameters = [scale, shape]
# the final outer negative is added because scipy only minimizes
return lambda params: \
-gpd_loglikelihood_scale_and_shape(params[0],
params[1],
price_data)


def gpd_loglikelihood_scale_and_shape(scale, shape, price_data):
n = len(price_data)
result = -1 * float_info.max
if (scale != 0):
param_factor = shape / scale
if (shape != 0 and param_factor >= 0 and scale >= 0):
result = ((-n * np.log(scale)) -
(((1 / shape) + 1) *
(np.log((shape / scale * price_data) + 1)).sum()))
return result


def gpd_loglikelihood_scale_only_factory(price_data):
# the negative is added because scipy only minimizes
return lambda scale: \
-gpd_loglikelihood_scale_only(scale, price_data)


def gpd_loglikelihood_scale_only(scale, price_data):
n = len(price_data)
data_sum = price_data.sum()
result = -1 * float_info.max
if (scale >= 0):
result = ((-n*np.log(scale)) - (data_sum/scale))
return result


def up_capture(returns, factor_returns, **kwargs):
"""
Compute the capture ratio for periods when the benchmark return is positive
Expand Down Expand Up @@ -1840,6 +2157,8 @@ def conditional_value_at_risk(returns, cutoff=0.05):
excess_sharpe,
alpha,
beta,
beta_fragility_heuristic,
gpd_risk_estimates,
capture,
up_capture,
down_capture
Expand Down
Loading

0 comments on commit b561c5f

Please sign in to comment.