|
13 | 13 | from ..Toolboxes.Max_Little import close_returns as _close_returns_c |
14 | 14 |
|
15 | 15 | def ShannonEntropy(y : ArrayLike, numBins : Union[int, list[int]] = 2, depth : Union[int, list[int]] = 3) -> Union[float, dict, None]: |
| 16 | + """ |
| 17 | + Approximate Shannon entropy of a time series. |
| 18 | +
|
| 19 | + Uses a numBin-bin encoding and depth-symbol sequences. |
| 20 | + Uniform population binning is used, and the implementation uses Michael Small's code |
| 21 | + MS_shannon.c. |
| 22 | +
|
| 23 | + Reference |
| 24 | + --------- |
| 25 | + M. Small, Applied Nonlinear Time Series Analysis: Applications in Physics, |
| 26 | + Physiology, and Finance (book) World Scientific, Nonlinear Science Series A, |
| 27 | + Vol. 52 (2005). |
| 28 | + Michael Small's code is available at http://small.eie.polyu.edu.hk/matlab/ |
| 29 | +
|
| 30 | + In this wrapper function, you can evaluate the code at a given n and d, and |
| 31 | + also across a range of depth and numBin to return statistics on how the obtained |
| 32 | + entropies change. |
| 33 | +
|
| 34 | + Parameters |
| 35 | + ---------- |
| 36 | + y : array-like |
| 37 | + The input time series. |
| 38 | + numBins : int or list of int, optional |
| 39 | + The number of bins to discretize the time series into (i.e., alphabet size). |
| 40 | + depth : int or list of int, optional |
| 41 | + The length of strings to analyze. |
| 42 | +
|
| 43 | + Returns |
| 44 | + ------- |
| 45 | + float or dict or None |
| 46 | + The normalized Shannon entropy for a given setting, or summary statistics |
| 47 | + (max, min, median, mean, std) across a range of numBins or depths. |
| 48 | + """ |
16 | 49 | y = np.asarray(y) |
17 | 50 | binRangeSize = np.size(numBins) |
18 | 51 | depthRangeSize = np.size(depth) |
@@ -52,6 +85,37 @@ def ShannonEntropy(y : ArrayLike, numBins : Union[int, list[int]] = 2, depth : U |
52 | 85 | return out |
53 | 86 |
|
54 | 87 | def DistributionEntropy(y : ArrayLike, histOrKS : str = 'hist', numBins : int = 10, olremp : float = 0) -> float: |
| 88 | + """ |
| 89 | + Distributional entropy. |
| 90 | +
|
| 91 | + Estimates entropy from the distribution of a data vector. The distribution is estimated |
| 92 | + either using a histogram with numBins bins, or as a kernel-smoothed distribution using |
| 93 | + a Gaussian kernel. |
| 94 | +
|
| 95 | + An optional additional parameter can be used to remove a proportion of the most extreme |
| 96 | + positive and negative deviations from the mean as an initial pre-processing step. |
| 97 | +
|
| 98 | + Parameters |
| 99 | + ---------- |
| 100 | + y : array-like |
| 101 | + The input time series. |
| 102 | + histOrKS : str, optional |
| 103 | + 'hist' for histogram, or 'ks' for kernel density estimation. |
| 104 | + numBins : int or float, optional |
| 105 | + For 'hist': an integer, uses a histogram with that many bins. |
| 106 | + For 'ks': a positive real number, the width parameter for kernel density estimation |
| 107 | + (can also be empty for default width parameter, optimum for Gaussian). |
| 108 | + olremp : float, optional |
| 109 | + The proportion of outliers at both extremes to remove (e.g., if olremp = 0.01, |
| 110 | + keeps only the middle 98% of data; 0 keeps all data). This parameter should be |
| 111 | + less than 0.5. If specified, returns the difference in entropy from removing |
| 112 | + the outliers. |
| 113 | +
|
| 114 | + Returns |
| 115 | + ------- |
| 116 | + float |
| 117 | + The estimated entropy, or the difference in entropy if outlier removal is used. |
| 118 | + """ |
55 | 119 | # (1) Remove outliers? |
56 | 120 | y = np.asarray(y) |
57 | 121 | if olremp != 0: |
|
0 commit comments