Skip to content

Commit 8f617c4

Browse files
committed
fix spelling mistake for source distirbution MS_shannon.c
1 parent 89a4b6b commit 8f617c4

2 files changed

Lines changed: 65 additions & 1 deletion

File tree

pyhctsa/Operations/Entropy.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,39 @@
1313
from ..Toolboxes.Max_Little import close_returns as _close_returns_c
1414

1515
def ShannonEntropy(y : ArrayLike, numBins : Union[int, list[int]] = 2, depth : Union[int, list[int]] = 3) -> Union[float, dict, None]:
16+
"""
17+
Approximate Shannon entropy of a time series.
18+
19+
Uses a numBin-bin encoding and depth-symbol sequences.
20+
Uniform population binning is used, and the implementation uses Michael Small's code
21+
MS_shannon.c.
22+
23+
Reference
24+
---------
25+
M. Small, Applied Nonlinear Time Series Analysis: Applications in Physics,
26+
Physiology, and Finance (book) World Scientific, Nonlinear Science Series A,
27+
Vol. 52 (2005).
28+
Michael Small's code is available at http://small.eie.polyu.edu.hk/matlab/
29+
30+
In this wrapper function, you can evaluate the code at a given n and d, and
31+
also across a range of depth and numBin to return statistics on how the obtained
32+
entropies change.
33+
34+
Parameters
35+
----------
36+
y : array-like
37+
The input time series.
38+
numBins : int or list of int, optional
39+
The number of bins to discretize the time series into (i.e., alphabet size).
40+
depth : int or list of int, optional
41+
The length of strings to analyze.
42+
43+
Returns
44+
-------
45+
float or dict or None
46+
The normalized Shannon entropy for a given setting, or summary statistics
47+
(max, min, median, mean, std) across a range of numBins or depths.
48+
"""
1649
y = np.asarray(y)
1750
binRangeSize = np.size(numBins)
1851
depthRangeSize = np.size(depth)
@@ -52,6 +85,37 @@ def ShannonEntropy(y : ArrayLike, numBins : Union[int, list[int]] = 2, depth : U
5285
return out
5386

5487
def DistributionEntropy(y : ArrayLike, histOrKS : str = 'hist', numBins : int = 10, olremp : float = 0) -> float:
88+
"""
89+
Distributional entropy.
90+
91+
Estimates entropy from the distribution of a data vector. The distribution is estimated
92+
either using a histogram with numBins bins, or as a kernel-smoothed distribution using
93+
a Gaussian kernel.
94+
95+
An optional additional parameter can be used to remove a proportion of the most extreme
96+
positive and negative deviations from the mean as an initial pre-processing step.
97+
98+
Parameters
99+
----------
100+
y : array-like
101+
The input time series.
102+
histOrKS : str, optional
103+
'hist' for histogram, or 'ks' for kernel density estimation.
104+
numBins : int or float, optional
105+
For 'hist': an integer, uses a histogram with that many bins.
106+
For 'ks': a positive real number, the width parameter for kernel density estimation
107+
(can also be empty for default width parameter, optimum for Gaussian).
108+
olremp : float, optional
109+
The proportion of outliers at both extremes to remove (e.g., if olremp = 0.01,
110+
keeps only the middle 98% of data; 0 keeps all data). This parameter should be
111+
less than 0.5. If specified, returns the difference in entropy from removing
112+
the outliers.
113+
114+
Returns
115+
-------
116+
float
117+
The estimated entropy, or the difference in entropy if outlier removal is used.
118+
"""
55119
# (1) Remove outliers?
56120
y = np.asarray(y)
57121
if olremp != 0:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def get_libraries():
6969
shannon_entropy_module = Extension(
7070
"pyhctsa.Toolboxes.Michael_Small.shannon",
7171
sources=[
72-
"pyhctsa/Toolboxes/Michael_Small/MS_Shannon.c"
72+
"pyhctsa/Toolboxes/Michael_Small/MS_shannon.c"
7373
],
7474
include_dirs=["pyhctsa/Toolboxes/Michael_Small", np.get_include()],
7575
extra_compile_args=get_compile_args(),

0 commit comments

Comments
 (0)