|
| 1 | +import numpy as np |
| 2 | +from numpy.typing import ArrayLike |
| 3 | +from typing import Dict, Union |
| 4 | +from scipy import stats |
| 5 | +from loguru import logger |
| 6 | + |
| 7 | + |
| 8 | +def Withinp(x : ArrayLike, p : float = 1.0, meanOrMedian : str = 'mean') -> float: |
| 9 | + """ |
| 10 | + Proportion of data points within p standard deviations of the mean or median. |
| 11 | +
|
| 12 | + Parameters: |
| 13 | + ----------- |
| 14 | + x (array-like): The input data vector |
| 15 | + p (float): The number (proportion) of standard deviations |
| 16 | + meanOrMedian (str): Whether to use units of 'mean' and standard deviation, |
| 17 | + or 'median' and rescaled interquartile range |
| 18 | +
|
| 19 | + Returns: |
| 20 | + -------- |
| 21 | + float: The proportion of data points within p standard deviations |
| 22 | +
|
| 23 | + Raises: |
| 24 | + ValueError: If mean_or_median is not 'mean' or 'median' |
| 25 | + """ |
| 26 | + x = np.asarray(x) |
| 27 | + N = len(x) |
| 28 | + |
| 29 | + if meanOrMedian == 'mean': |
| 30 | + mu = np.mean(x) |
| 31 | + sig = np.std(x, ddof=1) |
| 32 | + elif meanOrMedian == 'median': |
| 33 | + mu = np.median(x) |
| 34 | + iqr_val = np.percentile(x, 75, method='hazen') - np.percentile(x, 25, method='hazen') |
| 35 | + sig = 1.35 * iqr_val |
| 36 | + else: |
| 37 | + raise ValueError(f"Unknown setting: '{meanOrMedian}'") |
| 38 | + |
| 39 | + # The withinp statistic: |
| 40 | + return np.divide(np.sum((x >= mu - p * sig) & (x <= mu + p * sig)), N) |
| 41 | + |
| 42 | +def Unique(y : ArrayLike) -> float: |
| 43 | + """ |
| 44 | + The proportion of the time series that are unique values. |
| 45 | +
|
| 46 | + Parameters |
| 47 | + ---------- |
| 48 | + y : array-like |
| 49 | + The input time series or data vector |
| 50 | +
|
| 51 | + Returns |
| 52 | + ------- |
| 53 | + float |
| 54 | + the proportion of time series that are unique values |
| 55 | + """ |
| 56 | + y = np.asarray(y) |
| 57 | + return np.divide(len(np.unique(y)), len(y)) |
| 58 | + |
| 59 | + |
| 60 | +def Spread(y : ArrayLike, spreadMeasure : str = 'std') -> float: |
| 61 | + """ |
| 62 | + Measure of spread of the input time series. |
| 63 | +
|
| 64 | + Returns the spread of the raw data vector using different statistical measures. |
| 65 | + This is part of the Distributional operations from hctsa, implementing DN_Spread. |
| 66 | +
|
| 67 | + Parameters |
| 68 | + ---------- |
| 69 | + y : array-like |
| 70 | + The input time series or data vector |
| 71 | + spreadMeasure : str, optional |
| 72 | + The spread measure to use (default is 'std'): |
| 73 | + - 'std': standard deviation |
| 74 | + - 'iqr': interquartile range |
| 75 | + - 'mad': mean absolute deviation |
| 76 | + - 'mead': median absolute deviation |
| 77 | +
|
| 78 | + Returns |
| 79 | + ------- |
| 80 | + float |
| 81 | + The calculated spread measure |
| 82 | + """ |
| 83 | + y = np.asarray(y) |
| 84 | + if spreadMeasure == 'std': |
| 85 | + out = np.std(y, ddof=1) |
| 86 | + elif spreadMeasure == 'iqr': |
| 87 | + q75 = np.quantile(y, 0.75, method='hazen') |
| 88 | + q25 = np.quantile(y, 0.25, method='hazen') |
| 89 | + out = q75 - q25 |
| 90 | + elif spreadMeasure == 'mad': |
| 91 | + # mean absolute deviation |
| 92 | + out = np.mean(np.absolute(y - np.mean(y, None)), None) |
| 93 | + elif spreadMeasure == 'mead': |
| 94 | + # median absolute deviation |
| 95 | + out = np.median(np.absolute(y - np.median(y, None)), None) |
| 96 | + else: |
| 97 | + raise ValueError('spreadMeasure must be one of std, iqr, mad or mead') |
| 98 | + return out |
| 99 | + |
| 100 | +def Quantile(y : ArrayLike, p : float = 0.5) -> float: |
| 101 | + """ |
| 102 | + Calculates the quantile value at a specified proportion, p. |
| 103 | +
|
| 104 | + Parameters: |
| 105 | + y (array-like): The input data vector |
| 106 | + p (float): The quantile proportion (default is 0.5, which is the median) |
| 107 | +
|
| 108 | + Returns: |
| 109 | + float: The calculated quantile value |
| 110 | +
|
| 111 | + Raises: |
| 112 | + ValueError: If p is not a number between 0 and 1 |
| 113 | + """ |
| 114 | + y = np.asarray(y) |
| 115 | + if p == 0.5: |
| 116 | + logger.warning("Using quantile p = 0.5 (median) by default") |
| 117 | + |
| 118 | + if not isinstance(p, (int, float)) or p < 0 or p > 1: |
| 119 | + raise ValueError("p must specify a proportion, in (0,1)") |
| 120 | + |
| 121 | + return float(np.quantile(y, p, method = 'hazen')) |
| 122 | + |
| 123 | +def ProportionValues(x : ArrayLike, propWhat : str = 'positive') -> float: |
| 124 | + """ |
| 125 | + Calculate the proportion of values meeting specific conditions in a time series. |
| 126 | +
|
| 127 | + Parameters |
| 128 | + ---------- |
| 129 | + x : array-like |
| 130 | + Input time series or data vector |
| 131 | + propWhat : str, optional (default is 'positive') |
| 132 | + Type of values to count: |
| 133 | + - 'zeros': values equal to zero |
| 134 | + - 'positive': values strictly greater than zero |
| 135 | + - 'geq0': values greater than or equal to zero |
| 136 | +
|
| 137 | + Returns |
| 138 | + ------- |
| 139 | + float |
| 140 | + Proportion of values meeting the specified condition. |
| 141 | + """ |
| 142 | + x = np.asarray(x) |
| 143 | + N = len(x) |
| 144 | + |
| 145 | + if propWhat == 'zeros': |
| 146 | + # returns the proportion of zeros in the input vector |
| 147 | + out = sum(x == 0) / N |
| 148 | + elif propWhat == 'positive': |
| 149 | + out = sum(x > 0) / N |
| 150 | + elif propWhat == 'geq0': |
| 151 | + out = sum(x >= 0) / N |
| 152 | + else: |
| 153 | + raise ValueError(f"Unknown condition to measure: {propWhat}") |
| 154 | + |
| 155 | + return out |
| 156 | + |
| 157 | + |
| 158 | +def PLeft(y : ArrayLike, th : float = 0.1) -> float: |
| 159 | + """ |
| 160 | + Distance from the mean at which a given proportion of data are more distant. |
| 161 | + |
| 162 | + Measures the maximum distance from the mean at which a given fixed proportion, `th`, of the time-series data points are further. |
| 163 | + Normalizes by the standard deviation of the time series. |
| 164 | + |
| 165 | + Parameters |
| 166 | + ---------- |
| 167 | + y : array_like |
| 168 | + The input data vector. |
| 169 | + th : float, optional |
| 170 | + The proportion of data further than `th` from the mean (default is 0.1). |
| 171 | + |
| 172 | + Returns |
| 173 | + ------- |
| 174 | + float |
| 175 | + The distance from the mean normalized by the standard deviation. |
| 176 | + """ |
| 177 | + y = np.asarray(y) |
| 178 | + p = np.quantile(np.abs(y - np.mean(y)), 1-th, method='hazen') |
| 179 | + # A proportion, th, of the data lie further than p from the mean |
| 180 | + out = np.divide(p, np.std(y, ddof=1)) |
| 181 | + return float(out) |
0 commit comments