diff --git a/cyeva/core/base.py b/cyeva/core/base.py index b08c75c..cf1dec7 100644 --- a/cyeva/core/base.py +++ b/cyeva/core/base.py @@ -26,7 +26,7 @@ calc_threshold_mae, calc_multiclass_accuracy_ratio, calc_multiclass_hanssen_kuipers_score, - calc_multiclass_heidke_skill_score + calc_multiclass_heidke_skill_score, ) @@ -36,7 +36,6 @@ class Comparison: def __init__( self, observation: Union[np.ndarray, list], forecast: Union[np.ndarray, list] ): - if isinstance(observation, Quantity): observation = observation.magnitude if isinstance(forecast, Quantity): @@ -216,7 +215,6 @@ def calc_multiclass_accuracy_ratio( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -232,14 +230,13 @@ def calc_multiclass_hanssen_kuipers_score( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: forecast = self.forecast return calc_multiclass_hanssen_kuipers_score(observation, forecast) - + @result_round_digit(4) def calc_multiclass_heidke_skill_score( self, @@ -248,7 +245,6 @@ def calc_multiclass_heidke_skill_score( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -295,7 +291,6 @@ def calc_threshold_accuracy_ratio( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -316,7 +311,6 @@ def calc_threshold_hit_ratio( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -337,7 +331,6 @@ def calc_threshold_miss_ratio( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -358,7 +351,6 @@ def calc_threshold_false_alarm_ratio( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -379,7 +371,6 @@ def calc_threshold_bias_score( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -400,7 +391,6 @@ def calc_threshold_ts( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: @@ -421,7 +411,6 @@ def calc_threshold_mae( *args, **kwargs ) -> float: - if observation is None: observation = self.observation if forecast is None: diff --git a/cyeva/core/precip.py b/cyeva/core/precip.py index 57a2e8b..73c6776 100644 --- a/cyeva/core/precip.py +++ b/cyeva/core/precip.py @@ -209,8 +209,12 @@ def __init__( self.kind = kind self.unit = unit self.lev = lev - self.observation = (self.observation * UNITS.parse_expression(unit)).to("mm").magnitude - self.forecast = (self.forecast * UNITS.parse_expression(unit)).to("mm").magnitude + self.observation = ( + (self.observation * UNITS.parse_expression(unit)).to("mm").magnitude + ) + self.forecast = ( + (self.forecast * UNITS.parse_expression(unit)).to("mm").magnitude + ) self.df = pd.DataFrame( { "observation": self.observation, diff --git a/cyeva/core/statistic.py b/cyeva/core/statistic.py index d18a164..68eac8b 100644 --- a/cyeva/core/statistic.py +++ b/cyeva/core/statistic.py @@ -48,6 +48,7 @@ def calc_binary_quadrant_values( return hits, misses, false_alarms, correct_rejects, total + @assert_length @drop_nan def calc_multiclass_confusion_matrix( @@ -80,11 +81,17 @@ class K n(F_K,O_1) n(F_K,O_2) n(F_K,O_K) cates = np.unique(np.concatenate([np.unique(observation), np.unique(forecast)])) confusion_matrix_list = [] for obs_cate_, fcst_cate_ in product(cates, cates): - count_cate_ = Counter((observation==obs_cate_) & (forecast==fcst_cate_))[True] + count_cate_ = Counter((observation == obs_cate_) & (forecast == fcst_cate_))[ + True + ] confusion_matrix_list.append([obs_cate_, fcst_cate_, count_cate_]) - confusion_matrix = pd.DataFrame(np.array(confusion_matrix_list), columns=['observation','forecast', 'count']) - confusion_matrix = confusion_matrix.pivot_table('count', index='forecast',columns='observation',aggfunc='sum').astype(int) + confusion_matrix = pd.DataFrame( + np.array(confusion_matrix_list), columns=["observation", "forecast", "count"] + ) + confusion_matrix = confusion_matrix.pivot_table( + "count", index="forecast", columns="observation", aggfunc="sum" + ).astype(int) assert len(observation) == np.sum(confusion_matrix.values) @@ -104,21 +111,20 @@ def calc_multiclass_accuracy_ratio( Args: observation (Union[list, np.ndarray]): Multiclass observation data array that consist of class labels. - forecast (Union[list, np.ndarray]): Multiclass forecast data array + forecast (Union[list, np.ndarray]): Multiclass forecast data array that consist of class labels. Returns: float: The accuracy(%) of multiclass forecast. Perfect score 100. """ - confusion_matrix = calc_multiclass_confusion_matrix( - observation, forecast - ) + confusion_matrix = calc_multiclass_confusion_matrix(observation, forecast) # compute the sum of hits of all categories all_hits = np.sum(confusion_matrix.values.diagonal()) total = len(observation) - return (all_hits / total) * 100 + return (all_hits / total) * 100 + @assert_length @fix_zero_division @@ -126,76 +132,80 @@ def calc_multiclass_accuracy_ratio( def calc_multiclass_heidke_skill_score( observation: Union[list, np.ndarray], forecast: Union[list, np.ndarray] ) -> float: - """calculate the Heidke Skill Score (HSS), which measures the - fraction of correct forecasts after eliminating those forecasts - which would be correct due purely to random chance. + """calculate the Heidke Skill Score (HSS), which measures the + fraction of correct forecasts after eliminating those forecasts + which would be correct due purely to random chance. - HSS = \frac {\frac {1} {Total} \sum\limits_{i=1}^{K} n(F_i,O_i) - - \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(F_i)N(O_i) } + HSS = \frac {\frac {1} {Total} \sum\limits_{i=1}^{K} n(F_i,O_i) - + \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(F_i)N(O_i) } {1 - \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(F_i)*N(O_i)} Args: observation (Union[list, np.ndarray]): Multiclass observation data array that consist of class labels. - forecast (Union[list, np.ndarray]): Multiclass forecast data array + forecast (Union[list, np.ndarray]): Multiclass forecast data array that consist of class labels. Returns: float: HSS score. Perfect score 1. """ - confusion_matrix = calc_multiclass_confusion_matrix( - observation, forecast - ) + confusion_matrix = calc_multiclass_confusion_matrix(observation, forecast) total = len(observation) # compute HSS score - acc_ = np.sum(confusion_matrix.values.diagonal()) / total - reference_acc_ = np.sum(confusion_matrix.sum(axis=0).values * confusion_matrix.sum(axis=1).values) / (total**2) + acc_ = np.sum(confusion_matrix.values.diagonal()) / total + reference_acc_ = np.sum( + confusion_matrix.sum(axis=0).values * confusion_matrix.sum(axis=1).values + ) / (total**2) perfect_acc_ = 1 - hss_score_ = ( acc_ - reference_acc_ ) / (perfect_acc_ - reference_acc_) - + hss_score_ = (acc_ - reference_acc_) / (perfect_acc_ - reference_acc_) + return hss_score_ + @assert_length @fix_zero_division @drop_nan def calc_multiclass_hanssen_kuipers_score( observation: Union[list, np.ndarray], forecast: Union[list, np.ndarray] ) -> float: - """calculate the Hanssen and Kuipers Score (HSS), which is - similar to the Heidke skill score (above), except that in - the denominator the fraction of correct forecasts due to + """calculate the Hanssen and Kuipers Score (HSS), which is + similar to the Heidke skill score (above), except that in + the denominator the fraction of correct forecasts due to random chance is for an unbiased forecast. - HK = \frac {\frac {1} {Total} \sum\limits_{i=1}^{K} n(F_i,O_i) - - \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(F_i)N(O_i) } + HK = \frac {\frac {1} {Total} \sum\limits_{i=1}^{K} n(F_i,O_i) - + \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(F_i)N(O_i) } {1 - \frac {1} {Total^2} \sum\limits_{i=1}^{K} N(O_i)^2} Args: observation (Union[list, np.ndarray]): Multiclass observation data array that consist of class labels. - forecast (Union[list, np.ndarray]): Multiclass forecast data array + forecast (Union[list, np.ndarray]): Multiclass forecast data array that consist of class labels. Returns: float: HK score. Perfect score 1. """ - confusion_matrix = calc_multiclass_confusion_matrix( - observation, forecast - ) + confusion_matrix = calc_multiclass_confusion_matrix(observation, forecast) total = len(observation) # compute HK score - acc_ = np.sum(confusion_matrix.values.diagonal()) / total - reference_acc_ = np.sum(confusion_matrix.sum(axis=0).values * confusion_matrix.sum(axis=1).values) / (total**2) + acc_ = np.sum(confusion_matrix.values.diagonal()) / total + reference_acc_ = np.sum( + confusion_matrix.sum(axis=0).values * confusion_matrix.sum(axis=1).values + ) / (total**2) perfect_acc_ = 1 - unbias_reference_acc_ = np.sum(confusion_matrix.sum(axis=0).values**2) / (total**2) - hk_score_ = ( acc_ - reference_acc_ ) / (perfect_acc_ - unbias_reference_acc_) - + unbias_reference_acc_ = np.sum(confusion_matrix.sum(axis=0).values ** 2) / ( + total**2 + ) + hk_score_ = (acc_ - reference_acc_) / (perfect_acc_ - unbias_reference_acc_) + return hk_score_ + @assert_length @fix_zero_division @drop_nan diff --git a/cyeva/core/temp.py b/cyeva/core/temp.py index 4a7f138..7ea13bf 100644 --- a/cyeva/core/temp.py +++ b/cyeva/core/temp.py @@ -21,8 +21,12 @@ def __init__( super().__init__(observation, forecast) self.kind = kind self.lev = lev - self.observation = (self.observation * UNITS.parse_expression(unit)).to("degC").magnitude - self.forecast = (self.forecast * UNITS.parse_expression(unit)).to("degC").magnitude + self.observation = ( + (self.observation * UNITS.parse_expression(unit)).to("degC").magnitude + ) + self.forecast = ( + (self.forecast * UNITS.parse_expression(unit)).to("degC").magnitude + ) self.df = pd.DataFrame( { "observation": self.observation, diff --git a/cyeva/core/weather_code.py b/cyeva/core/weather_code.py index d9034ab..10461d8 100644 --- a/cyeva/core/weather_code.py +++ b/cyeva/core/weather_code.py @@ -48,7 +48,6 @@ def gather_all_factors(self): } ) - df = pd.DataFrame(result) return df diff --git a/cyeva/utils/decorators.py b/cyeva/utils/decorators.py index 3ada8c1..671262f 100644 --- a/cyeva/utils/decorators.py +++ b/cyeva/utils/decorators.py @@ -35,7 +35,6 @@ def convert_to_ndarray(func): @wraps(func) def wrapper(observation, forecast, *args, **kwargs): - if not isinstance(observation, np.ndarray) and not isinstance( observation, Number ): diff --git a/docs/source/conf.py b/docs/source/conf.py index a33c141..a5bf2f1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -61,4 +61,4 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] -master_doc = 'index' +master_doc = "index" diff --git a/setup.py b/setup.py index 271e4d7..0de5186 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,9 @@ def get_version(rel_path): url="https://github.com/caiyunapp/cyeva", include_package_data=True, package_data={"": ["*.csv", "*.config", "*.nl", "*.json"]}, - packages=setuptools.find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), + packages=setuptools.find_packages( + exclude=["*.tests", "*.tests.*", "tests.*", "tests"] + ), install_requires=required, classifiers=[ "Development Status :: 4 - Beta", diff --git a/tests/functions/case/weather_code/__init__.py b/tests/functions/case/weather_code/__init__.py index 0477760..aedafd0 100644 --- a/tests/functions/case/weather_code/__init__.py +++ b/tests/functions/case/weather_code/__init__.py @@ -1,3 +1,3 @@ from .accuracy_ratio import ACCURACY_RATE_CASE from .hk import HK_CASE -from .hss import HSS_CASE \ No newline at end of file +from .hss import HSS_CASE diff --git a/tests/functions/case/weather_code/accuracy_ratio.py b/tests/functions/case/weather_code/accuracy_ratio.py index fdab0c2..5136ee4 100644 --- a/tests/functions/case/weather_code/accuracy_ratio.py +++ b/tests/functions/case/weather_code/accuracy_ratio.py @@ -1,6 +1,9 @@ ACCURACY_RATE_CASE = [ {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 100}, - {"obs": ['A', 'B', 'C', 'D', 'E'], "fct": ['A', 'B', 'C', 'D', 'E'], "result": 100}, - {"obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, "fct": [1, 2, 3, 4, 5]*5, "result": 20} - -] \ No newline at end of file + {"obs": ["A", "B", "C", "D", "E"], "fct": ["A", "B", "C", "D", "E"], "result": 100}, + { + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 20, + }, +] diff --git a/tests/functions/case/weather_code/hk.py b/tests/functions/case/weather_code/hk.py index c868b0f..b3d0c95 100644 --- a/tests/functions/case/weather_code/hk.py +++ b/tests/functions/case/weather_code/hk.py @@ -1,4 +1,8 @@ HK_CASE = [ {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 1}, - {"obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, "fct": [1, 2, 3, 4, 5]*5, "result": 0} -] \ No newline at end of file + { + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 0, + }, +] diff --git a/tests/functions/case/weather_code/hss.py b/tests/functions/case/weather_code/hss.py index 2d59a8d..f291acf 100644 --- a/tests/functions/case/weather_code/hss.py +++ b/tests/functions/case/weather_code/hss.py @@ -1,4 +1,8 @@ HSS_CASE = [ {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 1}, - {"obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, "fct": [1, 2, 3, 4, 5]*5, "result": 0} -] \ No newline at end of file + { + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 0, + }, +] diff --git a/tests/functions/test_base.py b/tests/functions/test_base.py index e5e5955..6f99e8d 100644 --- a/tests/functions/test_base.py +++ b/tests/functions/test_base.py @@ -14,7 +14,6 @@ def test_comparison(): - for case in RMSE_CASE: obs = case["obs"] fcst = case["fct"] diff --git a/tests/functions/test_statistic.py b/tests/functions/test_statistic.py index 61cdfe3..e48f3da 100644 --- a/tests/functions/test_statistic.py +++ b/tests/functions/test_statistic.py @@ -177,18 +177,15 @@ def calc_correlation_coefficient(): else: assert np.isnan(_result) + def test_calc_multiclass_confusion_matrix(): MULTICLASS_CASE = [ + {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": np.diag([1] * 5)}, { - "obs": [1, 2, 3, 4, 5], - "fct": [1, 2, 3, 4, 5], - "result": np.diag([1]*5) + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": np.ones((5, 5)), }, - { - "obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, - "fct": [1, 2, 3, 4, 5]*5, - "result": np.ones((5,5)) - } ] for case in MULTICLASS_CASE: obs = case["obs"] @@ -197,23 +194,20 @@ def test_calc_multiclass_confusion_matrix(): cm = calc_multiclass_confusion_matrix(obs, fct) assert (cm.values == result).all() + def test_calc_multiclass_accuracy_ratio(): MULTICLASS_CASE = [ + {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 100}, { - "obs": [1, 2, 3, 4, 5], - "fct": [1, 2, 3, 4, 5], - "result": 100 + "obs": np.array(["A", "B", "C", "D", "E"]), + "fct": np.array(["A", "B", "C", "D", "E"]), + "result": 100, }, { - "obs": np.array(['A', 'B', 'C', 'D', 'E']), - "fct": np.array(['A', 'B', 'C', 'D', 'E']), - "result": 100 + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 20, }, - { - "obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, - "fct": [1, 2, 3, 4, 5]*5, - "result": 20 - } ] for case in MULTICLASS_CASE: obs = case["obs"] @@ -222,18 +216,15 @@ def test_calc_multiclass_accuracy_ratio(): acc = calc_multiclass_accuracy_ratio(obs, fct) assert acc == result + def test_calc_multiclass_hanssen_kuipers_score(): MULTICLASS_CASE = [ + {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 1}, { - "obs": [1, 2, 3, 4, 5], - "fct": [1, 2, 3, 4, 5], - "result": 1 + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 0, }, - { - "obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, - "fct": [1, 2, 3, 4, 5]*5, - "result": 0 - } ] for case in MULTICLASS_CASE: obs = case["obs"] @@ -242,22 +233,19 @@ def test_calc_multiclass_hanssen_kuipers_score(): acc = calc_multiclass_hanssen_kuipers_score(obs, fct) assert acc == result + def test_calc_multiclass_heidke_skill_score(): MULTICLASS_CASE = [ + {"obs": [1, 2, 3, 4, 5], "fct": [1, 2, 3, 4, 5], "result": 1}, { - "obs": [1, 2, 3, 4, 5], - "fct": [1, 2, 3, 4, 5], - "result": 1 + "obs": [1] * 5 + [2] * 5 + [3] * 5 + [4] * 5 + [5] * 5, + "fct": [1, 2, 3, 4, 5] * 5, + "result": 0, }, - { - "obs": [1]*5 + [2]*5 + [3]*5 + [4]*5 + [5]*5, - "fct": [1, 2, 3, 4, 5]*5, - "result": 0 - } ] for case in MULTICLASS_CASE: obs = case["obs"] fct = case["fct"] result = case["result"] acc = calc_multiclass_heidke_skill_score(obs, fct) - assert acc == result \ No newline at end of file + assert acc == result diff --git a/tests/functions/test_weather_code.py b/tests/functions/test_weather_code.py index 2b97284..a8dd6b4 100644 --- a/tests/functions/test_weather_code.py +++ b/tests/functions/test_weather_code.py @@ -1,11 +1,7 @@ import numpy as np from cyeva import WeatherCodeComparison -from .case.weather_code import ( - ACCURACY_RATE_CASE, - HK_CASE, - HSS_CASE -) +from .case.weather_code import ACCURACY_RATE_CASE, HK_CASE, HSS_CASE def test_calc_weather_code_accuracy_ratio(): @@ -23,6 +19,7 @@ def test_calc_weather_code_accuracy_ratio(): else: assert np.isnan(_result) + def test_calc_weather_code_hanssen_kuipers_score(): for case in HK_CASE: obs = case["obs"] @@ -38,6 +35,7 @@ def test_calc_weather_code_hanssen_kuipers_score(): else: assert np.isnan(_result) + def test_calc_weather_code_heidke_skill_score(): for case in HSS_CASE: obs = case["obs"] @@ -61,4 +59,3 @@ def test_gather_all_factors(): wc = WeatherCodeComparison(obs, fcst) wc.gather_all_factors() - diff --git a/tests/test_issues.py b/tests/test_issues.py index 15f6867..a61aaa7 100644 --- a/tests/test_issues.py +++ b/tests/test_issues.py @@ -11,7 +11,7 @@ def test_iss33(): wd2 = np.array([3, 3]) wc = WindComparison(ws1, ws2, wd1, wd2) - assert np.isclose(wc.calc_rmse(kind='direction'), 6) + assert np.isclose(wc.calc_rmse(kind="direction"), 6) def test_iss36():