aeon-toolkit · TonyBagnall · May 21, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
@@ -0,0 +1 @@
+"""Feature Based learning clustering tests."""
@@ -0,0 +1,163 @@
+"""Tests for Catch22 Clusterer."""
+
+import numpy as np
+from sklearn import metrics
+
+from aeon.clustering.feature_based import Catch22Clusterer
+from aeon.datasets import load_basic_motions, load_gunpoint
+
+
+def test_catch24_multivariate():
+    """Test Catch24 Clusterer with univariate data."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    catach24 = Catch22Clusterer(
+        random_state=1,
+    )
+    catach24.fit(X_train)
+    train_result = catach24.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = catach24.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    predict_proba = catach24.predict_proba(X_test)
+
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        test_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 3, 6, 5, 3, 1, 1, 3],
+    )
+    assert np.array_equal(
+        train_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 2, 4, 3, 3, 6, 1],
+    )
+    assert train_score == 0.6684210526315789
+    assert test_score == 0.8263157894736842
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+def test_catch24_univariate():
+    """Test Catch24 Clusterer with multivariate data."""
+    X_train, y_train = load_gunpoint(split="train")
+    X_test, y_test = load_gunpoint(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    catach24 = Catch22Clusterer(
+        random_state=1,
+    )
+    train_result = catach24.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = catach24.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+    predict_proba = catach24.predict_proba(X_test)
+
+    assert len(predict_proba) == 20
+    assert ari_test == 0.036247577795508946
+    assert ari_train == 0.16466826538768986
+    assert np.array_equal(
+        test_result,
+        [1, 3, 4, 6, 7, 3, 5, 5, 6, 3, 3, 1, 3, 1, 1, 7, 3, 0, 6, 3],
+    )
+    assert np.array_equal(
+        train_result,
+        [3, 3, 7, 7, 0, 3, 2, 4, 1, 1, 6, 1, 5, 1, 3, 1, 6, 3, 1, 5],
+    )
+    assert train_score == 0.5947368421052631
+    assert test_score == 0.531578947368421
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+def test_catch22_multivariate():
+    """Test Catch22 Clusterer with univariate data."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    catach22 = Catch22Clusterer(
+        catch24=False,
+        random_state=1,
+    )
+    train_result = catach22.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = catach22.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+    predict_proba = catach22.predict_proba(X_test)
+
+    assert len(predict_proba) == 20
+    assert ari_test == 0.6451612903225806
+    assert ari_train == 0.32639279684862127
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        test_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 3, 6, 5, 3, 1, 1, 3],
+    )
+    assert np.array_equal(
+        train_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 2, 4, 3, 3, 6, 1],
+    )
+    assert train_score == 0.6684210526315789
+    assert test_score == 0.8263157894736842
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+def test_catch22_univariate():
+    """Test Catch22 Clusterer with multivariate data."""
+    X_train, y_train = load_gunpoint(split="train")
+    X_test, y_test = load_gunpoint(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    catach22 = Catch22Clusterer(
+        catch24=False,
+        random_state=1,
+    )
+    train_result = catach22.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = catach22.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+    predict_proba = catach22.predict_proba(X_test)
+
+    assert len(predict_proba) == 20
+    assert ari_test == 0.036247577795508946
+    assert ari_train == 0.16466826538768986
+    assert np.array_equal(
+        test_result,
+        [1, 3, 4, 6, 7, 3, 5, 5, 6, 3, 3, 1, 3, 1, 1, 7, 3, 0, 6, 3],
+    )
+    assert np.array_equal(
+        train_result,
+        [3, 3, 7, 7, 0, 3, 2, 4, 1, 1, 6, 1, 5, 1, 3, 1, 6, 3, 1, 5],
+    )
+    assert train_score == 0.5947368421052631
+    assert test_score == 0.531578947368421
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
@@ -0,0 +1,106 @@
+"""Tests for Summary Clusterer."""
+
+import numpy as np
+from sklearn import metrics
+
+from aeon.clustering.feature_based import SummaryClusterer
+from aeon.datasets import load_basic_motions, load_gunpoint
+
+
+def test_summary_univariate():
+    """Test Summary Clusterer with univariate data."""
+    X_train, y_train = load_gunpoint(split="train")
+    X_test, y_test = load_gunpoint(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    summary = SummaryClusterer(
+        random_state=1,
+    )
+    train_result = summary.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = summary.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    predict_proba = summary.predict_proba(X_test)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+
+    assert ari_test == 0.026750142287990893
+    assert ari_train == 0.18636519355943817
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        test_result,
+        [2, 0, 4, 2, 6, 0, 1, 7, 3, 0, 6, 2, 0, 2, 2, 6, 0, 6, 2, 6],
+    )
+    assert np.array_equal(
+        train_result,
+        [6, 6, 3, 6, 0, 6, 5, 4, 1, 2, 2, 2, 1, 2, 0, 2, 3, 6, 2, 7],
+    )
+    assert train_score == 0.6052631578947368
+    assert test_score == 0.5263157894736842
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+def test_summary_multivariate():
+    """Test Summary Clusterer with multivariate data."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    summary = SummaryClusterer(
+        random_state=1,
+    )
+    train_result = summary.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = summary.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    predict_proba = summary.predict_proba(X_test)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+
+    assert ari_test == 0.43478260869565216
+    assert ari_train == 0.4683038263849229
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        test_result,
+        [2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 5, 1, 5, 3, 5, 4, 4, 1, 1, 4],
+    )
+    assert np.array_equal(
+        train_result,
+        [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5, 6, 4, 7, 4, 5, 3, 1],
+    )
+    assert train_score == 0.7421052631578947
+    assert test_score == 0.7263157894736842
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+def test_all_summary_stat():
+    """Test Summary Clusterer with all summary stat."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    X_test = X_test[:num_points]
+    summary_stats_options = ["default", "percentiles", "bowley", "tukey"]
+    for summary_stat in summary_stats_options:
+        summary = SummaryClusterer(random_state=1, summary_stats=summary_stat)
+        train_result = summary.fit_predict(X_train)
+        test_result = summary.predict(X_test)
+        predict_proba = summary.predict_proba(X_test)
+        assert len(predict_proba) == 20
+        assert not np.isnan(train_result).any()
+        assert not np.isnan(test_result).any()
+        assert test_result.shape == (20,)
+        assert train_result.shape == (20,)
@@ -0,0 +1,125 @@
+"""Tests for TSFresh Clusterer."""
+
+import numpy as np
+import pytest
+from sklearn import metrics
+
+from aeon.clustering.feature_based import TSFreshClusterer
+from aeon.datasets import load_basic_motions, load_gunpoint
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tsfresh"], severity="none"),
+    reason="TSFresh soft dependency unavailable.",
+)
+def test_tsfresh_univariate():
+    """Test TSFresh Clusterer with univariate data."""
+    X_train, y_train = load_gunpoint(split="train")
+    X_test, y_test = load_gunpoint(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    tsfresh = TSFreshClusterer(
+        random_state=1,
+        n_clusters=2,
+    )
+    train_result = tsfresh.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = tsfresh.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    predict_proba = tsfresh.predict_proba(X_test)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+
+    assert ari_test == 0.0
+    assert ari_train == 0.02240325865580448
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        train_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+    )
+    assert np.array_equal(
+        test_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    )
+    assert train_score == 0.49473684210526314
+    assert test_score == 0.4789473684210526
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tsfresh"], severity="none"),
+    reason="TSFresh soft dependency unavailable.",
+)
+def test_tsfresh_multivariate():
+    """Test TSFresh Clusterer with multivariate data."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    y_train = y_train[:num_points]
+    X_test = X_test[:num_points]
+    y_test = y_test[:num_points]
+
+    tsfresh = TSFreshClusterer(
+        random_state=1,
+        n_clusters=2,
+    )
+    train_result = tsfresh.fit_predict(X_train)
+    train_score = metrics.rand_score(y_train, train_result)
+    test_result = tsfresh.predict(X_test)
+    test_score = metrics.rand_score(y_test, test_result)
+    predict_proba = tsfresh.predict_proba(X_test)
+    ari_test = metrics.adjusted_rand_score(y_test, test_result)
+    ari_train = metrics.adjusted_rand_score(y_train, train_result)
+
+    assert ari_test == 1
+    assert ari_train == 1
+    assert len(predict_proba) == 20
+    assert np.array_equal(
+        train_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+    )
+    assert np.array_equal(
+        test_result,
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+    )
+    assert train_score == 1.0
+    assert test_score == 1.0
+    assert test_result.shape == (20,)
+    assert train_result.shape == (20,)
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies(["tsfresh"], severity="none"),
+    reason="TSFresh soft dependency unavailable.",
+)
+def test_all_fc_parameters():
+    """Test TSFresh Clusterer with all FC parameters."""
+    X_train, y_train = load_basic_motions(split="train")
+    X_test, y_test = load_basic_motions(split="test")
+    num_points = 20
+
+    X_train = X_train[:num_points]
+    X_test = X_test[:num_points]
+    fc_parameters = ["minimal", "efficient", "comprehensive"]
+    for fc in fc_parameters:
+        tsfresh = TSFreshClusterer(
+            n_clusters=2, random_state=1, default_fc_parameters=fc
+        )
+
+        train_result = tsfresh.fit_predict(X_train)
+        test_result = tsfresh.predict(X_test)
+        predict_proba = tsfresh.predict_proba(X_test)
+        assert len(predict_proba) == 20
+        assert not np.isnan(train_result).any()
+        assert not np.isnan(test_result).any()
+        assert test_result.shape == (20,)
+        assert train_result.shape == (20,)