Skip to content

[ENH] Added test cases for feature based clustering #2690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
bef0d8f
Added test cases for feature based clustering
Mar 24, 2025
9db9e92
Merge branch 'aeon-toolkit:main' into test_case_for_feature_based_clu…
Ramana-Raja Mar 24, 2025
7de55cb
Automatic `pre-commit` fixes
Ramana-Raja Mar 24, 2025
f39fdca
added docstring
Mar 24, 2025
a753660
Merge remote-tracking branch 'origin/test_case_for_feature_based_clus…
Mar 24, 2025
abaf016
Automatic `pre-commit` fixes
Ramana-Raja Mar 24, 2025
4e6df5f
added .
Mar 24, 2025
29c7adf
Merge remote-tracking branch 'origin/test_case_for_feature_based_clus…
Mar 24, 2025
2f0f89e
added .
Mar 24, 2025
882d5bb
updated tsfresh
Mar 24, 2025
05c3186
Automatic `pre-commit` fixes
Ramana-Raja Mar 24, 2025
9c12732
added pytest _check_soft_dependencies for tsfresh
Mar 24, 2025
278c6b9
Automatic `pre-commit` fixes
Ramana-Raja Mar 24, 2025
4d222c9
added more testing
Mar 27, 2025
8a59ffb
Automatic `pre-commit` fixes
Ramana-Raja Mar 27, 2025
1e5cee8
added docs
Mar 27, 2025
6539699
Merge remote-tracking branch 'origin/test_case_for_feature_based_clus…
Mar 27, 2025
e7b6d93
added docs
Mar 27, 2025
77d70a6
added pytest
Mar 27, 2025
3605dd5
Automatic `pre-commit` fixes
Ramana-Raja Mar 27, 2025
4b3d76f
improved docs
Mar 27, 2025
d7f4c38
Merge remote-tracking branch 'origin/test_case_for_feature_based_clus…
Mar 27, 2025
f1a8f88
Automatic `pre-commit` fixes
Ramana-Raja Mar 27, 2025
af4be41
improved test cases
Mar 27, 2025
ee74e23
Merge remote-tracking branch 'origin/test_case_for_feature_based_clus…
Mar 27, 2025
0644583
changes made as requested by moderators
Apr 4, 2025
559ffb1
changes made as requested by moderators
Apr 4, 2025
9c0a1c1
Automatic `pre-commit` fixes
Ramana-Raja Apr 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions aeon/clustering/feature_based/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Feature Based learning clustering tests."""
163 changes: 163 additions & 0 deletions aeon/clustering/feature_based/tests/test_catch22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""Tests for Catch22 Clusterer."""

import numpy as np
from sklearn import metrics

from aeon.clustering.feature_based import Catch22Clusterer
from aeon.datasets import load_basic_motions, load_gunpoint


def test_catch24_multivariate():
"""Test Catch24 Clusterer with univariate data."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

catach24 = Catch22Clusterer(
random_state=1,
)
catach24.fit(X_train)
train_result = catach24.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = catach24.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
predict_proba = catach24.predict_proba(X_test)

assert len(predict_proba) == 20
assert np.array_equal(
test_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 3, 6, 5, 3, 1, 1, 3],
)
assert np.array_equal(
train_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 2, 4, 3, 3, 6, 1],
)
assert train_score == 0.6684210526315789
assert test_score == 0.8263157894736842
assert test_result.shape == (20,)
assert train_result.shape == (20,)


def test_catch24_univariate():
"""Test Catch24 Clusterer with multivariate data."""
X_train, y_train = load_gunpoint(split="train")
X_test, y_test = load_gunpoint(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

catach24 = Catch22Clusterer(
random_state=1,
)
train_result = catach24.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = catach24.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)
predict_proba = catach24.predict_proba(X_test)

assert len(predict_proba) == 20
assert ari_test == 0.036247577795508946
assert ari_train == 0.16466826538768986
assert np.array_equal(
test_result,
[1, 3, 4, 6, 7, 3, 5, 5, 6, 3, 3, 1, 3, 1, 1, 7, 3, 0, 6, 3],
)
assert np.array_equal(
train_result,
[3, 3, 7, 7, 0, 3, 2, 4, 1, 1, 6, 1, 5, 1, 3, 1, 6, 3, 1, 5],
)
assert train_score == 0.5947368421052631
assert test_score == 0.531578947368421
assert test_result.shape == (20,)
assert train_result.shape == (20,)


def test_catch22_multivariate():
"""Test Catch22 Clusterer with univariate data."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

catach22 = Catch22Clusterer(
catch24=False,
random_state=1,
)
train_result = catach22.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = catach22.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)
predict_proba = catach22.predict_proba(X_test)

assert len(predict_proba) == 20
assert ari_test == 0.6451612903225806
assert ari_train == 0.32639279684862127
assert len(predict_proba) == 20
assert np.array_equal(
test_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 3, 6, 5, 3, 1, 1, 3],
)
assert np.array_equal(
train_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 2, 4, 3, 3, 6, 1],
)
assert train_score == 0.6684210526315789
assert test_score == 0.8263157894736842
assert test_result.shape == (20,)
assert train_result.shape == (20,)


def test_catch22_univariate():
"""Test Catch22 Clusterer with multivariate data."""
X_train, y_train = load_gunpoint(split="train")
X_test, y_test = load_gunpoint(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

catach22 = Catch22Clusterer(
catch24=False,
random_state=1,
)
train_result = catach22.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = catach22.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)
predict_proba = catach22.predict_proba(X_test)

assert len(predict_proba) == 20
assert ari_test == 0.036247577795508946
assert ari_train == 0.16466826538768986
assert np.array_equal(
test_result,
[1, 3, 4, 6, 7, 3, 5, 5, 6, 3, 3, 1, 3, 1, 1, 7, 3, 0, 6, 3],
)
assert np.array_equal(
train_result,
[3, 3, 7, 7, 0, 3, 2, 4, 1, 1, 6, 1, 5, 1, 3, 1, 6, 3, 1, 5],
)
assert train_score == 0.5947368421052631
assert test_score == 0.531578947368421
assert test_result.shape == (20,)
assert train_result.shape == (20,)
106 changes: 106 additions & 0 deletions aeon/clustering/feature_based/tests/test_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Tests for Summary Clusterer."""

import numpy as np
from sklearn import metrics

from aeon.clustering.feature_based import SummaryClusterer
from aeon.datasets import load_basic_motions, load_gunpoint


def test_summary_univariate():
"""Test Summary Clusterer with univariate data."""
X_train, y_train = load_gunpoint(split="train")
X_test, y_test = load_gunpoint(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

summary = SummaryClusterer(
random_state=1,
)
train_result = summary.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = summary.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
predict_proba = summary.predict_proba(X_test)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)

assert ari_test == 0.026750142287990893
assert ari_train == 0.18636519355943817
assert len(predict_proba) == 20
assert np.array_equal(
test_result,
[2, 0, 4, 2, 6, 0, 1, 7, 3, 0, 6, 2, 0, 2, 2, 6, 0, 6, 2, 6],
)
assert np.array_equal(
train_result,
[6, 6, 3, 6, 0, 6, 5, 4, 1, 2, 2, 2, 1, 2, 0, 2, 3, 6, 2, 7],
)
assert train_score == 0.6052631578947368
assert test_score == 0.5263157894736842
assert test_result.shape == (20,)
assert train_result.shape == (20,)


def test_summary_multivariate():
"""Test Summary Clusterer with multivariate data."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

summary = SummaryClusterer(
random_state=1,
)
train_result = summary.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = summary.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
predict_proba = summary.predict_proba(X_test)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)

assert ari_test == 0.43478260869565216
assert ari_train == 0.4683038263849229
assert len(predict_proba) == 20
assert np.array_equal(
test_result,
[2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 5, 1, 5, 3, 5, 4, 4, 1, 1, 4],
)
assert np.array_equal(
train_result,
[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5, 6, 4, 7, 4, 5, 3, 1],
)
assert train_score == 0.7421052631578947
assert test_score == 0.7263157894736842
assert test_result.shape == (20,)
assert train_result.shape == (20,)


def test_all_summary_stat():
"""Test Summary Clusterer with all summary stat."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
X_test = X_test[:num_points]
summary_stats_options = ["default", "percentiles", "bowley", "tukey"]
for summary_stat in summary_stats_options:
summary = SummaryClusterer(random_state=1, summary_stats=summary_stat)
train_result = summary.fit_predict(X_train)
test_result = summary.predict(X_test)
predict_proba = summary.predict_proba(X_test)
assert len(predict_proba) == 20
assert not np.isnan(train_result).any()
assert not np.isnan(test_result).any()
assert test_result.shape == (20,)
assert train_result.shape == (20,)
125 changes: 125 additions & 0 deletions aeon/clustering/feature_based/tests/test_tsfresh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Tests for TSFresh Clusterer."""

import numpy as np
import pytest
from sklearn import metrics

from aeon.clustering.feature_based import TSFreshClusterer
from aeon.datasets import load_basic_motions, load_gunpoint
from aeon.utils.validation._dependencies import _check_soft_dependencies


@pytest.mark.skipif(
not _check_soft_dependencies(["tsfresh"], severity="none"),
reason="TSFresh soft dependency unavailable.",
)
def test_tsfresh_univariate():
"""Test TSFresh Clusterer with univariate data."""
X_train, y_train = load_gunpoint(split="train")
X_test, y_test = load_gunpoint(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

tsfresh = TSFreshClusterer(
random_state=1,
n_clusters=2,
)
train_result = tsfresh.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = tsfresh.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
predict_proba = tsfresh.predict_proba(X_test)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)

assert ari_test == 0.0
assert ari_train == 0.02240325865580448
assert len(predict_proba) == 20
assert np.array_equal(
train_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
)
assert np.array_equal(
test_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
)
assert train_score == 0.49473684210526314
assert test_score == 0.4789473684210526
assert test_result.shape == (20,)
assert train_result.shape == (20,)


@pytest.mark.skipif(
not _check_soft_dependencies(["tsfresh"], severity="none"),
reason="TSFresh soft dependency unavailable.",
)
def test_tsfresh_multivariate():
"""Test TSFresh Clusterer with multivariate data."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
y_train = y_train[:num_points]
X_test = X_test[:num_points]
y_test = y_test[:num_points]

tsfresh = TSFreshClusterer(
random_state=1,
n_clusters=2,
)
train_result = tsfresh.fit_predict(X_train)
train_score = metrics.rand_score(y_train, train_result)
test_result = tsfresh.predict(X_test)
test_score = metrics.rand_score(y_test, test_result)
predict_proba = tsfresh.predict_proba(X_test)
ari_test = metrics.adjusted_rand_score(y_test, test_result)
ari_train = metrics.adjusted_rand_score(y_train, train_result)

assert ari_test == 1
assert ari_train == 1
assert len(predict_proba) == 20
assert np.array_equal(
train_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
)
assert np.array_equal(
test_result,
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
)
assert train_score == 1.0
assert test_score == 1.0
assert test_result.shape == (20,)
assert train_result.shape == (20,)


@pytest.mark.skipif(
not _check_soft_dependencies(["tsfresh"], severity="none"),
reason="TSFresh soft dependency unavailable.",
)
def test_all_fc_parameters():
"""Test TSFresh Clusterer with all FC parameters."""
X_train, y_train = load_basic_motions(split="train")
X_test, y_test = load_basic_motions(split="test")
num_points = 20

X_train = X_train[:num_points]
X_test = X_test[:num_points]
fc_parameters = ["minimal", "efficient", "comprehensive"]
for fc in fc_parameters:
tsfresh = TSFreshClusterer(
n_clusters=2, random_state=1, default_fc_parameters=fc
)

train_result = tsfresh.fit_predict(X_train)
test_result = tsfresh.predict(X_test)
predict_proba = tsfresh.predict_proba(X_test)
assert len(predict_proba) == 20
assert not np.isnan(train_result).any()
assert not np.isnan(test_result).any()
assert test_result.shape == (20,)
assert train_result.shape == (20,)