time-series-machine-learning
diff --git a/‎tsml_eval/experiments/tests/__init__.py
Lines changed: 5 additions & 4 deletions b/‎tsml_eval/experiments/tests/__init__.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎tsml_eval/publications/y2023/distance_based_clustering/__init__.py
Lines changed: 14 additions & 0 deletions b/‎tsml_eval/publications/y2023/distance_based_clustering/__init__.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎tsml_eval/publications/y2023/distance_based_clustering/_plot_path.py
Lines changed: 151 additions & 0 deletions b/‎tsml_eval/publications/y2023/distance_based_clustering/_plot_path.py
Lines changed: 151 additions & 0 deletions
diff --git a/‎tsml_eval/publications/y2023/distance_based_clustering/_tuning_functions.py
Lines changed: 175 additions & 0 deletions b/‎tsml_eval/publications/y2023/distance_based_clustering/_tuning_functions.py
Lines changed: 175 additions & 0 deletions
@@ -11,17 +11,18 @@
 from pathlib import Path
 
 _CLASSIFIER_RESULTS_PATH = (
-    os.path.dirname(Path(__file__).parent.parent) + "/test_output/classification/"
+    os.path.dirname(Path(__file__).parent.parent.parent)
+    + "/test_output/classification/"
 )
 
 _CLUSTERER_RESULTS_PATH = (
-    os.path.dirname(Path(__file__).parent.parent) + "/test_output/clustering/"
+    os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/clustering/"
 )
 
 _FORECASTER_RESULTS_PATH = (
-    os.path.dirname(Path(__file__).parent.parent) + "/test_output/forecasting/"
+    os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/forecasting/"
 )
 
 _REGRESSOR_RESULTS_PATH = (
-    os.path.dirname(Path(__file__).parent.parent) + "/test_output/regression/"
+    os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/regression/"
 )
@@ -1 +1,15 @@
 """Files for distance-based clustering publication."""
+
+__all__ = [
+    "_set_distance_clusterer",
+    "_run_experiment",
+    "distance_based_clusterers",
+]
+
+from tsml_eval.publications.y2023.distance_based_clustering.run_distance_experiments import (  # noqa: E501
+    _run_experiment,
+)
+from tsml_eval.publications.y2023.distance_based_clustering.set_distance_clusterer import (  # noqa: E501
+    _set_distance_clusterer,
+    distance_based_clusterers,
+)
@@ -0,0 +1,151 @@
+"""Alignment path plotting utilities."""
+
+import numpy as np
+from aeon.distances import cost_matrix as compute_cost_matrix
+from aeon.distances._distance import alignment_path, pairwise_distance
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+def _path_mask(cost_matrix, path, ax, theme=None):  # pragma: no cover
+    _check_soft_dependencies("matplotlib")
+
+    import matplotlib.colors as colorplt
+
+    if theme is None:
+        theme = colorplt.LinearSegmentedColormap.from_list("", ["#c9cacb", "white"])
+
+    plot_matrix = np.zeros_like(cost_matrix)
+    max_size = max(cost_matrix.shape)
+    for i in range(max_size):
+        for j in range(max_size):
+            if (i, j) in path:
+                plot_matrix[i, j] = 1.0
+            elif cost_matrix[i, j] == np.inf:
+                plot_matrix[i, j] = 0.0
+            else:
+                plot_matrix[i, j] = 0.25
+
+    for i in range(max_size):
+        for j in range(max_size):
+            c = cost_matrix[j, i]
+            ax.text(i, j, str(round(c, 2)), va="center", ha="center", size=10)
+            ax.text(i, j, str(round(c, 2)), va="center", ha="center", size=10)
+
+    ax.matshow(plot_matrix, cmap=theme)
+
+
+def _pairwise_path(x, y, metric):  # pragma: no cover
+    pw_matrix = pairwise_distance(x, y, metric=metric)
+    path = []
+    for i in range(pw_matrix.shape[0]):
+        for j in range(pw_matrix.shape[1]):
+            if i == j:
+                path.append((i, j))
+    return path, pw_matrix.trace(), pw_matrix
+
+
+def _plot_path(  # pragma: no cover
+    x: np.ndarray,
+    y: np.ndarray,
+    metric: str,
+    dist_kwargs: dict = None,
+    title: str = "",
+    plot_over_pw: bool = False,
+):
+    _check_soft_dependencies("matplotlib")
+
+    import matplotlib.pyplot as plt
+
+    if dist_kwargs is None:
+        dist_kwargs = {}
+    try:
+        path, dist = alignment_path(x, y, metric=metric, **dist_kwargs)
+        cost_matrix = compute_cost_matrix(x, y, metric=metric, **dist_kwargs)
+
+        if metric == "lcss":
+            _path = []
+            for tup in path:
+                _path.append(tuple(x + 1 for x in tup))
+            path = _path
+
+        if plot_over_pw is True:
+            if metric == "lcss":
+                pw = pairwise_distance(x, y, metric="euclidean")
+                cost_matrix = np.zeros_like(cost_matrix)
+                cost_matrix[1:, 1:] = pw
+            else:
+                pw = pairwise_distance(x, y, metric="squared")
+                cost_matrix = pw
+    except NotImplementedError:
+        path, dist, cost_matrix = _pairwise_path(x, y, metric)
+
+    plt.figure(1, figsize=(8, 8))
+    x_size = x.shape[0]
+
+    # definitions for the axes
+    left, bottom = 0.01, 0.1
+    w_ts = h_ts = 0.2
+    left_h = left + w_ts + 0.02
+    width = height = 0.65
+    bottom_h = bottom + height + 0.02
+
+    rect_s_y = [left, bottom, w_ts, height]
+    rect_gram = [left_h, bottom, width, height]
+    rect_s_x = [left_h, bottom_h, width, h_ts]
+
+    ax_gram = plt.axes(rect_gram)
+    ax_s_x = plt.axes(rect_s_x)
+    ax_s_y = plt.axes(rect_s_y)
+
+    _path_mask(cost_matrix, path, ax_gram)
+    ax_gram.axis("off")
+    ax_gram.autoscale(False)
+    # ax_gram.plot([j for (i, j) in path], [i for (i, j) in path], "w-",
+    #              linewidth=3.)
+
+    ax_s_x.plot(np.arange(x_size), y, "b-", linewidth=3.0, color="#818587")
+    ax_s_x.axis("off")
+    ax_s_x.set_xlim((0, x_size - 1))
+
+    ax_s_y.plot(-x, np.arange(x_size), "b-", linewidth=3.0, color="#818587")
+    ax_s_y.axis("off")
+    ax_s_y.set_ylim((0, x_size - 1))
+
+    ax_s_x.set_title(title, size=10)
+
+    return plt
+
+
+def _plot_alignment(  # pragma: no cover
+    x, y, metric, dist_kwargs: dict = None, title: str = ""
+):
+    _check_soft_dependencies("matplotlib")
+
+    import matplotlib.pyplot as plt
+
+    if dist_kwargs is None:
+        dist_kwargs = {}
+    try:
+        path, dist = alignment_path(x, y, metric=metric, **dist_kwargs)
+    except NotImplementedError:
+        path, dist, cost_matrix = _pairwise_path(x, y, metric)
+
+    plt.figure(1, figsize=(8, 8))
+
+    plt.plot(x, "b-", color="black")
+    plt.plot(y, "g-", color="black")
+
+    for positions in path:
+        try:
+            plt.plot(
+                [positions[0], positions[1]],
+                [x[positions[0]], y[positions[1]]],
+                "--",
+                color="#818587",
+            )
+        except Exception:
+            continue
+    plt.title(title)
+
+    plt.tight_layout()
+    return plt
@@ -0,0 +1,175 @@
+"""Code for tuning clusterer parameters used in the publication."""
+
+import sys
+
+import numpy as np
+from aeon.clustering.k_means import TimeSeriesKMeans
+from sklearn.metrics import davies_bouldin_score
+
+
+# used for dtw and wdtw primarily
+def _tune_window(metric, train_X, n_clusters):  # pragma: no cover
+    best_w = 0
+    best_score = sys.float_info.max
+    for w in np.arange(0.0, 0.2, 0.01):
+        cls = TimeSeriesKMeans(
+            metric=metric, distance_params={"window": w}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(f" Number of clusters = {clusters} window = {w} score = {score}")
+        if score < best_score:
+            best_score = score
+            best_w = w
+    print("best window =", best_w, " with score ", best_score)
+    return best_w
+
+
+def _tune_msm(train_X, n_clusters):  # pragma: no cover
+    best_c = 0
+    best_score = sys.float_info.max
+    for c in np.arange(0.0, 5.0, 0.25):
+        cls = TimeSeriesKMeans(
+            metric="msm", distance_params={"c": c}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(f" Number of clusters = {clusters} c parameter = {c} score = {score}")
+        if score < best_score:
+            best_score = score
+            best_c = c
+    print("best c =", best_c, " with score ", best_score)
+    return best_c
+
+
+def _tune_wdtw(train_X, n_clusters):  # pragma: no cover
+    best_g = 0
+    best_score = sys.float_info.max
+    for g in np.arange(0.0, 1.0, 0.05):
+        cls = TimeSeriesKMeans(
+            metric="wdtw", distance_params={"g": g}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(f" Number of clusters = {clusters} g parameter = {g} score = {score}")
+        if score < best_score:
+            best_score = score
+            best_g = g
+    print("best g =", best_g, " with score ", best_score)
+    return best_g
+
+
+def _tune_twe(train_X, n_clusters):  # pragma: no cover
+    best_nu = 0
+    best_lambda = 0
+    best_score = sys.float_info.max
+    for nu in np.arange(0.0, 1.0, 0.25):
+        for lam in np.arange(0.0, 1.0, 0.2):
+            cls = TimeSeriesKMeans(
+                metric="twe",
+                distance_params={"nu": nu, "lmbda": lam},
+                n_clusters=n_clusters,
+            )
+            cls.fit(train_X)
+            preds = cls.predict(train_X)
+            clusters = len(np.unique(preds))
+            if clusters <= 1:
+                score = sys.float_info.max
+            else:
+                score = davies_bouldin_score(train_X, preds)
+            print(
+                f" Number of clusters = {clusters} nu param = {nu} lambda para "
+                f"= {lam} score = {score}"
+            )  #
+            # noqa
+            if score < best_score:
+                best_score = score
+                best_nu = nu
+                best_lambda = lam
+    print("best nu =", best_nu, f" lambda = {best_lambda} score ", best_score)  # noqa
+    return best_nu, best_lambda
+
+
+def _tune_erp(train_X, n_clusters):  # pragma: no cover
+    best_g = 0
+    best_score = sys.float_info.max
+    for g in np.arange(0.0, 2.0, 0.2):
+        cls = TimeSeriesKMeans(
+            metric="erp", distance_params={"g": g}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(f" Number of clusters ={clusters} g parameter = {g} score  = {score}")
+        if score < best_score:
+            best_score = score
+            best_g = g
+    print("best g =", best_g, " with score ", best_score)
+    return best_g
+
+
+def _tune_edr(train_X, n_clusters):  # pragma: no cover
+    best_e = 0
+    best_score = sys.float_info.max
+    for e in np.arange(0.0, 0.2, 0.01):
+        cls = TimeSeriesKMeans(
+            metric="edr", distance_params={"epsilon": e}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(
+            f" Number of clusters = {clusters} epsilon parameter = {e} score = {score}"
+        )
+        if score < best_score:
+            best_score = score
+            best_e = e
+    print("best e =", best_e, " with score ", best_score)  # noqa
+    return best_e
+
+
+def _tune_lcss(train_X, n_clusters):  # pragma: no cover
+    best_e = 0
+    best_score = sys.float_info.max
+    for e in np.arange(0.0, 0.2, 0.01):
+        cls = TimeSeriesKMeans(
+            metric="lcss", distance_params={"epsilon": e}, n_clusters=n_clusters
+        )
+        cls.fit(train_X)
+        preds = cls.predict(train_X)
+        clusters = len(np.unique(preds))
+        if clusters <= 1:
+            score = sys.float_info.max
+        else:
+            score = davies_bouldin_score(train_X, preds)
+        print(
+            f" Number of clusters ={clusters} epsilon parameter = {e} score = {score}"
+        )
+        if score < best_score:
+            best_score = score
+            best_e = e
+    print("best e =", best_e, " with score ", best_score)
+    return best_e
Original file line number	Diff line number	Diff line change
`@@ -11,17 +11,18 @@`
`11`	`11`	`from pathlib import Path`
`12`	`12`
`13`	`13`	`_CLASSIFIER_RESULTS_PATH = (`
`14`		`- os.path.dirname(Path(__file__).parent.parent) + "/test_output/classification/"`
	`14`	`+ os.path.dirname(Path(__file__).parent.parent.parent)`
	`15`	`+ + "/test_output/classification/"`
`15`	`16`	`)`
`16`	`17`
`17`	`18`	`_CLUSTERER_RESULTS_PATH = (`
`18`		`- os.path.dirname(Path(__file__).parent.parent) + "/test_output/clustering/"`
	`19`	`+ os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/clustering/"`
`19`	`20`	`)`
`20`	`21`
`21`	`22`	`_FORECASTER_RESULTS_PATH = (`
`22`		`- os.path.dirname(Path(__file__).parent.parent) + "/test_output/forecasting/"`
	`23`	`+ os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/forecasting/"`
`23`	`24`	`)`
`24`	`25`
`25`	`26`	`_REGRESSOR_RESULTS_PATH = (`
`26`		`- os.path.dirname(Path(__file__).parent.parent) + "/test_output/regression/"`
	`27`	`+ os.path.dirname(Path(__file__).parent.parent.parent) + "/test_output/regression/"`
`27`	`28`	`)`