deeptime-ml
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎deeptime/clustering/__init__.py
Lines changed: 0 additions & 2 deletions b/‎deeptime/clustering/__init__.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎deeptime/clustering/_cluster_model.py
Lines changed: 3 additions & 2 deletions b/‎deeptime/clustering/_cluster_model.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎deeptime/clustering/_kmeans.py
Lines changed: 11 additions & 10 deletions b/‎deeptime/clustering/_kmeans.py
Lines changed: 11 additions & 10 deletions
diff --git a/‎deeptime/clustering/_metric.py
Lines changed: 35 additions & 7 deletions b/‎deeptime/clustering/_metric.py
Lines changed: 35 additions & 7 deletions
diff --git a/‎deeptime/clustering/_regspace.py
Lines changed: 3 additions & 4 deletions b/‎deeptime/clustering/_regspace.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎deeptime/clustering/include/bits/kmeans_bits.h
Lines changed: 15 additions & 26 deletions b/‎deeptime/clustering/include/bits/kmeans_bits.h
Lines changed: 15 additions & 26 deletions
diff --git a/‎deeptime/clustering/include/bits/metric_base_bits.h
Lines changed: 4 additions & 18 deletions b/‎deeptime/clustering/include/bits/metric_base_bits.h
Lines changed: 4 additions & 18 deletions
@@ -16,7 +16,7 @@ else()
     add_compile_options(-Wall -Wextra -pedantic -Werror)
 endif()
 
-include("${PYBIND_CMAKE_DIR}/pybind11Config.cmake")
+find_package(pybind11 REQUIRED)
 
 set(common_includes "${CMAKE_CURRENT_LIST_DIR}/deeptime/src/include")
 
 
@@ -47,13 +47,11 @@
     :toctree: generated/
     :template: class_nomodule.rst
 
-    Metric
     metrics
     MetricRegistry
 """
 
 from ._metric import metrics, MetricRegistry
-from ._clustering_bindings import Metric
 from ._kmeans import KMeans, MiniBatchKMeans, KMeansModel
 from ._regspace import RegularSpace
 from ._box import BoxDiscretization, BoxDiscretizationModel
 
@@ -1,7 +1,7 @@
 import numpy as np
 from deeptime.base import Model, Transformer
 
-from . import _clustering_bindings as _bd, metrics
+from . import metrics
 from ..util.parallel import handle_n_jobs
 
 
@@ -117,5 +117,6 @@ def transform(self, data, n_jobs=None) -> np.ndarray:
         n_jobs = handle_n_jobs(n_jobs)
         if data.ndim == 1:
             data = data[..., None]
-        dtraj = _bd.assign(data, self.cluster_centers, n_jobs, metrics[self.metric]())
+        impl = metrics[self.metric]
+        dtraj = impl.assign(data, self.cluster_centers, n_jobs)
         return dtraj
@@ -6,7 +6,7 @@
 
 from ..base import EstimatorTransformer
 from ._cluster_model import ClusterModel
-from . import _clustering_bindings as _bd, metrics
+from . import metrics
 
 from ..util.parallel import handle_n_jobs
 
@@ -41,9 +41,8 @@ def kmeans_plusplus(data, n_clusters: int, metric: str = 'euclidean', callback=N
     .. footbibliography::
     """
     n_jobs = handle_n_jobs(n_jobs)
-    metric = metrics[metric]()
-    return _bd.kmeans.init_centers_kmpp(data, k=n_clusters, random_seed=seed, n_threads=n_jobs,
-                                        callback=callback, metric=metric)
+    impl = metrics[metric]
+    return impl.kmeans.init_centers_kmpp(data, k=n_clusters, random_seed=seed, n_threads=n_jobs, callback=callback)
 
 
 class KMeansModel(ClusterModel):
@@ -132,7 +131,8 @@ def score(self, data: np.ndarray, n_jobs: Optional[int] = None) -> float:
             the inertia
         """
         n_jobs = handle_n_jobs(n_jobs)
-        return _bd.kmeans.cost_function(data, self.cluster_centers, n_jobs, metrics[self.metric]())
+        impl = metrics[self.metric]
+        return impl.kmeans.cost_function(data, self.cluster_centers, n_jobs)
 
 
 class KMeans(EstimatorTransformer):
@@ -425,9 +425,10 @@ def fit(self, data, initial_centers=None, callback_init_centers=None, callback_l
 
         # run k-means with all the data
         converged = False
-        cluster_centers, code, iterations, cost = _bd.kmeans.cluster_loop(
+        impl = metrics[self.metric]
+        cluster_centers, code, iterations, cost = impl.kmeans.cluster_loop(
             data, self.initial_centers.copy(), n_jobs, self.max_iter,
-            self.tolerance, callback_loop, metrics[self.metric]())
+            self.tolerance, callback_loop)
         if code == 0:
             converged = True
         else:
@@ -514,9 +515,9 @@ def partial_fit(self, data, n_jobs=None):
                                       tolerance=self.tolerance, inertias=np.array([float('inf')]))
         if data.ndim == 1:
             data = data[:, np.newaxis]
-        metric_instance = metrics[self.metric]()
-        self._model._cluster_centers = _bd.kmeans.cluster(data, self._model.cluster_centers, n_jobs, metric_instance)[0]
-        cost = _bd.kmeans.cost_function(data, self._model.cluster_centers, n_jobs, metric_instance)
+        impl = metrics[self.metric]
+        self._model._cluster_centers = impl.kmeans.cluster(data, self._model.cluster_centers, n_jobs)[0]
+        cost = impl.kmeans.cost_function(data, self._model.cluster_centers, n_jobs)
 
         rel_change = np.abs(cost - self._model.inertia) / cost if cost != 0.0 else 0.0
         self._model._inertias = np.append(self._model._inertias, cost)
 
@@ -8,25 +8,53 @@ class MetricRegistry:
     If a custom metric is implemented, it can be registered through a call to
     :meth:`register <deeptime.clustering.MetricRegistry.register>`.
 
-    Note that the registry should not be instantiated directly but rather be accessed
-    through :data:`metrics <deeptime.clustering.metrics>`.
+    .. note::
+
+        The registry should not be instantiated directly but rather be accessed
+        through the :data:`metrics <deeptime.clustering.metrics>` singleton.
+
+
+    .. rubric:: Adding a new metric
+
+    A new metric may be added by linking against the deeptime clustering c++ library (directory is provided by
+    `deeptime.capi_includes(inc_clustering=True)`) and subsequently exposing the clustering algorithms with your custom
+    metric like
+
+    .. code-block:: cpp
+
+        #include "register_clustering.h"
+
+        PYBIND11_MODULE(_clustering_bindings, m) {
+            m.doc() = "module containing clustering algorithms.";
+            auto customModule = m.def_submodule("custom");
+            deeptime::clustering::registerClusteringImplementation<Custom>(customModule);
+        }
+
+    and registering it with the deeptime library through
+
+    .. code-block:: python
+
+        import deeptime
+        import bindings  # this is your compiled extension, rename as appropriate
+
+        deeptime.clustering.metrics.register("custom", bindings.custom)
     """
 
     def __init__(self):
         self._registered = None
-        self.register("euclidean", _bd.EuclideanMetric)
+        self.register("euclidean", _bd.euclidean)
 
-    def register(self, name: str, clazz):
+    def register(self, name: str, impl):
         r""" Adds a new metric to the registry.
 
         Parameters
         ----------
         name : str
             The name of the metric.
-        clazz : class
-            Reference to the class of the metric.
+        impl : module
+            Reference to the implementation module.
         """
-        self._mapping[name] = clazz
+        self._mapping[name] = impl
 
     @property
     def available(self) -> Tuple[str]:
 
@@ -3,7 +3,6 @@
 import numpy as np
 
 from . import metrics
-from ._clustering_bindings import regspace as _regspace_ext
 from ._cluster_model import ClusterModel
 from ..base import Estimator
 
@@ -142,14 +141,14 @@ def fetch_model(self) -> ClusterModel:
 
     def partial_fit(self, data, n_jobs=None):
         r""" Fits data to an existing model. See :meth:`fit`. """
+        impl = metrics[self.metric]
         n_jobs = self.n_jobs if n_jobs is None else handle_n_jobs(n_jobs)
         if data.ndim == 1:
             data = data[:, np.newaxis]
         try:
-            metric = metrics[self.metric]()
-            _regspace_ext.cluster(data, self._clustercenters, self.dmin, self.max_centers, n_jobs, metric)
+            impl.regspace.cluster(data, self._clustercenters, self.dmin, self.max_centers, n_jobs)
             self._converged = True
-        except _regspace_ext.MaxCentersReachedException:
+        except impl.regspace.MaxCentersReachedException:
             warnings.warn('Maximum number of cluster centers reached.'
                           ' Consider increasing max_centers or choose'
                           ' a larger minimum distance, dmin.')
 
@@ -11,14 +11,9 @@ namespace deeptime {
 namespace clustering {
 namespace kmeans {
 
-template<typename T>
+template<typename Metric, typename T>
 inline std::tuple<np_array<T>, np_array<int>> cluster(const np_array_nfc<T> &np_chunk,
-                                                      const np_array_nfc<T> &np_centers, int n_threads,
-                                                      const Metric *metric) {
-    if (metric == nullptr) {
-        metric = default_metric();
-    }
-
+                                                      const np_array_nfc<T> &np_centers, int n_threads) {
     if (np_chunk.ndim() != 2) {
         throw std::runtime_error(R"(Number of dimensions of "chunk" ain't 2.)");
     }
@@ -51,9 +46,9 @@ inline std::tuple<np_array<T>, np_array<int>> cluster(const np_array_nfc<T> &np_
         for (pybind11::ssize_t i = 0; i < n_frames; ++i) {
             int argMinDist = 0;
             {
-                T minDist = metric->compute(&chunk(i, 0), &centers(0, 0), dim);
+                T minDist = Metric::template compute(&chunk(i, 0), &centers(0, 0), dim);
                 for (std::size_t j = 1; j < n_centers; ++j) {
-                    auto dist = metric->compute(&chunk(i, 0), &centers(j, 0), dim);
+                    auto dist = Metric::template compute(&chunk(i, 0), &centers(j, 0), dim);
                     if (dist < minDist) {
                         minDist = dist;
                         argMinDist = j;
@@ -77,7 +72,7 @@ inline std::tuple<np_array<T>, np_array<int>> cluster(const np_array_nfc<T> &np_
         for (pybind11::ssize_t i = 0; i < n_frames; ++i) {
             std::vector<T> dists(n_centers);
             for (std::size_t j = 0; j < n_centers; ++j) {
-                dists[j] = metric->compute(&chunk(i, 0), &centers(j, 0), dim);
+                dists[j] = Metric::template compute(&chunk(i, 0), &centers(j, 0), dim);
             }
 #pragma omp flush(dists)
 
@@ -106,9 +101,9 @@ inline std::tuple<np_array<T>, np_array<int>> cluster(const np_array_nfc<T> &np_
                 for (auto i = begin; i < end; ++i) {
                     std::size_t argMinDist = 0;
                     {
-                        T minDist = metric->compute(&chunk(i, 0), &centers(0, 0), dim);
+                        T minDist = Metric::template compute(&chunk(i, 0), &centers(0, 0), dim);
                         for (std::size_t j = 1; j < n_centers; ++j) {
-                            auto dist = metric->compute(&chunk(i, 0), &centers(j, 0), dim);
+                            auto dist = Metric::template compute(&chunk(i, 0), &centers(j, 0), dim);
                             if(dist < minDist) {
                                 minDist = dist;
                                 argMinDist = j;
@@ -151,13 +146,10 @@ inline std::tuple<np_array<T>, np_array<int>> cluster(const np_array_nfc<T> &np_
     return std::make_tuple(newCenters, std::move(assignments));
 }
 
-template<typename T>
+template<typename Metric, typename T>
 inline std::tuple<np_array_nfc<T>, int, int, np_array<T>> cluster_loop(
         const np_array_nfc<T> &np_chunk, const np_array_nfc<T> &np_centers,
-        int n_threads, int max_iter, T tolerance, py::object &callback, const Metric *metric) {
-    if (metric == nullptr) {
-        metric = default_metric();
-    }
+        int n_threads, int max_iter, T tolerance, py::object &callback) {
     int it = 0;
     bool converged = false;
     T rel_change;
@@ -168,10 +160,10 @@ inline std::tuple<np_array_nfc<T>, int, int, np_array<T>> cluster_loop(
     inertias.reserve(max_iter);
 
     do {
-        auto clusterResult = cluster<T>(np_chunk, currentCenters, n_threads, metric);
+        auto clusterResult = cluster<Metric>(np_chunk, currentCenters, n_threads);
         currentCenters = std::get<0>(clusterResult);
         const auto &assignments = std::get<1>(clusterResult);
-        auto cost = costFunction(np_chunk, currentCenters, assignments, n_threads, metric);
+        auto cost = costFunction<Metric>(np_chunk, currentCenters, assignments, n_threads);
         inertias.push_back(cost);
         rel_change = (cost != 0.0) ? std::abs(cost - prev_cost) / cost : 0;
         prev_cost = cost;
@@ -193,12 +185,9 @@ inline std::tuple<np_array_nfc<T>, int, int, np_array<T>> cluster_loop(
     return std::make_tuple(currentCenters, res, it, npInertias);
 }
 
-template<typename T>
+template<typename Metric, typename T>
 inline T costFunction(const np_array_nfc<T> &np_data, const np_array_nfc<T> &np_centers,
-                      const np_array<int> &assignments, int n_threads, const Metric *metric) {
-    if(metric == nullptr) {
-        metric = default_metric();
-    }
+                      const np_array<int> &assignments, int n_threads) {
     auto data = np_data.template unchecked<2>();
     auto centers = np_centers.template unchecked<2>();
 
@@ -210,9 +199,9 @@ inline T costFunction(const np_array_nfc<T> &np_data, const np_array_nfc<T> &np_
     omp_set_num_threads(n_threads);
     #endif
 
-    #pragma omp parallel for reduction(+:value) default(none) firstprivate(n_frames, metric, data, centers, assignmentsPtr, dim)
+    #pragma omp parallel for reduction(+:value) default(none) firstprivate(n_frames, data, centers, assignmentsPtr, dim)
     for (std::size_t i = 0; i < n_frames; i++) {
-        auto l = metric->compute(&data(i, 0), &centers(assignmentsPtr[i], 0), dim);
+        auto l = Metric::template compute(&data(i, 0), &centers(assignmentsPtr[i], 0), dim);
         {
             value += l * l;
         }
 
@@ -10,24 +10,10 @@
 #include <omp.h>
 #endif
 
-template<>
-inline float Metric::compute_squared<float>(const float* xs, const float* ys, std::size_t dim) const {
-    return compute_squared_f(xs, ys, dim);
-}
-
-template<>
-inline double Metric::compute_squared<double>(const double* xs, const double* ys, std::size_t dim) const {
-    return compute_squared_d(xs, ys, dim);
-}
-
-template<typename T>
+template<typename Metric, typename T>
 inline py::array_t<int> assign_chunk_to_centers(const np_array_nfc<T>& chunk,
                                                 const np_array_nfc<T>& centers,
-                                                int n_threads,
-                                                const Metric* metric) {
-    if (metric == nullptr) {
-        metric = default_metric();
-    }
+                                                int n_threads) {
     if (chunk.ndim() != 2) {
         throw std::invalid_argument("provided chunk does not have two dimensions.");
     }
@@ -57,12 +43,12 @@ inline py::array_t<int> assign_chunk_to_centers(const np_array_nfc<T>& chunk,
     omp_set_num_threads(n_threads);
 #endif
 
-    #pragma omp parallel default(none) firstprivate(N_frames, N_centers, centers_buff, input_dim, metric, chunk_buff, dtraj_buff, dists)
+    #pragma omp parallel default(none) firstprivate(N_frames, N_centers, centers_buff, input_dim, chunk_buff, dtraj_buff, dists)
     {
         #pragma omp for
         for(size_t i = 0; i < N_frames; ++i) {
             for(size_t j = 0; j < N_centers; ++j) {
-                dists[j] = metric->compute(&chunk_buff(i, 0), &centers_buff(j, 0), input_dim);
+                dists[j] = Metric::template compute<T>(&chunk_buff(i, 0), &centers_buff(j, 0), input_dim);
             }
 
             {