neurodata · adam2392 · Apr 26, 2023 · Apr 26, 2023 · Apr 27, 2023 · Apr 27, 2023
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -69,7 +69,7 @@ commands:
 
 jobs:
   # Build scikit-tree from source
-  build_scikit-tree:
+  build_scikit_tree:
     <<: *defaults
     steps:
       - checkout
@@ -186,13 +186,16 @@ jobs:
             - ~/sktree
 
 workflows:
-  commit:
+  default:
     jobs:
-      - build_scikit-tree
+      - build_scikit_tree:
+          name: build_scikit_tree
       - build_docs:
+          name: build_docs
           requires:
-            - build_scikit-tree
+            - build_scikit_tree
       - docs-deploy:
+          name: docs-deploy
           requires:
             - build_docs
           filters:

diff --git a/.github/workflows/circle_artifacts.yml b/.github/workflows/circle_artifacts.yml
@@ -3,12 +3,6 @@ on: [status]
 
 permissions: read-all
 
-# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
-# github actions workflow:
-# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
-# permissions:
-#   statuses: write
-
 jobs:
   circleci_artifacts_redirector_job:
     runs-on: ubuntu-latest

diff --git a/docs/whats_new/v0.1.rst b/docs/whats_new/v0.1.rst
@@ -34,6 +34,7 @@ Changelog
 - |Feature| All tree types can compute similarity and dissimilarity matrices, by `Sambit Panda`_ and `Adam Li`_ (:pr:`64`)
 - |Feature| MORF trees now can normalize by feature weight per sample per feature column, by `Adam Li`_ (:pr:`67`)
 - |Feature| Implementation of ObliqueDecisionTreeRegressor, PatchObliqueDecisionTreeRegressor, ObliqueRandomForestRegressor, PatchObliqueRandomForestRegressor, by `SUKI-O`_ (:pr:`72`)
+- |Feature| A general-kernel MORF is now implemented where users can pass in a kernel library, by `Adam Li`_ (:pr:`70`)
 
 Code and Documentation Contributors
 -----------------------------------

diff --git a/examples/plot_kernel_decision_tree.py b/examples/plot_kernel_decision_tree.py
@@ -0,0 +1,106 @@
+"""
+======================================
+Custom Kernel Decision Tree Classifier
+======================================
+
+This example shows how to build a manifold oblique decision tree classifier using
+a custom set of user-defined kernel/filter library, such as the Gaussian, or Gabor
+kernels.
+
+The example demonstrates superior performance on a 2D dataset with structured images
+as samples. The dataset is the downsampled MNIST dataset, where each sample is a
+28x28 image. The dataset is downsampled to 14x14, and then flattened to a 196
+dimensional vector. The dataset is then split into a training and testing set.
+
+See :ref:`sphx_glr_auto_examples_plot_projection_matrices` for more information on
+projection matrices and the way they can be sampled.
+"""
+import matplotlib.pyplot as plt
+
+# %%
+# Importing the necessary modules
+import numpy as np
+from sklearn.datasets import fetch_openml
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+from sktree.tree import KernelDecisionTreeClassifier
+
+# %%
+# Load the Dataset
+# ----------------
+# We need to load the dataset and split it into training and testing sets.
+
+# Load the dataset
+X, y = fetch_openml("mnist_784", version=1, return_X_y=True)
+
+# Downsample the dataset
+X = X.reshape((-1, 28, 28))
+X = X[:, ::2, ::2]
+X = X.reshape((-1, 196))
+
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# %%
+# Setting up the Custom Kernel Decision Tree Model
+# -------------------------------------------------
+# To set up the custom kernel decision tree model, we need to define typical hyperparameters
+# for the decision tree classifier, such as the maximum depth of the tree and the minimum
+# number of samples required to split an internal node. For the Kernel Decision tree model,
+# we also need to define the kernel function and its parameters.
+
+max_depth = 10
+min_samples_split = 2
+
+# Next, we define the hyperparameters for the custom kernels that we will use.
+# For example, if we want to use a Gaussian kernel with a sigma of 1.0 and a size of 3x3:
+kernel_function = "gaussian"
+kernel_params = {"sigma": 1.0, "size": (3, 3)}
+
+# We can then fit the custom kernel decision tree model to the training set:
+clf = KernelDecisionTreeClassifier(
+    max_depth=max_depth,
+    min_samples_split=min_samples_split,
+    data_dims=(28, 28),
+    min_patch_dims=(1, 1),
+    max_patch_dims=(14, 14),
+    dim_contiguous=(True, True),
+    boundary=None,
+    n_classes=10,
+    kernel_function=kernel_function,
+    n_kernels=500,
+    store_kernel_library=True,
+)
+
+# Fit the decision tree classifier using the custom kernel
+clf.fit(X_train, y_train)
+
+# %%
+# Evaluating the Custom Kernel Decision Tree Model
+# ------------------------------------------------
+# To evaluate the custom kernel decision tree model, we can use the testing set.
+# We can also inspect the important kernels that the tree selected.
+
+# Predict the labels for the testing set
+y_pred = clf.predict(X_test)
+
+# Compute the accuracy score
+accuracy = accuracy_score(y_test, y_pred)
+
+print(f"Kernel decision tree model obtained an accuracy of {accuracy} on MNIST.")
+
+# Get the important kernels from the decision tree classifier
+important_kernels = clf.kernel_arr_
+kernel_dims = clf.kernel_dims_
+kernel_params = clf.kernel_params_
+kernel_library = clf.kernel_library_
+
+# Plot the important kernels
+fig, axes = plt.subplots(
+    nrows=len(important_kernels), ncols=1, figsize=(6, 4 * len(important_kernels))
+)
+for i, kernel in enumerate(important_kernels):
+    axes[i].imshow(kernel, cmap="gray")
+    axes[i].set_title("Kernel {}".format(i + 1))
+plt.show()
diff --git a/examples/plot_projection_matrices.py b/examples/plot_projection_matrices.py
@@ -265,3 +265,40 @@
 
 fig.suptitle("2D Discontiguous Patch Visualization")
 plt.show()
+
+# %%
+# We will make the patch 2D, which samples multiple rows contiguously. This is
+# a 2D patch of size 3 in the columns and 2 in the rows.
+dim_contiguous = np.array((False, False))
+
+splitter = BestPatchSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    min_patch_dims,
+    max_patch_dims,
+    dim_contiguous,
+    data_dims,
+    boundary,
+    feature_weight,
+)
+splitter.init_test(X, y, sample_weight)
+
+# sample the projection matrix that consists of 1D patches
+proj_mat = splitter.sample_projection_matrix()
+
+# Visualize 2D patches
+fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(12, 8), sharex=True, sharey=True, squeeze=True)
+axs = axs.flatten()
+for idx, ax in enumerate(axs):
+    ax.imshow(proj_mat[idx, :].reshape(data_dims), cmap="viridis")
+    ax.set(
+        xlim=(-1, data_dims[1]),
+        ylim=(-1, data_dims[0]),
+        title=f"Patch {idx}",
+    )
+
+fig.suptitle("2D Discontiguous In All Dims Patch Visualization")
+plt.show()
diff --git a/sktree/tree/__init__.py b/sktree/tree/__init__.py
@@ -1,4 +1,5 @@
 from ._classes import (
+    KernelDecisionTreeClassifier,
     ObliqueDecisionTreeClassifier,
     ObliqueDecisionTreeRegressor,
     PatchObliqueDecisionTreeClassifier,
@@ -14,4 +15,5 @@
     "ObliqueDecisionTreeRegressor",
     "PatchObliqueDecisionTreeClassifier",
     "PatchObliqueDecisionTreeRegressor",
+    "KernelDecisionTreeClassifier",
 ]