wwu-mmll
diff --git a/‎.github/dependabot.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/dependabot.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/documentation_build_and_update.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/documentation_build_and_update.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/documentation_deployment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/documentation_deployment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-deploy_to_pypi.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-deploy_to_pypi.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-test_and_deploy.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/python-test_and_deploy.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/advanced/connectome_based_predictive_modeling_example.py‎
Lines changed: 34 additions & 0 deletions b/‎examples/advanced/connectome_based_predictive_modeling_example.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎examples/advanced/gpboost.py‎
Lines changed: 85 additions & 0 deletions b/‎examples/advanced/gpboost.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎examples/basic/classification_custom.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/basic/classification_custom.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/basic/regression.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/basic/regression.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/optimizer/meta_optimizer.py‎
Lines changed: 6 additions & 5 deletions b/‎examples/optimizer/meta_optimizer.py‎
Lines changed: 6 additions & 5 deletions
@@ -7,9 +7,11 @@ version: 2
 updates:
   - package-ecosystem: "github-actions" # See documentation for possible values
     directory: "/" # Location of package manifests
+    target-branch: "develop"
     schedule:
       interval: "daily"
   - package-ecosystem: "pip"
     directory: "/"
+    target-branch: "develop"
     schedule:
       interval: "daily"
@@ -13,7 +13,7 @@ jobs:
           fetch-depth: 0
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.9'
 
 
@@ -16,7 +16,7 @@ jobs:
           fetch-depth: 0
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.9'
 
 
@@ -13,7 +13,7 @@ jobs:
       with:
         fetch-depth: 0
     - name: Set up Python 3.10.8
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.10.8
     - name: Install pypa/build
 
@@ -25,7 +25,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python 3.9
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.9
     - name: Install dependencies
@@ -50,7 +50,7 @@ jobs:
       with:
         fetch-depth: 0
     - name: Set up Python 3.9
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.9
     - name: Install pypa/build
 
@@ -0,0 +1,34 @@
+"""
+Connectome-based predictive modeling
+
+CPM is a method described in the following Nature Protocols article: https://www.nature.com/articles/nprot.2016.178
+It has been used in a number of publications to predict behavior from connectivity data.
+CPM works similar to a feature selection method. First, relevant edges (connectivity values) are identified through
+correlation analysis. Every edge is correlated with the predictive target. Only significant edges will be used in the
+subsequent steps. Next, the edge values for all significant positive and for all significant negative correlations are
+summed to create two new features. Lastly, these two features are used as input to another classifier.
+
+In this example, no connectivity data is used, but the method will still work.
+This example is just supposed to show how to use CPM as feature selection and integration tool in PHOTONAI.
+"""
+
+from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import KFold
+
+from photonai import Hyperpipe, PipelineElement
+
+
+X, y = load_breast_cancer(return_X_y=True)
+
+pipe = Hyperpipe("cpm_feature_selection_pipe",
+                  outer_cv=KFold(n_splits=5, shuffle=True, random_state=15),
+                  inner_cv=KFold(n_splits=5, shuffle=True, random_state=15),
+                  metrics=["balanced_accuracy"], best_config_metric="balanced_accuracy",
+                  project_folder='./tmp')
+
+pipe += PipelineElement('CPMFeatureSelection', hyperparameters={'corr_method': ['pearson', 'spearman'],
+                                                                'p_threshold': [0.01, 0.05]})
+
+pipe += PipelineElement('LogisticRegression')
+
+pipe.fit(X, y)
@@ -0,0 +1,85 @@
+# pip install gpboost -U
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.model_selection import GroupKFold, KFold
+from photonai.base import Hyperpipe, PipelineElement
+import numpy as np
+import pandas as pd
+import gpboost as gpb
+# from gpboost import GPBoostRegressor
+
+
+class GPBoostDataWrapper(BaseEstimator, ClassifierMixin):
+
+    def __init__(self):
+        self.needs_covariates = True
+        # self.gpmodel = gpb.GPModel(likelihood="gaussian")
+        self.gpboost = None
+
+
+    def fit(self, X, y, **kwargs):
+        self.gpboost = gpb.GPBoostRegressor()
+        if "clusters" in kwargs:
+            clst = pd.Series(kwargs["clusters"])
+            gpmodel = gpb.GPModel(likelihood="gaussian", group_data=clst)
+            self.gpboost.fit(X, y, gp_model=gpmodel)
+        else:
+            raise NotImplementedError("GPBoost needs clusters")
+        return self
+
+    def predict(self, X, **kwargs):
+        clst = pd.Series(kwargs["clusters"])
+        preds = self.gpboost.predict(X, group_data_pred=clst)
+        preds = preds["response_mean"]
+        return preds
+
+    def save(self):
+        return None
+
+
+def get_gpboost_pipe(pipe_name, project_folder, split="group"):
+
+    if split == "group":
+        outercv = GroupKFold(n_splits=10)
+    else:
+        outercv = KFold(n_splits=10)
+
+    my_pipe = Hyperpipe(pipe_name,
+                        optimizer='grid_search',
+                        metrics=['mean_absolute_error', 'mean_squared_error',
+                                 'spearman_correlation', 'pearson_correlation'],
+                        best_config_metric='mean_absolute_error',
+                        outer_cv=outercv,
+                        inner_cv=KFold(n_splits=10),
+                        calculate_metrics_across_folds=True,
+                        use_test_set=True,
+                        verbosity=1,
+                        project_folder=project_folder)
+
+    # Add transformer elements
+    my_pipe += PipelineElement("StandardScaler", hyperparameters={},
+                               test_disabled=True, with_mean=True, with_std=True)
+
+    my_pipe += PipelineElement.create("GPBoost", GPBoostDataWrapper(), hyperparameters={})
+
+    return my_pipe
+
+
+def get_mock_data():
+
+    X = np.random.randint(10, size=(200, 9))
+    y = np.sum(X, axis=1)
+    clst = np.random.randint(10, size=200)
+
+    return X, y, clst
+
+
+if __name__ == '__main__':
+
+
+    X, y, clst = get_mock_data()
+
+    # define project folder
+    project_folder = "./tmp/gpboost_debug"
+
+    my_pipe = get_gpboost_pipe("Test_gpboost", project_folder, split="random")
+    my_pipe.fit(X, y, clusters=clst)
@@ -5,7 +5,7 @@
 my_pipe = Hyperpipe('basic_svm_pipe',
                     inner_cv=KFold(n_splits=5),
                     outer_cv=KFold(n_splits=3),
-                    optimizer='sk_opt',
+                    optimizer='random_grid_search',
                     optimizer_params={'n_configurations': 15},
                     metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
                     best_config_metric='accuracy',
 
@@ -2,6 +2,7 @@
 from photonai import RegressionPipe
 
 my_pipe = RegressionPipe('diabetes',
+                         best_config_metric='median_absolute_error',
                          add_default_pipeline_elements=True,
                          scaling=True,
                          imputation=False,
 
@@ -7,7 +7,8 @@
                     inner_cv=KFold(n_splits=5),
                     outer_cv=KFold(n_splits=3),
                     optimizer='switch',
-                    optimizer_params={'name': 'sk_opt', 'n_configurations': 50},
+                    # optimizer_params={'name': 'grid_search'},
+                    optimizer_params={'name': 'random_search', 'n_configurations': 10},
                     metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
                     best_config_metric='accuracy',
                     project_folder='./tmp',
@@ -16,7 +17,7 @@
 my_pipe.add(PipelineElement('StandardScaler'))
 
 my_pipe += PipelineElement('PCA',
-                           hyperparameters={'n_components': IntegerRange(10, 30)},
+                           hyperparameters={'n_components': IntegerRange(10, 30, step=5)},
                            test_disabled=True)
 
 # set up two learning algorithms in an ensemble
@@ -25,15 +26,15 @@
 estimator_selection += PipelineElement('RandomForestClassifier',
                                        criterion='gini',
                                        hyperparameters={'min_samples_split': IntegerRange(2, 4),
-                                                        'max_features': ['auto', 'sqrt', 'log2'],
+                                                        'max_features': ['sqrt', 'log2'],
                                                         'bootstrap': [True, False]})
 estimator_selection += PipelineElement('SVC',
-                                       hyperparameters={'C': FloatRange(0.5, 25),
+                                       hyperparameters={'C': FloatRange(0.5, 25, num=10),
                                                         'kernel': ['linear', 'rbf']})
 
 my_pipe += estimator_selection
 
 X, y = load_breast_cancer(return_X_y=True)
 my_pipe.fit(X, y)
 
-my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator()
+print(my_pipe.results_handler.get_mean_of_best_validation_configs_per_estimator())