Skip to content

Commit 3dfeee0

Browse files
Merge pull request #310 from SubhadityaMukherjee/main
Adding an OpenML example
2 parents 09cd8d2 + f2de18e commit 3dfeee0

File tree

3 files changed

+160
-0
lines changed

3 files changed

+160
-0
lines changed

.github/workflows/openml.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: openml
2+
3+
on:
4+
schedule:
5+
- cron: '0 15 * * *'
6+
pull_request:
7+
paths:
8+
- 'openml/**'
9+
- '.github/workflows/openml.yml'
10+
11+
jobs:
12+
examples:
13+
if: (github.event_name == 'schedule' && github.repository == 'optuna/optuna-examples') || (github.event_name != 'schedule')
14+
runs-on: ubuntu-latest
15+
strategy:
16+
matrix:
17+
# TODO(c-bata): Add Python 3.9 here after fixing https://github.com/optuna/optuna-examples/issues/307
18+
python-version: ['3.10', '3.11', '3.12']
19+
20+
steps:
21+
- uses: actions/checkout@v4
22+
- name: setup-python${{ matrix.python-version }}
23+
uses: actions/setup-python@v5
24+
with:
25+
python-version: ${{ matrix.python-version }}
26+
- name: Install (Python)
27+
run: |
28+
python -m pip install --upgrade pip
29+
pip install --progress-bar off -U setuptools
30+
pip install git+https://github.com/optuna/optuna.git
31+
python -c 'import optuna'
32+
pip install git+https://github.com/optuna/optuna-integration.git
33+
python -c 'import optuna_integration'
34+
35+
pip install -r openml/requirements.txt
36+
- name: Run examples
37+
run: |
38+
python openml/openml_simple.py
39+
env:
40+
OMP_NUM_THREADS: 1

openml/openml_simple.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
====================================================
3+
Hyperparameter Optimization Benchmark with OpenML
4+
====================================================
5+
6+
In this tutorial, we walk through how to conduct hyperparameter optimization experiments using
7+
OpenML and OptunaHub.
8+
"""
9+
10+
############################################################################
11+
# Please make sure to install the dependencies with:
12+
# ``pip install -r requirements.txt``
13+
# Then we import all the necessary modules.
14+
15+
# License: MIT License
16+
import logging
17+
18+
import optuna
19+
20+
import openml
21+
from openml.extensions.sklearn import cat
22+
from openml.extensions.sklearn import cont
23+
from sklearn.compose import ColumnTransformer
24+
from sklearn.ensemble import RandomForestClassifier
25+
from sklearn.impute import SimpleImputer
26+
from sklearn.pipeline import Pipeline
27+
from sklearn.preprocessing import OneHotEncoder
28+
29+
30+
logger = logging.Logger(name="Experiment Logger", level=1)
31+
32+
# Set your openml api key if you want to upload your results to OpenML (eg:
33+
# https://openml.org/search?type=run&sort=date) . To get one, simply make an
34+
# account (you don't need one for anything else, just to upload your results),
35+
# go to your profile and select the API-KEY.
36+
# Or log in, and navigate to https://www.openml.org/auth/api-key
37+
openml.config.apikey = ""
38+
39+
############################################################################
40+
# Prepare for preprocessors and an OpenML task
41+
# ============================================
42+
43+
# OpenML contains several key concepts which it needs to make machine learning research shareable.
44+
# A machine learning experiment consists of one or several runs, which describe the performance of
45+
# an algorithm (called a flow in OpenML), its hyperparameter settings (called a setup) on a task.
46+
# A Task is the combination of a dataset, a split and an evaluation metric We choose a dataset from
47+
# OpenML, (https://www.openml.org/d/1464) and a subsequent task (https://www.openml.org/t/10101) To
48+
# make your own dataset and task, please refer to
49+
# https://openml.github.io/openml-python/main/examples/30_extended/create_upload_tutorial.html
50+
51+
# https://www.openml.org/search?type=study&study_type=task&id=218
52+
task_id = 10101
53+
seed = 42
54+
categorical_preproc = (
55+
"categorical",
56+
OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
57+
cat,
58+
)
59+
numerical_preproc = ("numerical", SimpleImputer(strategy="median"), cont)
60+
preproc = ColumnTransformer([categorical_preproc, numerical_preproc])
61+
62+
############################################################################
63+
# Define a pipeline for the hyperparameter optimization (this is standark for Optuna)
64+
# =====================================================
65+
66+
# Optuna explanation
67+
# we follow the `Optuna <https://github.com/optuna/optuna/>`__ search space design.
68+
69+
# OpenML runs
70+
# We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance
71+
# of the pipeline
72+
# on the specified OpenML task.
73+
# Do you want to share your results along with an easily reproducible pipeline, you can set an API
74+
# key and just upload your results.
75+
# You can find more examples on https://www.openml.org/
76+
77+
78+
def objective(trial: optuna.Trial) -> Pipeline:
79+
clf = RandomForestClassifier(
80+
max_depth=trial.suggest_int("max_depth", 2, 32, log=True),
81+
min_samples_leaf=trial.suggest_float("min_samples_leaf", 0.0, 1.0),
82+
random_state=seed,
83+
)
84+
pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)])
85+
logger.log(1, f"Running pipeline - {pipe}")
86+
run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False)
87+
88+
logger.log(1, f"Model has been trained - {run}")
89+
if openml.config.apikey != "":
90+
try:
91+
run.publish()
92+
93+
logger.log(1, f"Run was uploaded to - {run.openml_url}")
94+
except Exception as e:
95+
logger.log(1, f"Could not publish run - {e}")
96+
else:
97+
logger.log(
98+
0,
99+
"If you want to publish your results to OpenML, please set an apikey",
100+
)
101+
accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values())
102+
logger.log(0, f"Accuracy {accuracy}")
103+
104+
return accuracy
105+
106+
107+
############################################################################
108+
# Optimize the pipeline
109+
# =====================
110+
study = optuna.create_study(direction="maximize")
111+
logger.log(0, f"Study {study}")
112+
study.optimize(objective, n_trials=15)
113+
114+
############################################################################
115+
# Visualize the optimization history
116+
# ==================================
117+
fig = optuna.visualization.plot_optimization_history(study)
118+
fig.show()

openml/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
openml>=0.15.1
2+
plotly

0 commit comments

Comments
 (0)