Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 67 additions & 48 deletions causaltune/optimiser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import copy
import warnings
from typing import List, Optional, Union
from typing import List, Optional, Union, Any
from collections import defaultdict
import time

import traceback
import pandas as pd
import numpy as np
from sklearn.linear_model import _base
from flaml import tune
from hiertunehub import create_tuner

from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -174,6 +174,7 @@ def __init__(
self._settings["tuner"]["resources_per_trial"] = (
resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
)
self._settings["tuner"]["algo"] = None
self._settings["try_init_configs"] = try_init_configs
self._settings["include_experimental_estimators"] = include_experimental_estimators

Expand All @@ -200,7 +201,7 @@ def __init__(
self._settings["propensity_model"] = propensity_model
self._settings["outcome_model"] = outcome_model

self.results = None
self.tuner = None
self._best_estimators = defaultdict(lambda: (float("-inf"), None))

self.original_estimator_list = estimator_list
Expand Down Expand Up @@ -288,6 +289,8 @@ def fit(
encoder_type: Optional[str] = None,
encoder_outcome: Optional[str] = None,
use_ray: Optional[bool] = None,
framework: Optional[str] = "flaml",
algo: Any = None,
):
"""Performs AutoML on list of causal inference estimators
- If estimator has a search space specified in its parameters, HPO is performed on the whole model.
Expand All @@ -307,6 +310,8 @@ def fit(
preprocess (bool): preprocess CausalityDataset if needed.
encoder_type (Optional[str]): Categorical Encoder for preprocessing
encoder_outcome (Optional[str]): Categorical Encoder target for preprocessing: TargetEncoder, WOE.
framework (Optional[str]): framework to use for HPO, choices are "flaml", "hyperopt" and "optuna".
algo (Optional[str]): algorithm to use for HPO, each framework has its own set of algorithms to choose from.

Returns:
None
Expand Down Expand Up @@ -436,6 +441,7 @@ def fit(
self._settings["tuner"]["time_budget_s"] = (
2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
)
self._settings["tuner"]["algo"] = algo

cmtb = self._settings["component_models"]["time_budget"]

Expand Down Expand Up @@ -474,47 +480,60 @@ def fit(
else []
)

if resume and self.results:
# pull out configs and resume_scores from previous trials:
for _, result in self.results.results.items():
self.resume_scores.append(result[self.metric])
self.resume_cfg.append(result["config"])
# append init_cfgs that have not yet been evaluated
for cfg in init_cfg:
self.resume_cfg.append(cfg) if cfg not in self.resume_cfg else None
try:
self.results = tune.run(
self._tune_with_config,
search_space,
metric=self.metric,
# use_ray=self.use_ray,
cost_attr="evaluation_cost",
points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
mode=("min" if self.metric in metrics_to_minimize() else "max"),
# resources_per_trial= {"cpu": 1} if self.use_ray else None,
low_cost_partial_config={},
**self._settings["tuner"],
)
# TODO: intergrate resume and init cfg
# if resume and self.results:
# # pull out configs and resume_scores from previous trials:
# for _, result in self.results.results.items():
# self.resume_scores.append(result[self.metric])
# self.resume_cfg.append(result["config"])
# # append init_cfgs that have not yet been evaluated
# for cfg in init_cfg:
# self.resume_cfg.append(cfg) if cfg not in self.resume_cfg else None

self.tuner = create_tuner(
self._tune_with_config,
search_space,
metric=self.metric,
mode=("min" if self.metric in metrics_to_minimize() else "max"),
framework=framework,
**self.cfg.parse_tuner_params(self._settings["tuner"], framework),
)

if self.results.get_best_trial() is None:
raise Exception(
"Optimization failed! Did you set large enough time_budget and components_budget?"
)
except Exception:
# we must have an older FLAML version that doesn't support the cost_attr parameter
self.results = tune.run(
self._tune_with_config,
search_space,
metric=self.metric,
points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
mode=("min" if self.metric in metrics_to_minimize() else "max"),
low_cost_partial_config={},
**self._settings["tuner"],
)
# print("Optimization failed!\n", traceback.format_exc())
# raise e
self.tuner.run()

# try:
# self.results = tune.run(
# self._tune_with_config,
# search_space,
# metric=self.metric,
# # use_ray=self.use_ray,
# cost_attr="evaluation_cost",
# points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
# evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
# mode=("min" if self.metric in metrics_to_minimize() else "max"),
# # resources_per_trial= {"cpu": 1} if self.use_ray else None,
# low_cost_partial_config={},
# **self._settings["tuner"],
# )
#
# if self.results.get_best_trial() is None:
# raise Exception(
# "Optimization failed! Did you set large enough time_budget and components_budget?"
# )
# except Exception:
# # we must have an older FLAML version that doesn't support the cost_attr parameter
# self.results = tune.run(
# self._tune_with_config,
# search_space,
# metric=self.metric,
# points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
# evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
# mode=("min" if self.metric in metrics_to_minimize() else "max"),
# low_cost_partial_config={},
# **self._settings["tuner"],
# )
# # print("Optimization failed!\n", traceback.format_exc())
# # raise e
self.update_summary_scores()

def update_summary_scores(self):
Expand All @@ -523,7 +542,7 @@ def update_summary_scores(self):
Returns:
None
"""
self.scores = Scorer.best_score_by_estimator(self.results.results, self.metric)
self.scores = Scorer.best_score_by_estimator(self.tuner.results, self.metric)
# now inject the separately saved model objects
for est_name in self.scores:
# Todo: Check approximate scores for OrthoIV (possibly other IV estimators)
Expand Down Expand Up @@ -699,7 +718,7 @@ def best_estimator(self) -> str:
Returns:
None
"""
return self.results.best_result["estimator_name"]
return self.tuner.best_result["estimator_name"]

@property
def model(self):
Expand All @@ -708,7 +727,7 @@ def model(self):
Returns:
CausalEstimator
"""
return self.results.best_result["estimator"].estimator
return self.tuner.best_result["estimator"].estimator

def best_model_for_estimator(self, estimator_name):
"""Return the best model found for a particular estimator.
Expand All @@ -730,7 +749,7 @@ def best_config(self):
Returns:
(dict): the best configuration
"""
return self.results.best_config
return self.tuner.best_params

@property
def best_config_per_estimator(self):
Expand All @@ -749,7 +768,7 @@ def best_score(self):
"""
Returns:
(float): the best score found."""
return self.results.best_result[self.metric]
return self.tuner.best_result[self.metric]

def effect(self, df, *args, **kwargs):
"""Heterogeneous Treatment Effects for data df
Expand Down
8 changes: 4 additions & 4 deletions causaltune/score/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1321,7 +1321,7 @@ def make_scores(
return out

@staticmethod
def best_score_by_estimator(scores: Dict[str, dict], metric: str) -> Dict[str, dict]:
def best_score_by_estimator(scores: list[dict], metric: str) -> Dict[str, dict]:
"""Obtain best score for each estimator.

Args:
Expand All @@ -1333,19 +1333,19 @@ def best_score_by_estimator(scores: Dict[str, dict], metric: str) -> Dict[str, d

"""

for k, v in scores.items():
for v in scores:
if "estimator_name" not in v:
raise ValueError(
f"Malformed scores dict, 'estimator_name' field missing " f"in{k}, {v}"
)

estimator_names = sorted(
list(set([v["estimator_name"] for v in scores.values() if "estimator_name" in v]))
list(set([v["estimator_name"] for v in scores if "estimator_name" in v]))
)
best = {}
for name in estimator_names:
est_scores = [
v for v in scores.values() if "estimator_name" in v and v["estimator_name"] == name
v for v in scores if "estimator_name" in v and v["estimator_name"] == name
]
best[name] = (
min(est_scores, key=lambda x: x[metric])
Expand Down
29 changes: 29 additions & 0 deletions causaltune/search/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import warnings
from econml.inference import BootstrapInference # noqa F401
from sklearn import linear_model
from hiertunehub import SearchSpace

from causaltune.utils import clean_config
from causaltune.search.component import model_from_cfg, joint_config
Expand Down Expand Up @@ -161,8 +162,36 @@ def search_space(
data_size, outcome_estimator_list
)

out = SearchSpace.from_flaml(out, name="estimator_name")

return out

@staticmethod
def parse_tuner_params(params: dict, framework: str) -> dict:
if framework == "flaml":
return {
"num_samples": params["num_samples"],
"time_budget_s": params["time_budget_s"],
"verbose": params["verbose"],
"resources_per_trial": params["resources_per_trial"],
"search_alg": params["algo"],
}
elif framework == "hyperopt":
return {
"max_evals": params["num_samples"] if params["num_samples"] != -1 else None,
"timeout": params["time_budget_s"],
"verbose": params["verbose"],
"algo": params["algo"],
}
elif framework == "optuna":
return {
"n_trials": params["num_samples"] if params["num_samples"] != -1 else None,
"timeout": params["time_budget_s"],
"sampler": params["algo"],
}
else:
raise ValueError(f"Framework {framework} not supported")

def default_configs(
self,
estimator_list: Iterable[str],
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
"wise-pizza",
"seaborn",
"category_encoders==2.6.3",
"hiertunehub",
"hyperopt",
"optuna"
],
extras_require={
"test": [
Expand Down