py-why · ItsYikunJiang · Mar 4, 2025 · Mar 5, 2025 · Mar 5, 2025 · Jun 17, 2025
diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
@@ -1,14 +1,14 @@
 import copy
 import warnings
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Any
 from collections import defaultdict
 import time
 
 import traceback
 import pandas as pd
 import numpy as np
 from sklearn.linear_model import _base
-from flaml import tune
+from hiertunehub import create_tuner
 
 from sklearn.dummy import DummyClassifier
 from sklearn.model_selection import train_test_split
@@ -174,6 +174,7 @@ def __init__(
         self._settings["tuner"]["resources_per_trial"] = (
             resources_per_trial if resources_per_trial is not None else {"cpu": 0.5}
         )
+        self._settings["tuner"]["algo"] = None
         self._settings["try_init_configs"] = try_init_configs
         self._settings["include_experimental_estimators"] = include_experimental_estimators
 
@@ -200,7 +201,7 @@ def __init__(
         self._settings["propensity_model"] = propensity_model
         self._settings["outcome_model"] = outcome_model
 
-        self.results = None
+        self.tuner = None
         self._best_estimators = defaultdict(lambda: (float("-inf"), None))
 
         self.original_estimator_list = estimator_list
@@ -288,6 +289,8 @@ def fit(
         encoder_type: Optional[str] = None,
         encoder_outcome: Optional[str] = None,
         use_ray: Optional[bool] = None,
+        framework: Optional[str] = "flaml",
+        algo: Any = None,
     ):
         """Performs AutoML on list of causal inference estimators
         - If estimator has a search space specified in its parameters, HPO is performed on the whole model.
@@ -307,6 +310,8 @@ def fit(
             preprocess (bool): preprocess CausalityDataset if needed.
             encoder_type (Optional[str]): Categorical Encoder for preprocessing
             encoder_outcome (Optional[str]): Categorical Encoder target for preprocessing: TargetEncoder, WOE.
+            framework (Optional[str]): framework to use for HPO, choices are "flaml", "hyperopt" and "optuna".
+            algo (Optional[str]): algorithm to use for HPO, each framework has its own set of algorithms to choose from.
 
         Returns:
             None
@@ -436,6 +441,7 @@ def fit(
             self._settings["tuner"]["time_budget_s"] = (
                 2.5 * len(self.estimator_list) * self._settings["component_models"]["time_budget"]
             )
+        self._settings["tuner"]["algo"] = algo
 
         cmtb = self._settings["component_models"]["time_budget"]
 
@@ -474,47 +480,60 @@ def fit(
             else []
         )
 
-        if resume and self.results:
-            # pull out configs and resume_scores from previous trials:
-            for _, result in self.results.results.items():
-                self.resume_scores.append(result[self.metric])
-                self.resume_cfg.append(result["config"])
-            # append init_cfgs that have not yet been evaluated
-            for cfg in init_cfg:
-                self.resume_cfg.append(cfg) if cfg not in self.resume_cfg else None
-        try:
-            self.results = tune.run(
-                self._tune_with_config,
-                search_space,
-                metric=self.metric,
-                # use_ray=self.use_ray,
-                cost_attr="evaluation_cost",
-                points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
-                evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
-                mode=("min" if self.metric in metrics_to_minimize() else "max"),
-                # resources_per_trial= {"cpu": 1} if self.use_ray else None,
-                low_cost_partial_config={},
-                **self._settings["tuner"],
-            )
+        # TODO: intergrate resume and init cfg
+        # if resume and self.results:
+        #     # pull out configs and resume_scores from previous trials:
+        #     for _, result in self.results.results.items():
+        #         self.resume_scores.append(result[self.metric])
+        #         self.resume_cfg.append(result["config"])
+        #     # append init_cfgs that have not yet been evaluated
+        #     for cfg in init_cfg:
+        #         self.resume_cfg.append(cfg) if cfg not in self.resume_cfg else None
+
+        self.tuner = create_tuner(
+            self._tune_with_config,
+            search_space,
+            metric=self.metric,
+            mode=("min" if self.metric in metrics_to_minimize() else "max"),
+            framework=framework,
+            **self.cfg.parse_tuner_params(self._settings["tuner"], framework),
+        )
 
-            if self.results.get_best_trial() is None:
-                raise Exception(
-                    "Optimization failed! Did you set large enough time_budget and components_budget?"
-                )
-        except Exception:
-            # we must have an older FLAML version that doesn't support the cost_attr parameter
-            self.results = tune.run(
-                self._tune_with_config,
-                search_space,
-                metric=self.metric,
-                points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
-                evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
-                mode=("min" if self.metric in metrics_to_minimize() else "max"),
-                low_cost_partial_config={},
-                **self._settings["tuner"],
-            )
-            # print("Optimization failed!\n", traceback.format_exc())
-            # raise e
+        self.tuner.run()
+
+        # try:
+        #     self.results = tune.run(
+        #         self._tune_with_config,
+        #         search_space,
+        #         metric=self.metric,
+        #         # use_ray=self.use_ray,
+        #         cost_attr="evaluation_cost",
+        #         points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
+        #         evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
+        #         mode=("min" if self.metric in metrics_to_minimize() else "max"),
+        #         # resources_per_trial= {"cpu": 1} if self.use_ray else None,
+        #         low_cost_partial_config={},
+        #         **self._settings["tuner"],
+        #     )
+        #
+        #     if self.results.get_best_trial() is None:
+        #         raise Exception(
+        #             "Optimization failed! Did you set large enough time_budget and components_budget?"
+        #         )
+        # except Exception:
+        #     # we must have an older FLAML version that doesn't support the cost_attr parameter
+        #     self.results = tune.run(
+        #         self._tune_with_config,
+        #         search_space,
+        #         metric=self.metric,
+        #         points_to_evaluate=(init_cfg if len(self.resume_cfg) == 0 else self.resume_cfg),
+        #         evaluated_rewards=([] if len(self.resume_scores) == 0 else self.resume_scores),
+        #         mode=("min" if self.metric in metrics_to_minimize() else "max"),
+        #         low_cost_partial_config={},
+        #         **self._settings["tuner"],
+        #     )
+        #     # print("Optimization failed!\n", traceback.format_exc())
+        #     # raise e
         self.update_summary_scores()
 
     def update_summary_scores(self):
@@ -523,7 +542,7 @@ def update_summary_scores(self):
         Returns:
             None
         """
-        self.scores = Scorer.best_score_by_estimator(self.results.results, self.metric)
+        self.scores = Scorer.best_score_by_estimator(self.tuner.results, self.metric)
         # now inject the separately saved model objects
         for est_name in self.scores:
             # Todo: Check approximate scores for OrthoIV (possibly other IV estimators)
@@ -699,7 +718,7 @@ def best_estimator(self) -> str:
         Returns:
             None
         """
-        return self.results.best_result["estimator_name"]
+        return self.tuner.best_result["estimator_name"]
 
     @property
     def model(self):
@@ -708,7 +727,7 @@ def model(self):
         Returns:
             CausalEstimator
         """
-        return self.results.best_result["estimator"].estimator
+        return self.tuner.best_result["estimator"].estimator
 
     def best_model_for_estimator(self, estimator_name):
         """Return the best model found for a particular estimator.
@@ -730,7 +749,7 @@ def best_config(self):
         Returns:
             (dict): the best configuration
         """
-        return self.results.best_config
+        return self.tuner.best_params
 
     @property
     def best_config_per_estimator(self):
@@ -749,7 +768,7 @@ def best_score(self):
         """
         Returns:
             (float):  the best score found."""
-        return self.results.best_result[self.metric]
+        return self.tuner.best_result[self.metric]
 
     def effect(self, df, *args, **kwargs):
         """Heterogeneous Treatment Effects for data df

diff --git a/causaltune/score/scoring.py b/causaltune/score/scoring.py
@@ -1321,7 +1321,7 @@ def make_scores(
         return out
 
     @staticmethod
-    def best_score_by_estimator(scores: Dict[str, dict], metric: str) -> Dict[str, dict]:
+    def best_score_by_estimator(scores: list[dict], metric: str) -> Dict[str, dict]:
         """Obtain best score for each estimator.
 
         Args:
@@ -1333,19 +1333,19 @@ def best_score_by_estimator(scores: Dict[str, dict], metric: str) -> Dict[str, d
 
         """
 
-        for k, v in scores.items():
+        for v in scores:
             if "estimator_name" not in v:
                 raise ValueError(
                     f"Malformed scores dict, 'estimator_name' field missing " f"in{k}, {v}"
                 )
 
         estimator_names = sorted(
-            list(set([v["estimator_name"] for v in scores.values() if "estimator_name" in v]))
+            list(set([v["estimator_name"] for v in scores if "estimator_name" in v]))
         )
         best = {}
         for name in estimator_names:
             est_scores = [
-                v for v in scores.values() if "estimator_name" in v and v["estimator_name"] == name
+                v for v in scores if "estimator_name" in v and v["estimator_name"] == name
             ]
             best[name] = (
                 min(est_scores, key=lambda x: x[metric])

diff --git a/causaltune/search/params.py b/causaltune/search/params.py
@@ -7,6 +7,7 @@
 import warnings
 from econml.inference import BootstrapInference  # noqa F401
 from sklearn import linear_model
+from hiertunehub import SearchSpace
 
 from causaltune.utils import clean_config
 from causaltune.search.component import model_from_cfg, joint_config
@@ -161,8 +162,36 @@ def search_space(
                 data_size, outcome_estimator_list
             )
 
+        out = SearchSpace.from_flaml(out, name="estimator_name")
+
         return out
 
+    @staticmethod
+    def parse_tuner_params(params: dict, framework: str) -> dict:
+        if framework == "flaml":
+            return {
+                "num_samples": params["num_samples"],
+                "time_budget_s": params["time_budget_s"],
+                "verbose": params["verbose"],
+                "resources_per_trial": params["resources_per_trial"],
+                "search_alg": params["algo"],
+            }
+        elif framework == "hyperopt":
+            return {
+                "max_evals": params["num_samples"] if params["num_samples"] != -1 else None,
+                "timeout": params["time_budget_s"],
+                "verbose": params["verbose"],
+                "algo": params["algo"],
+            }
+        elif framework == "optuna":
+            return {
+                "n_trials": params["num_samples"] if params["num_samples"] != -1 else None,
+                "timeout": params["time_budget_s"],
+                "sampler": params["algo"],
+            }
+        else:
+            raise ValueError(f"Framework {framework} not supported")
+
     def default_configs(
         self,
         estimator_list: Iterable[str],

diff --git a/setup.py b/setup.py
@@ -34,6 +34,9 @@
         "wise-pizza",
         "seaborn",
         "category_encoders==2.6.3",
+        "hiertunehub",
+        "hyperopt",
+        "optuna"
     ],
     extras_require={
         "test": [