Merge branch 'feature/batch_bo_issue_1229' of https://github.com/automl/SMAC3 into feature/batch_bo_issue_1229

daphne12345 · daphne12345 · commit 4b8e801bd519 · 2025-06-12T13:06:42.000+02:00
diff --git a/.github/workflows/recent_reminder.yml b/.github/workflows/recent_reminder.yml
@@ -32,7 +32,7 @@ jobs:
         echo "$(<recent_issues.md) <br />" >> mail.html
     - name: Send mail
       id: mail
-      uses: dawidd6/action-send-mail@v4
+      uses: dawidd6/action-send-mail@v5
       with:
         server_address: ${{secrets.MAIL_SERVER_ADDRESS}}
         server_port: ${{secrets.MAIL_SERVER_PORT}}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # 2.3.1
 
+## Bugfixes
+- Addressing situations where the acquisition function suggests configurations that have already been sampled in prior iterations (#1216)
+
 ## Misc
 - New SMAC logo
 - Fix doc link in README
@@ -11,6 +14,15 @@
 ## Improvements
 - Submit trials to runners in SMBO instead of running configs directly (#937)
 
+## Improvements
+- `target_function` becomes optional in Facade when using ask and tell exclusively (#946)
+
+## Documentation
+- Ask and tell without initial design and warmstarting
+
+## Bugfixes 
+- Ask and tell without initial design may no longer return a config from the initial design - if it is not "removed".  
+
 # 2.3.0
 
 ## Features
diff --git a/docs/advanced_usage/5_ask_and_tell.md b/docs/advanced_usage/5_ask_and_tell.md
@@ -15,5 +15,7 @@ and report the results of the trial.
     different budgets, they, obviously, can not be considered. However, all user-provided configurations will flow 
     into the intensification process.
 
+Notice: if you are exclusively using the ask-and-tell interface and do not use `smac.optimize()`, then smac no longer
+is responsible for the evaluation of the trials and therefore the Facade no longer will require a specified `target_algorithm` argument.
 
 Please have a look at our [ask-and-tell example](../examples/1%20Basics/3_ask_and_tell.md).
diff --git a/examples/1_basics/3_ask_and_tell.py b/examples/1_basics/3_ask_and_tell.py
@@ -2,6 +2,10 @@
 # Flags: doc-Runnable
 
 This examples show how to use the Ask-and-Tell interface.
+
+Notice, that the ask-and-tell interface will still use the initial design specified in the facade.
+Should you wish to add your own evaluated configurations instead or deactivate the initial
+design all together, please refer to the warmstarting example in conjunction with this one.
 """
 
 from ConfigSpace import Configuration, ConfigurationSpace, Float
@@ -52,7 +56,7 @@ def train(self, config: Configuration, seed: int = 0) -> float:
     # Now we use SMAC to find the best hyperparameters
     smac = HyperparameterOptimizationFacade(
         scenario,
-        model.train,
+        target_function=model.train,
         intensifier=intensifier,
         overwrite=True,
     )
@@ -68,7 +72,14 @@ def train(self, config: Configuration, seed: int = 0) -> float:
         smac.tell(info, value)
 
     # After calling ask+tell, we can still optimize
-    # Note: SMAC will optimize the next 90 trials because 10 trials already have been evaluated
+    # Note: SMAC will optimize the next 90 trials because 10 trials already have been evaluated.
+    # If we however choose not to call optimize; e.g. because we want to manage heavy
+    # computation of model.train completely outside smac, but still use it to suggest new
+    # configurations, then n_trials will only be relevant for the initial design in combination
+    # with initial design max_ratio! In fact in an only ask+tell case, we could even set
+    # target_function=None in the constructor, because smac wouldn't even need to know
+    # what the target function is. But that will prevent us from calling optimize and validate later
+    # on.
     incumbent = smac.optimize()
 
     # Get cost of default configuration
diff --git a/examples/1_basics/8_warmstart.py b/examples/1_basics/8_warmstart.py
@@ -56,15 +56,19 @@ def evaluate(self, config: Configuration, seed: int = 0) -> float:
     intensifier = HyperparameterOptimizationFacade.get_intensifier(scenario, max_config_calls=1)
     smac = HyperparameterOptimizationFacade(
         scenario,
-        task.evaluate,
+        target_function=task.evaluate,
         intensifier=intensifier,
         overwrite=True,
 
         # Modify the initial design to use our custom initial design
         initial_design=HyperparameterOptimizationFacade.get_initial_design(
             scenario, 
-            n_configs=0,  # Do not use the default initial design
-            additional_configs=configurations  # Use the configurations previously evaluated as initial design
+            n_configs=0,  # Do not use the default initial design at all
+
+            # You can pass the configurations as additional_configs, which will specify their
+            # origin to be the initial design. However, this is not necessary and we can just
+            # smac.tell the configurations.
+            # additional_configs=configurations  # Use the configurations previously evaluated as initial design
                                                # This only passes the configurations but not the cost!
                                                # So in order to actually use the custom, pre-evaluated initial design
                                                # we need to tell those trials, like below.
@@ -80,4 +84,6 @@ def evaluate(self, config: Configuration, seed: int = 0) -> float:
         smac.tell(info, value)
 
     # Optimize as usual
-    smac.optimize()
+    # Notice, that since we added three configurations, n_trials for the remaining optimization
+    # is effectively 27 in optimize().
+    smac.optimize()
diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py
@@ -58,9 +58,12 @@ class AbstractFacade:
     ----------
     scenario : Scenario
         The scenario object, holding all environmental information.
-    target_function : Callable | str | AbstractRunner
+    target_function : Callable | str | AbstractRunner | None, defaults to None
         This function is called internally to judge a trial's performance. If a string is passed,
         it is assumed to be a script. In this case, ``TargetFunctionScriptRunner`` is used to run the script.
+        In the rare case that only ``ask`` and ``tell`` and not ``optimize`` is used to optimize
+        the hyperparameters, the target_function argument can be None, because SMAC no longer is
+        charge of the evaluation of the configuration and thus does not need to know about it.
     model : AbstractModel | None, defaults to None
         The surrogate model.
     acquisition_function : AbstractAcquisitionFunction | None, defaults to None
@@ -105,7 +108,7 @@ class AbstractFacade:
     def __init__(
         self,
         scenario: Scenario,
-        target_function: Callable | str | AbstractRunner,
+        target_function: Callable | str | AbstractRunner | None = None,
         *,
         model: AbstractModel | None = None,
         acquisition_function: AbstractAcquisitionFunction | None = None,
@@ -176,8 +179,10 @@ def __init__(
         self._overwrite = overwrite
 
         # Prepare the algorithm executer
-        runner: AbstractRunner
-        if isinstance(target_function, AbstractRunner):
+        runner: AbstractRunner | None
+        if isinstance(target_function, AbstractRunner) or target_function is None:
+            # in case the target_function is None (e.g. we purely use ask & tell)
+            # we let smbo.optimize raise an error
             runner = target_function
         elif isinstance(target_function, str):
             runner = TargetFunctionScriptRunner(
@@ -193,7 +198,7 @@ def __init__(
             )
 
         # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
-        if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
+        if ((n_workers := scenario.n_workers) > 1 or dask_client is not None) and runner is not None:
             if dask_client is not None and n_workers > 1:
                 logger.warning(
                     "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
@@ -265,7 +270,7 @@ def meta(self) -> dict[str, Any]:
 
         meta = {
             "facade": {"name": self.__class__.__name__},
-            "runner": self._runner.meta,
+            "runner": self._runner.meta if self._runner is not None else None,
             "model": self._model.meta,
             "acquisition_maximizer": self._acquisition_maximizer.meta,
             "acquisition_function": self._acquisition_function.meta,
diff --git a/smac/initial_design/abstract_initial_design.py b/smac/initial_design/abstract_initial_design.py
@@ -82,7 +82,11 @@ def __init__(
             )
 
         # If the number of configurations is too large, we reduce it
-        _n_configs = int(max(1, min(self._n_configs, (max_ratio * scenario.n_trials))))
+        if self._n_configs > 1:
+            _n_configs = int(max(1, min(self._n_configs, (max_ratio * scenario.n_trials))))
+        else:
+            _n_configs = self._n_configs
+
         if self._n_configs != _n_configs:
             logger.info(
                 f"Reducing the number of initial configurations from {self._n_configs} to "
diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py
@@ -17,6 +17,7 @@
 from smac.acquisition.maximizer.random_search import RandomSearch
 from smac.callback.callback import Callback
 from smac.initial_design import AbstractInitialDesign
+from smac.main.exceptions import ConfigurationSpaceExhaustedException
 from smac.model.abstract_model import AbstractModel
 from smac.model.gaussian_process import GaussianProcess
 from smac.model.random_forest import RandomForest
@@ -119,15 +120,16 @@ def _set_components(
 
         self._initial_design_configs = initial_design.select_configurations()
         if len(self._initial_design_configs) == 0:
-            raise RuntimeError("SMAC needs initial configurations to work.")
+            # raise RuntimeError("SMAC needs initial configurations to work.")
+            logger.warning("No initial configurations were sampled.")
 
     @property
     def meta(self) -> dict[str, Any]:
         """Returns the meta data of the created object."""
         return {
             "name": self.__class__.__name__,
             "retrain_after": self._retrain_after,
-            "retries": self._max_new_config_tries,
+            "max_new_config_tries": self._max_new_config_tries,
             "min_trials": self._min_trials,
         }
 
@@ -261,10 +263,31 @@ def __iter__(self) -> Iterator[Configuration]:
 
                     # We exit the loop if we have tried to add the same configuration too often
                     if failed_counter == self._max_new_config_tries:
-                        logger.warning(
-                            f"Could not return a new configuration after {self._max_new_config_tries} retries." ""
-                        )
-                        return
+                        logger.warning(f"Could not return a new configuration after {failed_counter} retries.")
+                        break
+
+            # if we don't have enough configurations, we want to sample random configurations
+            if not retrain:
+                logger.warning(
+                    "Did not find enough configuration from the acquisition function. Sampling random configurations."
+                )
+                random_configs_retries = 0
+                while counter < self._retrain_after and random_configs_retries < self._max_new_config_tries:
+                    config = self._scenario.configspace.sample_configuration()
+                    if config not in self._processed_configs:
+                        counter += 1
+                        config.origin = "Random Search (max retries, no candidates)"
+                        self._processed_configs.append(config)
+                        self._call_callbacks_on_end(config)
+                        yield config
+                        retrain = counter == self._retrain_after
+                        self._call_callbacks_on_start()
+                    else:
+                        random_configs_retries += 1
+
+                    if random_configs_retries == self._max_new_config_tries:
+                        logger.warning(f"Could not return a new configuration after {random_configs_retries} retries.")
+                        raise ConfigurationSpaceExhaustedException()
 
     def _call_callbacks_on_start(self) -> None:
         for callback in self._callbacks:
diff --git a/smac/main/exceptions.py b/smac/main/exceptions.py
@@ -0,0 +1,7 @@
+class ConfigurationSpaceExhaustedException(Exception):
+    """Exception indicating that the configuration space is exhausted and no more configurations
+    can be sampled. This is usually raised when the maximum number of configurations has been
+    reached or when the configuration space has been fully explored.
+    """
+
+    pass
diff --git a/smac/main/smbo.py b/smac/main/smbo.py
@@ -40,8 +40,10 @@ class SMBO:
     ----------
     scenario : Scenario
         The scenario object, holding all environmental information.
-    runner : AbstractRunner
+    runner : AbstractRunner | None
         The runner (containing the target function) is called internally to judge a trial's performance.
+        In the rare case that ``optimize`` is never called and SMBO is operated with ``ask`` and ``tell`` only,
+        the runner is allowed to be None
     runhistory : Runhistory
         The runhistory stores all trials.
     intensifier : AbstractIntensifier
@@ -60,7 +62,7 @@ class SMBO:
     def __init__(
         self,
         scenario: Scenario,
-        runner: AbstractRunner,
+        runner: AbstractRunner | None,
         runhistory: RunHistory,
         intensifier: AbstractIntensifier,
         overwrite: bool = False,
@@ -290,6 +292,11 @@ def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configur
             callback.on_start(self)
 
         dask_data_to_scatter = {}
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set in the Facade."
+            )
+
         if isinstance(self._runner, DaskParallelRunner) and data_to_scatter is not None:
             dask_data_to_scatter = dict(data_to_scatter=self._runner._client.scatter(data_to_scatter, broadcast=True))
         elif data_to_scatter is not None:
@@ -435,6 +442,12 @@ def _add_results(self) -> None:
         """Adds results from the runner to the runhistory. Although most of the functionality could be written
         in the tell method, we separate it here to make it accessible for the automatic optimization procedure only.
         """
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set "
+                "in the Facade. So we cannot query the runner for results."
+            )
+
         # Check if there is any result
         for trial_info, trial_value in self._runner.iter_results():
             # Add the results of the run to the run history
@@ -578,6 +591,11 @@ def validate(
             The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
             averaged.
         """
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set in the Facade."
+            )
+
         if seed is None:
             seed = self._scenario.seed
 
diff --git a/smac/model/random_forest/random_forest.py b/smac/model/random_forest/random_forest.py
@@ -100,6 +100,7 @@ def __init__(
         self._eps_purity = eps_purity
         self._max_nodes = max_nodes
         self._bootstrapping = bootstrapping
+        self._rf = None
 
         # This list well be read out by save_iteration() in the solver
         # self._hypers = [