From 5fda24a892de349d5dd4dd12724beba750f5e95e Mon Sep 17 00:00:00 2001 From: Richa Gadgil Date: Fri, 6 Mar 2026 15:42:44 -0800 Subject: [PATCH] Fix ValueError in FillMissingParameters with empty experiment data (#4973) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/4973 `FillMissingParameters.transform_experiment_data` crashes with `ValueError: Columns must be same length as key` when `arm_data` has zero rows (e.g., during initial Sobol trial generation before any trials have completed). This happens because `DataFrame.apply(func, axis=1)` on an empty DataFrame returns an empty DataFrame rather than an empty Series. Assigning that DataFrame to a single column then fails. Replace `arm_data.apply(...)` with a list comprehension over `arm_data.iterrows()`, which correctly produces an empty list for empty DataFrames. Failing run: https://www.internalfb.com/mlhub/flow/1044918711/overview Reviewed By: yuhuishi-convect, saitcakmak Differential Revision: D95266027 --- .../transforms/fill_missing_parameters.py | 10 +--- .../tests/test_fill_missing_parameters.py | 51 ++++++++++++++++++- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/ax/adapter/transforms/fill_missing_parameters.py b/ax/adapter/transforms/fill_missing_parameters.py index 2d5967b4eeb..ae8849a8036 100644 --- a/ax/adapter/transforms/fill_missing_parameters.py +++ b/ax/adapter/transforms/fill_missing_parameters.py @@ -95,16 +95,8 @@ def transform_experiment_data( # to ensure correctness, since they are deterministic functions of # other parameters. if self._derived_parameters: - tunable_cols = [ - c - for c in arm_data.columns - if c != "metadata" and c not in self._derived_parameters - ] for p_name, p in self._derived_parameters.items(): - arm_data[p_name] = arm_data.apply( - lambda row, p=p: p.compute({col: row[col] for col in tunable_cols}), - axis=1, - ) + arm_data[p_name] = p.compute_array(arm_data) return ExperimentData( arm_data=arm_data, observation_data=experiment_data.observation_data, diff --git a/ax/adapter/transforms/tests/test_fill_missing_parameters.py b/ax/adapter/transforms/tests/test_fill_missing_parameters.py index 36ee261ba4a..82f7b16cd76 100644 --- a/ax/adapter/transforms/tests/test_fill_missing_parameters.py +++ b/ax/adapter/transforms/tests/test_fill_missing_parameters.py @@ -8,11 +8,12 @@ from copy import deepcopy +import pandas as pd from ax.adapter.base import DataLoaderConfig -from ax.adapter.data_utils import extract_experiment_data +from ax.adapter.data_utils import ExperimentData, extract_experiment_data from ax.adapter.transforms.fill_missing_parameters import FillMissingParameters from ax.core.observation import ObservationFeatures -from ax.core.parameter import ParameterType, RangeParameter +from ax.core.parameter import DerivedParameter, ParameterType, RangeParameter from ax.core.search_space import SearchSpace from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import get_experiment_with_observations @@ -172,3 +173,49 @@ def test_deprecated_config_behavior_still_works(self) -> None: ] result = t.transform_observation_features(deepcopy(observation_features)) self.assertEqual(result, expected) + + def test_transform_experiment_data_empty_with_derived_parameters(self) -> None: + """Test that transform_experiment_data works with empty data and + derived parameters. Regression test for a bug where + DataFrame.apply(func, axis=1) on an empty DataFrame returns an empty + DataFrame rather than an empty Series, causing a ValueError on column + assignment.""" + search_space = SearchSpace( + parameters=[ + RangeParameter( + name="x", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + RangeParameter( + name="y", + parameter_type=ParameterType.FLOAT, + lower=0.0, + upper=10.0, + ), + DerivedParameter( + name="z", + parameter_type=ParameterType.FLOAT, + expression_str="x + y", + ), + ] + ) + # Construct empty ExperimentData with the right columns but no rows. + empty_arm_data = pd.DataFrame( + { + "x": pd.Series(dtype=float), + "y": pd.Series(dtype=float), + "metadata": pd.Series(dtype=object), + } + ) + empty_observation_data = pd.DataFrame() + experiment_data = ExperimentData( + arm_data=empty_arm_data, + observation_data=empty_observation_data, + ) + t = FillMissingParameters(search_space=search_space) + # This should not raise ValueError. + transformed_data = t.transform_experiment_data(experiment_data=experiment_data) + self.assertEqual(len(transformed_data.arm_data), 0) + self.assertIn("z", transformed_data.arm_data.columns)