Skip to content

Commit 3e8e4e7

Browse files
Richa Gadgilmeta-codesync[bot]
authored andcommitted
Fix ValueError in FillMissingParameters with empty experiment data (#4973)
Summary: Pull Request resolved: #4973 `FillMissingParameters.transform_experiment_data` crashes with `ValueError: Columns must be same length as key` when `arm_data` has zero rows (e.g., during initial Sobol trial generation before any trials have completed). This happens because `DataFrame.apply(func, axis=1)` on an empty DataFrame returns an empty DataFrame rather than an empty Series. Assigning that DataFrame to a single column then fails. Replace `arm_data.apply(...)` with a list comprehension over `arm_data.iterrows()`, which correctly produces an empty list for empty DataFrames. Failing run: https://www.internalfb.com/mlhub/flow/1044918711/overview passing run: https://www.internalfb.com/mlhub/flow/1045916604/overview Reviewed By: yuhuishi-convect, saitcakmak Differential Revision: D95266027 fbshipit-source-id: 41cffa6fe9aa169a4fae727a135e2856197bfd64
1 parent 9a79809 commit 3e8e4e7

File tree

2 files changed

+50
-11
lines changed

2 files changed

+50
-11
lines changed

ax/adapter/transforms/fill_missing_parameters.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,8 @@ def transform_experiment_data(
9595
# to ensure correctness, since they are deterministic functions of
9696
# other parameters.
9797
if self._derived_parameters:
98-
tunable_cols = [
99-
c
100-
for c in arm_data.columns
101-
if c != "metadata" and c not in self._derived_parameters
102-
]
10398
for p_name, p in self._derived_parameters.items():
104-
arm_data[p_name] = arm_data.apply(
105-
lambda row, p=p: p.compute({col: row[col] for col in tunable_cols}),
106-
axis=1,
107-
)
99+
arm_data[p_name] = p.compute_array(arm_data)
108100
return ExperimentData(
109101
arm_data=arm_data,
110102
observation_data=experiment_data.observation_data,

ax/adapter/transforms/tests/test_fill_missing_parameters.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88

99
from copy import deepcopy
1010

11+
import pandas as pd
1112
from ax.adapter.base import DataLoaderConfig
12-
from ax.adapter.data_utils import extract_experiment_data
13+
from ax.adapter.data_utils import ExperimentData, extract_experiment_data
1314
from ax.adapter.transforms.fill_missing_parameters import FillMissingParameters
1415
from ax.core.observation import ObservationFeatures
15-
from ax.core.parameter import ParameterType, RangeParameter
16+
from ax.core.parameter import DerivedParameter, ParameterType, RangeParameter
1617
from ax.core.search_space import SearchSpace
1718
from ax.utils.common.testutils import TestCase
1819
from ax.utils.testing.core_stubs import get_experiment_with_observations
@@ -172,3 +173,49 @@ def test_deprecated_config_behavior_still_works(self) -> None:
172173
]
173174
result = t.transform_observation_features(deepcopy(observation_features))
174175
self.assertEqual(result, expected)
176+
177+
def test_transform_experiment_data_empty_with_derived_parameters(self) -> None:
178+
"""Test that transform_experiment_data works with empty data and
179+
derived parameters. Regression test for a bug where
180+
DataFrame.apply(func, axis=1) on an empty DataFrame returns an empty
181+
DataFrame rather than an empty Series, causing a ValueError on column
182+
assignment."""
183+
search_space = SearchSpace(
184+
parameters=[
185+
RangeParameter(
186+
name="x",
187+
parameter_type=ParameterType.FLOAT,
188+
lower=0.0,
189+
upper=10.0,
190+
),
191+
RangeParameter(
192+
name="y",
193+
parameter_type=ParameterType.FLOAT,
194+
lower=0.0,
195+
upper=10.0,
196+
),
197+
DerivedParameter(
198+
name="z",
199+
parameter_type=ParameterType.FLOAT,
200+
expression_str="x + y",
201+
),
202+
]
203+
)
204+
# Construct empty ExperimentData with the right columns but no rows.
205+
empty_arm_data = pd.DataFrame(
206+
{
207+
"x": pd.Series(dtype=float),
208+
"y": pd.Series(dtype=float),
209+
"metadata": pd.Series(dtype=object),
210+
}
211+
)
212+
empty_observation_data = pd.DataFrame()
213+
experiment_data = ExperimentData(
214+
arm_data=empty_arm_data,
215+
observation_data=empty_observation_data,
216+
)
217+
t = FillMissingParameters(search_space=search_space)
218+
# This should not raise ValueError.
219+
transformed_data = t.transform_experiment_data(experiment_data=experiment_data)
220+
self.assertEqual(len(transformed_data.arm_data), 0)
221+
self.assertIn("z", transformed_data.arm_data.columns)

0 commit comments

Comments
 (0)