Add DerivedMetric base class for metrics computed from other metrics (facebook#4950)

ItsMrLin · meta-codesync[bot] · commit e24e156fa346 · 2026-02-25T23:05:19.000-08:00
Summary: Pull Request resolved: facebook#4950 Introduces `DerivedMetric`, a base `Metric` subclass for metrics whose values depend on other metrics being fetched first. This enables a two-phase data-fetch pattern where base metrics are fetched and cached before derived metrics are computed. Key changes: 1. **`DerivedMetric` base class** (`ax/core/derived_metric.py`): Declares `input_metric_names` — names of metrics that must be available before this metric can be computed. Subclasses override `fetch_trial_data` to define the derivation logic. 2. **Two-phase fetching in `Experiment`**: `_lookup_or_fetch_trials_results` now separates base metrics from derived metrics, fetches base metrics first and attaches them to the cache, then fetches derived metrics. This ensures derived metrics can read their inputs via `trial.lookup_data()`. 3. **Test coverage**: Unit tests for the base class (init, validation, clone, summary_dict) and integration tests verifying the two-phase fetch in `Experiment`. A concrete subclass (`ExpressionDerivedMetric`) for expression-based derivation and storage registration is added in a follow-up diff. Reviewed By: lena-kashtelyan, saitcakmak Differential Revision: D92749156 fbshipit-source-id: 2e845bcbcc11798eea4de4f304e3713781ec8ba0
diff --git a/ax/core/derived_metric.py b/ax/core/derived_metric.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+"""
+DerivedMetric: A metric computed from other metrics.
+
+``DerivedMetric`` is a base class for metrics whose values depend on other
+metrics being fetched first.  The experiment's data-fetch loop uses
+``isinstance(m, DerivedMetric)`` to guarantee that all base metric data is
+attached to the cache before any derived metric's ``fetch_trial_data`` runs.
+
+.. note:: **Transform compatibility.**
+   Derived metrics are computed *before* any adapter transforms run.
+   Transforms that modify metric values (e.g. ``Relativize``, ``Log``) will
+   be applied to the already-computed derived value, **not** to its inputs
+   individually.  This means a derived metric ``log(a) - log(b)`` followed
+   by a ``Log`` transform would double-log the result.  Avoid using
+   transforms that overlap with operations already baked into the derivation.
+"""
+
+from __future__ import annotations
+
+from logging import Logger
+from typing import Any
+
+import pandas as pd
+from ax.core.metric import Metric
+from ax.exceptions.core import UserInputError
+from ax.utils.common.logger import get_logger
+
+
+logger: Logger = get_logger(__name__)
+
+
+class DerivedMetric(Metric):
+    """Base class for metrics that depend on other metrics.
+
+    A ``DerivedMetric`` declares the names of metrics whose data must be
+    available before this metric can be computed.  The experiment's two-phase
+    fetch loop (see ``Experiment._lookup_or_fetch_trials_results``) separates
+    derived metrics from base metrics and fetches base metrics first.
+
+    Subclasses must override ``fetch_trial_data`` to define how the derived
+    value is produced.
+
+    Attributes:
+        input_metric_names: Names of metrics that must be fetched first.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        input_metric_names: list[str],
+        lower_is_better: bool | None = None,
+        properties: dict[str, Any] | None = None,
+    ) -> None:
+        if not input_metric_names:
+            raise UserInputError(
+                f"DerivedMetric '{name}' must declare at least one input "
+                f"metric in input_metric_names."
+            )
+        super().__init__(
+            name=name,
+            lower_is_better=lower_is_better,
+            properties=properties,
+        )
+        self._input_metric_names = input_metric_names
+
+    @property
+    def input_metric_names(self) -> list[str]:
+        """Names of metrics that this metric depends on."""
+        return self._input_metric_names
+
+    @staticmethod
+    def _lookup_metric_values_for_arm(
+        arm_df: pd.DataFrame,
+        metric_name: str,
+    ) -> pd.DataFrame:
+        """Look up rows for *metric_name* by ``metric_name`` or
+        ``metric_signature`` column."""
+        return arm_df[
+            (arm_df["metric_name"] == metric_name)
+            | (arm_df["metric_signature"] == metric_name)
+        ]
+
+    @property
+    def summary_dict(self) -> dict[str, Any]:
+        """Fields of this metric's configuration that will appear
+        in the ``Summary`` analysis table.
+        """
+        return {
+            **super().summary_dict,
+            "input_metric_names": self._input_metric_names,
+        }
diff --git a/ax/core/experiment.py b/ax/core/experiment.py
@@ -30,6 +30,7 @@
 from ax.core.base_trial import BaseTrial
 from ax.core.batch_trial import BatchTrial
 from ax.core.data import combine_data_rows_favoring_recent, Data
+from ax.core.derived_metric import DerivedMetric
 from ax.core.experiment_status import ExperimentStatus
 from ax.core.generator_run import GeneratorRun
 from ax.core.llm_provider import LLMMessage
@@ -1045,19 +1046,94 @@ def _lookup_or_fetch_trials_results(
             logger.debug("No trials are in a state expecting data.")
             return {}
         metrics_to_fetch = list(metrics or self.metrics.values())
-        metrics_by_class = self._metrics_by_class(metrics=metrics_to_fetch)
+
+        # Separate base metrics from derived metrics.
+        # Derived metrics must be fetched after base metrics because they
+        # depend on base metric data being available in the cache.
+        base_metrics: list[Metric] = [
+            m for m in metrics_to_fetch if not isinstance(m, DerivedMetric)
+        ]
+        derived_metrics: list[Metric] = [
+            m for m in metrics_to_fetch if isinstance(m, DerivedMetric)
+        ]
+
+        results: dict[int, dict[str, MetricFetchResult]] = {}
+        contains_new_data = False
+
+        # Phase 1: Fetch all base (non-derived) metrics first.
+        if base_metrics:
+            base_results, base_new = self._fetch_metrics_by_class(
+                trials=trials,
+                metrics=base_metrics,
+                **kwargs,
+            )
+            results = base_results
+            contains_new_data = base_new
+
+            # Attach base metric results to the cache BEFORE fetching derived
+            # metrics so they can access base data via lookup_data().
+            if base_new and derived_metrics:
+                self._try_attach_fetch_results(base_results)
+
+        # Phase 2: Fetch derived metrics (they look up base data from cache).
+        if derived_metrics:
+            derived_results, derived_new = self._fetch_metrics_by_class(
+                trials=trials,
+                metrics=derived_metrics,
+                **kwargs,
+            )
+            for trial_index, trial_metrics in derived_results.items():
+                results.setdefault(trial_index, {}).update(trial_metrics)
+            contains_new_data = contains_new_data or derived_new
+
+        # Attach all results (base + derived).
+        if contains_new_data:
+            self._try_attach_fetch_results(results)
+
+        return results
+
+    def _try_attach_fetch_results(
+        self,
+        results: dict[int, dict[str, MetricFetchResult]],
+    ) -> None:
+        """Attach fetch results to the experiment cache, logging on error."""
+        try:
+            self.attach_fetch_results(results=results)
+        except ValueError as e:
+            logger.error(
+                f"Encountered ValueError {e} while attaching results. "
+                "Proceeding and returning results fetched without attaching."
+            )
+
+    def _fetch_metrics_by_class(
+        self,
+        trials: list[BaseTrial],
+        metrics: list[Metric],
+        **kwargs: Any,
+    ) -> tuple[dict[int, dict[str, MetricFetchResult]], bool]:
+        """Fetch metrics grouped by class.
+
+        Args:
+            trials: List of trials to fetch data for.
+            metrics: List of metrics to fetch.
+            **kwargs: Additional keyword arguments passed to fetch methods.
+
+        Returns:
+            A tuple of (results dict, contains_new_data bool).
+        """
+        metrics_by_class = self._metrics_by_class(metrics=metrics)
 
         results: dict[int, dict[str, MetricFetchResult]] = {}
         contains_new_data = False
 
-        for metric_cls, metrics in metrics_by_class.items():
-            first_metric_of_group = metrics[0]
+        for _metric_cls, cls_metrics in metrics_by_class.items():
+            first_metric_of_group = cls_metrics[0]
             (
                 new_fetch_results,
                 new_results_contains_new_data,
             ) = first_metric_of_group.fetch_data_prefer_lookup(
                 experiment=self,
-                metrics=metrics_by_class[metric_cls],
+                metrics=cls_metrics,
                 trials=trials,
                 **kwargs,
             )
@@ -1077,16 +1153,7 @@ def _lookup_or_fetch_trials_results(
                 for trial in trials
             }
 
-        if contains_new_data:
-            try:
-                self.attach_fetch_results(results=results)
-            except ValueError as e:
-                logger.error(
-                    f"Encountered ValueError {e} while attaching results. Proceeding "
-                    "and returning Results fetched without attaching."
-                )
-
-        return results
+        return results, contains_new_data
 
     @copy_doc(BaseTrial.fetch_data)
     def _fetch_trial_data(
diff --git a/ax/core/tests/test_derived_metric.py b/ax/core/tests/test_derived_metric.py