99from typing import Sequence
1010
1111import numpy as np
12+ import numpy .typing as npt
1213import pandas as pd
1314import torch
1415from ax .adapter .base import Adapter
@@ -540,6 +541,54 @@ def _extract_generation_node_name(trial: BaseTrial, arm: Arm) -> str:
540541 return Keys .UNKNOWN_GENERATION_NODE .value
541542
542543
544+ def _get_scalarized_constraint_mean_and_sem (
545+ df : pd .DataFrame ,
546+ constraint : ScalarizedOutcomeConstraint ,
547+ ) -> tuple [npt .NDArray [np .floating ], npt .NDArray [np .floating ]]:
548+ """
549+ Compute the combined mean and SEM for a ScalarizedOutcomeConstraint.
550+
551+ For independent random variables:
552+ combined_mean = sum(weight_i * mean_i)
553+ combined_sem = sqrt(sum((weight_i * sem_i)^2))
554+
555+ Args:
556+ df: DataFrame with "{metric_name}_mean" and "{metric_name}_sem" columns.
557+ constraint: The ScalarizedOutcomeConstraint.
558+
559+ Returns:
560+ Tuple of (combined_mean, combined_sem) as numpy arrays.
561+ If any component metric is missing, mean is NaN and sem is 0.
562+ """
563+ n_rows = len (df )
564+ combined_mean = np .zeros (n_rows )
565+ combined_var = np .zeros (n_rows )
566+ all_metrics_present = True
567+
568+ for metric , weight in constraint .metric_weights :
569+ mean_col = f"{ metric .name } _mean"
570+ sem_col = f"{ metric .name } _sem"
571+
572+ if mean_col in df .columns :
573+ combined_mean += weight * df [mean_col ].values
574+ else :
575+ all_metrics_present = False
576+ break
577+
578+ if sem_col in df .columns :
579+ metric_sem = df [sem_col ].fillna (0 ).values
580+ else :
581+ metric_sem = np .zeros (n_rows )
582+
583+ combined_var += (weight ** 2 ) * (metric_sem ** 2 )
584+
585+ if not all_metrics_present :
586+ # Match existing pattern: mean=NaN, sem=0 for missing data
587+ return np .full (n_rows , np .nan ), np .zeros (n_rows )
588+
589+ return combined_mean , np .sqrt (combined_var )
590+
591+
543592def _prepare_p_feasible (
544593 df : pd .DataFrame ,
545594 status_quo_df : pd .DataFrame | None ,
@@ -571,34 +620,27 @@ def _prepare_p_feasible(
571620 return pd .Series (np .ones (len (df )))
572621
573622 # If an arm is missing data for a metric leave the mean as NaN.
574- oc_names = []
575- for oc in outcome_constraints :
576- if isinstance (oc , ScalarizedOutcomeConstraint ):
577- # take the str representation of the scalarized outcome constraint
578- oc_names .append (str (oc ))
579- else :
580- oc_names .append (oc .metric .name )
581-
582- assert len (oc_names ) == len (outcome_constraints )
583-
584623 means = []
585624 sigmas = []
586- for i , oc_name in enumerate (oc_names ):
587- df_constraint = none_throws (rel_df if outcome_constraints [i ].relative else df )
588- # TODO[T235432214]: currently we are leaving the mean as NaN if the constraint
589- # is on ScalarizedOutcomeConstraint but we should be able to calculate it by
590- # setting the mean to be weights * individual metrics and sem to be
591- # sqrt(sum((weights * individual_sems)^2)), assuming independence.
592- if f"{ oc_name } _mean" in df_constraint .columns :
593- means .append (df_constraint [f"{ oc_name } _mean" ].tolist ())
625+ for oc in outcome_constraints :
626+ df_constraint = none_throws (rel_df if oc .relative else df )
594627
628+ if isinstance (oc , ScalarizedOutcomeConstraint ):
629+ mean , sem = _get_scalarized_constraint_mean_and_sem (df_constraint , oc )
630+ means .append (mean .tolist ())
631+ sigmas .append (sem .tolist ())
595632 else :
596- means .append ([float ("nan" )] * len (df_constraint ))
597- sigmas .append (
598- (df_constraint [f"{ oc_name } _sem" ].fillna (0 )).tolist ()
599- if f"{ oc_name } _sem" in df_constraint .columns
600- else [0 ] * len (df )
601- )
633+ metric_name = oc .metric .name
634+ if f"{ metric_name } _mean" in df_constraint .columns :
635+ means .append (df_constraint [f"{ metric_name } _mean" ].tolist ())
636+ else :
637+ means .append ([float ("nan" )] * len (df_constraint ))
638+
639+ sigmas .append (
640+ (df_constraint [f"{ metric_name } _sem" ].fillna (0 )).tolist ()
641+ if f"{ metric_name } _sem" in df_constraint .columns
642+ else [0 ] * len (df )
643+ )
602644
603645 con_lower_inds = [
604646 i
@@ -665,28 +707,27 @@ def _prepare_p_feasible_per_constraint(
665707 if len (outcome_constraints ) == 0 :
666708 return pd .DataFrame (index = df .index )
667709
668- oc_names = []
669- for oc in outcome_constraints :
670- if isinstance (oc , ScalarizedOutcomeConstraint ):
671- oc_names .append (str (oc ))
672- else :
673- oc_names .append (oc .metric .name )
674-
675710 result_df = pd .DataFrame (index = df .index )
676711 # Compute probability for each constraint individually
677- for oc_name , oc in zip ( oc_names , outcome_constraints ) :
712+ for oc in outcome_constraints :
678713 df_constraint = none_throws (rel_df if oc .relative else df )
679714
680- # Get mean and sigma for this constraint
681- if f" { oc_name } _mean" in df_constraint . columns :
682- mean = df_constraint [ f" { oc_name } _mean" ]. values
715+ if isinstance ( oc , ScalarizedOutcomeConstraint ):
716+ mean , sigma = _get_scalarized_constraint_mean_and_sem ( df_constraint , oc )
717+ oc_display_name = str ( oc )
683718 else :
684- mean = np .nan * np .ones (len (df_constraint ))
719+ metric_name = oc .metric .name
720+ oc_display_name = metric_name
685721
686- if f"{ oc_name } _sem" in df_constraint .columns :
687- sigma = df_constraint [f"{ oc_name } _sem" ].fillna (0 ).values
688- else :
689- sigma = np .zeros (len (df ))
722+ if f"{ metric_name } _mean" in df_constraint .columns :
723+ mean = df_constraint [f"{ metric_name } _mean" ].values
724+ else :
725+ mean = np .full (len (df_constraint ), np .nan )
726+
727+ if f"{ metric_name } _sem" in df_constraint .columns :
728+ sigma = df_constraint [f"{ metric_name } _sem" ].fillna (0 ).values
729+ else :
730+ sigma = np .zeros (len (df ))
690731
691732 # Convert to torch tensors (shape: [n_arms, 1])
692733 mean_tensor = torch .tensor (mean , dtype = torch .double ).unsqueeze (- 1 )
@@ -706,7 +747,7 @@ def _prepare_p_feasible_per_constraint(
706747
707748 # Convert back to numpy and store in result dataframe
708749 prob = log_prob .exp ().squeeze ().numpy ()
709- result_df [f"p_feasible_{ oc_name } " ] = prob
750+ result_df [f"p_feasible_{ oc_display_name } " ] = prob
710751
711752 return result_df
712753
0 commit comments