py-why · ZmeiGorynych · Dec 19, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 25, 2024
diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
@@ -94,6 +94,7 @@ def __init__(
         test_size=None,
         num_samples=-1,
         propensity_model="dummy",
+        propensity_automl_estimators: Optional[List[str]] = None,
         outcome_model="nested",
         components_task="regression",
         components_verbose=0,
@@ -185,6 +186,7 @@ def __init__(
         self._settings["component_models"]["n_jobs"] = components_njobs
         self._settings["component_models"]["time_budget"] = components_time_budget
         self._settings["component_models"]["eval_method"] = "holdout"
+        self._settings["propensity_automl_estimators"] = propensity_automl_estimators
 
         if 0 < train_size < 1:
             component_test_size = 1 - train_size
@@ -224,9 +226,11 @@ def init_propensity_model(self, propensity_model: str):
         if propensity_model == "dummy":
             self.propensity_model = DummyClassifier(strategy="prior")
         elif propensity_model == "auto":
-            self.propensity_model = AutoML(
-                **{**self._settings["component_models"], "task": "classification"}
-            )
+            automl_args = {**self._settings["component_models"], "task": "classification"}
+            if self._settings["propensity_automl_estimators"]:
+                automl_args["estimator_list"] = self._settings["propensity_automl_estimators"]
+
+            self.propensity_model = AutoML(**automl_args)
         elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
             self.propensity_model = propensity_model
         else:

diff --git a/causaltune/remote.py b/causaltune/remote.py
@@ -7,6 +7,6 @@ def remote_exec(function, args, use_ray=False):
     else:
         from joblib import Parallel, delayed
 
-        return Parallel(n_jobs=2, backend="threading")(delayed(function)(*args) for i in range(1))[
+        return Parallel(n_jobs=1, backend="threading")(delayed(function)(*args) for i in range(1))[
             0
         ]
diff --git a/causaltune/score/bite.py b/causaltune/score/bite.py
@@ -0,0 +1,143 @@
+from typing import List, Optional
+
+import numpy as np
+import pandas as pd
+from scipy.stats import kendalltau
+
+
+def bite(
+    working_df: pd.DataFrame,
+    treatment_name: str,
+    outcome_name: str,
+    min_N: int = 10,
+    max_N: int = 1000,
+    num_N: int = 20,
+    N_values: Optional[List[int]] = None,
+    clip_propensity: float = 0.05,
+) -> float:
+    max_N = int(min(max_N, len(working_df) / 10))
+    if N_values is None:
+        N_values = exponential_spacing(min_N, max_N, num_N)
+    # Calculate weights with clipping to avoid extremes
+    working_df["weights"] = np.where(
+        working_df[treatment_name] == 1,
+        1 / np.clip(working_df["propensity"], clip_propensity, 1 - clip_propensity),
+        1 / np.clip(1 - working_df["propensity"], clip_propensity, 1 - clip_propensity),
+    )
+
+    kendall_tau_values = []
+
+    for N in N_values:
+        iter_df = working_df.copy()
+
+        try:
+            # Ensure enough unique values for binning
+            unique_ites = np.unique(iter_df["estimated_ITE"])
+            if len(unique_ites) < N:
+                continue
+
+            # Create bins
+            iter_df["ITE_bin"] = pd.qcut(
+                iter_df["estimated_ITE"], q=N, labels=False, duplicates="drop"
+            )
+
+            # Compute bin statistics
+            bin_stats = []
+            for bin_idx in iter_df["ITE_bin"].unique():
+                bin_data = iter_df[iter_df["ITE_bin"] == bin_idx]
+
+                # Skip if bin is too small
+                if len(bin_data) < 2:
+                    continue
+
+                naive_est = compute_naive_estimate(bin_data, treatment_name, outcome_name)
+
+                # Only compute average ITE if weights are valid
+                bin_weights = bin_data["weights"].values
+                if bin_weights.sum() > 0 and not np.isnan(naive_est):
+                    try:
+                        avg_est_ite = np.average(bin_data["estimated_ITE"], weights=bin_weights)
+                        bin_stats.append(
+                            {
+                                "ITE_bin": bin_idx,
+                                "naive_estimate": naive_est,
+                                "average_estimated_ITE": avg_est_ite,
+                            }
+                        )
+                    except ZeroDivisionError:
+                        continue
+
+            # Calculate Kendall's Tau if we have enough valid bins
+            bin_stats_df = pd.DataFrame(bin_stats)
+            if len(bin_stats_df) >= 2:
+                tau, _ = kendalltau(
+                    bin_stats_df["naive_estimate"],
+                    bin_stats_df["average_estimated_ITE"],
+                )
+                if not np.isnan(tau):
+                    kendall_tau_values.append(tau)
+
+        except (ValueError, ZeroDivisionError):
+            continue
+
+    # Return final score
+    if len(kendall_tau_values) == 0:
+        return -np.inf  # Return -inf for failed computations
+
+    # top_3_taus = sorted(kendall_tau_values, reverse=True)[:3]
+    return np.mean(kendall_tau_values)
+
+
+def compute_naive_estimate(
+    group_data: pd.DataFrame, treatment_name: str, outcome_name: str
+) -> float:
+    """Compute naive estimate for a group with safeguards against edge cases."""
+    treated = group_data[group_data[treatment_name] == 1]
+    control = group_data[group_data[treatment_name] == 0]
+
+    if len(treated) == 0 or len(control) == 0:
+        return np.nan
+
+    treated_weights = treated["weights"].values
+    control_weights = control["weights"].values
+
+    # Check if weights sum to 0 or if all weights are 0
+    if (
+        treated_weights.sum() == 0
+        or control_weights.sum() == 0
+        or not (treated_weights > 0).any()
+        or not (control_weights > 0).any()
+    ):
+        return np.nan
+
+    # Weighted averages with explicit handling of edge cases
+    try:
+        y1 = np.average(treated[outcome_name], weights=treated_weights)
+        y0 = np.average(control[outcome_name], weights=control_weights)
+        return y1 - y0
+    except ZeroDivisionError:
+        return np.nan
+
+
+def exponential_spacing(start, end, num_points):
+    """
+    Generate approximately exponentially spaced integers between start and end.
+
+    Parameters:
+        start (int): The starting value.
+        end (int): The ending value.
+        num_points (int): Number of integers to generate.
+
+    Returns:
+        list: A list of approximately exponentially spaced integers.
+    """
+    # Use a logarithmic scale for exponential spacing
+    log_start = np.log(start)
+    log_end = np.log(end)
+    log_space = np.linspace(log_start, log_end, num_points)
+
+    # Exponentiate back and round to nearest integers
+    spaced_integers = np.round(np.exp(log_space)).astype(int)
+
+    # Ensure unique integers
+    return list(np.unique(spaced_integers))
diff --git a/causaltune/score/frobenius.py b/causaltune/score/frobenius.py
diff --git a/causaltune/score/scoring.py b/causaltune/score/scoring.py
@@ -14,17 +14,17 @@
 from causaltune.score.thompson import thompson_policy, extract_means_stds
 from causaltune.thirdparty.causalml import metrics
 from causaltune.score.erupt import ERUPT
+from .bite import bite
 from causaltune.utils import treatment_values, psw_joint_weights
 
 import dcor
 
 from scipy.spatial import distance
 from sklearn.neighbors import NearestNeighbors
-
-from scipy.stats import kendalltau
-
 from sklearn.preprocessing import StandardScaler
 
+logger = logging.getLogger(__name__)
+
 
 class DummyEstimator:
     def __init__(self, cate_estimate: np.ndarray, effect_intervals: Optional[np.ndarray] = None):
@@ -93,7 +93,7 @@ def __init__(
         Access methods and attributes via `CausalTune.scorer`.
 
         """
-
+        logger.info("Initializing Scorer")
         self.problem = problem
         self.multivalue = multivalue
         self.causal_model = copy.deepcopy(causal_model)
@@ -142,6 +142,26 @@ def __init__(
                 + self.psw_estimator._observed_common_causes_names,
             )
 
+    def inverse_propensity_score(self, df: pd.DataFrame, clip: float = 0.05) -> np.ndarray:
+        """
+        Calculate the inverse propensity score weights for the given dataframe.
+
+        Args:
+            df (pandas.DataFrame): input dataframe
+            clip (float): clipping value for propensity scores
+        """
+
+        propensity_model = self.psw_estimator.estimator.propensity_model
+        p = propensity_model.predict_proba(
+            df[self.causal_model.get_effect_modifiers() + self.causal_model.get_common_causes()]
+        )
+        treatment = df[self.psw_estimator._treatment_name].values
+        ex_ante_p = p[np.arange(p.shape[0]), treatment]
+
+        psw = 1.0 / np.clip(ex_ante_p, clip, 1 - clip)
+
+        return psw
+
     def ate(self, df: pd.DataFrame) -> tuple:
         """
         Calculate the Average Treatment Effect. Provide naive std estimates in
@@ -308,6 +328,7 @@ def frobenius_norm_score(
 
         # Get data splits and check validity
         Y0X, treatment_name, split_test_by = self._Y0_X_potential_outcomes(estimate, df)
+
         Y0X_1 = Y0X[Y0X[split_test_by] == 1]
         Y0X_0 = Y0X[Y0X[split_test_by] == 0]
 
@@ -320,8 +341,8 @@ def frobenius_norm_score(
         # Normalize features
         select_cols = estimate.estimator._effect_modifier_names + ["yhat"]
         scaler = StandardScaler()
-        Y0X_1_normalized = scaler.fit_transform(Y0X_1[select_cols])
-        Y0X_0_normalized = scaler.transform(Y0X_0[select_cols])
+        Y0X_0_normalized = scaler.fit_transform(Y0X_0[select_cols])
+        Y0X_1_normalized = scaler.transform(Y0X_1[select_cols])
 
         # Calculate pairwise differences
         differences_xy = Y0X_1_normalized[:, np.newaxis, :] - Y0X_0_normalized[np.newaxis, :, :]
@@ -906,7 +927,7 @@ def codec_score(estimate: CausalEstimate, df: pd.DataFrame) -> float:
         if standard_deviations < 0.01:
             return np.inf
 
-        return Scorer.codec(Y, Z, X)
+        return abs(Scorer.codec(Y, Z, X))
 
     @staticmethod
     def auc_make_score(
@@ -924,7 +945,7 @@ def auc_make_score(
             float: area under the uplift curve
 
         """
-
+        print("running auuc_score")
         est = estimate.estimator
         new_df = pd.DataFrame()
         new_df["y"] = df[est._outcome_name]
@@ -1041,8 +1062,6 @@ def bite_score(
         Returns:
             float: The BITE score. Higher values indicate better model performance.
         """
-        if N_values is None:
-            N_values = list(range(10, 21)) + list(range(25, 51, 5)) + list(range(60, 101, 10))
 
         est = estimate.estimator
         treatment_name = est._treatment_name
@@ -1068,102 +1087,9 @@ def bite_score(
         else:
             raise ValueError("Propensity model is not available.")
 
-        # Calculate weights with clipping to avoid extremes
-        working_df["weights"] = np.where(
-            working_df[treatment_name] == 1,
-            1 / np.clip(working_df["propensity"], 0.05, 0.95),
-            1 / np.clip(1 - working_df["propensity"], 0.05, 0.95),
-        )
-
-        kendall_tau_values = []
-
-        def compute_naive_estimate(group_data):
-            """Compute naive estimate for a group with safeguards against edge cases."""
-            treated = group_data[group_data[treatment_name] == 1]
-            control = group_data[group_data[treatment_name] == 0]
-
-            if len(treated) == 0 or len(control) == 0:
-                return np.nan
-
-            treated_weights = treated["weights"].values
-            control_weights = control["weights"].values
-
-            # Check if weights sum to 0 or if all weights are 0
-            if (
-                treated_weights.sum() == 0
-                or control_weights.sum() == 0
-                or not (treated_weights > 0).any()
-                or not (control_weights > 0).any()
-            ):
-                return np.nan
-
-            # Weighted averages with explicit handling of edge cases
-            try:
-                y1 = np.average(treated[outcome_name], weights=treated_weights)
-                y0 = np.average(control[outcome_name], weights=control_weights)
-                return y1 - y0
-            except ZeroDivisionError:
-                return np.nan
-
-        for N in N_values:
-            iter_df = working_df.copy()
-
-            try:
-                # Ensure enough unique values for binning
-                unique_ites = np.unique(iter_df["estimated_ITE"])
-                if len(unique_ites) < N:
-                    continue
-
-                # Create bins
-                iter_df["ITE_bin"] = pd.qcut(
-                    iter_df["estimated_ITE"], q=N, labels=False, duplicates="drop"
-                )
-
-                # Compute bin statistics
-                bin_stats = []
-                for bin_idx in iter_df["ITE_bin"].unique():
-                    bin_data = iter_df[iter_df["ITE_bin"] == bin_idx]
-
-                    # Skip if bin is too small
-                    if len(bin_data) < 2:
-                        continue
-
-                    naive_est = compute_naive_estimate(bin_data)
-
-                    # Only compute average ITE if weights are valid
-                    bin_weights = bin_data["weights"].values
-                    if bin_weights.sum() > 0 and not np.isnan(naive_est):
-                        try:
-                            avg_est_ite = np.average(bin_data["estimated_ITE"], weights=bin_weights)
-                            bin_stats.append(
-                                {
-                                    "ITE_bin": bin_idx,
-                                    "naive_estimate": naive_est,
-                                    "average_estimated_ITE": avg_est_ite,
-                                }
-                            )
-                        except ZeroDivisionError:
-                            continue
-
-                # Calculate Kendall's Tau if we have enough valid bins
-                bin_stats_df = pd.DataFrame(bin_stats)
-                if len(bin_stats_df) >= 2:
-                    tau, _ = kendalltau(
-                        bin_stats_df["naive_estimate"],
-                        bin_stats_df["average_estimated_ITE"],
-                    )
-                    if not np.isnan(tau):
-                        kendall_tau_values.append(tau)
-
-            except (ValueError, ZeroDivisionError):
-                continue
-
-        # Return final score
-        if len(kendall_tau_values) == 0:
-            return -np.inf  # Return -inf for failed computations
-
-        top_3_taus = sorted(kendall_tau_values, reverse=True)[:3]
-        return np.mean(top_3_taus)
+        # Calculate the BITE score
+        bite_score = bite(working_df, treatment_name, outcome_name)
+        return bite_score
 
     def make_scores(
         self,