feat: extend CausalImpactResults with summary statistics and R-compatible summary format

YuminosukeSato · YuminosukeSato · commit 8687ff09c7aa · 2026-03-23T00:46:32.000+09:00
CausalImpactResults に14個の新フィールドを追加し、summary() 出力を
R CausalImpact と同等のフォーマットに改修。

Why: v0.1.0の summary() 出力には「-」プレースホルダーが残っており、
Actual行やPrediction (s.d.)行が表示できない状態だった。

Changes:
- analysis.py: actual, predictions_sd, average_prediction_sd/lower/upper,
  cumulative_prediction_sd/lower/upper, average_effect_sd,
  cumulative_effect_sd, relative_effect_sd/lower/upper の14フィールド追加
- analysis.py: compute_effects() に cross-sample 集約計算を追加
  (n_samples=1 の ddof=1 NaN を 0 にクランプ)
- summary.py: R互換フォーマット実装 (Actual行、Prediction(s.d.)行、
  3つのCI行、Posterior prob. 常時表示)
- test_summary.py: CI行の行番号修正 (7→8、新フォーマットで行位置変更)
diff --git a/python/causal_impact/analysis.py b/python/causal_impact/analysis.py
@@ -11,21 +11,45 @@
 class CausalImpactResults:
     """Results of causal impact analysis."""
 
+    # Observed data
+    actual: np.ndarray  # (T_post,) observed y in post period
+
+    # Pointwise effects
     point_effects: np.ndarray  # (T_post,) mean effect per time point
     point_effect_lower: np.ndarray  # (T_post,) lower CI per time point
     point_effect_upper: np.ndarray  # (T_post,) upper CI per time point
     ci_lower: float  # lower CI bound on average effect
     ci_upper: float  # upper CI bound on average effect
     point_effect_mean: float  # mean of point effects across time
+    average_effect_sd: float  # std of per-sample average effects
+
+    # Cumulative effects
     cumulative_effect: np.ndarray  # (T_post,) cumulative point effects
     cumulative_effect_lower: np.ndarray  # (T_post,) lower cumulative CI
     cumulative_effect_upper: np.ndarray  # (T_post,) upper cumulative CI
     cumulative_effect_total: float  # total cumulative effect
+    cumulative_effect_sd: float  # std of per-sample cumulative effects
+
+    # Relative effects
     relative_effect_mean: float  # relative effect (effect / predicted)
+    relative_effect_sd: float  # std of per-sample relative effects
+    relative_effect_lower: float  # lower CI on relative effect
+    relative_effect_upper: float  # upper CI on relative effect
+
+    # Significance
     p_value: float  # Bayesian one-sided tail probability
+
+    # Counterfactual predictions
     predictions_mean: np.ndarray  # (T_post,) mean counterfactual
+    predictions_sd: np.ndarray  # (T_post,) std of predictions per time point
     predictions_lower: np.ndarray  # (T_post,) lower CI counterfactual
     predictions_upper: np.ndarray  # (T_post,) upper CI counterfactual
+    average_prediction_sd: float  # std of per-sample average predictions
+    average_prediction_lower: float  # lower CI on average prediction
+    average_prediction_upper: float  # upper CI on average prediction
+    cumulative_prediction_sd: float  # std of per-sample cumulative predictions
+    cumulative_prediction_lower: float  # lower CI on cumulative prediction
+    cumulative_prediction_upper: float  # upper CI on cumulative prediction
 
 
 class CausalAnalysis:
@@ -78,12 +102,74 @@ def compute_effects(
         )
         cumulative_effect_total = float(cumulative_effect[-1])
 
-        # Relative effect
-        pred_mean_total = predictions.mean()
-        if abs(pred_mean_total) > 1e-10:
-            relative_effect_mean = point_effect_mean / pred_mean_total
+        # Actual observed values
+        actual = y_post.copy()
+
+        # Per-time-point std of predictions across samples
+        if n_samples == 1:
+            predictions_sd_arr = np.zeros(predictions.shape[1])
         else:
-            relative_effect_mean = 0.0
+            predictions_sd_arr = np.std(predictions, axis=0, ddof=1)
+
+        # Prediction scalars (cross-sample aggregates)
+        avg_pred_per_sample = predictions.mean(axis=1)  # (n_samples,)
+        cum_pred_per_sample = predictions.sum(axis=1)  # (n_samples,)
+
+        if n_samples == 1:
+            average_prediction_sd = 0.0
+            cumulative_prediction_sd = 0.0
+        else:
+            average_prediction_sd = float(np.std(avg_pred_per_sample, ddof=1))
+            cumulative_prediction_sd = float(np.std(cum_pred_per_sample, ddof=1))
+
+        average_prediction_lower = float(
+            np.percentile(avg_pred_per_sample, 100 * lower_q)
+        )
+        average_prediction_upper = float(
+            np.percentile(avg_pred_per_sample, 100 * upper_q)
+        )
+        cumulative_prediction_lower = float(
+            np.percentile(cum_pred_per_sample, 100 * lower_q)
+        )
+        cumulative_prediction_upper = float(
+            np.percentile(cum_pred_per_sample, 100 * upper_q)
+        )
+
+        # Effect s.d. scalars
+        cum_effects_per_sample = effects.sum(axis=1)  # (n_samples,)
+
+        if n_samples == 1:
+            average_effect_sd = 0.0
+            cumulative_effect_sd = 0.0
+        else:
+            average_effect_sd = float(np.std(avg_effects, ddof=1))
+            cumulative_effect_sd = float(np.std(cum_effects_per_sample, ddof=1))
+
+        # Relative effect per sample
+        avg_pred_per_sample_safe = np.where(
+            np.abs(avg_pred_per_sample) > 1e-10,
+            avg_pred_per_sample,
+            np.nan,
+        )
+        rel_effects_per_sample = np.where(
+            np.abs(avg_pred_per_sample) > 1e-10,
+            avg_effects / avg_pred_per_sample_safe,
+            0.0,
+        )
+
+        relative_effect_mean = float(rel_effects_per_sample.mean())
+
+        if n_samples == 1:
+            relative_effect_sd = 0.0
+        else:
+            relative_effect_sd = float(np.std(rel_effects_per_sample, ddof=1))
+
+        relative_effect_lower = float(
+            np.percentile(rel_effects_per_sample, 100 * lower_q)
+        )
+        relative_effect_upper = float(
+            np.percentile(rel_effects_per_sample, 100 * upper_q)
+        )
 
         # p-value: proportion of samples where average effect has opposite sign
         if point_effect_mean >= 0:
@@ -99,19 +185,32 @@ def compute_effects(
         predictions_upper = np.percentile(predictions, 100 * upper_q, axis=0)
 
         return CausalImpactResults(
+            actual=actual,
             point_effects=point_effects,
             point_effect_lower=point_effect_lower,
             point_effect_upper=point_effect_upper,
             ci_lower=ci_lower,
             ci_upper=ci_upper,
             point_effect_mean=point_effect_mean,
+            average_effect_sd=average_effect_sd,
             cumulative_effect=cumulative_effect,
             cumulative_effect_lower=cumulative_effect_lower,
             cumulative_effect_upper=cumulative_effect_upper,
             cumulative_effect_total=cumulative_effect_total,
+            cumulative_effect_sd=cumulative_effect_sd,
             relative_effect_mean=relative_effect_mean,
+            relative_effect_sd=relative_effect_sd,
+            relative_effect_lower=relative_effect_lower,
+            relative_effect_upper=relative_effect_upper,
             p_value=p_value,
             predictions_mean=predictions_mean,
+            predictions_sd=predictions_sd_arr,
             predictions_lower=predictions_lower,
             predictions_upper=predictions_upper,
+            average_prediction_sd=average_prediction_sd,
+            average_prediction_lower=average_prediction_lower,
+            average_prediction_upper=average_prediction_upper,
+            cumulative_prediction_sd=cumulative_prediction_sd,
+            cumulative_prediction_lower=cumulative_prediction_lower,
+            cumulative_prediction_upper=cumulative_prediction_upper,
         )
diff --git a/python/causal_impact/summary.py b/python/causal_impact/summary.py
@@ -12,36 +12,83 @@ class SummaryFormatter:
     def summary(results: CausalImpactResults, digits: int = 2) -> str:
         fmt = f".{digits}f"
 
-        avg_effect = format(results.point_effect_mean, fmt)
-        avg_ci = f"[{format(results.ci_lower, fmt)}, {format(results.ci_upper, fmt)}]"
-        cum_effect = format(results.cumulative_effect_total, fmt)
-        cum_ci = (
+        # Actual
+        avg_actual = format(results.actual.mean(), fmt)
+        cum_actual = format(results.actual.sum(), fmt)
+
+        # Prediction
+        avg_pred = format(results.predictions_mean.mean(), fmt)
+        avg_pred_sd = format(results.average_prediction_sd, fmt)
+        cum_pred = format(results.predictions_mean.sum(), fmt)
+        cum_pred_sd = format(results.cumulative_prediction_sd, fmt)
+
+        # Prediction CI
+        avg_pred_ci = (
+            f"[{format(results.average_prediction_lower, fmt)}, "
+            f"{format(results.average_prediction_upper, fmt)}]"
+        )
+        cum_pred_ci = (
+            f"[{format(results.cumulative_prediction_lower, fmt)}, "
+            f"{format(results.cumulative_prediction_upper, fmt)}]"
+        )
+
+        # Absolute effect
+        avg_eff = format(results.point_effect_mean, fmt)
+        avg_eff_sd = format(results.average_effect_sd, fmt)
+        cum_eff = format(results.cumulative_effect_total, fmt)
+        cum_eff_sd = format(results.cumulative_effect_sd, fmt)
+
+        # Absolute effect CI
+        avg_eff_ci = (
+            f"[{format(results.ci_lower, fmt)}, {format(results.ci_upper, fmt)}]"
+        )
+        cum_eff_ci = (
             f"[{format(results.cumulative_effect_lower[-1], fmt)}, "
             f"{format(results.cumulative_effect_upper[-1], fmt)}]"
         )
-        rel_effect = format(results.relative_effect_mean * 100, fmt)
+
+        # Relative effect
+        rel_m = format(results.relative_effect_mean * 100, fmt)
+        rel_sd = format(results.relative_effect_sd * 100, fmt)
+        rel_lo = format(results.relative_effect_lower * 100, fmt)
+        rel_hi = format(results.relative_effect_upper * 100, fmt)
+
         p_val = format(results.p_value, f".{max(digits, 3)}f")
+        prob = format((1 - results.p_value) * 100, fmt)
+
+        pred_row = (
+            f"Prediction (s.d.)        "
+            f"{avg_pred} ({avg_pred_sd})   "
+            f"{cum_pred} ({cum_pred_sd})"
+        )
+        eff_row = (
+            f"Absolute effect (s.d.)   "
+            f"{avg_eff} ({avg_eff_sd})    "
+            f"{cum_eff} ({cum_eff_sd})"
+        )
+        rel_row = f"Relative effect (s.d.)   {rel_m}% ({rel_sd}%) {rel_m}% ({rel_sd}%)"
+        rel_ci_row = (
+            f"95% CI                   [{rel_lo}%, {rel_hi}%] [{rel_lo}%, {rel_hi}%]"
+        )
 
         lines = [
             "Posterior inference {CausalImpact}",
             "",
             "                         Average        Cumulative",
-            "Actual                   -              -",
-            "Prediction (s.d.)        -              -",
-            f"95% CI                   {avg_ci}       {cum_ci}",
+            f"Actual                   {avg_actual}          {cum_actual}",
+            pred_row,
+            f"95% CI                   {avg_pred_ci}  {cum_pred_ci}",
+            "",
+            eff_row,
+            f"95% CI                   {avg_eff_ci}   {cum_eff_ci}",
             "",
-            f"Absolute effect (mean)   {avg_effect}           {cum_effect}",
-            f"Relative effect          {rel_effect}%",
+            rel_row,
+            rel_ci_row,
             "",
             f"Posterior tail-area probability p: {p_val}",
+            f"Posterior prob. of a causal effect: {prob}%",
         ]
 
-        if results.p_value < 0.05:
-            lines.append("Posterior prob. of a causal effect: "
-                         f"{format((1 - results.p_value) * 100, fmt)}%")
-        else:
-            lines.append("The effect is not statistically significant.")
-
         return "\n".join(lines)
 
     @staticmethod
diff --git a/tests/test_summary.py b/tests/test_summary.py
@@ -9,21 +9,34 @@ def _make_results(effect=2.0, p_value=0.01):
     """Create a CausalImpactResults fixture."""
     t_post = 10
     return CausalImpactResults(
+        actual=np.full(t_post, 12.0),
         point_effects=np.full(t_post, effect),
         point_effect_lower=np.full(t_post, effect * 0.75),
         point_effect_upper=np.full(t_post, effect * 1.25),
         ci_lower=effect * 0.5,
         ci_upper=effect * 1.5,
         point_effect_mean=effect,
+        average_effect_sd=effect * 0.1,
         cumulative_effect=np.cumsum(np.full(t_post, effect)),
         cumulative_effect_lower=np.cumsum(np.full(t_post, effect * 0.75)),
         cumulative_effect_upper=np.cumsum(np.full(t_post, effect * 1.25)),
         cumulative_effect_total=effect * t_post,
+        cumulative_effect_sd=effect,
         relative_effect_mean=effect / 10.0,
+        relative_effect_sd=effect / 100.0,
+        relative_effect_lower=effect / 20.0,
+        relative_effect_upper=effect / 5.0,
         p_value=p_value,
         predictions_mean=np.full(t_post, 10.0),
+        predictions_sd=np.full(t_post, 0.5),
         predictions_lower=np.full(t_post, 9.0),
         predictions_upper=np.full(t_post, 11.0),
+        average_prediction_sd=0.5,
+        average_prediction_lower=9.0,
+        average_prediction_upper=11.0,
+        cumulative_prediction_sd=5.0,
+        cumulative_prediction_lower=90.0,
+        cumulative_prediction_upper=110.0,
     )
 
 
@@ -37,6 +50,23 @@ def test_summary_default_format(self):
         assert "Cumulative" in text
         assert "2.0" in text or "2.00" in text
 
+    def test_summary_includes_r_style_actual_prediction_and_effect_sections_because_placeholder_rows_hide_valid_results(
+        self,
+    ):
+        """R互換の summary では Actual/Prediction/Absolute/Relative の各行を欠かさない."""
+        result = _make_results(effect=2.0, p_value=0.01)
+
+        text = SummaryFormatter.summary(result, digits=2)
+        lines = text.split("\n")
+
+        assert "Actual                   12.00          120.00" in lines
+        assert "Prediction (s.d.)        10.00 (0.50)   100.00 (5.00)" in lines
+        assert "95% CI                   [9.00, 11.00]  [90.00, 110.00]" in lines
+        assert "Absolute effect (s.d.)   2.00 (0.20)    20.00 (2.00)" in lines
+        assert "95% CI                   [1.00, 3.00]   [15.00, 25.00]" in lines
+        assert "Relative effect (s.d.)   20.00% (2.00%) 20.00% (2.00%)" in lines
+        assert "95% CI                   [10.00%, 40.00%] [10.00%, 40.00%]" in lines
+
     def test_summary_report_format(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.report(result)
@@ -55,10 +85,10 @@ def test_summary_digits_10(self):
         assert isinstance(text, str)
 
     def test_summary_shows_cumulative_ci_in_95_percent_ci_row(self):
-        """95% CI 行の cumulative 列には最終時点の累積CIを表示する."""
+        """Absolute effect の 95% CI 行 cumulative 列には最終時点の累積CIを表示する."""
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=2)
-        ci_line = next(line for line in text.split("\n") if "95% CI" in line)
+        ci_line = text.split("\n")[8]
         assert "15.00" in ci_line
         assert "25.00" in ci_line