From 8687ff09c7aad7e7b03c7c3f24e6b9d81339dd8f Mon Sep 17 00:00:00 2001
From: YuminosukeSato <coffcoffeecoffee@gmail.com>
Date: Mon, 23 Mar 2026 00:46:32 +0900
Subject: [PATCH 1/3] feat: extend CausalImpactResults with summary statistics
 and R-compatible summary format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CausalImpactResults に14個の新フィールドを追加し、summary() 出力を
R CausalImpact と同等のフォーマットに改修。

Why: v0.1.0の summary() 出力には「-」プレースホルダーが残っており、
Actual行やPrediction (s.d.)行が表示できない状態だった。

Changes:
- analysis.py: actual, predictions_sd, average_prediction_sd/lower/upper,
  cumulative_prediction_sd/lower/upper, average_effect_sd,
  cumulative_effect_sd, relative_effect_sd/lower/upper の14フィールド追加
- analysis.py: compute_effects() に cross-sample 集約計算を追加
  (n_samples=1 の ddof=1 NaN を 0 にクランプ)
- summary.py: R互換フォーマット実装 (Actual行、Prediction(s.d.)行、
  3つのCI行、Posterior prob. 常時表示)
- test_summary.py: CI行の行番号修正 (7→8、新フォーマットで行位置変更)
---
 python/causal_impact/analysis.py | 109 +++++++++++++++++++++++++++++--
 python/causal_impact/summary.py  |  79 +++++++++++++++++-----
 tests/test_summary.py            |  34 +++++++++-
 3 files changed, 199 insertions(+), 23 deletions(-)

diff --git a/python/causal_impact/analysis.py b/python/causal_impact/analysis.py
index 9ce9364..2e9e14e 100644
--- a/python/causal_impact/analysis.py
+++ b/python/causal_impact/analysis.py
@@ -11,21 +11,45 @@
 class CausalImpactResults:
     """Results of causal impact analysis."""
 
+    # Observed data
+    actual: np.ndarray  # (T_post,) observed y in post period
+
+    # Pointwise effects
     point_effects: np.ndarray  # (T_post,) mean effect per time point
     point_effect_lower: np.ndarray  # (T_post,) lower CI per time point
     point_effect_upper: np.ndarray  # (T_post,) upper CI per time point
     ci_lower: float  # lower CI bound on average effect
     ci_upper: float  # upper CI bound on average effect
     point_effect_mean: float  # mean of point effects across time
+    average_effect_sd: float  # std of per-sample average effects
+
+    # Cumulative effects
     cumulative_effect: np.ndarray  # (T_post,) cumulative point effects
     cumulative_effect_lower: np.ndarray  # (T_post,) lower cumulative CI
     cumulative_effect_upper: np.ndarray  # (T_post,) upper cumulative CI
     cumulative_effect_total: float  # total cumulative effect
+    cumulative_effect_sd: float  # std of per-sample cumulative effects
+
+    # Relative effects
     relative_effect_mean: float  # relative effect (effect / predicted)
+    relative_effect_sd: float  # std of per-sample relative effects
+    relative_effect_lower: float  # lower CI on relative effect
+    relative_effect_upper: float  # upper CI on relative effect
+
+    # Significance
     p_value: float  # Bayesian one-sided tail probability
+
+    # Counterfactual predictions
     predictions_mean: np.ndarray  # (T_post,) mean counterfactual
+    predictions_sd: np.ndarray  # (T_post,) std of predictions per time point
     predictions_lower: np.ndarray  # (T_post,) lower CI counterfactual
     predictions_upper: np.ndarray  # (T_post,) upper CI counterfactual
+    average_prediction_sd: float  # std of per-sample average predictions
+    average_prediction_lower: float  # lower CI on average prediction
+    average_prediction_upper: float  # upper CI on average prediction
+    cumulative_prediction_sd: float  # std of per-sample cumulative predictions
+    cumulative_prediction_lower: float  # lower CI on cumulative prediction
+    cumulative_prediction_upper: float  # upper CI on cumulative prediction
 
 
 class CausalAnalysis:
@@ -78,12 +102,74 @@ def compute_effects(
         )
         cumulative_effect_total = float(cumulative_effect[-1])
 
-        # Relative effect
-        pred_mean_total = predictions.mean()
-        if abs(pred_mean_total) > 1e-10:
-            relative_effect_mean = point_effect_mean / pred_mean_total
+        # Actual observed values
+        actual = y_post.copy()
+
+        # Per-time-point std of predictions across samples
+        if n_samples == 1:
+            predictions_sd_arr = np.zeros(predictions.shape[1])
         else:
-            relative_effect_mean = 0.0
+            predictions_sd_arr = np.std(predictions, axis=0, ddof=1)
+
+        # Prediction scalars (cross-sample aggregates)
+        avg_pred_per_sample = predictions.mean(axis=1)  # (n_samples,)
+        cum_pred_per_sample = predictions.sum(axis=1)  # (n_samples,)
+
+        if n_samples == 1:
+            average_prediction_sd = 0.0
+            cumulative_prediction_sd = 0.0
+        else:
+            average_prediction_sd = float(np.std(avg_pred_per_sample, ddof=1))
+            cumulative_prediction_sd = float(np.std(cum_pred_per_sample, ddof=1))
+
+        average_prediction_lower = float(
+            np.percentile(avg_pred_per_sample, 100 * lower_q)
+        )
+        average_prediction_upper = float(
+            np.percentile(avg_pred_per_sample, 100 * upper_q)
+        )
+        cumulative_prediction_lower = float(
+            np.percentile(cum_pred_per_sample, 100 * lower_q)
+        )
+        cumulative_prediction_upper = float(
+            np.percentile(cum_pred_per_sample, 100 * upper_q)
+        )
+
+        # Effect s.d. scalars
+        cum_effects_per_sample = effects.sum(axis=1)  # (n_samples,)
+
+        if n_samples == 1:
+            average_effect_sd = 0.0
+            cumulative_effect_sd = 0.0
+        else:
+            average_effect_sd = float(np.std(avg_effects, ddof=1))
+            cumulative_effect_sd = float(np.std(cum_effects_per_sample, ddof=1))
+
+        # Relative effect per sample
+        avg_pred_per_sample_safe = np.where(
+            np.abs(avg_pred_per_sample) > 1e-10,
+            avg_pred_per_sample,
+            np.nan,
+        )
+        rel_effects_per_sample = np.where(
+            np.abs(avg_pred_per_sample) > 1e-10,
+            avg_effects / avg_pred_per_sample_safe,
+            0.0,
+        )
+
+        relative_effect_mean = float(rel_effects_per_sample.mean())
+
+        if n_samples == 1:
+            relative_effect_sd = 0.0
+        else:
+            relative_effect_sd = float(np.std(rel_effects_per_sample, ddof=1))
+
+        relative_effect_lower = float(
+            np.percentile(rel_effects_per_sample, 100 * lower_q)
+        )
+        relative_effect_upper = float(
+            np.percentile(rel_effects_per_sample, 100 * upper_q)
+        )
 
         # p-value: proportion of samples where average effect has opposite sign
         if point_effect_mean >= 0:
@@ -99,19 +185,32 @@ def compute_effects(
         predictions_upper = np.percentile(predictions, 100 * upper_q, axis=0)
 
         return CausalImpactResults(
+            actual=actual,
             point_effects=point_effects,
             point_effect_lower=point_effect_lower,
             point_effect_upper=point_effect_upper,
             ci_lower=ci_lower,
             ci_upper=ci_upper,
             point_effect_mean=point_effect_mean,
+            average_effect_sd=average_effect_sd,
             cumulative_effect=cumulative_effect,
             cumulative_effect_lower=cumulative_effect_lower,
             cumulative_effect_upper=cumulative_effect_upper,
             cumulative_effect_total=cumulative_effect_total,
+            cumulative_effect_sd=cumulative_effect_sd,
             relative_effect_mean=relative_effect_mean,
+            relative_effect_sd=relative_effect_sd,
+            relative_effect_lower=relative_effect_lower,
+            relative_effect_upper=relative_effect_upper,
             p_value=p_value,
             predictions_mean=predictions_mean,
+            predictions_sd=predictions_sd_arr,
             predictions_lower=predictions_lower,
             predictions_upper=predictions_upper,
+            average_prediction_sd=average_prediction_sd,
+            average_prediction_lower=average_prediction_lower,
+            average_prediction_upper=average_prediction_upper,
+            cumulative_prediction_sd=cumulative_prediction_sd,
+            cumulative_prediction_lower=cumulative_prediction_lower,
+            cumulative_prediction_upper=cumulative_prediction_upper,
         )
diff --git a/python/causal_impact/summary.py b/python/causal_impact/summary.py
index 2f0185f..ed7c1fe 100644
--- a/python/causal_impact/summary.py
+++ b/python/causal_impact/summary.py
@@ -12,36 +12,83 @@ class SummaryFormatter:
     def summary(results: CausalImpactResults, digits: int = 2) -> str:
         fmt = f".{digits}f"
 
-        avg_effect = format(results.point_effect_mean, fmt)
-        avg_ci = f"[{format(results.ci_lower, fmt)}, {format(results.ci_upper, fmt)}]"
-        cum_effect = format(results.cumulative_effect_total, fmt)
-        cum_ci = (
+        # Actual
+        avg_actual = format(results.actual.mean(), fmt)
+        cum_actual = format(results.actual.sum(), fmt)
+
+        # Prediction
+        avg_pred = format(results.predictions_mean.mean(), fmt)
+        avg_pred_sd = format(results.average_prediction_sd, fmt)
+        cum_pred = format(results.predictions_mean.sum(), fmt)
+        cum_pred_sd = format(results.cumulative_prediction_sd, fmt)
+
+        # Prediction CI
+        avg_pred_ci = (
+            f"[{format(results.average_prediction_lower, fmt)}, "
+            f"{format(results.average_prediction_upper, fmt)}]"
+        )
+        cum_pred_ci = (
+            f"[{format(results.cumulative_prediction_lower, fmt)}, "
+            f"{format(results.cumulative_prediction_upper, fmt)}]"
+        )
+
+        # Absolute effect
+        avg_eff = format(results.point_effect_mean, fmt)
+        avg_eff_sd = format(results.average_effect_sd, fmt)
+        cum_eff = format(results.cumulative_effect_total, fmt)
+        cum_eff_sd = format(results.cumulative_effect_sd, fmt)
+
+        # Absolute effect CI
+        avg_eff_ci = (
+            f"[{format(results.ci_lower, fmt)}, {format(results.ci_upper, fmt)}]"
+        )
+        cum_eff_ci = (
             f"[{format(results.cumulative_effect_lower[-1], fmt)}, "
             f"{format(results.cumulative_effect_upper[-1], fmt)}]"
         )
-        rel_effect = format(results.relative_effect_mean * 100, fmt)
+
+        # Relative effect
+        rel_m = format(results.relative_effect_mean * 100, fmt)
+        rel_sd = format(results.relative_effect_sd * 100, fmt)
+        rel_lo = format(results.relative_effect_lower * 100, fmt)
+        rel_hi = format(results.relative_effect_upper * 100, fmt)
+
         p_val = format(results.p_value, f".{max(digits, 3)}f")
+        prob = format((1 - results.p_value) * 100, fmt)
+
+        pred_row = (
+            f"Prediction (s.d.)        "
+            f"{avg_pred} ({avg_pred_sd})   "
+            f"{cum_pred} ({cum_pred_sd})"
+        )
+        eff_row = (
+            f"Absolute effect (s.d.)   "
+            f"{avg_eff} ({avg_eff_sd})    "
+            f"{cum_eff} ({cum_eff_sd})"
+        )
+        rel_row = f"Relative effect (s.d.)   {rel_m}% ({rel_sd}%) {rel_m}% ({rel_sd}%)"
+        rel_ci_row = (
+            f"95% CI                   [{rel_lo}%, {rel_hi}%] [{rel_lo}%, {rel_hi}%]"
+        )
 
         lines = [
             "Posterior inference {CausalImpact}",
             "",
             "                         Average        Cumulative",
-            "Actual                   -              -",
-            "Prediction (s.d.)        -              -",
-            f"95% CI                   {avg_ci}       {cum_ci}",
+            f"Actual                   {avg_actual}          {cum_actual}",
+            pred_row,
+            f"95% CI                   {avg_pred_ci}  {cum_pred_ci}",
+            "",
+            eff_row,
+            f"95% CI                   {avg_eff_ci}   {cum_eff_ci}",
             "",
-            f"Absolute effect (mean)   {avg_effect}           {cum_effect}",
-            f"Relative effect          {rel_effect}%",
+            rel_row,
+            rel_ci_row,
             "",
             f"Posterior tail-area probability p: {p_val}",
+            f"Posterior prob. of a causal effect: {prob}%",
         ]
 
-        if results.p_value < 0.05:
-            lines.append("Posterior prob. of a causal effect: "
-                         f"{format((1 - results.p_value) * 100, fmt)}%")
-        else:
-            lines.append("The effect is not statistically significant.")
-
         return "\n".join(lines)
 
     @staticmethod
diff --git a/tests/test_summary.py b/tests/test_summary.py
index e0c28b8..d9885f0 100644
--- a/tests/test_summary.py
+++ b/tests/test_summary.py
@@ -9,21 +9,34 @@ def _make_results(effect=2.0, p_value=0.01):
     """Create a CausalImpactResults fixture."""
     t_post = 10
     return CausalImpactResults(
+        actual=np.full(t_post, 12.0),
         point_effects=np.full(t_post, effect),
         point_effect_lower=np.full(t_post, effect * 0.75),
         point_effect_upper=np.full(t_post, effect * 1.25),
         ci_lower=effect * 0.5,
         ci_upper=effect * 1.5,
         point_effect_mean=effect,
+        average_effect_sd=effect * 0.1,
         cumulative_effect=np.cumsum(np.full(t_post, effect)),
         cumulative_effect_lower=np.cumsum(np.full(t_post, effect * 0.75)),
         cumulative_effect_upper=np.cumsum(np.full(t_post, effect * 1.25)),
         cumulative_effect_total=effect * t_post,
+        cumulative_effect_sd=effect,
         relative_effect_mean=effect / 10.0,
+        relative_effect_sd=effect / 100.0,
+        relative_effect_lower=effect / 20.0,
+        relative_effect_upper=effect / 5.0,
         p_value=p_value,
         predictions_mean=np.full(t_post, 10.0),
+        predictions_sd=np.full(t_post, 0.5),
         predictions_lower=np.full(t_post, 9.0),
         predictions_upper=np.full(t_post, 11.0),
+        average_prediction_sd=0.5,
+        average_prediction_lower=9.0,
+        average_prediction_upper=11.0,
+        cumulative_prediction_sd=5.0,
+        cumulative_prediction_lower=90.0,
+        cumulative_prediction_upper=110.0,
     )
 
 
@@ -37,6 +50,23 @@ def test_summary_default_format(self):
         assert "Cumulative" in text
         assert "2.0" in text or "2.00" in text
 
+    def test_summary_includes_r_style_actual_prediction_and_effect_sections_because_placeholder_rows_hide_valid_results(
+        self,
+    ):
+        """R互換の summary では Actual/Prediction/Absolute/Relative の各行を欠かさない."""
+        result = _make_results(effect=2.0, p_value=0.01)
+
+        text = SummaryFormatter.summary(result, digits=2)
+        lines = text.split("\n")
+
+        assert "Actual                   12.00          120.00" in lines
+        assert "Prediction (s.d.)        10.00 (0.50)   100.00 (5.00)" in lines
+        assert "95% CI                   [9.00, 11.00]  [90.00, 110.00]" in lines
+        assert "Absolute effect (s.d.)   2.00 (0.20)    20.00 (2.00)" in lines
+        assert "95% CI                   [1.00, 3.00]   [15.00, 25.00]" in lines
+        assert "Relative effect (s.d.)   20.00% (2.00%) 20.00% (2.00%)" in lines
+        assert "95% CI                   [10.00%, 40.00%] [10.00%, 40.00%]" in lines
+
     def test_summary_report_format(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.report(result)
@@ -55,10 +85,10 @@ def test_summary_digits_10(self):
         assert isinstance(text, str)
 
     def test_summary_shows_cumulative_ci_in_95_percent_ci_row(self):
-        """95% CI 行の cumulative 列には最終時点の累積CIを表示する."""
+        """Absolute effect の 95% CI 行 cumulative 列には最終時点の累積CIを表示する."""
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=2)
-        ci_line = next(line for line in text.split("\n") if "95% CI" in line)
+        ci_line = text.split("\n")[8]
         assert "15.00" in ci_line
         assert "25.00" in ci_line
 

From 3666d53866b14830d193c5d9d33cd2b5235ebd82 Mon Sep 17 00:00:00 2001
From: YuminosukeSato <coffcoffeecoffee@gmail.com>
Date: Mon, 23 Mar 2026 00:49:32 +0900
Subject: [PATCH 2/3] fix(ci): update test_plot fixture and shorten long test
 names for E501

- test_plot.py: add 13 new CausalImpactResults fields to
  _make_results_with_index() fixture
- test_summary.py: shorten test method name to fix E501
- test_analysis.py: shorten two test method names to fix E501
---
 tests/test_analysis.py | 48 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_plot.py     | 13 ++++++++++++
 tests/test_summary.py  |  6 ++----
 3 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 120432a..3646ad2 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -107,6 +107,54 @@ def test_relative_effect_percentage(self):
         # Relative effect ≈ 2.0/10.0 = 20%
         assert abs(result.relative_effect_mean - 0.2) < 0.1
 
+    def test_summary_stats_use_posterior_sample_aggregates(self):
+        """summary行はposterior sample集約値(mean/sd/CI)を保持する."""
+        y_post = np.array([10.0, 10.0])
+        predictions = np.array(
+            [
+                [8.0, 8.0],
+                [10.0, 6.0],
+                [11.0, 7.0],
+            ]
+        )
+
+        result = CausalAnalysis.compute_effects(
+            y_post=y_post,
+            predictions=predictions,
+            alpha=0.0,
+        )
+
+        assert np.array_equal(result.actual, y_post)
+        assert np.allclose(result.predictions_sd, np.std(predictions, axis=0, ddof=1))
+        assert result.average_prediction_sd == np.sqrt(1.0 / 3.0)
+        assert result.average_prediction_lower == 8.0
+        assert result.average_prediction_upper == 9.0
+        assert result.cumulative_prediction_sd == np.sqrt(4.0 / 3.0)
+        assert result.cumulative_prediction_lower == 16.0
+        assert result.cumulative_prediction_upper == 18.0
+        assert result.average_effect_sd == np.sqrt(1.0 / 3.0)
+        assert result.cumulative_effect_sd == np.sqrt(4.0 / 3.0)
+        assert result.relative_effect_lower == 1.0 / 9.0
+        assert result.relative_effect_upper == 0.25
+
+    def test_single_sample_degenerates_sd_to_zero(self):
+        """posterior sampleが1本ならs.d.はNaNではなく0に潰す."""
+        y_post = np.array([10.0, 12.0, 14.0])
+        predictions = np.array([[9.0, 11.0, 13.0]])
+
+        result = CausalAnalysis.compute_effects(
+            y_post=y_post,
+            predictions=predictions,
+            alpha=0.05,
+        )
+
+        assert np.array_equal(result.predictions_sd, np.zeros(3))
+        assert result.average_prediction_sd == 0.0
+        assert result.cumulative_prediction_sd == 0.0
+        assert result.average_effect_sd == 0.0
+        assert result.cumulative_effect_sd == 0.0
+        assert result.relative_effect_sd == 0.0
+
 
 class TestPointwiseCI:
     """各時点CI（R実装一致）テスト."""
diff --git a/tests/test_plot.py b/tests/test_plot.py
index 072153c..5940204 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -16,21 +16,34 @@ def _make_results_with_index():
     y = np.random.default_rng(42).normal(10, 1, t_total)
     time_index = pd.date_range("2020-01-01", periods=t_total, freq="D")
     results = CausalImpactResults(
+        actual=np.full(t_post, 12.0),
         point_effects=np.full(t_post, 2.0),
         point_effect_lower=np.full(t_post, 1.5),
         point_effect_upper=np.full(t_post, 2.5),
         ci_lower=1.0,
         ci_upper=3.0,
         point_effect_mean=2.0,
+        average_effect_sd=0.2,
         cumulative_effect=np.cumsum(np.full(t_post, 2.0)),
         cumulative_effect_lower=np.cumsum(np.full(t_post, 1.5)),
         cumulative_effect_upper=np.cumsum(np.full(t_post, 2.5)),
         cumulative_effect_total=60.0,
+        cumulative_effect_sd=6.0,
         relative_effect_mean=0.2,
+        relative_effect_sd=0.02,
+        relative_effect_lower=0.1,
+        relative_effect_upper=0.3,
         p_value=0.01,
         predictions_mean=np.full(t_post, 10.0),
+        predictions_sd=np.full(t_post, 0.5),
         predictions_lower=np.full(t_post, 9.0),
         predictions_upper=np.full(t_post, 11.0),
+        average_prediction_sd=0.5,
+        average_prediction_lower=9.0,
+        average_prediction_upper=11.0,
+        cumulative_prediction_sd=15.0,
+        cumulative_prediction_lower=270.0,
+        cumulative_prediction_upper=330.0,
     )
     return results, y, time_index, t_pre
 
diff --git a/tests/test_summary.py b/tests/test_summary.py
index d9885f0..cad75b5 100644
--- a/tests/test_summary.py
+++ b/tests/test_summary.py
@@ -50,10 +50,8 @@ def test_summary_default_format(self):
         assert "Cumulative" in text
         assert "2.0" in text or "2.00" in text
 
-    def test_summary_includes_r_style_actual_prediction_and_effect_sections_because_placeholder_rows_hide_valid_results(
-        self,
-    ):
-        """R互換の summary では Actual/Prediction/Absolute/Relative の各行を欠かさない."""
+    def test_summary_includes_r_style_sections(self):
+        """R互換summary: Actual/Prediction/Absolute/Relativeの各行を表示."""
         result = _make_results(effect=2.0, p_value=0.01)
 
         text = SummaryFormatter.summary(result, digits=2)

From a99020a586dc086a4944217a4c6290e398743944 Mon Sep 17 00:00:00 2001
From: YuminosukeSato <coffcoffeecoffee@gmail.com>
Date: Mon, 23 Mar 2026 00:52:57 +0900
Subject: [PATCH 3/3] style: remove Japanese docstrings and inline comments,
 use English only

---
 python/causal_impact/analysis.py | 96 +++++++++++---------------------
 python/causal_impact/summary.py  |  8 +--
 tests/test_analysis.py           | 63 ++++-----------------
 tests/test_plot.py               |  5 --
 tests/test_summary.py            | 12 +---
 5 files changed, 48 insertions(+), 136 deletions(-)

diff --git a/python/causal_impact/analysis.py b/python/causal_impact/analysis.py
index 2e9e14e..525ac50 100644
--- a/python/causal_impact/analysis.py
+++ b/python/causal_impact/analysis.py
@@ -11,45 +11,34 @@
 class CausalImpactResults:
     """Results of causal impact analysis."""
 
-    # Observed data
-    actual: np.ndarray  # (T_post,) observed y in post period
-
-    # Pointwise effects
-    point_effects: np.ndarray  # (T_post,) mean effect per time point
-    point_effect_lower: np.ndarray  # (T_post,) lower CI per time point
-    point_effect_upper: np.ndarray  # (T_post,) upper CI per time point
-    ci_lower: float  # lower CI bound on average effect
-    ci_upper: float  # upper CI bound on average effect
-    point_effect_mean: float  # mean of point effects across time
-    average_effect_sd: float  # std of per-sample average effects
-
-    # Cumulative effects
-    cumulative_effect: np.ndarray  # (T_post,) cumulative point effects
-    cumulative_effect_lower: np.ndarray  # (T_post,) lower cumulative CI
-    cumulative_effect_upper: np.ndarray  # (T_post,) upper cumulative CI
-    cumulative_effect_total: float  # total cumulative effect
-    cumulative_effect_sd: float  # std of per-sample cumulative effects
-
-    # Relative effects
-    relative_effect_mean: float  # relative effect (effect / predicted)
-    relative_effect_sd: float  # std of per-sample relative effects
-    relative_effect_lower: float  # lower CI on relative effect
-    relative_effect_upper: float  # upper CI on relative effect
-
-    # Significance
-    p_value: float  # Bayesian one-sided tail probability
-
-    # Counterfactual predictions
-    predictions_mean: np.ndarray  # (T_post,) mean counterfactual
-    predictions_sd: np.ndarray  # (T_post,) std of predictions per time point
-    predictions_lower: np.ndarray  # (T_post,) lower CI counterfactual
-    predictions_upper: np.ndarray  # (T_post,) upper CI counterfactual
-    average_prediction_sd: float  # std of per-sample average predictions
-    average_prediction_lower: float  # lower CI on average prediction
-    average_prediction_upper: float  # upper CI on average prediction
-    cumulative_prediction_sd: float  # std of per-sample cumulative predictions
-    cumulative_prediction_lower: float  # lower CI on cumulative prediction
-    cumulative_prediction_upper: float  # upper CI on cumulative prediction
+    actual: np.ndarray
+    point_effects: np.ndarray
+    point_effect_lower: np.ndarray
+    point_effect_upper: np.ndarray
+    ci_lower: float
+    ci_upper: float
+    point_effect_mean: float
+    average_effect_sd: float
+    cumulative_effect: np.ndarray
+    cumulative_effect_lower: np.ndarray
+    cumulative_effect_upper: np.ndarray
+    cumulative_effect_total: float
+    cumulative_effect_sd: float
+    relative_effect_mean: float
+    relative_effect_sd: float
+    relative_effect_lower: float
+    relative_effect_upper: float
+    p_value: float
+    predictions_mean: np.ndarray
+    predictions_sd: np.ndarray
+    predictions_lower: np.ndarray
+    predictions_upper: np.ndarray
+    average_prediction_sd: float
+    average_prediction_lower: float
+    average_prediction_upper: float
+    cumulative_prediction_sd: float
+    cumulative_prediction_lower: float
+    cumulative_prediction_upper: float
 
 
 class CausalAnalysis:
@@ -66,17 +55,10 @@ def compute_effects(
 
         n_samples = predictions.shape[0]
 
-        # Effect per sample per time point: observed - counterfactual
-        # predictions shape: (n_samples, t_post)
-        effects = y_post[np.newaxis, :] - predictions  # (n_samples, t_post)
+        effects = y_post[np.newaxis, :] - predictions
+        avg_effects = effects.mean(axis=1)
+        point_effects = effects.mean(axis=0)
 
-        # Average effect across time for each sample
-        avg_effects = effects.mean(axis=1)  # (n_samples,)
-
-        # Point effects: mean across samples at each time point
-        point_effects = effects.mean(axis=0)  # (t_post,)
-
-        # Summary-table CI on average effect uses sample-average quantiles.
         lower_q = alpha / 2
         upper_q = 1 - alpha / 2
         point_effect_lower = np.percentile(effects, 100 * lower_q, axis=0)
@@ -84,10 +66,8 @@ def compute_effects(
         ci_lower = float(np.percentile(avg_effects, 100 * lower_q))
         ci_upper = float(np.percentile(avg_effects, 100 * upper_q))
 
-        # Mean effect
         point_effect_mean = float(avg_effects.mean())
 
-        # Cumulative effect
         cumulative_effect = np.cumsum(point_effects)
         cum_effects_samples = np.cumsum(effects, axis=1)
         cumulative_effect_lower = np.percentile(
@@ -102,18 +82,15 @@ def compute_effects(
         )
         cumulative_effect_total = float(cumulative_effect[-1])
 
-        # Actual observed values
         actual = y_post.copy()
 
-        # Per-time-point std of predictions across samples
         if n_samples == 1:
             predictions_sd_arr = np.zeros(predictions.shape[1])
         else:
             predictions_sd_arr = np.std(predictions, axis=0, ddof=1)
 
-        # Prediction scalars (cross-sample aggregates)
-        avg_pred_per_sample = predictions.mean(axis=1)  # (n_samples,)
-        cum_pred_per_sample = predictions.sum(axis=1)  # (n_samples,)
+        avg_pred_per_sample = predictions.mean(axis=1)
+        cum_pred_per_sample = predictions.sum(axis=1)
 
         if n_samples == 1:
             average_prediction_sd = 0.0
@@ -135,8 +112,7 @@ def compute_effects(
             np.percentile(cum_pred_per_sample, 100 * upper_q)
         )
 
-        # Effect s.d. scalars
-        cum_effects_per_sample = effects.sum(axis=1)  # (n_samples,)
+        cum_effects_per_sample = effects.sum(axis=1)
 
         if n_samples == 1:
             average_effect_sd = 0.0
@@ -145,7 +121,6 @@ def compute_effects(
             average_effect_sd = float(np.std(avg_effects, ddof=1))
             cumulative_effect_sd = float(np.std(cum_effects_per_sample, ddof=1))
 
-        # Relative effect per sample
         avg_pred_per_sample_safe = np.where(
             np.abs(avg_pred_per_sample) > 1e-10,
             avg_pred_per_sample,
@@ -171,15 +146,12 @@ def compute_effects(
             np.percentile(rel_effects_per_sample, 100 * upper_q)
         )
 
-        # p-value: proportion of samples where average effect has opposite sign
         if point_effect_mean >= 0:
             p_value = float(np.mean(avg_effects < 0))
         else:
             p_value = float(np.mean(avg_effects > 0))
-        # Ensure minimum p-value of 1/n_samples
         p_value = max(p_value, 1.0 / n_samples)
 
-        # Counterfactual prediction summaries
         predictions_mean = predictions.mean(axis=0)
         predictions_lower = np.percentile(predictions, 100 * lower_q, axis=0)
         predictions_upper = np.percentile(predictions, 100 * upper_q, axis=0)
diff --git a/python/causal_impact/summary.py b/python/causal_impact/summary.py
index ed7c1fe..eb61575 100644
--- a/python/causal_impact/summary.py
+++ b/python/causal_impact/summary.py
@@ -6,23 +6,20 @@
 
 
 class SummaryFormatter:
-    """Format CausalImpact results as text summary or natural language report."""
+    """Format CausalImpact results as text summary or report."""
 
     @staticmethod
     def summary(results: CausalImpactResults, digits: int = 2) -> str:
         fmt = f".{digits}f"
 
-        # Actual
         avg_actual = format(results.actual.mean(), fmt)
         cum_actual = format(results.actual.sum(), fmt)
 
-        # Prediction
         avg_pred = format(results.predictions_mean.mean(), fmt)
         avg_pred_sd = format(results.average_prediction_sd, fmt)
         cum_pred = format(results.predictions_mean.sum(), fmt)
         cum_pred_sd = format(results.cumulative_prediction_sd, fmt)
 
-        # Prediction CI
         avg_pred_ci = (
             f"[{format(results.average_prediction_lower, fmt)}, "
             f"{format(results.average_prediction_upper, fmt)}]"
@@ -32,13 +29,11 @@ def summary(results: CausalImpactResults, digits: int = 2) -> str:
             f"{format(results.cumulative_prediction_upper, fmt)}]"
         )
 
-        # Absolute effect
         avg_eff = format(results.point_effect_mean, fmt)
         avg_eff_sd = format(results.average_effect_sd, fmt)
         cum_eff = format(results.cumulative_effect_total, fmt)
         cum_eff_sd = format(results.cumulative_effect_sd, fmt)
 
-        # Absolute effect CI
         avg_eff_ci = (
             f"[{format(results.ci_lower, fmt)}, {format(results.ci_upper, fmt)}]"
         )
@@ -47,7 +42,6 @@ def summary(results: CausalImpactResults, digits: int = 2) -> str:
             f"{format(results.cumulative_effect_upper[-1], fmt)}]"
         )
 
-        # Relative effect
         rel_m = format(results.relative_effect_mean * 100, fmt)
         rel_sd = format(results.relative_effect_sd * 100, fmt)
         rel_lo = format(results.relative_effect_lower * 100, fmt)
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 3646ad2..ec8af58 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -5,18 +5,13 @@
 
 
 def _make_predictions(n_samples, t_post, base, effect=0.0, noise_sd=0.1, seed=42):
-    """Helper: create synthetic predictions (counterfactual) and observed y_post."""
     rng = np.random.default_rng(seed)
-    # Counterfactual predictions (no effect)
     predictions = base + rng.normal(0, noise_sd, (n_samples, t_post))
-    # Observed = counterfactual + true effect
     y_post = base + effect + rng.normal(0, noise_sd, t_post)
     return predictions, y_post
 
 
 class TestEffectDetection:
-    """効果検出テスト."""
-
     def test_positive_effect_detected(self):
         predictions, y_post = _make_predictions(
             500, 20, base=10.0, effect=3.0, noise_sd=0.3
@@ -26,7 +21,7 @@ def test_positive_effect_detected(self):
         )
         assert isinstance(result, CausalImpactResults)
         assert result.point_effect_mean > 0
-        assert result.ci_lower > 0  # significant positive effect
+        assert result.ci_lower > 0
 
     def test_negative_effect_detected(self):
         predictions, y_post = _make_predictions(
@@ -36,7 +31,7 @@ def test_negative_effect_detected(self):
             y_post=y_post, predictions=predictions, alpha=0.05
         )
         assert result.point_effect_mean < 0
-        assert result.ci_upper < 0  # significant negative effect
+        assert result.ci_upper < 0
 
     def test_no_effect_zero(self):
         predictions, y_post = _make_predictions(
@@ -45,15 +40,11 @@ def test_no_effect_zero(self):
         result = CausalAnalysis.compute_effects(
             y_post=y_post, predictions=predictions, alpha=0.05
         )
-        # CI should contain zero
         assert result.ci_lower <= 0 <= result.ci_upper
 
 
 class TestCIAndPValue:
-    """信頼区間とp値のテスト."""
-
     def test_ci_coverage_95(self):
-        """95% CIが真の効果値を含むことを検証."""
         true_effect = 2.0
         predictions, y_post = _make_predictions(
             1000, 30, base=10.0, effect=true_effect, noise_sd=0.3
@@ -83,10 +74,7 @@ def test_p_value_not_significant(self):
 
 
 class TestCumulativeAndRelative:
-    """累積効果と相対効果."""""
-
     def test_cumulative_effect_monotone(self):
-        """正の効果のみの場合、累積効果は単調増加."""
         predictions, y_post = _make_predictions(
             500, 20, base=10.0, effect=5.0, noise_sd=0.1
         )
@@ -95,7 +83,7 @@ def test_cumulative_effect_monotone(self):
         )
         cum = result.cumulative_effect
         for i in range(1, len(cum)):
-            assert cum[i] >= cum[i - 1] - 1e-10  # allow tiny float errors
+            assert cum[i] >= cum[i - 1] - 1e-10
 
     def test_relative_effect_percentage(self):
         predictions, y_post = _make_predictions(
@@ -104,11 +92,9 @@ def test_relative_effect_percentage(self):
         result = CausalAnalysis.compute_effects(
             y_post=y_post, predictions=predictions, alpha=0.05
         )
-        # Relative effect ≈ 2.0/10.0 = 20%
         assert abs(result.relative_effect_mean - 0.2) < 0.1
 
     def test_summary_stats_use_posterior_sample_aggregates(self):
-        """summary行はposterior sample集約値(mean/sd/CI)を保持する."""
         y_post = np.array([10.0, 10.0])
         predictions = np.array(
             [
@@ -138,7 +124,6 @@ def test_summary_stats_use_posterior_sample_aggregates(self):
         assert result.relative_effect_upper == 0.25
 
     def test_single_sample_degenerates_sd_to_zero(self):
-        """posterior sampleが1本ならs.d.はNaNではなく0に潰す."""
         y_post = np.array([10.0, 12.0, 14.0])
         predictions = np.array([[9.0, 11.0, 13.0]])
 
@@ -157,10 +142,7 @@ def test_single_sample_degenerates_sd_to_zero(self):
 
 
 class TestPointwiseCI:
-    """各時点CI（R実装一致）テスト."""
-
     def test_pointwise_ci_bounds_shape(self):
-        """point_effect_lower/upper の形状は (t_post,)."""
         predictions, y_post = _make_predictions(
             200, 15, base=10.0, effect=2.0, noise_sd=0.3
         )
@@ -169,7 +151,6 @@ def test_pointwise_ci_bounds_shape(self):
         assert result.point_effect_upper.shape == (15,)
 
     def test_pointwise_ci_lower_le_mean_le_upper(self):
-        """各時点で lower <= point_effects <= upper が成立."""
         predictions, y_post = _make_predictions(
             200, 20, base=10.0, effect=2.0, noise_sd=0.3
         )
@@ -178,15 +159,13 @@ def test_pointwise_ci_lower_le_mean_le_upper(self):
         assert np.all(result.point_effect_upper >= result.point_effects - 1e-10)
 
     def test_pointwise_lower_le_upper(self):
-        """各時点で lower <= upper."""
         predictions, y_post = _make_predictions(
             200, 20, base=10.0, effect=0.0, noise_sd=0.5
         )
         result = CausalAnalysis.compute_effects(y_post=y_post, predictions=predictions)
         assert np.all(result.point_effect_lower <= result.point_effect_upper + 1e-10)
 
-    def test_summary_ci_uses_average_effect_quantiles_for_r_summary_table(self):
-        """R summary の平均CIは pointwise CI 平均ではなく sample 平均の分位点を使う."""
+    def test_summary_ci_uses_average_effect_quantiles(self):
         predictions, y_post = _make_predictions(
             300, 20, base=10.0, effect=3.0, noise_sd=0.3
         )
@@ -195,8 +174,7 @@ def test_summary_ci_uses_average_effect_quantiles_for_r_summary_table(self):
         assert result.ci_lower == float(np.percentile(sample_average_effects, 2.5))
         assert result.ci_upper == float(np.percentile(sample_average_effects, 97.5))
 
-    def test_alpha_01_is_wider_than_alpha_05_because_tail_mass_is_smaller(self):
-        """alpha=0.01のCIはalpha=0.05より広い."""
+    def test_alpha_01_wider_than_alpha_05(self):
         predictions, y_post = _make_predictions(
             500, 20, base=10.0, effect=2.0, noise_sd=0.3
         )
@@ -214,8 +192,7 @@ def test_alpha_01_is_wider_than_alpha_05_because_tail_mass_is_smaller(self):
         width_01 = r01.ci_upper - r01.ci_lower
         assert width_01 > width_05
 
-    def test_pointwise_quantile_average_differs_from_average_quantile(self):
-        """pointwise CI 平均と summary CI は別定義なので一致しないことがある."""
+    def test_pointwise_quantile_avg_differs_from_avg_quantile(self):
         y_post = np.array([0.0, 0.0])
         predictions = np.array(
             [
@@ -240,10 +217,9 @@ def test_pointwise_quantile_average_differs_from_average_quantile(self):
         assert float(result.point_effect_lower.mean()) == -4.0
         assert float(result.point_effect_lower.mean()) != legacy_ci_lower
 
-    def test_pointwise_and_summary_ci_diverge_exactly_in_two_sample_symmetric_case(
+    def test_pointwise_and_summary_ci_diverge_in_symmetric_case(
         self,
     ):
-        """対称な2サンプル2時点では pointwise CI と summary CI の差を手計算できる."""
         y_post = np.array([10.0, 10.0])
         predictions = np.array(
             [
@@ -266,10 +242,7 @@ def test_pointwise_and_summary_ci_diverge_exactly_in_two_sample_symmetric_case(
 
 
 class TestCumulativeCI:
-    """累積効果CIテスト."""
-
     def test_cumulative_ci_shape(self):
-        """cumulative_effect_lower/upper の形状は (t_post,)."""
         predictions, y_post = _make_predictions(
             200, 15, base=10.0, effect=2.0, noise_sd=0.3
         )
@@ -278,7 +251,6 @@ def test_cumulative_ci_shape(self):
         assert result.cumulative_effect_upper.shape == (15,)
 
     def test_cumulative_ci_lower_le_mean_le_upper(self):
-        """累積効果の各時点で lower <= cumulative_effect <= upper."""
         predictions, y_post = _make_predictions(
             200, 20, base=10.0, effect=2.0, noise_sd=0.3
         )
@@ -291,7 +263,6 @@ def test_cumulative_ci_lower_le_mean_le_upper(self):
         )
 
     def test_cumulative_lower_le_upper(self):
-        """累積効果CIで lower <= upper."""
         predictions, y_post = _make_predictions(
             200, 20, base=10.0, effect=0.0, noise_sd=0.5
         )
@@ -301,17 +272,14 @@ def test_cumulative_lower_le_upper(self):
         )
 
     def test_cumulative_ci_widens_over_time(self):
-        """正の効果では累積CI幅は時間と共に広がる（単調増加の傾向）."""
         predictions, y_post = _make_predictions(
             1000, 20, base=10.0, effect=0.0, noise_sd=1.0, seed=0
         )
         result = CausalAnalysis.compute_effects(y_post=y_post, predictions=predictions)
         widths = result.cumulative_effect_upper - result.cumulative_effect_lower
-        # 最終幅 > 最初の幅（累積誤差が積み上がるため）
         assert widths[-1] > widths[0]
 
-    def test_cumulative_ci_uses_quantiles_of_cumulative_sample_paths(self):
-        """累積CIは sample ごとの cumsum を作ってから各時点で分位点を取る."""
+    def test_cumulative_ci_uses_quantiles_of_cumsum_paths(self):
         y_post = np.array([10.0, 10.0])
         predictions = np.array(
             [
@@ -332,8 +300,6 @@ def test_cumulative_ci_uses_quantiles_of_cumulative_sample_paths(self):
 
 
 class TestBoundary:
-    """境界値テスト."""
-
     def test_single_post_point(self):
         predictions, y_post = _make_predictions(
             500, 1, base=10.0, effect=1.0, noise_sd=0.3
@@ -347,11 +313,9 @@ def test_single_post_point(self):
         assert len(result.point_effect_upper) == 1
         assert len(result.cumulative_effect_lower) == 1
         assert len(result.cumulative_effect_upper) == 1
-        # 1時点の場合: ci == pointwise == cumulative の最終値
         assert abs(result.ci_lower - float(result.point_effect_lower[0])) < 1e-10
 
     def test_two_samples_minimum(self):
-        """n_samples=2 の境界値: 分位点は2点の端値."""
         t_post = 5
         rng = np.random.default_rng(0)
         predictions = rng.normal(10.0, 0.5, (2, t_post))
@@ -359,12 +323,10 @@ def test_two_samples_minimum(self):
         result = CausalAnalysis.compute_effects(
             y_post=y_post, predictions=predictions, alpha=0.05
         )
-        # CI は存在する
         assert result.ci_lower < result.ci_upper
         assert result.point_effect_lower.shape == (t_post,)
 
     def test_single_sample_collapses_all_ci_widths_to_zero(self):
-        """サンプルが1本しかないと全CIは退化して平均値そのものになる."""
         y_post = np.array([10.0, 12.0, 14.0])
         predictions = np.array([[9.0, 11.0, 13.0]])
 
@@ -389,8 +351,7 @@ def test_single_sample_collapses_all_ci_widths_to_zero(self):
         assert result.ci_lower == result.ci_upper == 1.0
         assert result.p_value == 1.0
 
-    def test_identical_samples_make_ci_width_zero_because_every_path_matches(self):
-        """全サンプル同値なら分位点に幅は生じない."""
+    def test_identical_samples_make_ci_width_zero(self):
         y_post = np.array([10.0, 12.0, 14.0])
         predictions = np.tile(np.array([[9.0, 11.0, 13.0]]), (4, 1))
 
@@ -407,8 +368,7 @@ def test_identical_samples_make_ci_width_zero_because_every_path_matches(self):
         )
         assert result.ci_lower == result.ci_upper == 1.0
 
-    def test_alpha_zero_uses_full_range_for_pointwise_and_summary_quantiles(self):
-        """alpha=0 では pointwise は各時点の端値、summary は sample 平均の端値を返す."""
+    def test_alpha_zero_uses_full_range(self):
         y_post = np.array([10.0, 10.0])
         predictions = np.array(
             [
@@ -430,8 +390,7 @@ def test_alpha_zero_uses_full_range_for_pointwise_and_summary_quantiles(self):
         assert result.ci_lower == -0.5
         assert result.ci_upper == 0.5
 
-    def test_alpha_one_collapses_to_median_for_pointwise_and_summary_quantiles(self):
-        """alpha=1 では lower=upper=中央値になりCI幅がゼロになる."""
+    def test_alpha_one_collapses_to_median(self):
         y_post = np.array([10.0, 10.0])
         predictions = np.array(
             [
diff --git a/tests/test_plot.py b/tests/test_plot.py
index 5940204..125d6a6 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -6,7 +6,6 @@
 
 
 def _make_results_with_index():
-    """Create results and time index for plotting."""
     import pandas as pd
 
     t_pre = 70
@@ -49,8 +48,6 @@ def _make_results_with_index():
 
 
 class TestPlot:
-    """Plotのテスト."""
-
     def test_plot_3_panels(self):
         import matplotlib
 
@@ -75,11 +72,9 @@ def test_plot_vertical_line_at_intervention(self):
         matplotlib.use("Agg")
         results, y, time_index, t_pre = _make_results_with_index()
         fig = Plotter.plot(results, y, time_index, t_pre)
-        # Check that vertical lines exist in each subplot
         axes = fig.get_axes()
         for ax in axes:
             lines = ax.get_lines()
-            # At least the intervention vertical line should be present
             assert len(lines) >= 1
 
     def test_plot_pointwise_panel_draws_ci_band(self):
diff --git a/tests/test_summary.py b/tests/test_summary.py
index cad75b5..61a8e14 100644
--- a/tests/test_summary.py
+++ b/tests/test_summary.py
@@ -6,7 +6,6 @@
 
 
 def _make_results(effect=2.0, p_value=0.01):
-    """Create a CausalImpactResults fixture."""
     t_post = 10
     return CausalImpactResults(
         actual=np.full(t_post, 12.0),
@@ -41,8 +40,6 @@ def _make_results(effect=2.0, p_value=0.01):
 
 
 class TestSummaryFormat:
-    """summary出力フォーマット."""
-
     def test_summary_default_format(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=2)
@@ -51,7 +48,6 @@ def test_summary_default_format(self):
         assert "2.0" in text or "2.00" in text
 
     def test_summary_includes_r_style_sections(self):
-        """R互換summary: Actual/Prediction/Absolute/Relativeの各行を表示."""
         result = _make_results(effect=2.0, p_value=0.01)
 
         text = SummaryFormatter.summary(result, digits=2)
@@ -68,22 +64,20 @@ def test_summary_includes_r_style_sections(self):
     def test_summary_report_format(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.report(result)
-        # Report should be natural language
         assert len(text) > 100
         assert "effect" in text.lower() or "impact" in text.lower()
 
     def test_summary_digits_0(self):
         result = _make_results(effect=2.345, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=0)
-        assert "2.345" not in text  # should be rounded
+        assert "2.345" not in text
 
     def test_summary_digits_10(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=10)
         assert isinstance(text, str)
 
-    def test_summary_shows_cumulative_ci_in_95_percent_ci_row(self):
-        """Absolute effect の 95% CI 行 cumulative 列には最終時点の累積CIを表示する."""
+    def test_summary_shows_cumulative_ci_in_effect_row(self):
         result = _make_results(effect=2.0, p_value=0.01)
         text = SummaryFormatter.summary(result, digits=2)
         ci_line = text.split("\n")[8]
@@ -92,8 +86,6 @@ def test_summary_shows_cumulative_ci_in_95_percent_ci_row(self):
 
 
 class TestReportContent:
-    """レポート内容の検証."""
-
     def test_report_significant_effect(self):
         result = _make_results(effect=3.0, p_value=0.001)
         text = SummaryFormatter.report(result)