gvpathi-quant
diff --git a/‎sktime_quant/Ingest-outside-code/smalldata/^INDIAVIX.csv‎
Lines changed: 0 additions & 3963 deletions b/‎sktime_quant/Ingest-outside-code/smalldata/^INDIAVIX.csv‎
Lines changed: 0 additions & 3963 deletions
diff --git a/‎sktime_quant/Ingest-outside-code/smalldata/^NSEBANK.csv‎
Lines changed: 0 additions & 3688 deletions b/‎sktime_quant/Ingest-outside-code/smalldata/^NSEBANK.csv‎
Lines changed: 0 additions & 3688 deletions
diff --git a/‎sktime_quant/Ingest-outside-code/smalldata/^NSEI.csv‎
Lines changed: 0 additions & 3964 deletions b/‎sktime_quant/Ingest-outside-code/smalldata/^NSEI.csv‎
Lines changed: 0 additions & 3964 deletions
diff --git a/‎sktime_quant/backtest/walkforward.py‎
Lines changed: 31 additions & 14 deletions b/‎sktime_quant/backtest/walkforward.py‎
Lines changed: 31 additions & 14 deletions
diff --git a/‎sktime_quant/pipelines/orchestrator.py‎
Lines changed: 29 additions & 10 deletions b/‎sktime_quant/pipelines/orchestrator.py‎
Lines changed: 29 additions & 10 deletions
diff --git a/‎sktime_quant/strategy/classifier.py‎
Lines changed: 13 additions & 13 deletions b/‎sktime_quant/strategy/classifier.py‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎sktime_quant/tests/test_orchestrator_integration.py‎
Lines changed: 25 additions & 0 deletions b/‎sktime_quant/tests/test_orchestrator_integration.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎sktime_quant/tests/test_strategy_engine.py‎
Lines changed: 3 additions & 1 deletion b/‎sktime_quant/tests/test_strategy_engine.py‎
Lines changed: 3 additions & 1 deletion
@@ -18,7 +18,7 @@
 from sktime_quant.risk.metrics import max_drawdown
 from sktime_quant.strategy.blender import blend_signals
 from sktime_quant.strategy.classifier import predict_classifier_signal_at
-from sktime_quant.strategy.rule_dsl import evaluate_rules_signal, load_rules_yaml
+from sktime_quant.strategy.rule_dsl import evaluate_rules_signal
 
 
 @dataclass(slots=True)
@@ -251,12 +251,6 @@ def run(
         assets_list = sorted(market["asset"].astype(str).unique().tolist())
         strategy_mode = str(getattr(strategy_config, "mode", "forecast_only"))
         strategy_rules = list(getattr(strategy_config, "rules", []) or [])
-        strategy_rules_path = getattr(strategy_config, "rules_path", None)
-        if strategy_rules_path:
-            try:
-                strategy_rules = load_rules_yaml(strategy_rules_path)
-            except Exception:
-                pass
         rule_chain = str(getattr(strategy_config, "rule_chain", "any"))
         classifier_type = str(getattr(strategy_config, "classifier_type", "random_forest"))
         classifier_min_train = int(
@@ -281,6 +275,7 @@ def run(
                 .set_index("timestamp")["close"]
                 .astype(float)
             )
+            classifier_cache: dict[tuple[str, str, str, int, float], tuple[int, float, str]] = {}
             y = self._series_for_asset(market, asset)
             if len(y) <= backtest_config.window_length + backtest_config.horizon + 2:
                 continue
@@ -436,6 +431,7 @@ def run(
                 signals_classifier: list[int] = []
                 signals_blended: list[int] = []
                 classifier_confidence_vals: list[float] = []
+                classifier_status_vals: list[str] = []
                 interval_sources: list[str] = []
 
                 for _, row in result.iterrows():
@@ -468,14 +464,25 @@ def run(
                         chain=rule_chain,
                         default_signal=0,
                     )
-                    classifier_signal, cls_conf = predict_classifier_signal_at(
-                        feature_frame=strategy_feature_frame,
-                        close_series=strategy_close,
-                        cutoff=cutoff,
-                        classifier_type=classifier_type,
-                        min_train_samples=classifier_min_train,
-                        probability_threshold=classifier_prob_threshold,
+                    cache_key = (
+                        str(asset),
+                        pd.Timestamp(cutoff).isoformat(),
+                        classifier_type,
+                        classifier_min_train,
+                        round(classifier_prob_threshold, 4),
                     )
+                    cached = classifier_cache.get(cache_key)
+                    if cached is None:
+                        cached = predict_classifier_signal_at(
+                            feature_frame=strategy_feature_frame,
+                            close_series=strategy_close,
+                            cutoff=cutoff,
+                            classifier_type=classifier_type,
+                            min_train_samples=classifier_min_train,
+                            probability_threshold=classifier_prob_threshold,
+                        )
+                        classifier_cache[cache_key] = cached
+                    classifier_signal, cls_conf, cls_status = cached
                     blended_signal = blend_signals(
                         forecast_signal=signal,
                         rule_signal=rule_signal,
@@ -512,6 +519,7 @@ def run(
                     signals_classifier.append(classifier_signal)
                     signals_blended.append(blended_signal)
                     classifier_confidence_vals.append(cls_conf)
+                    classifier_status_vals.append(cls_status)
                     interval_sources.append(interval_source)
 
                 result["fold_return"] = fold_returns
@@ -524,6 +532,7 @@ def run(
                 result["signal_classifier"] = signals_classifier
                 result["signal_blended"] = signals_blended
                 result["classifier_confidence"] = classifier_confidence_vals
+                result["classifier_status"] = classifier_status_vals
                 result["interval_source"] = interval_sources
 
                 returns = result["fold_return"].fillna(0.0)
@@ -543,12 +552,17 @@ def run(
 
                 excluded = False
                 exclusion_reason = ""
+                classifier_status_series = result["classifier_status"].astype(str)
+                classifier_missing = bool((classifier_status_series == "sklearn_missing").any())
                 if failure_rate > backtest_config.max_failure_rate:
                     excluded = True
                     exclusion_reason = "high_failure_rate"
                 elif empirical_coverage < backtest_config.confidence_floor:
                     excluded = True
                     exclusion_reason = "low_empirical_coverage"
+                elif strategy_mode in {"classifier_only", "blended"} and classifier_missing:
+                    excluded = True
+                    exclusion_reason = "classifier_unavailable"
 
                 metrics_rows.append(
                     {
@@ -570,6 +584,8 @@ def run(
                         "strategy_mode": strategy_mode,
                         "blend_policy": blend_policy,
                         "classifier_type": classifier_type,
+                        "classifier_unavailable": classifier_missing,
+                        "classifier_status_counts": classifier_status_series.value_counts().to_dict(),
                     }
                 )
 
@@ -587,6 +603,7 @@ def run(
                         "signal_classifier",
                         "signal_blended",
                         "classifier_confidence",
+                        "classifier_status",
                         "interval_source",
                     ]
                 ].copy()
 
@@ -33,7 +33,7 @@
 )
 from sktime_quant.portfolio.optimizer import AllocationResult, PortfolioEngine
 from sktime_quant.reporting.run_report import write_run_report
-from sktime_quant.strategy.rule_dsl import save_rules_yaml
+from sktime_quant.strategy.rule_dsl import load_rules_yaml, save_rules_yaml
 
 
 @dataclass(slots=True)
@@ -210,6 +210,24 @@ def _effective_data_config(self, cfg: AppConfig, paths: dict[str, Path]):
             return cfg.data
         return replace(cfg.data, start=prev_ts)
 
+    def _resolve_effective_strategy_config(self, cfg: AppConfig) -> tuple[object, list[dict[str, object]], str]:
+        rules_source = "inline"
+        effective_rules = list(cfg.strategy.rules or [])
+        if cfg.strategy.rules_path:
+            if not Path(cfg.strategy.rules_path).exists():
+                raise ValueError(
+                    f"Failed to load strategy rules from path '{cfg.strategy.rules_path}': FileNotFoundError: file does not exist"
+                )
+            try:
+                effective_rules = load_rules_yaml(cfg.strategy.rules_path)
+                rules_source = f"path:{cfg.strategy.rules_path}"
+            except Exception as exc:
+                raise ValueError(
+                    f"Failed to load strategy rules from path '{cfg.strategy.rules_path}': {type(exc).__name__}: {exc}"
+                ) from exc
+        strategy_cfg = replace(cfg.strategy, rules=effective_rules)
+        return strategy_cfg, effective_rules, rules_source
+
     def _write_incremental_state(self, cfg: AppConfig, paths: dict[str, Path], market: pd.DataFrame) -> None:
         if not cfg.data.incremental_mode or market.empty:
             return
@@ -359,6 +377,7 @@ def notify(payload: dict[str, object]) -> None:
 
         notify({"stage": "start", "event": "run_start", "run_id": cfg.run_id})
         paths = self._artifact_paths(cfg)
+        strategy_cfg, effective_rules, strategy_rules_source = self._resolve_effective_strategy_config(cfg)
         notify({"stage": "data", "event": "loading_data"})
         effective_data_cfg = self._effective_data_config(cfg, paths)
         market, exog = self.data_provider.load_history(effective_data_cfg)
@@ -373,15 +392,13 @@ def notify(payload: dict[str, object]) -> None:
             min_points_for_freq=cfg.execution.data_quality_min_points_for_freq,
         )
         paths["data_quality"].write_text(json.dumps(data_quality, indent=2), encoding="utf-8")
-        strategy_payload = asdict(cfg.strategy)
+        strategy_payload = asdict(strategy_cfg)
+        strategy_payload["rules_source"] = strategy_rules_source
         paths["strategy_config"].write_text(
             json.dumps(strategy_payload, indent=2, default=str),
             encoding="utf-8",
         )
-        try:
-            save_rules_yaml(paths["strategy_rules"], cfg.strategy.rules)
-        except Exception:
-            pass
+        save_rules_yaml(paths["strategy_rules"], effective_rules)
         notify(
             {
                 "stage": "data",
@@ -418,11 +435,12 @@ def notify(payload: dict[str, object]) -> None:
                 "model_governance_path": str(paths["model_governance"]),
                 "strategy_config_path": str(paths["strategy_config"]),
                 "strategy_rules_path": str(paths["strategy_rules"]),
+                "strategy_rules_source": strategy_rules_source,
                 "timestamp_utc": datetime.now(UTC).isoformat(),
                 "allocation_diagnostics": {},
                 "execution_diagnostics": self.order_exporter._empty_diagnostics(),
                 "governance_alert_count": 0,
-                "strategy_mode": cfg.strategy.mode,
+                "strategy_mode": strategy_cfg.mode,
                 "run_status": "no_new_data",
                 "message": "No rows available after applying ingestion filters/incremental window.",
             }
@@ -475,7 +493,7 @@ def notify(payload: dict[str, object]) -> None:
             model_names=candidate_models,
             backtest_config=cfg.backtest,
             exog=exog_model,
-            strategy_config=cfg.strategy,
+            strategy_config=strategy_cfg,
             holiday_by_asset=holiday_by_asset,
             progress_hook=progress_hook,
         )
@@ -570,8 +588,9 @@ def notify(payload: dict[str, object]) -> None:
             "execution_diagnostics": order_diagnostics,
             "strategy_config_path": str(paths["strategy_config"]),
             "strategy_rules_path": str(paths["strategy_rules"]),
-            "strategy_mode": cfg.strategy.mode,
-            "strategy_blend_policy": cfg.strategy.blend_policy,
+            "strategy_rules_source": strategy_rules_source,
+            "strategy_mode": strategy_cfg.mode,
+            "strategy_blend_policy": strategy_cfg.blend_policy,
             "forecast_update_mode": cfg.model.update_mode,
             "forecast_update_status_counts": forecast.predictions.get(
                 "update_status", pd.Series(dtype=str)
 
@@ -39,54 +39,54 @@ def predict_classifier_signal_at(
     classifier_type: str = "random_forest",
     min_train_samples: int = 30,
     probability_threshold: float = 0.55,
-) -> tuple[int, float]:
+) -> tuple[int, float, str]:
     if feature_frame is None or feature_frame.empty or close_series is None or close_series.empty:
-        return 0, 0.0
+        return 0, 0.0, "no_features_or_close"
 
     frame = feature_frame.copy()
     if not isinstance(frame.index, pd.DatetimeIndex):
-        return 0, 0.0
+        return 0, 0.0, "invalid_feature_index"
     frame = frame.sort_index()
     close = close_series.copy().sort_index()
     if not isinstance(close.index, pd.DatetimeIndex):
-        return 0, 0.0
+        return 0, 0.0, "invalid_close_index"
 
     common = frame.index.intersection(close.index)
     if len(common) < max(5, int(min_train_samples)):
-        return 0, 0.0
+        return 0, 0.0, "insufficient_common_samples"
 
     frame = frame.loc[common]
     y_cls = _build_label_from_close(close.loc[common])
     data = frame.copy()
     data["target"] = y_cls
     data = data.dropna(how="any")
     if data.empty:
-        return 0, 0.0
+        return 0, 0.0, "empty_after_dropna"
 
     cutoff = pd.Timestamp(cutoff)
     train = data[data.index < cutoff]
     test = data[data.index == cutoff]
     if test.empty:
         prior = data[data.index <= cutoff]
         if prior.empty:
-            return 0, 0.0
+            return 0, 0.0, "no_test_row"
         test = prior.tail(1)
         train = data[data.index < test.index[0]]
 
     if len(train) < max(5, int(min_train_samples)):
-        return 0, 0.0
+        return 0, 0.0, "insufficient_train_samples"
 
     x_cols = [c for c in train.columns if c != "target"]
     if not x_cols:
-        return 0, 0.0
+        return 0, 0.0, "no_feature_columns"
 
     x_train = train[x_cols]
     y_train = train["target"].astype(int)
     x_test = test[x_cols]
 
     model = _make_classifier(classifier_type)
     if model is None:
-        return 0, 0.0
+        return 0, 0.0, "sklearn_missing"
     try:
         model.fit(x_train, y_train)
         pred = int(model.predict(x_test)[0])
@@ -98,7 +98,7 @@ def predict_classifier_signal_at(
         else:
             conf = 0.5
         if conf < float(probability_threshold):
-            return 0, conf
-        return pred if pred in {-1, 0, 1} else 0, conf
+            return 0, conf, "low_confidence"
+        return (pred if pred in {-1, 0, 1} else 0), conf, "ok"
     except Exception:
-        return 0, 0.0
+        return 0, 0.0, "classifier_error"
@@ -1,6 +1,7 @@
 import pandas as pd
 import json
 from pathlib import Path
+import pytest
 
 from sktime_quant.config.schema import AppConfig
 from sktime_quant.pipelines.orchestrator import Orchestrator
@@ -50,6 +51,7 @@ def test_orchestrator_end_to_end_csv(tmp_path):
     assert "strategy_mode" in summary
     assert "strategy_config_path" in summary
     assert "strategy_rules_path" in summary
+    assert "strategy_rules_source" in summary
     governance = json.loads((tmp_path / "results" / "reports" / "it_run_model_governance.json").read_text(encoding="utf-8"))
     assert "alerts" in governance
 
@@ -84,6 +86,29 @@ def test_orchestrator_handles_no_new_data_incremental_window(tmp_path):
     assert result.report_path.endswith(".md")
 
 
+def test_orchestrator_raises_on_invalid_rules_path(tmp_path):
+    n = 80
+    df = pd.DataFrame(
+        {
+            "timestamp": pd.date_range("2023-01-01", periods=n, freq="D"),
+            "asset": ["A"] * n,
+            "close": [100 + i * 0.1 for i in range(n)],
+        }
+    )
+    csv_path = tmp_path / "market.csv"
+    df.to_csv(csv_path, index=False)
+
+    cfg = AppConfig()
+    cfg.run_id = "it_bad_rules"
+    cfg.data.source_type = "csv"
+    cfg.data.csv_path = str(csv_path)
+    cfg.execution.output_dir = str(tmp_path / "results")
+    cfg.strategy.rules_path = str(tmp_path / "missing_rules.yaml")
+
+    with pytest.raises(ValueError, match="Failed to load strategy rules"):
+        Orchestrator().run(cfg)
+
+
 def test_data_quality_reports_frequency_drift_and_missing_bars(tmp_path):
     base_dates = pd.date_range("2023-01-01", periods=40, freq="D")
     a_dates = base_dates.delete(10)  # introduce one missing bar in daily sequence
 
@@ -47,7 +47,7 @@ def test_classifier_predict_signal_at():
         },
         index=idx,
     )
-    sig, conf = predict_classifier_signal_at(
+    sig, conf, status = predict_classifier_signal_at(
         feature_frame=features,
         close_series=close,
         cutoff=idx[100],
@@ -57,6 +57,7 @@ def test_classifier_predict_signal_at():
     )
     assert sig in {-1, 0, 1}
     assert 0.0 <= conf <= 1.0
+    assert isinstance(status, str)
 
 
 def test_walkforward_strategy_modes_emit_signal_columns():
@@ -90,3 +91,4 @@ def test_walkforward_strategy_modes_emit_signal_columns():
     assert "signal_rule" in result.fold_predictions.columns
     assert "signal_classifier" in result.fold_predictions.columns
     assert "signal_blended" in result.fold_predictions.columns
+    assert "classifier_status" in result.fold_predictions.columns