feat: support dynamic mode decomposition calibrator (#1053)

Archerkattri · web-flow · commit 6cd559c07be4 · 2026-06-14T23:40:28.000+08:00
* Add a Dynamic Mode Decomposition (Prony) exponential-basis calibrator (`calibrator_type="dmd"`)

* Address review: cache the DMD fit, decouple batch items, guard fp16 overflow

- Cache the horizon-free DMD eigendecomposition per snapshot window
  (DMDState._fit / _fit_key, invalidated when a new snapshot arrives). Skip
  steps now reuse one SVD/eig instead of recomputing it every step, which is
  what restores the intended cache speedup at large fresh intervals.
- Fit DMD independently per batch item (axis 0). Flattening folded the batch
  into one state, so a prompt's forecast depended on the other prompts in the
  batch; per-item fitting keeps them independent like the Taylor path.
- Move the finite check after the output-dtype cast: a finite float64 forecast
  can still overflow to inf in fp16, so the cast result is what gets guarded.
- yapf / docformatter clean (fixes the failing pre-commit CI check).

* Add 'dmd' to the example generate CLI (--dmd / --dmd-history)

Per review: enable the DMD calibrator from `python -m cache_dit.generate`
exactly like --taylorseer. --dmd selects DMDCalibratorConfig (history via
--dmd-history, default 6); --taylorseer is unchanged.

Verified end-to-end:
  python -m cache_dit.generate flux --cache --dmd --cpu-offload
generates with the DMD calibrator active (optimization tag ...DMDH6_S12, image saved).
diff --git a/src/cache_dit/__init__.py b/src/cache_dit/__init__.py
@@ -44,6 +44,7 @@
 from .caching import CalibratorConfig
 from .caching import TaylorSeerCalibratorConfig
 from .caching import FoCaCalibratorConfig
+from .caching import DMDCalibratorConfig
 from .caching import supported_pipelines
 from .caching import get_adapter
 from .caching import BlockAdapterRegister
diff --git a/src/cache_dit/_utils/utils.py b/src/cache_dit/_utils/utils.py
@@ -18,6 +18,7 @@
 from ..caching import (
   BlockAdapter,
   DBCacheConfig,
+  DMDCalibratorConfig,
   TaylorSeerCalibratorConfig,
   load_configs,
   load_parallelism_config,
@@ -309,6 +310,18 @@ def get_args(parse: bool = True, ) -> argparse.ArgumentParser | argparse.Namespa
     default=1,
     help="TaylorSeer order",
   )
+  parser.add_argument(
+    "--dmd",
+    action="store_true",
+    default=False,
+    help="Enable DMD (Dynamic Mode Decomposition / Prony) exponential-basis calibrator for CacheDiT",
+  )
+  parser.add_argument(
+    "--dmd-history",
+    type=int,
+    default=6,
+    help="DMD snapshot-history window length",
+  )
   parser.add_argument(
     "--steps-mask",
     action="store_true",
@@ -2282,8 +2295,9 @@ def _prepare_distributed_size():
           force_refresh_step_hint=kwargs.get("force_refresh_step_hint", None),
           force_refresh_step_policy=kwargs.get("force_refresh_step_policy", "once"),
         ) if cache_config is None and args.cache else cache_config),
-        calibrator_config=(TaylorSeerCalibratorConfig(taylorseer_order=args.taylorseer_order, )
-                           if args.taylorseer else None),
+        calibrator_config=(DMDCalibratorConfig(
+          dmd_history=args.dmd_history) if args.dmd else TaylorSeerCalibratorConfig(
+            taylorseer_order=args.taylorseer_order) if args.taylorseer else None),
         params_modifiers=kwargs.get("params_modifiers", None),
         parallelism_config=(ParallelismConfig(
           ulysses_size=ulysses_size,
diff --git a/src/cache_dit/caching/__init__.py b/src/cache_dit/caching/__init__.py
@@ -21,6 +21,7 @@
 from .cache_contexts import CalibratorConfig
 from .cache_contexts import TaylorSeerCalibratorConfig
 from .cache_contexts import FoCaCalibratorConfig
+from .cache_contexts import DMDCalibratorConfig
 
 from .cache_blocks import CachedBlocks
 from .cache_blocks import PrunedBlocks
diff --git a/src/cache_dit/caching/cache_contexts/__init__.py b/src/cache_dit/caching/cache_contexts/__init__.py
@@ -4,6 +4,7 @@
   CalibratorConfig,
   TaylorSeerCalibratorConfig,
   FoCaCalibratorConfig,
+  DMDCalibratorConfig,
 )
 from .cache_config import (
   BasicCacheConfig,
diff --git a/src/cache_dit/caching/cache_contexts/calibrators/__init__.py b/src/cache_dit/caching/cache_contexts/calibrators/__init__.py
@@ -1,6 +1,7 @@
 from .base import CalibratorBase
 from .taylorseer import TaylorSeerCalibrator
 from .foca import FoCaCalibrator
+from .dmd import DMDCalibrator
 
 import dataclasses
 from typing import Any, Dict
@@ -153,6 +154,70 @@ def to_kwargs(self) -> Dict:
     return kwargs
 
 
+@dataclasses.dataclass
+class DMDCalibratorConfig(CalibratorConfig):
+  """Config for the Dynamic Mode Decomposition (Prony) forecasting calibrator.
+
+  An EXPONENTIAL-basis alternative to TaylorSeer's polynomial forecast: DMD (Schmid 2010; the SVD-
+  regularised generalisation of Prony's method) identifies the linear propagator of the cached
+  feature stream from recent compute-step snapshots and extrapolates by eigenvalue powers — exact on
+  the (locally) exponential trajectories diffusion features follow, where a polynomial diverges with
+  the cache interval. NOT Distribution Matching Distillation.
+  """
+
+  # enable_calibrator (`bool`, *required*,  defaults to True):
+  #     Whether to enable calibrator, if True. means that user want to use DBCache
+  #     with specific calibrator for hidden_states (or hidden_states redisual),
+  #     such as taylorseer, foca, dmd, and so on.
+  enable_calibrator: bool = True
+  # enable_encoder_calibrator (`bool`, *required*,  defaults to True):
+  #     Whether to enable calibrator, if True. means that user want to use DBCache
+  #     with specific calibrator for encoder_hidden_states (or encoder_hidden_states
+  #     redisual), such as taylorseer, foca, dmd, and so on.
+  enable_encoder_calibrator: bool = True
+  # calibrator_type (`str`, *required*,  defaults to 'dmd'):
+  #    The specific type for calibrator, taylorseer, foca or dmd, etc.
+  calibrator_type: str = "dmd"
+  # dmd_history (`int`, *required*, defaults to 6):
+  #    Number of recent compute-step snapshots retained per stream. >= 4 uniformly
+  #    spaced snapshots are needed before the exponential fit engages (one complex
+  #    pole costs two real degrees of freedom); below the floor the calibrator
+  #    falls back to the Taylor expansion automatically. 5-6 is the sweet spot —
+  #    the feature dynamics drift across timesteps, so longer windows hurt.
+  dmd_history: int = 6
+  # dmd_rank (`int`, *optional*, defaults to 0):
+  #    SVD truncation rank of the snapshot matrix; 0 selects it from the spectrum
+  #    (drop modes below 1e-4 of the leading singular value). The truncation is
+  #    what rejects the noise subspace.
+  dmd_rank: int = 0
+  # dmd_ridge (`float`, *optional*, defaults to 1e-8):
+  #    Tikhonov term added to the inverted singular values.
+  dmd_ridge: float = 1e-8
+
+  def strify(self, **kwargs) -> str:
+    """Return a compact tag that includes the snapshot-history length.
+
+    :param kwargs: Additional keyword arguments forwarded to the underlying implementation.
+    :returns: A compact DMD tag for logs, summaries, or filenames.
+    """
+
+    if kwargs.get("details", False):
+      return f"DMD_H({self.dmd_history})"
+    return f"DMDH{self.dmd_history}"
+
+  def to_kwargs(self) -> Dict:
+    """Translate config fields into `DMDCalibrator` init kwargs.
+
+    :returns: Keyword arguments expected by `DMDCalibrator`.
+    """
+
+    kwargs = self.calibrator_kwargs.copy()
+    kwargs["history"] = self.dmd_history
+    kwargs["rank"] = self.dmd_rank
+    kwargs["ridge"] = self.dmd_ridge
+    return kwargs
+
+
 @dataclasses.dataclass
 class FoCaCalibratorConfig(CalibratorConfig):
   """Config placeholder for the future FoCa calibrator backend."""
@@ -183,6 +248,7 @@ class Calibrator:
 
   _supported_calibrators = [
     "taylorseer",
+    "dmd",
     # TODO: FoCa
   ]
 
@@ -201,5 +267,7 @@ def __new__(
 
     if calibrator_config.calibrator_type.lower() == "taylorseer":
       return TaylorSeerCalibrator(**calibrator_config.to_kwargs())
+    elif calibrator_config.calibrator_type.lower() == "dmd":
+      return DMDCalibrator(**calibrator_config.to_kwargs())
     else:
       raise ValueError(f"Calibrator {calibrator_config.calibrator_type} is not supported now!")
diff --git a/src/cache_dit/caching/cache_contexts/calibrators/dmd.py b/src/cache_dit/caching/cache_contexts/calibrators/dmd.py

Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`CalibratorConfig,`
`5`	`5`	`TaylorSeerCalibratorConfig,`
`6`	`6`	`FoCaCalibratorConfig,`
	`7`	`+ DMDCalibratorConfig,`
`7`	`8`	`)`
`8`	`9`	`from .cache_config import (`
`9`	`10`	`BasicCacheConfig,`