Feat: Allow specifying a minimum number of intervals per model to include in a plan

erindru · erindru · commit 3a1a7c350bb5 · 2025-06-30T04:39:44.000Z
diff --git a/examples/multi/repo_1/linter/__init__.py b/examples/multi/repo_1/linter/__init__.py
diff --git a/sqlmesh/cli/main.py b/sqlmesh/cli/main.py
@@ -513,6 +513,11 @@ def diff(ctx: click.Context, environment: t.Optional[str] = None) -> None:
     help="Explain the plan instead of applying it.",
     default=None,
 )
+@click.option(
+    "--min-intervals",
+    default=0,
+    help="For every model, ensure at least this many intervals are covered by a missing intervals check regardless of the plan start date",
+)
 @opt.verbose
 @click.pass_context
 @error_handler
diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py
@@ -46,6 +46,7 @@
 from pathlib import Path
 from shutil import rmtree
 from types import MappingProxyType
+from datetime import datetime
 
 from sqlglot import Dialect, exp
 from sqlglot.helper import first
@@ -126,6 +127,8 @@
     format_tz_datetime,
     now_timestamp,
     now,
+    to_datetime,
+    make_exclusive,
 )
 from sqlmesh.utils.errors import (
     CircuitBreakerError,
@@ -1215,6 +1218,7 @@ def plan(
         diff_rendered: t.Optional[bool] = None,
         skip_linter: t.Optional[bool] = None,
         explain: t.Optional[bool] = None,
+        min_intervals: t.Optional[int] = None,
     ) -> Plan:
         """Interactively creates a plan.
 
@@ -1261,6 +1265,8 @@ def plan(
             diff_rendered: Whether the diff should compare raw vs rendered models
             skip_linter: Linter runs by default so this will skip it if enabled
             explain: Whether to explain the plan instead of applying it.
+            min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered
+                on every model when checking for missing intervals
 
         Returns:
             The populated Plan object.
@@ -1289,6 +1295,7 @@ def plan(
             diff_rendered=diff_rendered,
             skip_linter=skip_linter,
             explain=explain,
+            min_intervals=min_intervals,
         )
 
         plan = plan_builder.build()
@@ -1338,6 +1345,7 @@ def plan_builder(
         diff_rendered: t.Optional[bool] = None,
         skip_linter: t.Optional[bool] = None,
         explain: t.Optional[bool] = None,
+        min_intervals: t.Optional[int] = None,
     ) -> PlanBuilder:
         """Creates a plan builder.
 
@@ -1374,6 +1382,8 @@ def plan_builder(
             enable_preview: Indicates whether to enable preview for forward-only models in development environments.
             run: Whether to run latest intervals as part of the plan application.
             diff_rendered: Whether the diff should compare raw vs rendered models
+            min_intervals: Adjust the plan start date on a per-model basis in order to ensure at least this many intervals are covered
+                on every model when checking for missing intervals
 
         Returns:
             The plan builder.
@@ -1401,6 +1411,7 @@ def plan_builder(
             "run": run,
             "diff_rendered": diff_rendered,
             "skip_linter": skip_linter,
+            "min_intervals": min_intervals,
         }
         user_provided_flags: t.Dict[str, UserProvidedFlags] = {
             k: v for k, v in kwargs.items() if v is not None
@@ -1523,6 +1534,15 @@ def plan_builder(
             # Refresh snapshot intervals to ensure that they are up to date with values reflected in the max_interval_end_per_model.
             self.state_sync.refresh_snapshot_intervals(context_diff.snapshots.values())
 
+        start_override_per_model = self._calculate_start_override_per_model(
+            min_intervals,
+            start or default_start,
+            end or default_end,
+            execution_time or now(),
+            backfill_models,
+            snapshots,
+        )
+
         return self.PLAN_BUILDER_TYPE(
             context_diff=context_diff,
             start=start,
@@ -1553,6 +1573,7 @@ def plan_builder(
             ),
             end_bounded=not run,
             ensure_finalized_snapshots=self.config.plan.use_finalized_state,
+            start_override_per_model=start_override_per_model,
             interval_end_per_model=max_interval_end_per_model,
             console=self.console,
             user_provided_flags=user_provided_flags,
@@ -2864,6 +2885,58 @@ def _get_plan_default_start_end(
 
         return default_start, default_end
 
+    def _calculate_start_override_per_model(
+        self,
+        min_intervals: t.Optional[int],
+        plan_start: t.Optional[TimeLike],
+        plan_end: t.Optional[TimeLike],
+        plan_execution_time: TimeLike,
+        backfill_model_fqns: t.Optional[t.Set[str]],
+        snapshots_by_model_fqn: t.Dict[str, Snapshot],
+    ) -> t.Dict[str, datetime]:
+        if not min_intervals or not backfill_model_fqns or not plan_start:
+            # If there are no models to backfill, there are no intervals to consider for backfill, so we dont need to consider a minimum number
+            # If the plan doesnt have a start date, all intervals are considered already so we dont need to consider a minimum number
+            # If we dont have a minimum number of intervals to consider, then we dont need to adjust the start date on a per-model basis
+            return {}
+
+        start_overrides = {}
+
+        plan_execution_time_dt = to_datetime(plan_execution_time)
+        plan_start_dt = to_datetime(plan_start, relative_base=plan_execution_time_dt)
+        plan_end_dt = to_datetime(
+            plan_end or plan_execution_time_dt, relative_base=plan_execution_time_dt
+        )
+
+        for model_fqn in backfill_model_fqns:
+            snapshot = snapshots_by_model_fqn.get(model_fqn)
+            if not snapshot:
+                continue
+
+            starting_point = plan_end_dt
+            if node_end := snapshot.node.end:
+                # if we dont do this, if the node end is a date (as opposed to a timestamp)
+                # we end up incorrectly winding back an extra day
+                node_end_dt = make_exclusive(node_end)
+
+                if node_end_dt < plan_end_dt:
+                    # if the model has an end date that has already elapsed, use that as a starting point for calculating min_intervals
+                    # instead of the plan end. If we use the plan end, we will return intervals in the future which are invalid
+                    starting_point = node_end_dt
+
+            snapshot_start = snapshot.node.cron_floor(starting_point)
+
+            for _ in range(min_intervals):
+                # wind back the starting point by :min_intervals intervals to arrive at the minimum snapshot start date
+                snapshot_start = snapshot.node.cron_prev(snapshot_start)
+
+            # only consider this an override if the wound-back start date is earlier than the plan start date
+            # if it isnt then the plan already covers :min_intervals intervals for this snapshot
+            if snapshot_start < plan_start_dt:
+                start_overrides[model_fqn] = snapshot_start
+
+        return start_overrides
+
     def _get_max_interval_end_per_model(
         self, snapshots: t.Dict[str, Snapshot], backfill_models: t.Optional[t.Set[str]]
     ) -> t.Dict[str, int]:
diff --git a/sqlmesh/core/node.py b/sqlmesh/core/node.py
@@ -31,6 +31,9 @@ class IntervalUnit(str, Enum):
     IntervalUnit can be one of 5 types, YEAR, MONTH, DAY, HOUR, MINUTE. The unit is inferred
     based on the cron schedule of a node. The minimum time delta between a sample set of dates
     is used to determine which unit a node's schedule is.
+
+    It's designed to align with common partitioning schemes, hence why there is no WEEK unit
+    because generally tables are not partitioned by week
     """
 
     YEAR = "year"
diff --git a/sqlmesh/core/plan/builder.py b/sqlmesh/core/plan/builder.py
@@ -5,6 +5,7 @@
 import typing as t
 from collections import defaultdict
 from functools import cached_property
+from datetime import datetime
 
 
 from sqlmesh.core.console import PlanBuilderConsole, get_console
@@ -85,6 +86,7 @@ class PlanBuilder:
         ensure_finalized_snapshots: Whether to compare against snapshots from the latest finalized
             environment state, or to use whatever snapshots are in the current environment state even if
             the environment is not finalized.
+        start_override_per_model: The mapping from model FQN's to model-specific start dates
         interval_end_per_model: The mapping from model FQNs to target end dates.
         explain: Whether to explain the plan instead of applying it.
     """
@@ -117,6 +119,7 @@ def __init__(
         end_bounded: bool = False,
         ensure_finalized_snapshots: bool = False,
         explain: bool = False,
+        start_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
         interval_end_per_model: t.Optional[t.Dict[str, int]] = None,
         console: t.Optional[PlanBuilderConsole] = None,
         user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None,
@@ -133,6 +136,7 @@ def __init__(
         self._enable_preview = enable_preview
         self._end_bounded = end_bounded
         self._ensure_finalized_snapshots = ensure_finalized_snapshots
+        self._start_override_per_model = start_override_per_model
         self._interval_end_per_model = interval_end_per_model
         self._environment_ttl = environment_ttl
         self._categorizer_config = categorizer_config or CategorizerConfig()
@@ -280,7 +284,11 @@ def build(self) -> Plan:
         self._adjust_new_snapshot_intervals()
 
         deployability_index = (
-            DeployabilityIndex.create(self._context_diff.snapshots.values(), start=self._start)
+            DeployabilityIndex.create(
+                self._context_diff.snapshots.values(),
+                start=self._start,
+                start_override_per_model=self._start_override_per_model,
+            )
             if self._is_dev
             else DeployabilityIndex.all_deployable()
         )
@@ -322,6 +330,7 @@ def build(self) -> Plan:
             indirectly_modified=indirectly_modified,
             deployability_index=deployability_index,
             restatements=restatements,
+            start_override_per_model=self._start_override_per_model,
             interval_end_per_model=interval_end_per_model,
             selected_models_to_backfill=self._backfill_models,
             models_to_backfill=models_to_backfill,
diff --git a/sqlmesh/core/plan/definition.py b/sqlmesh/core/plan/definition.py
@@ -57,6 +57,7 @@ class Plan(PydanticModel, frozen=True):
 
     deployability_index: DeployabilityIndex
     restatements: t.Dict[SnapshotId, Interval]
+    start_override_per_model: t.Optional[t.Dict[str, datetime]]
     interval_end_per_model: t.Optional[t.Dict[str, int]]
 
     selected_models_to_backfill: t.Optional[t.Set[str]] = None
@@ -177,6 +178,7 @@ def missing_intervals(self) -> t.List[SnapshotIntervals]:
                 execution_time=self.execution_time,
                 restatements=self.restatements,
                 deployability_index=self.deployability_index,
+                start_override_per_model=self.start_override_per_model,
                 interval_end_per_model=self.interval_end_per_model,
                 end_bounded=self.end_bounded,
             ).items()
@@ -265,6 +267,7 @@ def to_evaluatable(self) -> EvaluatablePlan:
             removed_snapshots=sorted(self.context_diff.removed_snapshots),
             requires_backfill=self.requires_backfill,
             models_to_backfill=self.models_to_backfill,
+            start_override_per_model=self.start_override_per_model,
             interval_end_per_model=self.interval_end_per_model,
             execution_time=self.execution_time,
             disabled_restatement_models={
@@ -303,6 +306,7 @@ class EvaluatablePlan(PydanticModel):
     removed_snapshots: t.List[SnapshotId]
     requires_backfill: bool
     models_to_backfill: t.Optional[t.Set[str]] = None
+    start_override_per_model: t.Optional[t.Dict[str, datetime]] = None
     interval_end_per_model: t.Optional[t.Dict[str, int]] = None
     execution_time: t.Optional[TimeLike] = None
     disabled_restatement_models: t.Set[str]
diff --git a/sqlmesh/core/plan/stages.py b/sqlmesh/core/plan/stages.py
@@ -524,6 +524,7 @@ def _missing_intervals(
             },
             deployability_index=deployability_index,
             end_bounded=plan.end_bounded,
+            start_override_per_model=plan.start_override_per_model,
             interval_end_per_model=plan.interval_end_per_model,
         )
 
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 import logging
 import typing as t
+from datetime import datetime
 from sqlglot import exp
 from sqlmesh.core import constants as c
 from sqlmesh.core.console import Console, get_console
@@ -704,6 +705,7 @@ def merged_missing_intervals(
     execution_time: t.Optional[TimeLike] = None,
     deployability_index: t.Optional[DeployabilityIndex] = None,
     restatements: t.Optional[t.Dict[SnapshotId, Interval]] = None,
+    start_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
     interval_end_per_model: t.Optional[t.Dict[str, int]] = None,
     ignore_cron: bool = False,
     end_bounded: bool = False,
@@ -722,6 +724,8 @@ def merged_missing_intervals(
         execution_time: The date/time reference to use for execution time. Defaults to now.
         deployability_index: Determines snapshots that are deployable in the context of this evaluation.
         restatements: A set of snapshot names being restated.
+        start_override_per_model: A mapping of model FQNs to start dates, where the start date for calculating intervals
+            should be different from the plan start date
         interval_end_per_model: The mapping from model FQNs to target end dates.
         ignore_cron: Whether to ignore the node's cron schedule.
         end_bounded: If set to true, the returned intervals will be bounded by the target end date, disregarding lookback,
@@ -737,6 +741,7 @@ def merged_missing_intervals(
         deployability_index=deployability_index,
         execution_time=execution_time or now_timestamp(),
         restatements=restatements,
+        start_override_per_model=start_override_per_model,
         interval_end_per_model=interval_end_per_model,
         ignore_cron=ignore_cron,
         end_bounded=end_bounded,
@@ -751,6 +756,7 @@ def compute_interval_params(
     deployability_index: t.Optional[DeployabilityIndex] = None,
     execution_time: t.Optional[TimeLike] = None,
     restatements: t.Optional[t.Dict[SnapshotId, Interval]] = None,
+    start_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
     interval_end_per_model: t.Optional[t.Dict[str, int]] = None,
     ignore_cron: bool = False,
     end_bounded: bool = False,
@@ -769,6 +775,8 @@ def compute_interval_params(
         deployability_index: Determines snapshots that are deployable in the context of this evaluation.
         execution_time: The date/time reference to use for execution time.
         restatements: A dict of snapshot names being restated and their intervals.
+        start_override_per_model: A mapping of model FQNs to start dates, where the start date for calculating intervals
+            should be different from the plan start date
         interval_end_per_model: The mapping from model FQNs to target end dates.
         ignore_cron: Whether to ignore the node's cron schedule.
         end_bounded: If set to true, the returned intervals will be bounded by the target end date, disregarding lookback,
@@ -786,6 +794,7 @@ def compute_interval_params(
         execution_time=execution_time,
         restatements=restatements,
         deployability_index=deployability_index,
+        start_override_per_model=start_override_per_model,
         interval_end_per_model=interval_end_per_model,
         ignore_cron=ignore_cron,
         end_bounded=end_bounded,
diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py
@@ -1458,14 +1458,16 @@ def none_deployable(cls) -> DeployabilityIndex:
     def create(
         cls,
         snapshots: t.Dict[SnapshotId, Snapshot] | t.Collection[Snapshot],
-        start: t.Optional[TimeLike] = None,
+        start: t.Optional[TimeLike] = None,  # plan start
+        start_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
     ) -> DeployabilityIndex:
         if not isinstance(snapshots, dict):
             snapshots = {s.snapshot_id: s for s in snapshots}
 
         deployability_mapping: t.Dict[SnapshotId, bool] = {}
         children_deployability_mapping: t.Dict[SnapshotId, bool] = {}
         representative_shared_version_ids: t.Set[SnapshotId] = set()
+        start_override_per_model = start_override_per_model or {}
 
         start_date_cache: t.Optional[t.Dict[str, datetime]] = {}
 
@@ -1488,12 +1490,12 @@ def create(
                     snapshot.is_model and snapshot.model.auto_restatement_cron is not None
                 )
 
+                snapshot_start = start_override_per_model.get(
+                    node.name, start_date(snapshot, snapshots.values(), cache=start_date_cache)
+                )
+
                 is_valid_start = (
-                    snapshot.is_valid_start(
-                        start, start_date(snapshot, snapshots.values(), start_date_cache)
-                    )
-                    if start is not None
-                    else True
+                    snapshot.is_valid_start(start, snapshot_start) if start is not None else True
                 )
 
                 if (
@@ -1789,6 +1791,7 @@ def missing_intervals(
     execution_time: t.Optional[TimeLike] = None,
     restatements: t.Optional[t.Dict[SnapshotId, Interval]] = None,
     deployability_index: t.Optional[DeployabilityIndex] = None,
+    start_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
     interval_end_per_model: t.Optional[t.Dict[str, int]] = None,
     ignore_cron: bool = False,
     end_bounded: bool = False,
@@ -1806,13 +1809,16 @@ def missing_intervals(
         else earliest_start_date(snapshots, cache=cache, relative_to=end_date)
     )
     restatements = restatements or {}
+    start_override_per_model = start_override_per_model or {}
     interval_end_per_model = interval_end_per_model or {}
     deployability_index = deployability_index or DeployabilityIndex.all_deployable()
 
     for snapshot in snapshots.values():
         if not snapshot.evaluatable:
             continue
-        snapshot_start_date = start_dt
+
+        snapshot_start_override = start_override_per_model.get(snapshot.name, None)
+        snapshot_start_date = snapshot_start_override or start_dt
         snapshot_end_date: TimeLike = end_date
 
         restated_interval = restatements.get(snapshot.snapshot_id)
@@ -1891,7 +1897,7 @@ def compute_missing_intervals(
     Returns:
         A list of all timestamps in this range.
     """
-    if start_ts == end_ts:
+    if start_ts >= end_ts:
         return []
 
     timestamps = expand_range(start_ts, end_ts, interval_unit)
diff --git a/tests/core/test_context.py b/tests/core/test_context.py
diff --git a/tests/core/test_plan.py b/tests/core/test_plan.py
diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py

Original file line number	Diff line number	Diff line change
`@@ -524,6 +524,7 @@ def _missing_intervals(`
`524`	`524`	`},`
`525`	`525`	`deployability_index=deployability_index,`
`526`	`526`	`end_bounded=plan.end_bounded,`
	`527`	`+ start_override_per_model=plan.start_override_per_model,`
`527`	`528`	`interval_end_per_model=plan.interval_end_per_model,`
`528`	`529`	`)`
`529`	`530`