Skip to content

Commit bce97bc

Browse files
jopemachineclaude
andcommitted
fix: Add RollingUpdateSpec validation and deploying_revision_id null check
- Add Pydantic model_validator to reject max_surge=0 + max_unavailable=0 (deadlock scenario where no progress is possible) - Add explicit ValueError when deploying_revision_id is None in evaluate_cycle to fix mypy arg-type error - Update test to expect ValueError for None deploying_revision_id Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4b40e96 commit bce97bc

3 files changed

Lines changed: 15 additions & 9 deletions

File tree

src/ai/backend/manager/models/deployment_policy/row.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import TYPE_CHECKING
77

88
import sqlalchemy as sa
9-
from pydantic import BaseModel
9+
from pydantic import BaseModel, model_validator
1010
from sqlalchemy.dialects import postgresql as pgsql
1111
from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship
1212

@@ -38,6 +38,12 @@ class RollingUpdateSpec(BaseModel):
3838
max_surge: int = 1
3939
max_unavailable: int = 0
4040

41+
@model_validator(mode="after")
42+
def _check_not_both_zero(self) -> RollingUpdateSpec:
43+
if self.max_surge == 0 and self.max_unavailable == 0:
44+
raise ValueError("max_surge or max_unavailable must be positive")
45+
return self
46+
4147

4248
class BlueGreenSpec(BaseModel):
4349
"""Specification for blue-green deployment strategy."""

src/ai/backend/manager/sokovan/deployment/strategy/rolling_update.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ def evaluate_cycle(
6969
5. Compute allowed surge/unavailable, decide create/terminate -> PROGRESSING.
7070
"""
7171
desired = deployment.replica_spec.target_replica_count
72-
classified = self._classify_routes(routes, deployment.deploying_revision_id)
72+
deploying_revision_id = deployment.deploying_revision_id
73+
if deploying_revision_id is None:
74+
raise ValueError("deploying_revision_id must not be None for rolling update")
75+
classified = self._classify_routes(routes, deploying_revision_id)
7376

7477
if result := self._check_provisioning(deployment, classified):
7578
return result

tests/unit/manager/sokovan/deployment/strategy/test_rolling_update.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -724,17 +724,14 @@ def test_large_desired_surge_1_unavailable_0_creates_exactly_1(self) -> None:
724724
assert _count_rollout(result) == 1
725725
assert len(_drain_ids(result)) == 0
726726

727-
def test_deploying_rev_none_all_routes_classified_as_old(self) -> None:
728-
"""If deploying_revision_id is None, all routes are old (r.revision_id != None)."""
727+
def test_deploying_rev_none_rejected(self) -> None:
728+
"""If deploying_revision_id is None, evaluate_cycle raises ValueError."""
729729
deployment = make_deployment(desired=1, deploying_revision_id=None) # type: ignore[arg-type]
730730
spec = RollingUpdateSpec(max_surge=1, max_unavailable=0)
731731
routes = [make_route(revision_id=OLD_REV, status=RouteStatus.HEALTHY)]
732732

733-
result = RollingUpdateStrategy(spec).evaluate_cycle(deployment, routes)
734-
735-
# All classified as old, no new → PROGRESSING with create
736-
assert result.sub_step == DeploymentSubStep.PROGRESSING
737-
assert _count_rollout(result) == 1
733+
with pytest.raises(ValueError, match="deploying_revision_id must not be None"):
734+
RollingUpdateStrategy(spec).evaluate_cycle(deployment, routes)
738735

739736
def test_route_without_revision_classified_as_old(self) -> None:
740737
"""Routes with revision_id=None are classified as old."""

0 commit comments

Comments
 (0)