Skip to content

Commit 9968847

Browse files
jopemachineclaude
andauthored
feat(BA-5278): Use deploying-revision image for new route session creation (#10271)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3e21c8e commit 9968847

13 files changed

Lines changed: 189 additions & 71 deletions

File tree

changes/10271.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use deploying-revision image for new route session creation

src/ai/backend/manager/data/deployment/types.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ class ExecutionSpec(ConfiguredModel):
322322

323323

324324
class ModelRevisionSpec(ConfiguredModel):
325+
revision_id: UUID | None = None
325326
image_identifier: ImageIdentifier
326327
resource_spec: ResourceSpec
327328
mounts: MountMetadata
@@ -379,10 +380,12 @@ class DeploymentInfo:
379380
deploying_revision_id: UUID | None = None
380381
sub_step: DeploymentSubStep | None = None
381382

382-
def target_revision(self) -> ModelRevisionSpec | None:
383-
if self.model_revisions:
384-
return self.model_revisions[0]
385-
return None
383+
def resolve_revision_spec(self, revision_id: UUID) -> ModelRevisionSpec | None:
384+
"""Find a ModelRevisionSpec by revision_id from model_revisions."""
385+
return next(
386+
(r for r in self.model_revisions if r.revision_id == revision_id),
387+
None,
388+
)
386389

387390

388391
@dataclass

src/ai/backend/manager/models/endpoint/row.py

Lines changed: 44 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
relationship,
3636
selectinload,
3737
)
38+
from sqlalchemy.orm.attributes import instance_state
3839

3940
from ai.backend.common.config import model_definition_iv
4041
from ai.backend.common.types import (
@@ -779,14 +780,16 @@ def to_deployment_info(self) -> DeploymentInfo:
779780
if self.deployment_policy is not None:
780781
policy_data = self.deployment_policy.to_data()
781782

782-
# Try to use current revision if available
783-
if self.current_revision and hasattr(self, "revisions") and self.revisions:
784-
current_rev = next(
785-
(r for r in self.revisions if r.id == self.current_revision),
786-
None,
787-
)
788-
if current_rev:
789-
info = self._to_deployment_info_from_revision(current_rev)
783+
# Build model_revisions list from loaded revision rows
784+
if "revisions" in instance_state(self).dict and self.revisions:
785+
model_revisions: list[ModelRevisionSpec] = []
786+
for rev_row in self.revisions:
787+
if rev_row.image_row is None:
788+
continue
789+
if rev_row.id == self.current_revision or rev_row.id == self.deploying_revision:
790+
model_revisions.append(self._build_revision_spec(rev_row))
791+
if model_revisions:
792+
info = self._to_deployment_info_with_revisions(model_revisions)
790793
info.policy = policy_data
791794
return info
792795

@@ -795,16 +798,44 @@ def to_deployment_info(self) -> DeploymentInfo:
795798
info.policy = policy_data
796799
return info
797800

798-
def _to_deployment_info_from_revision(
801+
def _build_revision_spec(
799802
self,
800803
revision: DeploymentRevisionRow,
801-
) -> DeploymentInfo:
802-
"""Build DeploymentInfo using revision data."""
803-
# Get image identifier from revision's image_row
804+
) -> ModelRevisionSpec:
805+
"""Build a ModelRevisionSpec from a revision row."""
804806
image_identifier = ImageIdentifier(
805807
canonical=revision.image_row.name,
806808
architecture=revision.image_row.architecture,
807809
)
810+
return ModelRevisionSpec(
811+
revision_id=revision.id,
812+
image_identifier=image_identifier,
813+
resource_spec=ResourceSpec(
814+
cluster_mode=ClusterMode(revision.cluster_mode),
815+
cluster_size=revision.cluster_size,
816+
resource_slots=revision.resource_slots,
817+
resource_opts=revision.resource_opts,
818+
),
819+
mounts=MountMetadata(
820+
model_vfolder_id=revision.model or uuid.UUID(int=0),
821+
model_definition_path=revision.model_definition_path,
822+
model_mount_destination=revision.model_mount_destination,
823+
extra_mounts=revision.extra_mounts or [],
824+
),
825+
execution=ExecutionSpec(
826+
startup_command=revision.startup_command,
827+
bootstrap_script=revision.bootstrap_script,
828+
environ=revision.environ,
829+
runtime_variant=revision.runtime_variant,
830+
callback_url=yarl.URL(revision.callback_url) if revision.callback_url else None,
831+
),
832+
)
833+
834+
def _to_deployment_info_with_revisions(
835+
self,
836+
model_revisions: Sequence[ModelRevisionSpec],
837+
) -> DeploymentInfo:
838+
"""Build DeploymentInfo with pre-built model_revisions dict."""
808839
return DeploymentInfo(
809840
id=self.id,
810841
metadata=DeploymentMetadata(
@@ -830,32 +861,7 @@ def _to_deployment_info_from_revision(
830861
open_to_public=self.open_to_public if self.open_to_public is not None else False,
831862
url=self.url,
832863
),
833-
model_revisions=[
834-
ModelRevisionSpec(
835-
image_identifier=image_identifier,
836-
resource_spec=ResourceSpec(
837-
cluster_mode=ClusterMode(revision.cluster_mode),
838-
cluster_size=revision.cluster_size,
839-
resource_slots=revision.resource_slots,
840-
resource_opts=revision.resource_opts,
841-
),
842-
mounts=MountMetadata(
843-
model_vfolder_id=revision.model or uuid.UUID(int=0),
844-
model_definition_path=revision.model_definition_path,
845-
model_mount_destination=revision.model_mount_destination,
846-
extra_mounts=revision.extra_mounts or [],
847-
),
848-
execution=ExecutionSpec(
849-
startup_command=revision.startup_command,
850-
bootstrap_script=revision.bootstrap_script,
851-
environ=revision.environ,
852-
runtime_variant=revision.runtime_variant,
853-
callback_url=yarl.URL(revision.callback_url)
854-
if revision.callback_url
855-
else None,
856-
),
857-
),
858-
],
864+
model_revisions=list(model_revisions),
859865
current_revision_id=self.current_revision,
860866
deploying_revision_id=self.deploying_revision,
861867
sub_step=self.sub_step,

src/ai/backend/manager/repositories/deployment/db_source/db_source.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ async def get_routes_by_endpoint(
983983
status=row.status,
984984
traffic_ratio=row.traffic_ratio,
985985
created_at=row.created_at,
986+
revision_id=row.revision,
986987
error_data=row.error_data or {},
987988
)
988989
for row in rows
@@ -1556,6 +1557,7 @@ async def get_routes_by_statuses(
15561557
status=row.status,
15571558
traffic_ratio=row.traffic_ratio,
15581559
created_at=row.created_at,
1560+
revision_id=row.revision,
15591561
error_data=row.error_data or {},
15601562
)
15611563
route_data_list.append(route_data)
@@ -1787,11 +1789,13 @@ async def delete_routes_by_route_ids(
17871789
async def fetch_deployment_context(
17881790
self,
17891791
deployment_info: DeploymentInfo,
1792+
revision_id: uuid.UUID,
17901793
) -> DeploymentContext:
17911794
"""Fetch all context data needed for session creation from deployment info.
17921795
17931796
Args:
17941797
deployment_info: Deployment information
1798+
revision_id: Revision to use for image resolution.
17951799
17961800
Returns:
17971801
DeploymentContext: Context data needed for session creation
@@ -1842,15 +1846,22 @@ async def fetch_deployment_context(
18421846
else None,
18431847
)
18441848

1845-
# Resolve image
1846-
target_revision = deployment_info.target_revision()
1847-
if not target_revision:
1848-
raise DeploymentHasNoTargetRevision("Deployment has no target revision")
1849-
1850-
image_row = await ImageRow.resolve(
1851-
db_sess,
1852-
[target_revision.image_identifier],
1849+
revision_query = (
1850+
sa.select(DeploymentRevisionRow)
1851+
.where(DeploymentRevisionRow.id == revision_id)
1852+
.options(selectinload(DeploymentRevisionRow.image_row))
1853+
)
1854+
revision_result = await db_sess.execute(revision_query)
1855+
revision_row = revision_result.scalar_one_or_none()
1856+
if revision_row is None or revision_row.image_row is None:
1857+
raise DeploymentHasNoTargetRevision(
1858+
f"Revision {revision_id} not found or has no image"
1859+
)
1860+
image_identifier = ImageIdentifier(
1861+
canonical=revision_row.image_row.name,
1862+
architecture=revision_row.image_row.architecture,
18531863
)
1864+
image_row = await ImageRow.resolve(db_sess, [image_identifier])
18541865

18551866
# Build DeploymentContext
18561867
return DeploymentContext(

src/ai/backend/manager/repositories/deployment/repository.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,16 +707,18 @@ async def delete_routes_by_route_ids(
707707
async def fetch_deployment_context(
708708
self,
709709
deployment_info: DeploymentInfo,
710+
revision_id: UUID,
710711
) -> DeploymentContext:
711712
"""Fetch all context data needed for session creation from deployment info.
712713
713714
Args:
714715
deployment_info: Deployment information
716+
revision_id: Revision to use for image resolution.
715717
716718
Returns:
717719
DeploymentContext: Context data needed for session creation
718720
"""
719-
return await self._db_source.fetch_deployment_context(deployment_info)
721+
return await self._db_source.fetch_deployment_context(deployment_info, revision_id)
720722

721723
# Auto-scaling operations
722724

src/ai/backend/manager/repositories/deployment/types/endpoint.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class RouteData:
5656
status: RouteStatus
5757
traffic_ratio: float
5858
created_at: datetime
59+
revision_id: uuid.UUID | None = None
5960
updated_at: datetime | None = None
6061
error_data: dict[str, Any] = field(default_factory=dict)
6162

src/ai/backend/manager/repositories/scheduler/types/session_creation.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,17 @@ class SessionCreationSpec:
109109

110110
@classmethod
111111
def from_deployment_info(
112-
cls, deployment_info: DeploymentInfo, context: DeploymentContext, route_id: UUID
112+
cls,
113+
deployment_info: DeploymentInfo,
114+
context: DeploymentContext,
115+
route_id: UUID,
116+
revision_id: UUID,
113117
) -> Self:
114118
session_creation_id = secrets.token_urlsafe(16)
115-
target_revision = deployment_info.target_revision()
119+
target_revision = deployment_info.resolve_revision_spec(revision_id)
116120
if target_revision is None:
117121
raise DeploymentHasNoTargetRevision(
118-
"Deployment has no target revision for session creation"
122+
f"Revision {revision_id} not found in model_revisions"
119123
)
120124

121125
# Prepare mount spec

src/ai/backend/manager/sokovan/deployment/deployment_controller.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,14 @@ async def update_deployment(
147147
modified_endpoint = await self._deployment_repository.get_modified_endpoint(
148148
endpoint_id=endpoint_id, updater=updater
149149
)
150-
target_revision = modified_endpoint.target_revision()
151-
if target_revision:
152-
await self._scheduling_controller.validate_session_spec(
153-
SessionValidationSpec.from_revision(model_revision=target_revision)
150+
if modified_endpoint.current_revision_id is not None:
151+
current_revision = modified_endpoint.resolve_revision_spec(
152+
modified_endpoint.current_revision_id
154153
)
154+
if current_revision:
155+
await self._scheduling_controller.validate_session_spec(
156+
SessionValidationSpec.from_revision(model_revision=current_revision)
157+
)
155158
res = await self._deployment_repository.update_endpoint_with_spec(updater)
156159
try:
157160
await self.mark_lifecycle_needed(DeploymentLifecycleType.CHECK_REPLICA)

src/ai/backend/manager/sokovan/deployment/executor.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,20 @@ async def check_pending_deployments(
129129
valid_deployments: list[DeploymentWithHistory] = []
130130
for deployment in deployments:
131131
info = deployment.deployment_info
132-
target_revision = info.target_revision()
133-
if not target_revision:
132+
if info.current_revision_id is None:
134133
log.warning(
135-
"Deployment {} has no target revision, skipping",
134+
"Deployment {} has no current revision, skipping",
136135
info.id,
137136
)
138137
continue
138+
current_revision = info.resolve_revision_spec(info.current_revision_id)
139+
if not current_revision:
140+
log.warning(
141+
"Deployment {} current revision {} not found in model_revisions, skipping",
142+
info.id,
143+
info.current_revision_id,
144+
)
145+
continue
139146
targets = scaling_group_targets[info.metadata.resource_group]
140147
if not targets:
141148
log.warning(
@@ -445,16 +452,20 @@ async def _register_endpoint(
445452

446453
with recorder.phase("register_endpoint"):
447454
with recorder.step("check_target_revision"):
448-
target_revision = deployment.target_revision()
449-
if not target_revision:
455+
if deployment.current_revision_id is None:
456+
raise ModelDefinitionNotFound(
457+
f"No current revision for deployment {deployment.id}"
458+
)
459+
current_revision = deployment.resolve_revision_spec(deployment.current_revision_id)
460+
if not current_revision:
450461
raise ModelDefinitionNotFound(
451-
f"No target revision for deployment {deployment.id}"
462+
f"Current revision {deployment.current_revision_id} not found for deployment {deployment.id}"
452463
)
453464

454465
with recorder.step("generate_model_definition"):
455466
model_definition = (
456467
await self._model_definition_generator_registry.generate_model_definition(
457-
target_revision
468+
current_revision
458469
)
459470
)
460471
health_check_config = model_definition.health_check_config()
@@ -471,7 +482,7 @@ async def _register_endpoint(
471482
session_owner_id=deployment.metadata.session_owner,
472483
project_id=deployment.metadata.project,
473484
domain_name=deployment.metadata.domain,
474-
runtime_variant=target_revision.execution.runtime_variant,
485+
runtime_variant=current_revision.execution.runtime_variant,
475486
existing_url=deployment.network.url,
476487
open_to_public=deployment.network.open_to_public,
477488
health_check_config=health_check_config,

src/ai/backend/manager/sokovan/deployment/route/executor.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from ai.backend.manager.config.provider import ManagerConfigProvider
1919
from ai.backend.manager.data.deployment.types import DeploymentInfo, RouteStatus
2020
from ai.backend.manager.errors.deployment import (
21+
DeploymentHasNoTargetRevision,
2122
EndpointNotFound,
2223
RouteSessionNotFound,
2324
RouteSessionTerminated,
@@ -398,9 +399,20 @@ async def _provision_route(
398399
if deployment is None:
399400
raise EndpointNotFound(f"Deployment not found for endpoint {route.endpoint_id}")
400401

402+
target_revision_id = (
403+
route.revision_id
404+
or deployment.deploying_revision_id
405+
or deployment.current_revision_id
406+
)
407+
if target_revision_id is None:
408+
raise DeploymentHasNoTargetRevision(
409+
"Deployment has no revision for image resolution"
410+
)
411+
401412
# Fetch deployment context with all necessary data
402413
deployment_context = await self._deployment_repo.fetch_deployment_context(
403-
deployment
414+
deployment,
415+
revision_id=target_revision_id,
404416
)
405417

406418
# Create session with full context
@@ -409,6 +421,7 @@ async def _provision_route(
409421
deployment_info=deployment,
410422
context=deployment_context,
411423
route_id=route.route_id,
424+
revision_id=target_revision_id,
412425
)
413426
)
414427

0 commit comments

Comments
 (0)