Skip to content

Commit ea67cf9

Browse files
jopemachineclaude
andcommitted
refactor: align REST deployment surface with the v2 GQL scope-id-only shape
Drop nested revision / policy / id-list payloads from ``ModelDeploymentData`` so the projection only ever carries the endpoint row's own columns. The v2 GQL node already exposes only scope IDs and defers the revision spec / policy / replica list / auto-scaling rules / access tokens to dedicated DataLoader resolvers; the v1 REST surface now mirrors that — clients fetch the spec through the nested endpoints (``/deployments/{id}/revisions/{revision_id}``, ``/deployments/{id}/policy``, etc.). **Breaking change for v1 REST**: ``DeploymentDTO.current_revision`` and ``DeploymentDTO.deployment_policy`` are removed; ``current_revision_id`` and ``deploying_revision_id`` are exposed instead. With the projection no longer touching ``EndpointRow.current_revision_row`` / ``deploying_revision_row`` / ``deployment_policy``, the four search / get paths that consume ``to_model_deployment_data`` drop their ``selectinload`` chains — each was incurring a per-row dead eager load. The v2 ``DeploymentNode.policy`` field is removed for the same reason (GQL never read it; the resolver always went through the policy DataLoader). The v1 handler's revision-variant resolver is no longer needed on the deployment DTO path; ``_deployment_dto`` becomes synchronous. Tests: - ``test_endpoint_projection`` drops the now-obsolete revision-row scenarios (the BA-5963 list-order regression is structurally impossible once the projection only reads columns) and keeps the column-pass-through plus the lifecycle status mapping pins. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 380264e commit ea67cf9

9 files changed

Lines changed: 74 additions & 241 deletions

File tree

src/ai/backend/common/dto/manager/deployment/response.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,19 @@ class DeploymentDTO(BackendAISchema):
135135
default_deployment_strategy: DeploymentStrategy = Field(
136136
description="Default deployment strategy"
137137
)
138-
current_revision: RevisionDTO | None = Field(
139-
default=None, description="Current active revision"
138+
current_revision_id: UUID | None = Field(
139+
default=None,
140+
description=(
141+
"ID of the current active revision. Fetch the revision spec via"
142+
" ``GET /deployments/{deployment_id}/revisions/{revision_id}``."
143+
),
140144
)
141-
deployment_policy: DeploymentPolicyDTO | None = Field(
142-
default=None, description="Deployment rollout policy"
145+
deploying_revision_id: UUID | None = Field(
146+
default=None,
147+
description=(
148+
"ID of the revision currently being deployed (in progress, not yet"
149+
" active). Fetch the spec via the nested revision endpoint."
150+
),
143151
)
144152
sub_step: str | None = Field(
145153
default=None, description="Current deployment sub-step (e.g. provisioning, rolling_back)"

src/ai/backend/common/dto/manager/v2/deployment/response.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
ClusterConfigInfoDTO,
2727
DeploymentMetadataInfoDTO,
2828
DeploymentNetworkAccessInfoDTO,
29-
DeploymentPolicyInfo,
3029
DeploymentStrategyInfoDTO,
3130
ExtraVFolderMountGQLDTO,
3231
ModelDefinitionInfoDTO,
@@ -176,9 +175,6 @@ class DeploymentNode(BaseResponseModel):
176175
default=None,
177176
description="ID of the revision currently being deployed (in progress, not yet active)",
178177
)
179-
policy: DeploymentPolicyInfo | None = Field(
180-
default=None, description="Deployment update policy"
181-
)
182178

183179

184180
class RouteNode(BaseResponseModel):

src/ai/backend/manager/api/adapters/deployment/adapter.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,11 @@
9898
UpsertDeploymentPolicyPayload,
9999
)
100100
from ai.backend.common.dto.manager.v2.deployment.types import (
101-
BlueGreenConfigInfo,
102101
BlueGreenStrategySpecInfo,
103102
ClusterConfigInfoDTO,
104103
DeploymentMetadataInfoDTO,
105104
DeploymentNetworkAccessInfoDTO,
106105
DeploymentOrderField,
107-
DeploymentPolicyInfo,
108106
DeploymentStrategyInfoDTO,
109107
EnvironmentVariableEntryInfoDTO,
110108
EnvironmentVariablesInfoDTO,
@@ -117,7 +115,6 @@
117115
ReplicaStateInfo,
118116
ResourceConfigInfoDTO,
119117
RevisionOrderField,
120-
RollingUpdateConfigInfo,
121118
RollingUpdateStrategySpecInfo,
122119
RouteOrderField,
123120
)
@@ -2229,26 +2226,6 @@ def _convert_replica_orders(orders: list[ReplicaOrder]) -> list[QueryOrder]:
22292226

22302227
@staticmethod
22312228
def _deployment_data_to_dto(data: ModelDeploymentData) -> DeploymentNode:
2232-
policy_info: DeploymentPolicyInfo | None = None
2233-
if data.policy is not None:
2234-
policy_spec = data.policy.strategy_spec
2235-
rolling: RollingUpdateConfigInfo | None = None
2236-
blue_green: BlueGreenConfigInfo | None = None
2237-
if isinstance(policy_spec, RollingUpdateSpec):
2238-
rolling = RollingUpdateConfigInfo(
2239-
max_surge=policy_spec.max_surge,
2240-
max_unavailable=policy_spec.max_unavailable,
2241-
)
2242-
elif isinstance(policy_spec, BlueGreenSpec):
2243-
blue_green = BlueGreenConfigInfo(
2244-
auto_promote=policy_spec.auto_promote,
2245-
promote_delay_seconds=policy_spec.promote_delay_seconds,
2246-
)
2247-
policy_info = DeploymentPolicyInfo(
2248-
strategy=data.policy.strategy,
2249-
rolling_update=rolling,
2250-
blue_green=blue_green,
2251-
)
22522229
return DeploymentNode(
22532230
id=data.id,
22542231
metadata=DeploymentMetadataInfoDTO(
@@ -2278,7 +2255,6 @@ def _deployment_data_to_dto(data: ModelDeploymentData) -> DeploymentNode:
22782255
scaling_state=data.scaling_state,
22792256
current_revision_id=data.current_revision_id,
22802257
deploying_revision_id=data.deploying_revision_id,
2281-
policy=policy_info,
22822258
)
22832259

22842260
@staticmethod

src/ai/backend/manager/api/rest/deployment/adapter.py

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -106,45 +106,16 @@
106106
class DeploymentAdapter(BaseFilterAdapter):
107107
"""Adapter for converting deployment requests to repository queries."""
108108

109-
def __init__(
110-
self,
111-
*,
112-
revision_adapter: RevisionAdapter,
113-
policy_adapter: DeploymentPolicyAdapter,
114-
) -> None:
115-
"""Inject the sub-adapters this converter delegates to.
116-
117-
``RevisionAdapter`` / ``DeploymentPolicyAdapter`` are the single
118-
place that knows how to render a revision or policy surface into
119-
a DTO — recomputing them per call would duplicate that knowledge
120-
and lose the ability to swap implementations (e.g. testing).
109+
def convert_to_dto(self, data: ModelDeploymentData) -> DeploymentDTO:
110+
"""Convert ModelDeploymentData to the v1 response DTO.
111+
112+
Surfaces ``current_revision_id`` and ``deploying_revision_id`` as
113+
scope IDs only — mirrors the v2 GQL ``ModelDeployment`` node, which
114+
likewise exposes only the IDs and defers the revision / policy
115+
specs to dedicated DataLoader resolvers. Clients fetch the revision
116+
spec through ``GET /deployments/{deployment_id}/revisions/{id}`` and
117+
the policy through ``GET /deployments/{deployment_id}/policy``.
121118
"""
122-
self._revision_adapter = revision_adapter
123-
self._policy_adapter = policy_adapter
124-
125-
def convert_to_dto(
126-
self,
127-
data: ModelDeploymentData,
128-
runtime_variant_name: RuntimeVariant,
129-
) -> DeploymentDTO:
130-
"""Convert ModelDeploymentData to DTO.
131-
132-
``runtime_variant_name`` is resolved by the caller (REST handler)
133-
from ``data.revision.model_runtime_config.runtime_variant_id``
134-
via the RuntimeVariant resolver path — the legacy REST response
135-
preserves the historical name-based field so old clients keep
136-
seeing the same shape.
137-
"""
138-
current_revision = None
139-
if data.revision:
140-
current_revision = self._revision_adapter.convert_to_dto(
141-
data.revision, runtime_variant_name
142-
)
143-
144-
deployment_policy = None
145-
if data.policy:
146-
deployment_policy = self._policy_adapter.convert_to_dto(data.policy)
147-
148119
return DeploymentDTO(
149120
id=data.id,
150121
name=data.metadata.name,
@@ -165,8 +136,8 @@ def convert_to_dto(
165136
replica_ids=data.replica_state.replica_ids,
166137
),
167138
default_deployment_strategy=data.default_deployment_strategy,
168-
current_revision=current_revision,
169-
deployment_policy=deployment_policy,
139+
current_revision_id=data.current_revision_id,
140+
deploying_revision_id=data.deploying_revision_id,
170141
sub_step=data.sub_step,
171142
)
172143

src/ai/backend/manager/api/rest/deployment/handler.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,7 @@ def __init__(
128128
self._runtime_variant_adapter = runtime_variant_adapter
129129
self._revision_adapter = RevisionAdapter()
130130
self._policy_adapter = DeploymentPolicyAdapter()
131-
self._deployment_adapter = DeploymentAdapter(
132-
revision_adapter=self._revision_adapter,
133-
policy_adapter=self._policy_adapter,
134-
)
131+
self._deployment_adapter = DeploymentAdapter()
135132
self._route_adapter = RouteAdapter()
136133
self._create_deployment_adapter = CreateDeploymentAdapter()
137134
self._add_revision_adapter = AddRevisionAdapter()
@@ -145,29 +142,21 @@ async def _revision_dto(self, data: ModelRevisionData) -> RevisionDTO:
145142
resolving the runtime-variant id back to its name so the legacy
146143
shape stays stable.
147144
"""
148-
variant_name = await self._resolve_revision_variant_name(data)
149-
return self._revision_adapter.convert_to_dto(data, variant_name)
150-
151-
async def _resolve_revision_variant_name(self, data: ModelRevisionData) -> RuntimeVariant:
152-
"""Resolve the runtime-variant name from a revision's variant id."""
153145
variant_node = await self._runtime_variant_adapter.get(
154146
data.model_runtime_config.runtime_variant_id
155147
)
156-
return RuntimeVariant(variant_node.name)
148+
return self._revision_adapter.convert_to_dto(data, RuntimeVariant(variant_node.name))
157149

158-
async def _deployment_dto(self, data: ModelDeploymentData) -> DeploymentDTO:
159-
"""Render a deployment DTO with runtime-variant name pre-resolved.
150+
def _deployment_dto(self, data: ModelDeploymentData) -> DeploymentDTO:
151+
"""Render a deployment DTO.
160152
161-
``DeploymentAdapter.convert_to_dto`` expects the caller to provide
162-
the runtime-variant name; when no current revision exists the
163-
value is an empty ``RuntimeVariant`` sentinel since the adapter
164-
drops ``current_revision`` anyway.
153+
Synchronous now — the v1 response surface mirrors the v2 GQL node
154+
and only carries ``current_revision_id`` / ``deploying_revision_id``,
155+
so there is no nested revision to resolve here. Clients fetch the
156+
revision spec through ``GET /deployments/{deployment_id}/revisions/{id}``
157+
and the policy through ``GET /deployments/{deployment_id}/policy``.
165158
"""
166-
if data.revision is None:
167-
variant_name = RuntimeVariant("")
168-
else:
169-
variant_name = await self._resolve_revision_variant_name(data.revision)
170-
return self._deployment_adapter.convert_to_dto(data, variant_name)
159+
return self._deployment_adapter.convert_to_dto(data)
171160

172161
# Deployment Endpoints
173162

@@ -192,7 +181,7 @@ async def create_deployment(
192181
)
193182

194183
# Build response
195-
resp = CreateDeploymentResponse(deployment=await self._deployment_dto(action_result.data))
184+
resp = CreateDeploymentResponse(deployment=self._deployment_dto(action_result.data))
196185
return APIResponse.build(status_code=HTTPStatus.CREATED, response_model=resp)
197186

198187
async def search_deployments(
@@ -214,7 +203,7 @@ async def search_deployments(
214203
SearchProjectDeploymentsAction(scope=scope, querier=querier)
215204
)
216205

217-
deployment_dtos = [await self._deployment_dto(dep) for dep in action_result.data]
206+
deployment_dtos = [self._deployment_dto(dep) for dep in action_result.data]
218207
resp = ListDeploymentsResponse(
219208
deployments=deployment_dtos,
220209
pagination=PaginationInfo(
@@ -236,7 +225,7 @@ async def get_deployment(
236225
)
237226

238227
# Build response
239-
resp = GetDeploymentResponse(deployment=await self._deployment_dto(action_result.data))
228+
resp = GetDeploymentResponse(deployment=self._deployment_dto(action_result.data))
240229
return APIResponse.build(status_code=HTTPStatus.OK, response_model=resp)
241230

242231
async def update_deployment(
@@ -273,7 +262,7 @@ async def update_deployment(
273262
)
274263

275264
# Build response
276-
resp = UpdateDeploymentResponse(deployment=await self._deployment_dto(action_result.data))
265+
resp = UpdateDeploymentResponse(deployment=self._deployment_dto(action_result.data))
277266
return APIResponse.build(status_code=HTTPStatus.OK, response_model=resp)
278267

279268
async def destroy_deployment(

src/ai/backend/manager/data/deployment/types.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,14 +1027,21 @@ class ReplicaStateData:
10271027

10281028
@dataclass
10291029
class ModelDeploymentData:
1030+
"""API-shaped projection of an ``EndpointRow``.
1031+
1032+
Carries only the row's own columns (plus the metadata composite). Joined
1033+
children — current/deploying revision spec, deployment policy, replica
1034+
list, auto-scaling rules, access tokens — are NOT included; the v2 GQL
1035+
layer fetches each via its own DataLoader/resolver and v1 REST clients
1036+
follow up with the dedicated nested endpoints. The projection therefore
1037+
stays scope-id-only and requires no eager loads beyond the row itself.
1038+
"""
1039+
10301040
id: DeploymentID
10311041
metadata: ModelDeploymentMetadataInfo
10321042
network_access: DeploymentNetworkData
1033-
revision: ModelRevisionData | None
10341043
current_revision_id: DeploymentRevisionID | None
10351044
deploying_revision_id: DeploymentRevisionID | None
1036-
revision_history_ids: list[DeploymentRevisionID]
1037-
scaling_rule_ids: list[UUID]
10381045
replica_state: ReplicaStateData
10391046
default_deployment_strategy: DeploymentStrategy
10401047
created_user_id: UUID
@@ -1043,8 +1050,6 @@ class ModelDeploymentData:
10431050
# whether the endpoint is currently reconciling its replica count
10441051
# (``SCALING``) or holding at the desired count (``STABLE``).
10451052
scaling_state: ScalingState
1046-
policy: DeploymentPolicyData | None = None
1047-
access_token_ids: list[UUID] | None = None
10481053
sub_step: DeploymentLifecycleSubStep | None = None
10491054

10501055

src/ai/backend/manager/models/endpoint/row.py

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -819,27 +819,16 @@ def _build_deployment_info(
819819
def to_model_deployment_data(self) -> ModelDeploymentData:
820820
"""Project the row to the API-shaped ``ModelDeploymentData``.
821821
822-
Eager-load requirements: ``current_revision_row`` (and
823-
``deploying_revision_row`` if the spec is needed) plus
824-
``deployment_policy``. Mirrors the relationship usage in
825-
``to_deployment_info`` so the projection follows the same
826-
column-direct lookup as the BA-6056 split — no list scan over
827-
``revisions``. ``current_revision_id`` / ``deploying_revision_id``
828-
surface directly from the row columns; the joined ``revision``
829-
spec is ``None`` when the row was not eager-loaded (callers can
830-
still act on the ID).
822+
Reads only the row's own columns — no relationship access — so the
823+
caller does not need to eager-load anything beyond the endpoint
824+
row itself. Joined children (revision spec, policy, replicas, etc.)
825+
are surfaced through their dedicated DataLoader/resolver paths
826+
(v2 GQL) or via the nested REST endpoints (v1 REST); the projection
827+
only carries scope IDs.
831828
"""
832-
revision: ModelRevisionData | None = None
833-
if self.current_revision_row is not None:
834-
revision = self.current_revision_row.to_data()
835-
836829
desired_count = (
837830
self.desired_replicas if self.desired_replicas is not None else self.replicas
838831
)
839-
policy_data = (
840-
self.deployment_policy.to_data() if self.deployment_policy is not None else None
841-
)
842-
843832
return ModelDeploymentData(
844833
id=self.id,
845834
metadata=ModelDeploymentMetadataInfo(
@@ -858,11 +847,8 @@ def to_model_deployment_data(self) -> ModelDeploymentData:
858847
url=self.url,
859848
preferred_domain_name=None,
860849
),
861-
revision_history_ids=[self.current_revision] if self.current_revision else [],
862-
revision=revision,
863850
current_revision_id=self.current_revision,
864851
deploying_revision_id=self.deploying_revision,
865-
scaling_rule_ids=[],
866852
replica_state=ReplicaStateData(
867853
desired_replica_count=desired_count,
868854
replica_ids=[],
@@ -871,7 +857,6 @@ def to_model_deployment_data(self) -> ModelDeploymentData:
871857
created_user_id=self.created_user,
872858
options=self.options,
873859
scaling_state=self.scaling_state,
874-
policy=policy_data,
875860
sub_step=self.sub_step,
876861
)
877862

src/ai/backend/manager/repositories/deployment/db_source/db_source.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -413,15 +413,7 @@ async def get_deployment_data(
413413
EndpointNotFound: If the endpoint does not exist.
414414
"""
415415
async with self._begin_readonly_session_read_committed() as db_sess:
416-
query = (
417-
sa.select(EndpointRow)
418-
.where(EndpointRow.id == endpoint_id)
419-
.options(
420-
selectinload(EndpointRow.current_revision_row),
421-
selectinload(EndpointRow.deploying_revision_row),
422-
selectinload(EndpointRow.deployment_policy),
423-
)
424-
)
416+
query = sa.select(EndpointRow).where(EndpointRow.id == endpoint_id)
425417
result = await db_sess.execute(query)
426418
row: EndpointRow | None = result.scalar_one_or_none()
427419

@@ -1148,11 +1140,7 @@ async def admin_search_deployments(
11481140
the stack (db_source → repository → service).
11491141
"""
11501142
async with self._begin_readonly_session_read_committed() as db_sess:
1151-
query = sa.select(EndpointRow).options(
1152-
selectinload(EndpointRow.current_revision_row),
1153-
selectinload(EndpointRow.deploying_revision_row),
1154-
selectinload(EndpointRow.deployment_policy),
1155-
)
1143+
query = sa.select(EndpointRow)
11561144

11571145
result = await execute_batch_querier(
11581146
db_sess,
@@ -1181,11 +1169,7 @@ async def search_user_deployments(
11811169
``execute_batch_querier``'s ``scope`` argument.
11821170
"""
11831171
async with self._begin_readonly_session_read_committed() as db_sess:
1184-
query = sa.select(EndpointRow).options(
1185-
selectinload(EndpointRow.current_revision_row),
1186-
selectinload(EndpointRow.deploying_revision_row),
1187-
selectinload(EndpointRow.deployment_policy),
1188-
)
1172+
query = sa.select(EndpointRow)
11891173

11901174
result = await execute_batch_querier(
11911175
db_sess,
@@ -1215,11 +1199,7 @@ async def search_project_deployments(
12151199
admin list pages. Backs the v2 adapter's ``project_search`` path.
12161200
"""
12171201
async with self._begin_readonly_session_read_committed() as db_sess:
1218-
query = sa.select(EndpointRow).options(
1219-
selectinload(EndpointRow.current_revision_row),
1220-
selectinload(EndpointRow.deploying_revision_row),
1221-
selectinload(EndpointRow.deployment_policy),
1222-
)
1202+
query = sa.select(EndpointRow)
12231203

12241204
result = await execute_batch_querier(
12251205
db_sess,

0 commit comments

Comments
 (0)