Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
82080dd
refactor(BA-5650): add main_access_key resolver helpers
jopemachine Apr 14, 2026
e1547c1
docs: rename news fragment to assigned PR number 11041
jopemachine Apr 14, 2026
e66536b
fix(BA-5709): keep delegate_endpoint_ownership signature in slice A
jopemachine Apr 14, 2026
1be446d
refactor(BA-5709): simplify main_access_key filter helper
jopemachine Apr 14, 2026
a866afb
refactor(BA-5650): thread main_access_key through UserPermission
jopemachine Apr 14, 2026
711f136
docs: rename news fragment to assigned PR number 11043
jopemachine Apr 14, 2026
cfdf874
fix(BA-5710): restrict slice B to UserPermission-only test changes
jopemachine Apr 14, 2026
f597c42
chore(BA-5710): drop stale misc news fragment; slice is skip:changelog
jopemachine Apr 14, 2026
3e2a72f
refactor(BA-5650-C): rename SessionData user_uuid to owner_id
jopemachine Apr 14, 2026
d151254
docs: rename news fragment to 11045
jopemachine Apr 14, 2026
09cbd11
refactor(BA-5711): address slice C review feedback
jopemachine Apr 14, 2026
12ec797
docs(BA-5711): add enhance news fragment for slice C
jopemachine Apr 14, 2026
5d3575a
fix(BA-5711): update cascaded call sites in slice C
jopemachine Apr 14, 2026
830edea
fix(BA-5650-C): make slice C typecheck independently
jopemachine Apr 14, 2026
deab30e
refactor(BA-5650-C): rename SessionData user_uuid to owner_id
jopemachine Apr 14, 2026
c79c20d
refactor(BA-5650-D): switch session repository to owner_id
jopemachine Apr 14, 2026
e0069d0
docs: rename news fragment to 11046
jopemachine Apr 14, 2026
3a01145
docs(BA-5650): use enhance news fragment type for slice
jopemachine Apr 14, 2026
d832999
refactor(BA-5712): address slice D review feedback
jopemachine Apr 14, 2026
7e6e70e
refactor(BA-5712): clean up stale slice C fragment and comment
jopemachine Apr 14, 2026
82ea2d0
fix(BA-5650-D): align remaining slice D call sites with owner_id rename
jopemachine Apr 14, 2026
7c209f1
refactor(BA-5650-D): switch session repository to owner_id
jopemachine Apr 14, 2026
adec0d8
refactor(BA-5650-E): collapse scheduler signatures to owner_id
jopemachine Apr 14, 2026
f44bad9
docs: rename news fragment to 11047
jopemachine Apr 14, 2026
df67b48
docs(BA-5650): use enhance news fragment type for slice
jopemachine Apr 14, 2026
ca71661
refactor(BA-5713): address slice E review feedback
jopemachine Apr 14, 2026
603c3d0
refactor(BA-5713): address new slice E review feedback
jopemachine Apr 14, 2026
b3067da
fix(BA-5650-E): align remaining slice E call sites with renamed fields
jopemachine Apr 14, 2026
f23ed6c
refactor(BA-5650-D): switch session repository to owner_id
jopemachine Apr 14, 2026
3ad2159
refactor(BA-5650-E): collapse scheduler signatures to owner_id
jopemachine Apr 14, 2026
4df2e52
refactor(BA-5650-F): propagate owner_id rename into sokovan
jopemachine Apr 14, 2026
65a8938
docs: rename news fragment to 11048
jopemachine Apr 14, 2026
7adff9a
docs(BA-5650): use enhance news fragment type for slice
jopemachine Apr 14, 2026
a072335
refactor(BA-5714): drop stray deployment changes and restore dropped …
jopemachine Apr 14, 2026
707fd7b
refactor(BA-5714): resolve rebase conflicts and add resolve_main_acce…
jopemachine Apr 14, 2026
efa2a91
fix(BA-5650-F): align slice F call sites with renamed sokovan fields
jopemachine Apr 14, 2026
7f268e7
refactor(BA-5650-F): propagate owner_id rename into sokovan
jopemachine Apr 14, 2026
ebcb7b3
refactor(BA-5650-G): resolve owner_id via current_user() in services
jopemachine Apr 14, 2026
a06ad87
docs: rename news fragment to 11049
jopemachine Apr 14, 2026
8212794
docs(BA-5650): use enhance news fragment type for slice
jopemachine Apr 14, 2026
fa35fca
refactor(BA-5715): address slice G review feedback
jopemachine Apr 14, 2026
222d718
refactor(BA-5715): drop stale misc fragments from intermediate slices
jopemachine Apr 14, 2026
319ce94
fix(BA-5650-G): align scheduler db_source and tests with renamed fields
jopemachine Apr 14, 2026
d713fdd
fix(BA-5650-G): align remaining test fixtures and adapter call sites
jopemachine Apr 14, 2026
cd02a14
fix(BA-5650-G): align slice G call sites after rebasing on F
jopemachine Apr 14, 2026
d18853f
refactor(BA-5650-D): switch session repository to owner_id
jopemachine Apr 14, 2026
a789733
refactor(BA-5650-F): propagate owner_id rename into sokovan
jopemachine Apr 14, 2026
2678209
refactor(BA-5650-G): resolve owner_id via current_user() in services
jopemachine Apr 14, 2026
6641a51
refactor(BA-5715): drop stale misc fragments from intermediate slices
jopemachine Apr 14, 2026
140e363
fix(BA-5650-G): align remaining test fixtures and adapter call sites
jopemachine Apr 14, 2026
67951f7
refactor(BA-5650-E): collapse scheduler signatures to owner_id
jopemachine Apr 14, 2026
d6d92d4
breaking(BA-5650-H): drop owner_access_key from REST v1 session API
jopemachine Apr 14, 2026
57cd898
docs: rename news fragment to 11050
jopemachine Apr 14, 2026
269eafc
breaking(BA-5716): fix log format strings and stale test assertions
jopemachine Apr 14, 2026
5734e9a
test: remove trivial owner_access_key assertion test classes
jopemachine Apr 14, 2026
e3d6fdc
fix(BA-5650-H): align tests with renamed action/data fields
jopemachine Apr 14, 2026
2675af8
fix(BA-5650-H): align remaining test fixtures with renamed fields
jopemachine Apr 14, 2026
7077091
chore: drop accidentally committed local artifacts from BA-5650 stack
jopemachine Apr 14, 2026
df654e4
fix(BA-5716): restore LabelMatcher.exact() wrapping in MetricPreset l…
jopemachine Apr 14, 2026
6f78186
chore: consolidate news fragments for PR #11050
jopemachine Apr 15, 2026
b0d4397
fix(BA-5650): update test fixtures for mypy compliance
jopemachine Apr 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/11046.enhance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Collapse `SessionRepository` / `SessionDBSource` signatures to take `owner_id: UUID` instead of `owner_access_key: AccessKey`. No external behavior change.
1 change: 1 addition & 0 deletions changes/11048.enhance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Propagate the `owner_id` / `main_access_key` signature rename into the sokovan data classes, scheduler handlers, provisioner validators, launcher, scheduling controller, and sequencers.
1 change: 1 addition & 0 deletions changes/11050.breaking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove `owner_access_key` from REST v1 session API; resolve `owner_id` via `current_user()` in the service layer. Clients must migrate to `owner_id` (user UUID) for delegation.
15 changes: 3 additions & 12 deletions src/ai/backend/common/dto/manager/session/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class CreateFromTemplateRequest(BaseRequestModel):
default=None,
validation_alias=AliasChoices("callback_url", "callbackUrl", "callbackURL"),
)
owner_access_key: str | None = None
owner_id: UUID | None = None


class CreateFromParamsRequest(BaseRequestModel):
Expand Down Expand Up @@ -214,7 +214,7 @@ class CreateFromParamsRequest(BaseRequestModel):
default=None,
validation_alias=AliasChoices("callback_url", "callbackUrl", "callbackURL"),
)
owner_access_key: str | None = None
owner_id: UUID | None = None


class CreateClusterRequest(BaseRequestModel):
Expand Down Expand Up @@ -252,7 +252,7 @@ class CreateClusterRequest(BaseRequestModel):
ge=0,
validation_alias=AliasChoices("max_wait_seconds", "maxWaitSeconds"),
)
owner_access_key: str | None = None
owner_id: UUID | None = None


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -352,14 +352,11 @@ class DestroySessionRequest(BaseRequestModel):

forced: bool = False
recursive: bool = False
owner_access_key: str | None = None


class RestartSessionRequest(BaseRequestModel):
"""PATCH ``/{session_name}``"""

owner_access_key: str | None = None


class MatchSessionsRequest(BaseRequestModel):
"""GET ``/_/match``"""
Expand Down Expand Up @@ -419,10 +416,6 @@ class ListFilesRequest(BaseRequestModel):
class GetContainerLogsRequest(BaseRequestModel):
"""GET ``/{session_name}/logs``"""

owner_access_key: str | None = Field(
default=None,
validation_alias=AliasChoices("owner_access_key", "ownerAccessKey"),
)
kernel_id: UUID | None = Field(
default=None,
validation_alias=AliasChoices("kernel_id", "kernelId"),
Expand All @@ -441,5 +434,3 @@ class GetTaskLogsRequest(BaseRequestModel):

class GetStatusHistoryRequest(BaseRequestModel):
"""GET ``/{session_name}/status-history``"""

owner_access_key: str | None = None
18 changes: 6 additions & 12 deletions src/ai/backend/manager/api/adapters/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ async def enqueue(

When ``input.owner_id`` is set, the session is created on behalf of the
target user: their main access key, role, and domain are used in place
of the caller's. Resolution and authorization of the delegated user
are handled by the downstream session service, not by this adapter.
of the caller's. The target user must be loadable via the user
processor (RBAC enforced).
"""
batch_spec: SessionBatchSpec | None = None
if input.batch is not None:
Expand Down Expand Up @@ -849,12 +849,10 @@ async def shutdown_service(
self,
session_id: UUID,
input: ShutdownSessionServiceInput,
access_key: str,
) -> None:
"""Shut down a service in a session."""
action = ShutdownServiceAction(
session_name=str(session_id),
owner_access_key=AccessKey(access_key),
service_name=input.service,
)
await self._processors.session.shutdown_service.wait_for_complete(action)
Expand All @@ -866,13 +864,11 @@ async def shutdown_service(
async def get_logs(
self,
session_id: UUID,
access_key: str,
kernel_id: UUID | None = None,
) -> SessionLogsPayload:
"""Get container logs for a session."""
action = GetContainerLogsAction(
session_name=str(session_id),
owner_access_key=AccessKey(access_key),
kernel_id=KernelId(kernel_id) if kernel_id else None,
)
result = await self._processors.session.get_container_logs.wait_for_complete(action)
Expand All @@ -887,14 +883,12 @@ async def update(
self,
session_id: UUID,
input: UpdateSessionInput,
access_key: str,
) -> UpdateSessionPayload:
"""Update session fields (currently supports rename only)."""
if input.name is not None:
action = RenameSessionAction(
session_name=str(session_id),
new_name=input.name,
owner_access_key=AccessKey(access_key),
)
result = await self._processors.session.rename_session.wait_for_complete(action)
return UpdateSessionPayload(session=self._session_data_to_node(result.session_data))
Expand Down Expand Up @@ -933,13 +927,13 @@ def _session_data_to_node(data: SessionData) -> SessionNode:
return SessionNode(
id=data.id,
domain_name=data.domain_name,
user_id=data.user_uuid,
user_id=data.owner_id,
project_id=data.group_id,
metadata=SessionMetadataInfoGQLDTO(
creation_id=data.creation_id or "",
name=data.name or "",
session_type=data.session_type.value,
access_key=str(data.access_key) if data.access_key else "",
access_key="",
cluster_mode=data.cluster_mode.name,
cluster_size=data.cluster_size,
priority=data.priority,
Expand Down Expand Up @@ -1011,8 +1005,8 @@ def _kernel_info_to_node(info: KernelInfo) -> KernelNode:
session_type=info.session.session_type.value,
),
user_info=KernelUserInfoGQLDTO(
user_id=info.user_permission.user_uuid,
access_key=info.user_permission.access_key,
user_id=info.user_permission.owner_id,
access_key=info.user_permission.main_access_key,
domain_name=info.user_permission.domain_name,
group_id=info.user_permission.group_id,
),
Expand Down
37 changes: 28 additions & 9 deletions src/ai/backend/manager/api/gql_legacy/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from ai.backend.common import validators as tx
from ai.backend.common.defs.session import SESSION_PRIORITY_MAX, SESSION_PRIORITY_MIN
from ai.backend.common.exception import SessionWithInvalidStateError
from ai.backend.common.types import (
ClusterMode,
KernelId,
Expand Down Expand Up @@ -395,19 +394,25 @@ def from_dataclass(
cls,
ctx: GraphQueryContext,
session_data: SessionData,
main_access_key: str | None,
*,
permissions: Iterable[ComputeSessionPermission] | None = None,
) -> Self:
"""Build a ``ComputeSessionNode`` from session data.

``main_access_key`` must be pre-resolved by the caller (typically
via ``UserRepository.get_main_access_key_by_id(session_data.owner_id)``
or by eagerly loading ``session_data.owner``). Keeping the helper
synchronous avoids a hidden per-session DB query and lets the
caller batch the lookup across nodes.
"""
status_history = session_data.status_history or {}
raw_scheduled_at = status_history.get(SessionStatus.SCHEDULED.name)
if not session_data.vfolder_mounts:
vfolder_mounts = []
else:
vfolder_mounts = [vf.vfid.folder_id for vf in session_data.vfolder_mounts]

if session_data.owner is None:
raise SessionWithInvalidStateError()

result = cls(
# identity
id=session_data.id, # auto-converted to Relay global ID
Expand All @@ -422,9 +427,9 @@ def from_dataclass(
# ownership
domain_name=session_data.domain_name,
project_id=session_data.group_id,
user_id=session_data.user_uuid,
access_key=session_data.access_key,
owner=UserNode.from_dataclass(ctx, session_data.owner),
user_id=session_data.owner_id,
access_key=main_access_key,
owner=UserNode.from_dataclass(ctx, session_data.owner) if session_data.owner else None,
# status
status=session_data.status.name,
# status_changed=row.status_changed, # FIXME: generated attribute
Expand Down Expand Up @@ -918,8 +923,14 @@ async def mutate_and_get_payload(
)
)

session_data = result.session_data
main_access_key = (
session_data.owner.main_access_key
if session_data.owner
else await graph_ctx.user_repository.get_main_access_key_by_id(session_data.owner_id)
)
return ModifyComputeSession(
ComputeSessionNode.from_dataclass(graph_ctx, result.session_data),
ComputeSessionNode.from_dataclass(graph_ctx, session_data, main_access_key),
input.get("client_mutation_id"),
)

Expand Down Expand Up @@ -969,8 +980,16 @@ async def mutate(
)
)
)
session_data = action_result.session_data
main_access_key = (
session_data.owner.main_access_key
if session_data.owner
else await graph_ctx.user_repository.get_main_access_key_by_id(
session_data.owner_id
)
)
session_nodes.append(
ComputeSessionNode.from_dataclass(graph_ctx, action_result.session_data)
ComputeSessionNode.from_dataclass(graph_ctx, session_data, main_access_key)
)

return CheckAndTransitStatus(session_nodes, input.get("client_mutation_id"))
Expand Down
Loading
Loading