Skip to content

Commit e35caec

Browse files
jopemachineclaude
andcommitted
refactor(BA-5650-I): drop stray non-BA-5650 changes from slice
This slice was carrying unrelated work that crept in during the original PR split (prometheus client, valkey route health, deployment auto_activate, web ssl_enabled, auth client_type_id, gql_legacy routing health_status). All such files are reverted to the slice-H base. Also fixes: - Use session.user.main_access_key (loaded via selectinload) instead of session.main_access_key in scheduler db_source — SessionRow has no main_access_key attribute. - UserService delegate path now resolves target_main_access_key via UserRepository.get_main_access_key_by_id and forwards it to delegate_endpoint_ownership. - Drop stale changes/BA-5650-H.breaking.md (slice H ships 11050.breaking.md). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ea5daef commit e35caec

40 files changed

Lines changed: 463 additions & 93 deletions

File tree

changes/BA-5650-H.breaking.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

dev

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,32 @@ cmd_restart() {
149149

150150
cmd_log() {
151151
local svc=$1
152+
local follow=${2:-}
152153
local winname
153154
winname=$(_tmux_window_name "$svc")
154155
local win
155156
win=$(tmux list-windows -t "$TMUX_SESSION" -F "#{window_name}" 2>/dev/null | grep "^${winname}$" | head -1) || true
156-
if [ -n "$win" ]; then
157-
tmux capture-pane -t "$TMUX_SESSION:$win" -p -S -50
158-
else
157+
if [ -z "$win" ]; then
159158
echo "$(_color red "No tmux window found for $svc")"
159+
return 1
160+
fi
161+
if [ "$follow" = "-f" ]; then
162+
local last_hash=""
163+
trap 'exit 0' INT
164+
while true; do
165+
local output
166+
output=$(tmux capture-pane -t "$TMUX_SESSION:$win" -p -S -50 2>/dev/null)
167+
local cur_hash
168+
cur_hash=$(echo "$output" | md5sum | cut -d' ' -f1)
169+
if [ "$cur_hash" != "$last_hash" ]; then
170+
clear
171+
echo "$output"
172+
last_hash=$cur_hash
173+
fi
174+
sleep 1
175+
done
176+
else
177+
tmux capture-pane -t "$TMUX_SESSION:$win" -p -S -50
160178
fi
161179
}
162180

@@ -170,7 +188,7 @@ Commands:
170188
start <service|all> Start a service
171189
stop <service|all> Stop a service
172190
restart <service|all> Restart a service
173-
log <service> Show recent log output
191+
log <service> [-f] Show recent log output (-f to follow)
174192
175193
Services:
176194
mgr, agent, storage, web, proxy-coordinator, proxy-worker, all
@@ -207,9 +225,9 @@ case "$1" in
207225
"cmd_$1" "$2"
208226
;;
209227
log)
210-
[ $# -lt 2 ] && { echo "$(_color red "Usage: ./dev log <service>")"; exit 1; }
228+
[ $# -lt 2 ] && { echo "$(_color red "Usage: ./dev log <service> [-f]")"; exit 1; }
211229
_validate_service "$2"
212-
cmd_log "$2"
230+
cmd_log "$2" "${3:-}"
213231
;;
214232
*) echo "$(_color red "Unknown command: $1")"; usage; exit 1 ;;
215233
esac

docs/manager/graphql-reference/schema.graphql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,6 +1836,9 @@ type Routing implements Item {
18361836
endpoint: String
18371837
session: UUID
18381838
status: String
1839+
1840+
"""Added in 26.4.1."""
1841+
health_status: String
18391842
traffic_ratio: Float
18401843
created_at: DateTime
18411844
error_data: JSONString

docs/manager/graphql-reference/supergraph.graphql

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,16 @@ input AddRevisionInput
166166
extraMounts: [ExtraVFolderMountInput!] = null
167167
}
168168

169+
"""Added in 26.4.1. Options for the add_model_revision mutation."""
170+
input AddRevisionOptions
171+
@join__type(graph: STRAWBERRY)
172+
{
173+
"""
174+
When true, automatically activate the newly added revision immediately after creation.
175+
"""
176+
autoActivate: Boolean! = false
177+
}
178+
169179
"""Added in 25.19.0. Payload for adding a revision."""
170180
type AddRevisionPayload
171181
@join__type(graph: STRAWBERRY)
@@ -4673,9 +4683,9 @@ input DeploymentOrderBy
46734683
enum DeploymentOrderField
46744684
@join__type(graph: STRAWBERRY)
46754685
{
4686+
NAME @join__enumValue(graph: STRAWBERRY)
46764687
CREATED_AT @join__enumValue(graph: STRAWBERRY)
46774688
UPDATED_AT @join__enumValue(graph: STRAWBERRY)
4678-
NAME @join__enumValue(graph: STRAWBERRY)
46794689
}
46804690

46814691
"""Added in 25.19.0. Deployment policy configuration."""
@@ -10009,7 +10019,7 @@ type Mutation
1000910019
syncReplicas(input: SyncReplicaInput!): SyncReplicaPayload! @join__field(graph: STRAWBERRY)
1001010020

1001110021
"""Added in 25.16.0. Add model revision."""
10012-
addModelRevision(input: AddRevisionInput!): AddRevisionPayload! @join__field(graph: STRAWBERRY)
10022+
addModelRevision(input: AddRevisionInput!, options: AddRevisionOptions = null): AddRevisionPayload! @join__field(graph: STRAWBERRY)
1001310023

1001410024
"""
1001510025
Added in 26.4.1. Create or update the deployment policy for a given deployment (upsert semantics). If the deployment already has a policy, it is replaced entirely with the new configuration
@@ -14923,6 +14933,9 @@ type Routing implements Item
1492314933
endpoint: String
1492414934
session: UUID
1492514935
status: String
14936+
14937+
"""Added in 26.4.1."""
14938+
health_status: String
1492614939
traffic_ratio: Float
1492714940
created_at: DateTime
1492814941
error_data: JSONString

docs/manager/graphql-reference/v2-schema.graphql

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ input AddRevisionInput {
126126
extraMounts: [ExtraVFolderMountInput!] = null
127127
}
128128

129+
"""Added in 26.4.1. Options for the add_model_revision mutation."""
130+
input AddRevisionOptions {
131+
"""
132+
When true, automatically activate the newly added revision immediately after creation.
133+
"""
134+
autoActivate: Boolean! = false
135+
}
136+
129137
"""Added in 25.19.0. Payload for adding a revision."""
130138
type AddRevisionPayload {
131139
"""Added revision"""
@@ -3004,9 +3012,9 @@ input DeploymentOrderBy {
30043012
}
30053013

30063014
enum DeploymentOrderField {
3015+
NAME
30073016
CREATED_AT
30083017
UPDATED_AT
3009-
NAME
30103018
}
30113019

30123020
"""Added in 25.19.0. Deployment policy configuration."""
@@ -6013,7 +6021,7 @@ type Mutation {
60136021
syncReplicas(input: SyncReplicaInput!): SyncReplicaPayload!
60146022

60156023
"""Added in 25.16.0. Add model revision."""
6016-
addModelRevision(input: AddRevisionInput!): AddRevisionPayload!
6024+
addModelRevision(input: AddRevisionInput!, options: AddRevisionOptions = null): AddRevisionPayload!
60176025

60186026
"""
60196027
Added in 26.4.1. Create or update the deployment policy for a given deployment (upsert semantics). If the deployment already has a policy, it is replaced entirely with the new configuration

src/ai/backend/common/clients/prometheus/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from .client import PrometheusClient
2-
from .preset import MetricPreset
2+
from .preset import LabelMatcher, LabelOperator, MetricPreset
33
from .querier import ContainerMetricQuerier, MetricQuerier
44
from .types import ValueType
55

66
__all__ = [
7+
"LabelMatcher",
8+
"LabelOperator",
79
"PrometheusClient",
810
"MetricPreset",
911
"MetricQuerier",

src/ai/backend/common/clients/prometheus/preset.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,30 @@
11
from collections.abc import Mapping, Set
22
from dataclasses import dataclass, field
3+
from enum import StrEnum
4+
from typing import Self
5+
6+
7+
class LabelOperator(StrEnum):
8+
EQUAL = "="
9+
NOT_EQUAL = "!="
10+
REGEX = "=~"
11+
NOT_REGEX = "!~"
12+
13+
14+
@dataclass(frozen=True)
15+
class LabelMatcher:
16+
"""PromQL label matcher with an explicit operator."""
17+
18+
value: str
19+
operator: LabelOperator = LabelOperator.EQUAL
20+
21+
@classmethod
22+
def exact(cls, value: str) -> Self:
23+
return cls(value=value, operator=LabelOperator.EQUAL)
24+
25+
@classmethod
26+
def regex(cls, value: str) -> Self:
27+
return cls(value=value, operator=LabelOperator.REGEX)
328

429

530
def _escape_label_value(value: str) -> str:
@@ -15,7 +40,7 @@ class MetricPreset:
1540
template: str
1641

1742
# Query labels (injected into {labels} placeholder)
18-
labels: Mapping[str, str] = field(default_factory=dict)
43+
labels: Mapping[str, LabelMatcher] = field(default_factory=dict)
1944

2045
# Group by labels (injected into {group_by} placeholder)
2146
group_by: Set[str] = field(default_factory=frozenset)
@@ -25,7 +50,10 @@ class MetricPreset:
2550

2651
def render(self) -> str:
2752
"""Render the PromQL query with all values injected."""
28-
label_str = ",".join(f'{k}="{_escape_label_value(v)}"' for k, v in self.labels.items())
53+
label_str = ",".join(
54+
f'{key}{value.operator}"{_escape_label_value(value.value)}"'
55+
for key, value in self.labels.items()
56+
)
2957
return self.template.format(
3058
labels=label_str,
3159
window=self.window,

src/ai/backend/common/clients/prometheus/querier.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from dataclasses import dataclass
44
from uuid import UUID
55

6+
from ai.backend.common.clients.prometheus.preset import LabelMatcher
67
from ai.backend.common.clients.prometheus.types import ValueType
78

89

@@ -14,7 +15,7 @@ class MetricQuerier(ABC):
1415
"""
1516

1617
@abstractmethod
17-
def labels(self) -> Mapping[str, str]:
18+
def labels(self) -> Mapping[str, LabelMatcher]:
1819
"""Return the labels to be used in the Prometheus query."""
1920
...
2021

@@ -34,22 +35,22 @@ class ContainerMetricQuerier(MetricQuerier):
3435
user_id: UUID | None = None
3536
project_id: UUID | None = None
3637

37-
def labels(self) -> Mapping[str, str]:
38+
def labels(self) -> Mapping[str, LabelMatcher]:
3839
"""Return the labels for the container metric query."""
39-
result: dict[str, str] = {
40-
"container_metric_name": self.metric_name,
41-
"value_type": self.value_type,
40+
result: dict[str, LabelMatcher] = {
41+
"container_metric_name": LabelMatcher.exact(self.metric_name),
42+
"value_type": LabelMatcher.exact(self.value_type),
4243
}
4344
if self.kernel_id is not None:
44-
result["kernel_id"] = str(self.kernel_id)
45+
result["kernel_id"] = LabelMatcher.exact(str(self.kernel_id))
4546
if self.session_id is not None:
46-
result["session_id"] = str(self.session_id)
47+
result["session_id"] = LabelMatcher.exact(str(self.session_id))
4748
if self.agent_id is not None:
48-
result["agent_id"] = self.agent_id
49+
result["agent_id"] = LabelMatcher.exact(self.agent_id)
4950
if self.user_id is not None:
50-
result["user_id"] = str(self.user_id)
51+
result["user_id"] = LabelMatcher.exact(str(self.user_id))
5152
if self.project_id is not None:
52-
result["project_id"] = str(self.project_id)
53+
result["project_id"] = LabelMatcher.exact(str(self.project_id))
5354
return result
5455

5556
def group_by_labels(self) -> frozenset[str]:

src/ai/backend/common/clients/valkey_client/valkey_schedule/client.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,14 @@ class RouteHealthRecord:
9595

9696
route_id: str
9797
created_at: int # Unix timestamp when route was created
98-
initial_delay_until: int # Unix timestamp = created_at + initial_delay
98+
initial_delay_until: int # Unix timestamp = running_at + initial_delay
9999
health_path: str # extracted from model_definition
100100
inference_port: int # extracted from kernel
101101
replica_host: str # extracted from kernel
102102

103+
# Timestamp when route entered RUNNING state (set by coordinator)
104+
running_at: int | None = None
105+
103106
# Agent check results
104107
agent_healthy: bool = False
105108
agent_last_check: int = 0 # Unix timestamp
@@ -128,7 +131,7 @@ def is_stale(self, current_time: int, staleness_sec: int = MAX_HEALTH_STALENESS_
128131

129132
def to_valkey_hash(self) -> Mapping[str, str]:
130133
"""Serialize to Valkey hash fields."""
131-
return {
134+
data: dict[str, str] = {
132135
"route_id": self.route_id,
133136
"created_at": str(self.created_at),
134137
"initial_delay_until": str(self.initial_delay_until),
@@ -140,6 +143,9 @@ def to_valkey_hash(self) -> Mapping[str, str]:
140143
"manager_healthy": "1" if self.manager_healthy else "0",
141144
"manager_last_check": str(self.manager_last_check),
142145
}
146+
if self.running_at is not None:
147+
data["running_at"] = str(self.running_at)
148+
return data
143149

144150
@classmethod
145151
def from_valkey_hash(cls, data: Mapping[str, str]) -> RouteHealthRecord:
@@ -151,6 +157,7 @@ def from_valkey_hash(cls, data: Mapping[str, str]) -> RouteHealthRecord:
151157
health_path=data["health_path"],
152158
inference_port=int(data["inference_port"]),
153159
replica_host=data["replica_host"],
160+
running_at=int(raw) if (raw := data.get("running_at")) and raw != "0" else None,
154161
agent_healthy=data.get("agent_healthy", "0") == "1",
155162
agent_last_check=int(data.get("agent_last_check", "0")),
156163
manager_healthy=data.get("manager_healthy", "0") == "1",
@@ -668,6 +675,52 @@ async def update_route_liveness(self, route_id: str, liveness: bool) -> None:
668675
async with self._client.client() as conn:
669676
await conn.exec(batch, raise_on_error=True)
670677

678+
@valkey_schedule_resilience.apply()
679+
async def mark_route_running_at(self, route_id: str) -> None:
680+
"""
681+
Record the RUNNING transition timestamp for a route.
682+
Called when a route transitions to RUNNING status.
683+
Uses Redis time for consistency with health check comparisons.
684+
685+
:param route_id: The route ID that entered RUNNING state
686+
"""
687+
key = self._get_route_health_key(route_id)
688+
current_time = str(await self._get_redis_time())
689+
async with self._client.client() as conn:
690+
await conn.hset(key, {"running_at": current_time})
691+
await conn.expire(key, ROUTE_HEALTH_TTL_SEC)
692+
693+
@valkey_schedule_resilience.apply()
694+
async def get_route_running_at_batch(self, route_ids: Sequence[str]) -> dict[str, int | None]:
695+
"""
696+
Batch read running_at field from route health hashes.
697+
Works even on partial hashes (before full RouteHealthRecord is initialized).
698+
699+
:param route_ids: Route IDs to look up
700+
:return: Mapping of route_id to running_at timestamp (None if not set)
701+
"""
702+
if not route_ids:
703+
return {}
704+
705+
batch = Batch(is_atomic=False)
706+
for route_id in route_ids:
707+
key = self._get_route_health_key(route_id)
708+
batch.hget(key, "running_at")
709+
710+
async with self._client.client() as conn:
711+
results = await conn.exec(batch, raise_on_error=False)
712+
if results is None:
713+
return dict.fromkeys(route_ids)
714+
715+
running_at_map: dict[str, int | None] = {}
716+
for i, route_id in enumerate(route_ids):
717+
raw = results[i] if len(results) > i else None
718+
if raw and raw != b"0":
719+
running_at_map[route_id] = int(raw)
720+
else:
721+
running_at_map[route_id] = None
722+
return running_at_map
723+
671724
@valkey_schedule_resilience.apply()
672725
async def refresh_route_health_ttl(self, route_id: str) -> None:
673726
"""
@@ -828,6 +881,8 @@ async def get_route_health_record(self, route_id: str) -> RouteHealthRecord | No
828881
return None
829882

830883
data = {k.decode(): v.decode() for k, v in result.items()}
884+
if "route_id" not in data:
885+
return None
831886
return RouteHealthRecord.from_valkey_hash(data)
832887

833888
@valkey_schedule_resilience.apply()
@@ -866,6 +921,10 @@ async def get_route_health_records_batch(
866921
continue
867922

868923
data = {k.decode(): v.decode() for k, v in raw.items()}
924+
if "route_id" not in data:
925+
# Partial hash (e.g., only running_at set by mark_route_running_at)
926+
records[route_id] = None
927+
continue
869928
records[route_id] = RouteHealthRecord.from_valkey_hash(data)
870929

871930
return records

src/ai/backend/common/dto/manager/auth/request.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,12 @@ class AuthorizeRequest(BaseRequestModel):
5151
default=None,
5252
description="One-time password for TOTP-based two-factor authentication",
5353
)
54-
client_type_id: UUID = Field(
54+
client_type_id: UUID | None = Field(
55+
default=None,
5556
description=(
5657
"Login client type UUID (must reference an existing login_client_types row). "
57-
"Concurrent session limits are enforced per client type."
58+
"Concurrent session limits are enforced per client type. "
59+
"When omitted, session tracking is not scoped by client type."
5860
),
5961
)
6062
force: bool = Field(

0 commit comments

Comments
 (0)