Skip to content

Commit 2caedcb

Browse files
authored
fix(BA-3313): Add missing lock IDs to handlers with short cycle (#7223)
1 parent 7c506a8 commit 2caedcb

9 files changed

Lines changed: 23 additions & 17 deletions

File tree

changes/7223.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add missing lock IDs to handlers with short cycle preventing race conditions caused by concurrent execution between short and long cycles

src/ai/backend/manager/defs.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,14 @@ class LockID(enum.IntEnum):
105105
# Retry timers (only long cycle - 30 seconds)
106106
LOCKID_SOKOVAN_RETRY_PREPARING_TIMER = 220
107107
LOCKID_SOKOVAN_RETRY_CREATING_TIMER = 221
108-
# Deployment auto-scaler timer
109-
LOCKID_DEPLOYMENT_AUTO_SCALER = 222
110-
108+
# Deployment locks
109+
LOCKID_DEPLOYMENT_AUTO_SCALER = 222 # Lock for deployment auto-scaler
110+
LOCKID_DEPLOYMENT_PROVISIONING_ROUTES = 223 # Lock for provisioning routes
111+
LOCKID_DEPLOYMENT_HEALTH_CHECK_ROUTES = 224 # Lock for health check routes
112+
LOCKID_DEPLOYMENT_RUNNING_ROUTES = 225 # Lock for running routes
113+
LOCKID_DEPLOYMENT_CHECK_PENDING = 226 # For operations checking PENDING sessions
114+
LOCKID_DEPLOYMENT_CHECK_REPLICA = 227 # For operations checking REPLICA sessions
115+
LOCKID_DEPLOYMENT_DESTROYING = 228 # For operations destroying deployments
111116
# Sokovan target status locks (prevent concurrent operations on same status)
112117
LOCKID_SOKOVAN_TARGET_PENDING = 230 # For operations targeting PENDING sessions
113118
LOCKID_SOKOVAN_TARGET_PREPARING = 231 # For operations targeting PREPARING/PULLING sessions

src/ai/backend/manager/sokovan/deployment/handlers/destroying.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def name(cls) -> str:
3939

4040
@property
4141
def lock_id(self) -> Optional[LockID]:
42-
"""No lock needed for destroying deployments."""
43-
return None
42+
"""Lock for destroying deployments."""
43+
return LockID.LOCKID_DEPLOYMENT_DESTROYING
4444

4545
@classmethod
4646
def target_statuses(cls) -> list[EndpointLifecycle]:

src/ai/backend/manager/sokovan/deployment/handlers/pending.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def name(cls) -> str:
3838

3939
@property
4040
def lock_id(self) -> Optional[LockID]:
41-
"""No lock needed for checking pending deployments."""
42-
return None
41+
"""Lock for checking pending deployments."""
42+
return LockID.LOCKID_DEPLOYMENT_CHECK_PENDING
4343

4444
@classmethod
4545
def target_statuses(cls) -> list[EndpointLifecycle]:

src/ai/backend/manager/sokovan/deployment/handlers/replica.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def name(cls) -> str:
3838

3939
@property
4040
def lock_id(self) -> Optional[LockID]:
41-
"""No lock needed for checking replicas."""
42-
return None
41+
"""Lock for checking replicas."""
42+
return LockID.LOCKID_DEPLOYMENT_CHECK_REPLICA
4343

4444
@classmethod
4545
def target_statuses(cls) -> list[EndpointLifecycle]:

src/ai/backend/manager/sokovan/deployment/handlers/scaling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def name(cls) -> str:
3939

4040
@property
4141
def lock_id(self) -> Optional[LockID]:
42-
"""No lock needed for scaling deployments."""
43-
return None
42+
"""Lock for scaling deployments."""
43+
return LockID.LOCKID_DEPLOYMENT_AUTO_SCALER
4444

4545
@classmethod
4646
def target_statuses(cls) -> list[EndpointLifecycle]:

src/ai/backend/manager/sokovan/deployment/route/handlers/health_check.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def name(cls) -> str:
3535

3636
@property
3737
def lock_id(self) -> Optional[LockID]:
38-
"""No lock needed for health check."""
39-
return None
38+
"""Lock for health check routes."""
39+
return LockID.LOCKID_DEPLOYMENT_HEALTH_CHECK_ROUTES
4040

4141
@classmethod
4242
def target_statuses(cls) -> list[RouteStatus]:

src/ai/backend/manager/sokovan/deployment/route/handlers/provisioning.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def name(cls) -> str:
3535

3636
@property
3737
def lock_id(self) -> Optional[LockID]:
38-
"""No lock needed for provisioning routes."""
39-
return None
38+
"""Lock for provisioning routes."""
39+
return LockID.LOCKID_DEPLOYMENT_PROVISIONING_ROUTES
4040

4141
@classmethod
4242
def target_statuses(cls) -> list[RouteStatus]:

src/ai/backend/manager/sokovan/deployment/route/handlers/running.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def name(cls) -> str:
3535

3636
@property
3737
def lock_id(self) -> Optional[LockID]:
38-
"""No lock needed for checking running routes."""
39-
return None
38+
"""Lock for checking running routes."""
39+
return LockID.LOCKID_DEPLOYMENT_RUNNING_ROUTES
4040

4141
@classmethod
4242
def target_statuses(cls) -> list[RouteStatus]:

0 commit comments

Comments
 (0)