@@ -541,6 +541,7 @@ def cleanup(self):
541541 if container_model .state in (
542542 ContainerState .RELEASED ,
543543 ContainerState .RECYCLED ,
544+ ContainerState .REPLACED ,
544545 ):
545546 continue
546547
@@ -567,6 +568,7 @@ def cleanup(self):
567568 if container_model .state in (
568569 ContainerState .RELEASED ,
569570 ContainerState .RECYCLED ,
571+ ContainerState .REPLACED ,
570572 ):
571573 continue
572574
@@ -1123,17 +1125,62 @@ async def get_status_async(self, *args, **kwargs):
11231125 return await asyncio .to_thread (self .get_status , * args , ** kwargs )
11241126
11251127 @remote_wrapper ()
1126- def get_info (self , identity ):
1127- """Get container information by container_name or container_id."""
1128+ def get_info (self , identity , _redirect_depth : int = 0 ):
1129+ """
1130+ Get container information by container_name or container_id.
1131+
1132+ Automatically follows redirects if container is REPLACED.
1133+
1134+ Args:
1135+ identity: Container name or ID to look up
1136+ _redirect_depth: Internal parameter to prevent infinite redirect
1137+ loops
1138+
1139+ Returns:
1140+ Container information dict
1141+
1142+ Raises:
1143+ RuntimeError: If container not found or redirect loop detected
1144+ """
1145+ # Prevent infinite redirect loops
1146+ if _redirect_depth > 10 :
1147+ raise RuntimeError (
1148+ f"Redirect loop detected for container { identity } " ,
1149+ )
1150+
11281151 container_model = self .container_mapping .get (identity )
11291152 if container_model is None :
11301153 container_model = self .container_mapping .get (
11311154 self ._generate_container_key (identity ),
11321155 )
11331156 if container_model is None :
11341157 raise RuntimeError (f"No container found with id: { identity } ." )
1158+
1159+ # Parse container model
1160+ if isinstance (container_model , dict ):
1161+ cm_dict = container_model
1162+ elif hasattr (container_model , "model_dump" ):
1163+ cm_dict = container_model .model_dump ()
1164+ else :
1165+ cm_dict = container_model
1166+
1167+ # Check if this container has been replaced and needs redirect
1168+ cm = ContainerModel (** cm_dict )
1169+ if (
1170+ cm .state == ContainerState .REPLACED
1171+ and cm .redirect_to
1172+ and cm .redirect_to != identity
1173+ ):
1174+ logger .debug (
1175+ f"Container { identity } is REPLACED, redirecting to "
1176+ f"{ cm .redirect_to } " ,
1177+ )
1178+ # Follow the redirect recursively
1179+ return self .get_info (cm .redirect_to , _redirect_depth + 1 )
1180+
1181+ # Return the container model as dict/json
11351182 if hasattr (container_model , "model_dump_json" ):
1136- container_model = container_model .model_dump_json ()
1183+ return container_model .model_dump_json ()
11371184
11381185 return container_model
11391186
@@ -1447,14 +1494,32 @@ def restore_session(self, session_ctx_id: str) -> None:
14471494 # 3) heartbeat after restore (session-level)
14481495 self .update_heartbeat (session_ctx_id )
14491496
1450- # 4) archive old recycled records so needs_restore becomes False
1451- for old_name in recycled_old_names :
1497+ # 4) mark old recycled records as REPLACED with redirect info
1498+ # (keep them for client compatibility, don't delete)
1499+ for i , old_name in enumerate (recycled_old_names ):
14521500 try :
1453- self .container_mapping .delete (old_name )
1501+ # Get the raw container data without following redirects
1502+ old_data = self .container_mapping .get (old_name )
1503+ if not old_data :
1504+ logger .warning (
1505+ f"restore_session: old container { old_name } not "
1506+ f"found in mapping" ,
1507+ )
1508+ continue
1509+
1510+ old_cm = ContainerModel (** old_data )
1511+ old_cm .state = ContainerState .REPLACED
1512+ old_cm .redirect_to = new_container_names [i ]
1513+ old_cm .updated_at = time .time ()
1514+ self .container_mapping .set (old_name , old_cm .model_dump ())
1515+ logger .debug (
1516+ f"restore_session: marked { old_name } as REPLACED -> "
1517+ f"{ new_container_names [i ]} " ,
1518+ )
14541519 except Exception as e :
14551520 logger .warning (
1456- f"restore_session: failed to delete old model "
1457- f" { old_name } : { e } " ,
1521+ f"restore_session: failed to mark old container "
1522+ f"{ old_name } as REPLACED : { e } " ,
14581523 )
14591524
14601525 def scan_heartbeat_once (self ) -> dict :
@@ -1602,7 +1667,8 @@ def scan_pool_once(self) -> dict:
16021667
16031668 def scan_released_cleanup_once (self , max_delete : int = 200 ) -> dict :
16041669 """
1605- Delete container_mapping records whose state == RELEASED and expired.
1670+ Delete container_mapping records whose state is RELEASED or
1671+ REPLACED and expired.
16061672
16071673 TTL is config.released_key_ttl seconds. 0 disables cleanup.
16081674 """
@@ -1635,17 +1701,26 @@ def scan_released_cleanup_once(self, max_delete: int = 200) -> dict:
16351701
16361702 cm = ContainerModel (** container_json )
16371703
1638- if cm .state != ContainerState .RELEASED :
1704+ # Only cleanup RELEASED or REPLACED states
1705+ if cm .state not in (
1706+ ContainerState .RELEASED ,
1707+ ContainerState .REPLACED ,
1708+ ):
16391709 result ["skipped_not_released" ] += 1
16401710 continue
16411711
1642- released_at = cm .released_at or cm .updated_at or 0
1643- if released_at <= 0 :
1712+ # For RELEASED: use released_at; for REPLACED: use updated_at
1713+ cleanup_at = (
1714+ cm .released_at
1715+ if cm .state == ContainerState .RELEASED
1716+ else cm .updated_at
1717+ )
1718+ if not cleanup_at or cleanup_at <= 0 :
16441719 # no timestamp -> treat as not expired
16451720 result ["skipped_not_expired" ] += 1
16461721 continue
16471722
1648- if now - released_at <= ttl :
1723+ if now - cleanup_at <= ttl :
16491724 result ["skipped_not_expired" ] += 1
16501725 continue
16511726
0 commit comments