@@ -193,11 +193,6 @@ async def check_running_routes(self, routes: Sequence[RouteData]) -> RouteExecut
193193 with RouteRecorderContext .shared_step ("fetch_kernel_connection_info" ):
194194 await self ._populate_replica_info (routes_missing_replica )
195195
196- # Phase 4: Ensure RouteHealthRecords exist in Valkey for routes with replica info
197- routes_with_replica = [r for r in successes if r .replica_host and r .replica_port ]
198- if routes_with_replica :
199- await self ._ensure_health_records (routes_with_replica )
200-
201196 return RouteExecutionResult (
202197 successes = successes ,
203198 errors = errors ,
@@ -225,29 +220,6 @@ async def _populate_replica_info(self, routes: Sequence[RouteData]) -> None:
225220 if populated_routes :
226221 await self ._initialize_health_records (populated_routes , updates )
227222
228- async def _ensure_health_records (self , routes : Sequence [RouteData ]) -> None :
229- """Ensure RouteHealthRecords exist in Valkey for routes that already have replica info.
230-
231- Routes may already have replica_host/port in DB (set by a previous cycle or legacy code)
232- but lack a RouteHealthRecord in Valkey. This method checks and initializes missing records.
233- """
234- route_id_strs = [str (r .route_id ) for r in routes ]
235- existing = await self ._valkey_schedule .get_route_health_records_batch (route_id_strs )
236- missing = [r for r in routes if existing .get (str (r .route_id )) is None ]
237- if not missing :
238- return
239- log .warning (
240- "RouteHealthRecord missing in Valkey for {} routes, re-initializing: {}" ,
241- len (missing ),
242- [str (r .route_id )[:8 ] for r in missing ],
243- )
244- replica_info = {
245- r .route_id : (r .replica_host , r .replica_port )
246- for r in missing
247- if r .replica_host and r .replica_port
248- }
249- await self ._initialize_health_records (missing , replica_info )
250-
251223 async def _initialize_health_records (
252224 self ,
253225 routes : Sequence [RouteData ],
@@ -258,14 +230,6 @@ async def _initialize_health_records(
258230 health_configs = await self ._deployment_repo .fetch_health_check_configs_by_revision_ids (
259231 revision_ids
260232 )
261- redis_time = await self ._valkey_schedule .get_redis_time ()
262-
263- # Read existing running_at values that were set when routes transitioned to RUNNING
264- # These may be in partial hashes (only running_at field), so read raw field directly
265- running_at_map = await self ._valkey_schedule .get_route_running_at_batch ([
266- str (r .route_id ) for r in routes
267- ])
268-
269233 records : list [RouteHealthRecord ] = []
270234 for route in routes :
271235 host , port = replica_info [route .route_id ]
@@ -274,21 +238,16 @@ async def _initialize_health_records(
274238 health_path = health_config .path if health_config else "/"
275239 initial_delay = health_config .initial_delay if health_config else 60.0
276240 created_at = int (route .created_at .timestamp ())
277-
278- # Use running_at from Valkey (set at RUNNING transition), fallback to redis_time
279- route_id_str = str (route .route_id )
280- running_at = running_at_map .get (route_id_str ) or redis_time
281- initial_delay_until = running_at + int (initial_delay )
241+ initial_delay_until = created_at + int (initial_delay )
282242
283243 records .append (
284244 RouteHealthRecord (
285- route_id = route_id_str ,
245+ route_id = str ( route . route_id ) ,
286246 created_at = created_at ,
287247 initial_delay_until = initial_delay_until ,
288248 health_path = health_path ,
289249 inference_port = port ,
290250 replica_host = host ,
291- running_at = running_at ,
292251 )
293252 )
294253
0 commit comments