11# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22# SPDX-License-Identifier: Apache-2.0
33import asyncio
4+ import gc
45import time
56
67import orjson
2223from aiperf .common .factories import (
2324 ComposerFactory ,
2425 DatasetBackingStoreFactory ,
26+ DatasetClientStoreFactory ,
2527 EndpointFactory ,
2628 ServiceFactory ,
2729)
4547)
4648from aiperf .common .protocols import (
4749 DatasetBackingStoreProtocol ,
50+ DatasetClientStoreProtocol ,
4851 EndpointProtocol ,
4952 ServiceProtocol ,
5053)
@@ -94,6 +97,7 @@ def __init__(
9497 benchmark_id = user_config .benchmark_id ,
9598 )
9699 )
100+ self ._dataset_client : DatasetClientStoreProtocol | None = None
97101
98102 @on_command (CommandType .PROFILE_CONFIGURE )
99103 async def _profile_configure_command (
@@ -111,9 +115,33 @@ async def _profile_configure_command(
111115 begin = time .perf_counter ()
112116 await self ._configure_dataset ()
113117 await self ._generate_inputs_json_file ()
118+ await self ._configure_dataset_client_and_free_memory ()
119+
114120 duration = time .perf_counter () - begin
115121 self .info (lambda : f"Dataset configured in { duration :.2f} seconds" )
116122
123+ async def _configure_dataset_client_and_free_memory (self ) -> None :
124+ """Configure the dataset client for serving fallback requests."""
125+ # Create dataset client for serving fallback requests, then free in-memory dataset
126+ client_metadata = self ._backing_store .get_client_metadata ()
127+ self ._dataset_client = DatasetClientStoreFactory .create_instance (
128+ client_metadata = client_metadata ,
129+ )
130+ await self ._dataset_client .initialize ()
131+ # Now that the client is ready, signal that fallback requests can be served
132+ self .dataset_configured .set ()
133+ # Free the in-memory dataset now that we have the client to serve fallback requests.
134+ # Reassign to new empty containers (not .clear()) to release object references,
135+ # then run gc.collect() twice to ensure circular references are cleaned up.
136+ conversation_count = len (self .dataset )
137+ self .dataset = {}
138+ self ._conversation_ids_cache = []
139+ gc .collect ()
140+ gc .collect ()
141+ self .info (
142+ f"Dataset client initialized and freed { conversation_count } conversations from memory"
143+ )
144+
117145 async def _configure_tokenizer (self ) -> None :
118146 """Configure the tokenizer for the dataset manager."""
119147 tokenizer_name = self .user_config .tokenizer .name
@@ -304,7 +332,9 @@ async def _configure_dataset(self) -> None:
304332 f"unique conversations: { len (self .dataset_metadata .conversations )} , "
305333 f"unique turn count: { self .dataset_metadata .total_turn_count } "
306334 )
307- self .dataset_configured .set ()
335+ # Note: dataset_configured event is set in _profile_configure_command after
336+ # the dataset client is initialized, to avoid a race condition where fallback
337+ # requests arrive before the client is ready.
308338 await self .publish (
309339 DatasetConfiguredNotification (
310340 service_id = self .service_id ,
@@ -317,55 +347,58 @@ async def _configure_dataset(self) -> None:
317347 async def _handle_conversation_request (
318348 self , message : ConversationRequestMessage
319349 ) -> ConversationResponseMessage :
320- """Handle a conversation request."""
350+ """Handle a conversation request using the dataset client ."""
321351 self .debug (lambda : f"Handling conversation request: { message } " )
322352
323353 await self ._wait_for_dataset_configuration ()
324354
325- if not self .dataset :
355+ if self ._dataset_client is None :
326356 raise self ._service_error (
327- "Dataset is empty and must be configured before handling requests." ,
357+ "Dataset client is not initialized. Dataset must be configured before handling requests." ,
328358 )
329359
330- return self ._return_conversation_by_id (
331- request_id = message .request_id ,
332- conversation_id = message .conversation_id ,
333- )
334-
335- def _return_conversation_by_id (
336- self , request_id : str | None , conversation_id : str
337- ) -> ConversationResponseMessage :
338- """Return a conversation if it exists, otherwise raise an error."""
339-
340- if conversation_id not in self .dataset :
341- raise self ._service_error (
342- f"Conversation { conversation_id } not found in dataset." ,
360+ try :
361+ conversation = await self ._dataset_client .get_conversation (
362+ message .conversation_id
343363 )
364+ except KeyError :
365+ raise self ._service_error (
366+ f"Conversation { message .conversation_id } not found in dataset." ,
367+ ) from None
344368
345- conversation = self .dataset [conversation_id ]
346369 self .trace_or_debug (
347370 lambda : f"Sending conversation response: { conversation } " ,
348371 lambda : f"Sending conversation response with id: { conversation .session_id } " ,
349372 )
350373 return ConversationResponseMessage (
351374 service_id = self .service_id ,
352- request_id = request_id ,
375+ request_id = message . request_id ,
353376 conversation = conversation ,
354377 )
355378
356379 @on_request (MessageType .CONVERSATION_TURN_REQUEST )
357380 async def _handle_conversation_turn_request (
358381 self , message : ConversationTurnRequestMessage
359382 ) -> ConversationTurnResponseMessage :
360- """Handle a turn request."""
383+ """Handle a turn request using the dataset client ."""
361384 self .debug (lambda : f"Handling turn request: { message } " )
362385
363- if message .conversation_id not in self .dataset :
386+ await self ._wait_for_dataset_configuration ()
387+
388+ if self ._dataset_client is None :
364389 raise self ._service_error (
365- f"Conversation { message .conversation_id } not found in dataset." ,
390+ "Dataset client is not initialized. Dataset must be configured before handling requests." ,
391+ )
392+
393+ try :
394+ conversation = await self ._dataset_client .get_conversation (
395+ message .conversation_id
366396 )
397+ except KeyError as e :
398+ raise self ._service_error (
399+ f"Conversation { message .conversation_id } not found in dataset." ,
400+ ) from e
367401
368- conversation = self .dataset [message .conversation_id ]
369402 if message .turn_index >= len (conversation .turns ):
370403 raise self ._service_error (
371404 f"Turn index { message .turn_index } is out of range for conversation { message .conversation_id } ." ,
@@ -395,8 +428,11 @@ async def _wait_for_dataset_configuration(self) -> None:
395428 )
396429
397430 @on_stop
398- async def _cleanup_backing_store (self ) -> None :
399- """Clean up the backing store and associated mmap files."""
431+ async def _cleanup (self ) -> None :
432+ """Clean up the backing store, dataset client, and associated mmap files."""
433+ if self ._dataset_client is not None :
434+ await self ._dataset_client .stop ()
435+ self .debug ("Dataset client cleanup complete" )
400436 if self ._backing_store is not None :
401437 await self ._backing_store .stop ()
402438 self .debug ("Backing store cleanup complete" )
0 commit comments