55 import tensorrt_llm .tensorrt_llm_transfer_agent_binding as tab
66
77 HAS_TRANSFER_AGENT = True
8- # Check which backends are available
8+ # Check which backends are available (compile-time flags)
99 HAS_NIXL = getattr (tab , "NIXL_ENABLED" , False )
1010 HAS_MOONCAKE = getattr (tab , "MOONCAKE_ENABLED" , False )
1111except ImportError :
1212 HAS_TRANSFER_AGENT = False
1313 HAS_NIXL = False
1414 HAS_MOONCAKE = False
1515
16+
17+ def _is_mooncake_runtime_available ():
18+ """Check if Mooncake runtime libraries are actually available.
19+
20+ HAS_MOONCAKE only indicates compile-time support. At runtime,
21+ DynLibLoader::dlopen searches: LD_LIBRARY_PATH -> RUNPATH -> system paths.
22+ We mirror this by trying the library name first (covers LD_LIBRARY_PATH
23+ and dev builds), then falling back to the bundled path (installed wheels).
24+ """
25+ if not HAS_MOONCAKE :
26+ return False
27+
28+ import ctypes
29+ import os
30+
31+ wrapper_name = "libtensorrt_llm_mooncake_wrapper.so"
32+
33+ # 1) Try by name: finds via LD_LIBRARY_PATH / system paths (dev workflow)
34+ try :
35+ ctypes .CDLL (wrapper_name )
36+ return True
37+ except OSError :
38+ pass
39+
40+ # 2) Fallback: try bundled path (installed wheel)
41+ try :
42+ binding_dir = os .path .dirname (tab .__file__ )
43+ wrapper_path = os .path .join (binding_dir , "libs" , wrapper_name )
44+ ctypes .CDLL (wrapper_path )
45+ return True
46+ except (OSError , AttributeError , TypeError ):
47+ pass
48+
49+ return False
50+
51+
52+ HAS_MOONCAKE_RUNTIME = _is_mooncake_runtime_available ()
53+
1654# Try to import torch for functional tests
1755try :
1856 import torch
@@ -277,41 +315,21 @@ def test_nixl_transfer_agent_has_required_methods(self):
277315
278316@pytest .mark .skipif (not HAS_MOONCAKE , reason = "Mooncake backend not available" )
279317class TestMooncakeTransferAgent :
280- """Test cases for MooncakeTransferAgent."""
281-
282- def test_mooncake_transfer_agent_class_exists (self ):
283- """Test that MooncakeTransferAgent class exists."""
284- assert hasattr (tab , "MooncakeTransferAgent" )
285-
286- def test_mooncake_transfer_status_class_exists (self ):
287- """Test that MooncakeTransferStatus class exists."""
288- assert hasattr (tab , "MooncakeTransferStatus" )
318+ """Test cases for Mooncake transfer agent via make_transfer_agent factory.
289319
290- def test_mooncake_transfer_agent_is_base_subclass (self ):
291- """Test that MooncakeTransferAgent is a subclass of BaseTransferAgent."""
292- assert issubclass (tab .MooncakeTransferAgent , tab .BaseTransferAgent )
320+ Note: MooncakeTransferAgent/MooncakeTransferStatus are not directly exposed
321+ as nanobind classes to avoid a hard load-time dependency on libtransfer_engine.so.
322+ Instead, agents are created via the make_transfer_agent("mooncake", ...) factory
323+ which uses dlopen for lazy loading.
324+ """
293325
294- def test_mooncake_transfer_status_is_base_subclass (self ):
295- """Test that MooncakeTransferStatus is a subclass of TransferStatus ."""
296- assert issubclass ( tab .MooncakeTransferStatus , tab . TransferStatus )
326+ def test_mooncake_enabled_flag (self ):
327+ """Test that MOONCAKE_ENABLED flag is set ."""
328+ assert tab .MOONCAKE_ENABLED is True
297329
298- def test_mooncake_transfer_agent_has_required_methods (self ):
299- """Test that MooncakeTransferAgent has all required methods."""
300- required_methods = [
301- "register_memory" ,
302- "deregister_memory" ,
303- "load_remote_agent" ,
304- "load_remote_agent_by_connection" ,
305- "get_local_agent_desc" ,
306- "get_local_connection_info" ,
307- "invalidate_remote_agent" ,
308- "submit_transfer_requests" ,
309- "notify_sync_message" ,
310- "get_notified_sync_messages" ,
311- "check_remote_descs" ,
312- ]
313- for method in required_methods :
314- assert hasattr (tab .MooncakeTransferAgent , method ), f"Missing method: { method } "
330+ def test_make_transfer_agent_factory_exists (self ):
331+ """Test that the make_transfer_agent factory function exists."""
332+ assert hasattr (tab , "make_transfer_agent" )
315333
316334
317335# =============================================================================
@@ -561,7 +579,10 @@ def test_nixl_wait_failure_on_invalidated_remote(self):
561579 not (HAS_TORCH and HAS_CUDA ),
562580 reason = "Torch with CUDA support required for functional tests" ,
563581)
564- @pytest .mark .skipif (not HAS_MOONCAKE , reason = "Mooncake backend not available" )
582+ @pytest .mark .skipif (
583+ not HAS_MOONCAKE_RUNTIME ,
584+ reason = "Mooncake runtime libraries not available (libtransfer_engine.so)" ,
585+ )
565586class TestMooncakeFunctionalTransfer :
566587 """Functional tests for Mooncake data transfer between two agents."""
567588
@@ -578,12 +599,11 @@ def test_mooncake_write_transfer_gpu_tensor(self):
578599 # Verify initial state
579600 assert not torch .equal (src_tensor , dst_tensor )
580601
581- # Create two agents
602+ # Create two agents via factory (uses dlopen for lazy loading)
582603 config_a = tab .BaseAgentConfig (name = "mooncake_agent_a" , use_prog_thread = True )
583604 config_b = tab .BaseAgentConfig (name = "mooncake_agent_b" , use_prog_thread = True )
584- agent_a = tab .MooncakeTransferAgent (config_a )
585-
586- agent_b = tab .MooncakeTransferAgent (config_b )
605+ agent_a = tab .make_transfer_agent ("mooncake" , config_a )
606+ agent_b = tab .make_transfer_agent ("mooncake" , config_b )
587607 # Register memory regions
588608 src_descs = _create_memory_descs_from_tensor (src_tensor , tab .MemoryType .VRAM )
589609 dst_descs = _create_memory_descs_from_tensor (dst_tensor , tab .MemoryType .VRAM )
@@ -633,12 +653,12 @@ def test_mooncake_write_transfer_multiple_chunks(self):
633653 # Create corresponding destination tensors
634654 dst_tensors = [torch .zeros (256 , dtype = torch .float32 , device = device ) for _ in range (4 )]
635655
636- # Create agents
656+ # Create agents via factory (uses dlopen for lazy loading)
637657 config_a = tab .BaseAgentConfig (name = "mooncake_agent_a" , use_prog_thread = True )
638658 config_b = tab .BaseAgentConfig (name = "mooncake_agent_b" , use_prog_thread = True )
639659
640- agent_a = tab .MooncakeTransferAgent ( config_a )
641- agent_b = tab .MooncakeTransferAgent ( config_b )
660+ agent_a = tab .make_transfer_agent ( "mooncake" , config_a )
661+ agent_b = tab .make_transfer_agent ( "mooncake" , config_b )
642662
643663 # Create memory descriptors for all chunks
644664 src_memory_descs = []
0 commit comments