Skip to content

Commit d309646

Browse files
authored
[None][fix] fix mooncake dynamic load in transfer_agent_binding (NVIDIA#12181)
Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com>
1 parent c80dc64 commit d309646

4 files changed

Lines changed: 79 additions & 88 deletions

File tree

cpp/tensorrt_llm/executor/cache_transmission/mooncake_utils/transferAgent.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ TransferState MooncakeTransferStatus::wait(int64_t timeout_ms) const
9797
freeBatchID(mEngine, mBatchId);
9898
mBatchFreed = true;
9999
TLLM_LOG_DEBUG("Batch ID %lu freed in wait()", mBatchId);
100-
syncSegmentCache(mEngine);
101100
std::this_thread::sleep_for(std::chrono::milliseconds(1));
102101
return TransferState::kSUCCESS;
103102
}

cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,16 @@ if(NIXL_ENABLED OR MOONCAKE_ENABLED)
9696
message(STATUS "Transfer agent binding: NIXL support enabled")
9797
endif()
9898

99-
# Conditionally add Mooncake support
99+
# Conditionally add Mooncake support NOTE: We intentionally do NOT link
100+
# against tensorrt_llm_mooncake_wrapper here. The mooncake wrapper (and its
101+
# dependency libtransfer_engine.so) is loaded lazily via DynLibLoader::dlopen
102+
# in makeTransferAgent("mooncake", ...) at runtime. This allows the binding
103+
# module to be imported even when Mooncake is not installed.
100104
if(MOONCAKE_ENABLED)
101105
target_compile_definitions(${TRANSFER_AGENT_BINDING_TARGET}
102106
PRIVATE ENABLE_MOONCAKE)
103-
target_include_directories(${TRANSFER_AGENT_BINDING_TARGET}
104-
PRIVATE ${TRANSFER_ENGINE_INCLUDE_DIR})
105-
target_link_libraries(${TRANSFER_AGENT_BINDING_TARGET}
106-
PRIVATE tensorrt_llm_mooncake_wrapper)
107-
message(STATUS "Transfer agent binding: Mooncake support enabled")
107+
message(
108+
STATUS "Transfer agent binding: Mooncake support enabled (lazy loading)")
108109
endif()
109110

110111
# Common dependencies

cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/agentBindings.cpp

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@
2121
#include "transferAgent.h"
2222
#endif
2323

24-
#ifdef ENABLE_MOONCAKE
25-
#include "../mooncake_utils/transferAgent.h"
26-
#endif
27-
2824
#include <nanobind/nanobind.h>
2925
#include <nanobind/ndarray.h>
3026
#include <nanobind/stl/optional.h>
@@ -173,10 +169,13 @@ NB_MODULE(tensorrt_llm_transfer_agent_binding, m)
173169
.def_prop_ro("remote_name", &kvc::TransferRequest::getRemoteName)
174170
.def_prop_ro("sync_message", &kvc::TransferRequest::getSyncMessage);
175171

176-
// TransferStatus base class
172+
// TransferStatus base class - release GIL for potentially blocking operations.
173+
// All concrete subclasses (Nixl, Mooncake) perform blocking waits, so releasing
174+
// the GIL here is safe and necessary for correct behavior when the concrete
175+
// subclass type is not directly registered (e.g., agents created via factory).
177176
nb::class_<kvc::TransferStatus>(m, "TransferStatus")
178-
.def("is_completed", &kvc::TransferStatus::isCompleted)
179-
.def("wait", &kvc::TransferStatus::wait, nb::arg("timeout_ms") = -1);
177+
.def("is_completed", &kvc::TransferStatus::isCompleted, nb::call_guard<nb::gil_scoped_release>())
178+
.def("wait", &kvc::TransferStatus::wait, nb::arg("timeout_ms") = -1, nb::call_guard<nb::gil_scoped_release>());
180179

181180
// BaseAgentConfig struct
182181
nb::class_<kvc::BaseAgentConfig>(m, "BaseAgentConfig")
@@ -257,39 +256,11 @@ NB_MODULE(tensorrt_llm_transfer_agent_binding, m)
257256
.def("check_remote_descs", &kvc::NixlTransferAgent::checkRemoteDescs, nb::arg("name"), nb::arg("memory_descs"));
258257
#endif
259258

260-
#ifdef ENABLE_MOONCAKE
261-
// MooncakeTransferStatus class - release GIL for blocking operations
262-
nb::class_<kvc::MooncakeTransferStatus, kvc::TransferStatus>(m, "MooncakeTransferStatus")
263-
.def("is_completed", &kvc::MooncakeTransferStatus::isCompleted, nb::call_guard<nb::gil_scoped_release>())
264-
.def("wait", &kvc::MooncakeTransferStatus::wait, nb::arg("timeout_ms") = -1,
265-
nb::call_guard<nb::gil_scoped_release>());
266-
267-
// MooncakeTransferAgent class
268-
nb::class_<kvc::MooncakeTransferAgent, kvc::BaseTransferAgent>(m, "MooncakeTransferAgent")
269-
.def(nb::init<kvc::BaseAgentConfig const&>(), nb::arg("config"))
270-
.def("register_memory", &kvc::MooncakeTransferAgent::registerMemory, nb::arg("descs"))
271-
.def("deregister_memory", &kvc::MooncakeTransferAgent::deregisterMemory, nb::arg("descs"))
272-
.def("load_remote_agent",
273-
nb::overload_cast<std::string const&, kvc::AgentDesc const&>(&kvc::MooncakeTransferAgent::loadRemoteAgent),
274-
nb::arg("name"), nb::arg("agent_desc"))
275-
.def("load_remote_agent_by_connection",
276-
nb::overload_cast<std::string const&, kvc::ConnectionInfoType const&>(
277-
&kvc::MooncakeTransferAgent::loadRemoteAgent),
278-
nb::arg("name"), nb::arg("connection_info"))
279-
.def("get_local_agent_desc", &kvc::MooncakeTransferAgent::getLocalAgentDesc)
280-
.def("get_local_connection_info", &kvc::MooncakeTransferAgent::getLocalConnectionInfo)
281-
.def("invalidate_remote_agent", &kvc::MooncakeTransferAgent::invalidateRemoteAgent, nb::arg("name"))
282-
.def(
283-
"submit_transfer_requests",
284-
[](kvc::MooncakeTransferAgent& self, kvc::TransferRequest const& request)
285-
{ return self.submitTransferRequests(request).release(); },
286-
nb::arg("request"), nb::rv_policy::take_ownership, nb::call_guard<nb::gil_scoped_release>())
287-
.def("notify_sync_message", &kvc::MooncakeTransferAgent::notifySyncMessage, nb::arg("name"),
288-
nb::arg("sync_message"))
289-
.def("get_notified_sync_messages", &kvc::MooncakeTransferAgent::getNotifiedSyncMessages)
290-
.def("check_remote_descs", &kvc::MooncakeTransferAgent::checkRemoteDescs, nb::arg("name"),
291-
nb::arg("memory_descs"));
292-
#endif
259+
// NOTE: MooncakeTransferAgent/MooncakeTransferStatus class bindings are intentionally
260+
// NOT registered here. Directly binding them would create a load-time dependency on
261+
// libtensorrt_llm_mooncake_wrapper.so (and transitively libtransfer_engine.so),
262+
// causing import to fail on machines without Mooncake installed.
263+
// Instead, use make_transfer_agent("mooncake", config) which loads the library lazily.
293264

294265
// Factory function to create transfer agent by backend name (uses dynamic loading)
295266
m.def(

tests/unittest/bindings/test_transfer_agent_bindings.py

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,52 @@
55
import tensorrt_llm.tensorrt_llm_transfer_agent_binding as tab
66

77
HAS_TRANSFER_AGENT = True
8-
# Check which backends are available
8+
# Check which backends are available (compile-time flags)
99
HAS_NIXL = getattr(tab, "NIXL_ENABLED", False)
1010
HAS_MOONCAKE = getattr(tab, "MOONCAKE_ENABLED", False)
1111
except ImportError:
1212
HAS_TRANSFER_AGENT = False
1313
HAS_NIXL = False
1414
HAS_MOONCAKE = False
1515

16+
17+
def _is_mooncake_runtime_available():
18+
"""Check if Mooncake runtime libraries are actually available.
19+
20+
HAS_MOONCAKE only indicates compile-time support. At runtime,
21+
DynLibLoader::dlopen searches: LD_LIBRARY_PATH -> RUNPATH -> system paths.
22+
We mirror this by trying the library name first (covers LD_LIBRARY_PATH
23+
and dev builds), then falling back to the bundled path (installed wheels).
24+
"""
25+
if not HAS_MOONCAKE:
26+
return False
27+
28+
import ctypes
29+
import os
30+
31+
wrapper_name = "libtensorrt_llm_mooncake_wrapper.so"
32+
33+
# 1) Try by name: finds via LD_LIBRARY_PATH / system paths (dev workflow)
34+
try:
35+
ctypes.CDLL(wrapper_name)
36+
return True
37+
except OSError:
38+
pass
39+
40+
# 2) Fallback: try bundled path (installed wheel)
41+
try:
42+
binding_dir = os.path.dirname(tab.__file__)
43+
wrapper_path = os.path.join(binding_dir, "libs", wrapper_name)
44+
ctypes.CDLL(wrapper_path)
45+
return True
46+
except (OSError, AttributeError, TypeError):
47+
pass
48+
49+
return False
50+
51+
52+
HAS_MOONCAKE_RUNTIME = _is_mooncake_runtime_available()
53+
1654
# Try to import torch for functional tests
1755
try:
1856
import torch
@@ -277,41 +315,21 @@ def test_nixl_transfer_agent_has_required_methods(self):
277315

278316
@pytest.mark.skipif(not HAS_MOONCAKE, reason="Mooncake backend not available")
279317
class TestMooncakeTransferAgent:
280-
"""Test cases for MooncakeTransferAgent."""
281-
282-
def test_mooncake_transfer_agent_class_exists(self):
283-
"""Test that MooncakeTransferAgent class exists."""
284-
assert hasattr(tab, "MooncakeTransferAgent")
285-
286-
def test_mooncake_transfer_status_class_exists(self):
287-
"""Test that MooncakeTransferStatus class exists."""
288-
assert hasattr(tab, "MooncakeTransferStatus")
318+
"""Test cases for Mooncake transfer agent via make_transfer_agent factory.
289319
290-
def test_mooncake_transfer_agent_is_base_subclass(self):
291-
"""Test that MooncakeTransferAgent is a subclass of BaseTransferAgent."""
292-
assert issubclass(tab.MooncakeTransferAgent, tab.BaseTransferAgent)
320+
Note: MooncakeTransferAgent/MooncakeTransferStatus are not directly exposed
321+
as nanobind classes to avoid a hard load-time dependency on libtransfer_engine.so.
322+
Instead, agents are created via the make_transfer_agent("mooncake", ...) factory
323+
which uses dlopen for lazy loading.
324+
"""
293325

294-
def test_mooncake_transfer_status_is_base_subclass(self):
295-
"""Test that MooncakeTransferStatus is a subclass of TransferStatus."""
296-
assert issubclass(tab.MooncakeTransferStatus, tab.TransferStatus)
326+
def test_mooncake_enabled_flag(self):
327+
"""Test that MOONCAKE_ENABLED flag is set."""
328+
assert tab.MOONCAKE_ENABLED is True
297329

298-
def test_mooncake_transfer_agent_has_required_methods(self):
299-
"""Test that MooncakeTransferAgent has all required methods."""
300-
required_methods = [
301-
"register_memory",
302-
"deregister_memory",
303-
"load_remote_agent",
304-
"load_remote_agent_by_connection",
305-
"get_local_agent_desc",
306-
"get_local_connection_info",
307-
"invalidate_remote_agent",
308-
"submit_transfer_requests",
309-
"notify_sync_message",
310-
"get_notified_sync_messages",
311-
"check_remote_descs",
312-
]
313-
for method in required_methods:
314-
assert hasattr(tab.MooncakeTransferAgent, method), f"Missing method: {method}"
330+
def test_make_transfer_agent_factory_exists(self):
331+
"""Test that the make_transfer_agent factory function exists."""
332+
assert hasattr(tab, "make_transfer_agent")
315333

316334

317335
# =============================================================================
@@ -561,7 +579,10 @@ def test_nixl_wait_failure_on_invalidated_remote(self):
561579
not (HAS_TORCH and HAS_CUDA),
562580
reason="Torch with CUDA support required for functional tests",
563581
)
564-
@pytest.mark.skipif(not HAS_MOONCAKE, reason="Mooncake backend not available")
582+
@pytest.mark.skipif(
583+
not HAS_MOONCAKE_RUNTIME,
584+
reason="Mooncake runtime libraries not available (libtransfer_engine.so)",
585+
)
565586
class TestMooncakeFunctionalTransfer:
566587
"""Functional tests for Mooncake data transfer between two agents."""
567588

@@ -578,12 +599,11 @@ def test_mooncake_write_transfer_gpu_tensor(self):
578599
# Verify initial state
579600
assert not torch.equal(src_tensor, dst_tensor)
580601

581-
# Create two agents
602+
# Create two agents via factory (uses dlopen for lazy loading)
582603
config_a = tab.BaseAgentConfig(name="mooncake_agent_a", use_prog_thread=True)
583604
config_b = tab.BaseAgentConfig(name="mooncake_agent_b", use_prog_thread=True)
584-
agent_a = tab.MooncakeTransferAgent(config_a)
585-
586-
agent_b = tab.MooncakeTransferAgent(config_b)
605+
agent_a = tab.make_transfer_agent("mooncake", config_a)
606+
agent_b = tab.make_transfer_agent("mooncake", config_b)
587607
# Register memory regions
588608
src_descs = _create_memory_descs_from_tensor(src_tensor, tab.MemoryType.VRAM)
589609
dst_descs = _create_memory_descs_from_tensor(dst_tensor, tab.MemoryType.VRAM)
@@ -633,12 +653,12 @@ def test_mooncake_write_transfer_multiple_chunks(self):
633653
# Create corresponding destination tensors
634654
dst_tensors = [torch.zeros(256, dtype=torch.float32, device=device) for _ in range(4)]
635655

636-
# Create agents
656+
# Create agents via factory (uses dlopen for lazy loading)
637657
config_a = tab.BaseAgentConfig(name="mooncake_agent_a", use_prog_thread=True)
638658
config_b = tab.BaseAgentConfig(name="mooncake_agent_b", use_prog_thread=True)
639659

640-
agent_a = tab.MooncakeTransferAgent(config_a)
641-
agent_b = tab.MooncakeTransferAgent(config_b)
660+
agent_a = tab.make_transfer_agent("mooncake", config_a)
661+
agent_b = tab.make_transfer_agent("mooncake", config_b)
642662

643663
# Create memory descriptors for all chunks
644664
src_memory_descs = []

0 commit comments

Comments
 (0)