deepseek 4卡启动报错

启动命令：CUDA_VISIBLE_DEVICES=0,1,2,3 grpst start server.mar --inference_conf=conf/inference_deepseek-r1-distill-qwen.yml --mpi_np=4

报错信息：
[TensorRT-LLM][ERROR] tensorrt_llm::common::TllmException: [TensorRT-LLM][ERROR] Assertion failed: len <= remaining_buffer_size (/workspace/tensorrt_llm/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/cubinObj.cpp:34)
1       0x7fdd6f903370 tensorrt_llm::common::throwRuntimeError(char const*, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 95
2       0x7fdd106eb24c tensorrt_llm::kernels::jit::CubinObj::CubinObj(void const*, unsigned long) + 412
3       0x7fdd107017d4 tensorrt_llm::kernels::jit::CubinObjRegistryTemplate<tensorrt_llm::kernels::XQAKernelFullHashKey, tensorrt_llm::kernels::XQAKernelFullHasher>::CubinObjRegistryTemplate(void const*, unsigned long) + 292
4       0x7fdd10701132 tensorrt_llm::kernels::DecoderXQARunner::Resource::Resource(void const*, unsigned long) + 50
5       0x7fdd0405e149 tensorrt_llm::plugins::GPTAttentionPluginCommon::GPTAttentionPluginCommon(void const*, unsigned long) + 1193
6       0x7fdd04095232 tensorrt_llm::plugins::GPTAttentionPlugin::GPTAttentionPlugin(void const*, unsigned long) + 18
7       0x7fdd040952b2 tensorrt_llm::plugins::GPTAttentionPluginCreator::deserializePlugin(char const*, void const*, unsigned long) + 50
8       0x7fdcf5aa6b5b /usr/local/tensorrt/lib/libnvinfer.so.10(+0x11deb5b) [0x7fdcf5aa6b5b]
9       0x7fdcf5aa345e /usr/local/tensorrt/lib/libnvinfer.so.10(+0x11db45e) [0x7fdcf5aa345e]
10      0x7fdcf59d62b7 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x110e2b7) [0x7fdcf59d62b7]
11      0x7fdcf59d4e6a /usr/local/tensorrt/lib/libnvinfer.so.10(+0x110ce6a) [0x7fdcf59d4e6a]
12      0x7fdcf59eca77 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x1124a77) [0x7fdcf59eca77]
13      0x7fdcf59f05b6 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x11285b6) [0x7fdcf59f05b6]
14      0x7fdcf59f0b06 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x1128b06) [0x7fdcf59f0b06]
15      0x7fdcf5a27fc7 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x115ffc7) [0x7fdcf5a27fc7]
16      0x7fdcf5a28bd8 /usr/local/tensorrt/lib/libnvinfer.so.10(+0x1160bd8) [0x7fdcf5a28bd8]
17      0x7fdcf5a28cdb /usr/local/tensorrt/lib/libnvinfer.so.10(+0x1160cdb) [0x7fdcf5a28cdb]
18      0x7fdd1252f275 tensorrt_llm::runtime::TllmRuntime::TllmRuntime(tensorrt_llm::runtime::RawEngine const&, nvinfer1::ILogger*, float, bool) + 1413
19      0x7fdd1298d428 tensorrt_llm::batch_manager::TrtGptModelInflightBatching::TrtGptModelInflightBatching(std::shared_ptr<nvinfer1::ILogger>, tensorrt_llm::runtime::ModelConfig const&, tensorrt_llm::runtime::WorldConfig const&, tensorrt_llm::runtime::RawEngine const&, bool, tensorrt_llm::batch_manager::TrtGptModelOptionalParams const&) + 1304
20      0x7fdd1291151e tensorrt_llm::batch_manager::TrtGptModelFactory::create(tensorrt_llm::runtime::RawEngine const&, tensorrt_llm::runtime::ModelConfig const&, tensorrt_llm::runtime::WorldConfig const&, tensorrt_llm::batch_manager::TrtGptModelType, tensorrt_llm::batch_manager::TrtGptModelOptionalParams const&) + 526
21      0x7fdd12a28029 tensorrt_llm::executor::Executor::Impl::createModel(tensorrt_llm::runtime::RawEngine const&, tensorrt_llm::runtime::ModelConfig const&, tensorrt_llm::runtime::WorldConfig const&, tensorrt_llm::executor::ExecutorConfig const&) + 185
22      0x7fdd12a286bd tensorrt_llm::executor::Executor::Impl::loadModel(std::optional<std::filesystem::__cxx11::path> const&, std::optional<std::basic_string_view<unsigned char, std::char_traits<unsigned char> > > const&, tensorrt_llm::runtime::GptJsonConfig const&, tensorrt_llm::executor::ExecutorConfig const&, bool, std::optional<std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tensorrt_llm::executor::Tensor, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, tensorrt_llm::executor::Tensor> > > > const&) + 1229
23      0x7fdd12a2990a tensorrt_llm::executor::Executor::Impl::Impl(std::filesystem::__cxx11::path const&, std::optional<std::filesystem::__cxx11::path> const&, tensorrt_llm::executor::ModelType, tensorrt_llm::executor::ExecutorConfig const&) + 2474
24      0x7fdd12a0f757 tensorrt_llm::executor::Executor::Executor(std::filesystem::__cxx11::path const&, tensorrt_llm::executor::ModelType, tensorrt_llm::executor::ExecutorConfig const&) + 87
25      0x7fdd6f977bfa std::__detail::_MakeUniq<tensorrt_llm::executor::Executor>::__single_object std::make_unique<tensorrt_llm::executor::Executor, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, tensorrt_llm::executor::ModelType&, tensorrt_llm::executor::ExecutorConfig&>(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, tensorrt_llm::executor::ModelType&, tensorrt_llm::executor::ExecutorConfig&) + 154
26      0x7fdd6f974966 netease::grps::TrtLlmModelInstance::TrtLlmModelInstance(netease::grps::TrtLlmModelState*, netease::grps::LLMStyler*, netease::grps::MultiInstanceTokenizer*, netease::grps::VIT*) + 3542
27      0x7fdd73427eae netease::grps::TrtllmInferer::Load() + 4286
28      0x55b7cafefe2e ./bin/grps_server(+0xf2e2e) [0x55b7cafefe2e]
29      0x55b7caff1e2c ./bin/grps_server(+0xf4e2c) [0x55b7caff1e2c]
30      0x55b7caf82a7a ./bin/grps_server(+0x85a7a) [0x55b7caf82a7a]
31      0x7fdd6ff9a1ca /usr/lib/x86_64-linux-gnu/libc.so.6(+0x2a1ca) [0x7fdd6ff9a1ca]
32      0x7fdd6ff9a28b __libc_start_main + 139
33      0x55b7caf87865 ./bin/grps_server(+0x8a865) [0x55b7caf87865]


模型在TensorRT-LLM单独测过没有问题，麻烦看下。

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

deepseek 4卡启动报错 #6

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

deepseek 4卡启动报错 #6

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions