-
Notifications
You must be signed in to change notification settings - Fork 53
Description
run Qwen3-VL-32B-Instruct error
config:
services:
sglang-server:
image: quay.nju.edu.cn/ascend/sglang:main-910b
container_name: sglang-server
restart: unless-stopped
environment:
- ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
- TP_SIZE=4
privileged: true
network_mode: host
ipc: host
shm_size: 16g
command: >
bash -c "
pip install -U transformers &&
python -m sglang.launch_server
--model-path /root/.cache/Qwen3-VL-32B-Instruct
--host 0.0.0.0
--port 8000
--trust-remote-code
--nnodes 1
--node-rank 0
--attention-backend ascend
--mm-attention-backend triton_attn
--device npu
--max-running-requests 32
--context-length 8192
--disable-radix-cache
--chunked-prefill-size 32768
--max-prefill-tokens 28000
--tp-size 4
--mem-fraction-static 0.8
--disable-cuda-graph"
volumes:
- /usr/local/sbin:/usr/local/sbin
- /usr/local/Ascend/driver:/usr/local/Ascend/driver
- /usr/local/Ascend/firmware:/usr/local/Ascend/firmware
- /etc/ascend_install.info:/etc/ascend_install.info
- /var/queue_schedule:/var/queue_schedule
- /Model/:/root/.cache
devices:
- /dev/davinci0
- /dev/davinci1
- /dev/davinci2
- /dev/davinci3
- /dev/davinci4
- /dev/davinci5
- /dev/davinci6
- /dev/davinci7
- /dev/davinci_manager
- /dev/hisi_hdc
the log:
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_utils/error_code.py", line 43, in wapper
sglang-server | return func(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/core/_backend.py", line 127, in compile
sglang-server | return super(TorchNpuGraph, self).compile()
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | RuntimeError: [PID: 885] 2025-11-18-00:34:29.371.175 Inner_Error_Compile_Fail(EA0000): Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | TraceBack (most recent call last):
sglang-server | Failed to compile Op [StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)
sglang-server | RuntimeError: {'errCode': 'EA0000', 'message': 'src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | ', 'traceback': 'Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)'}
sglang-server | ], optype: [StridedSliceD])
sglang-server | Compile op[StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]] failed, oppath[/usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py], optype[StridedSliceD], taskID[209]. Please check op's compilation error message.[FUNC:ReportBuildErrMessage][FILE:fusion_manager.cc][LINE:368]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281431691489568] failed to recompile single op[StridedSliceV2_2][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281431691489568] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281432631013664] failed to recompile single op[StridedSliceV2_6][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][CompOpOnly] CompileOp failed.[FUNC:CompileOpOnly][FILE:op_compiler.cc][LINE:1174]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281432631013664] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | [GraphOpt][FusedGraph][RunCompile] Failed to compile graph with compiler Normal mode Op Compiler[FUNC:SubGraphCompile][FILE:fe_graph_optimizer.cc][LINE:1417]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank9_new_sub_graph31[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank4_new_sub_graph26[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 122
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | Failed to compile Op [StridedSliceV2,[StridedSliceV2,StridedSliceV2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)
sglang-server | RuntimeError: {'errCode': 'EA0000', 'message': 'src tensor overflow, src need 128 elements, but only 122
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | ', 'traceback': 'Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)'}
sglang-server | ], optype: [StridedSliceD])
sglang-server | Compile op[StridedSliceV2,[StridedSliceV2,StridedSliceV2]] failed, oppath[/usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py], optype[StridedSliceD], taskID[226]. Please check op's compilation error message.[FUNC:ReportBuildErrMessage][FILE:fusion_manager.cc][LINE:368]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281431892816160] failed to recompile single op[StridedSliceV2][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281431892816160] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank6_new_sub_graph28[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281433234993440] failed to recompile single op[StridedSliceV2_4][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281433234993440] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank1_new_sub_graph23[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | subgraph 3 optimize failed[FUNC:OptimizeSubGraphWithMultiThreads][FILE:graph_manager.cc][LINE:962]
sglang-server | [Call][PreRun] Failed, graph_id:5, session_id:0.[FUNC:CompileGraph][FILE:graph_manager.cc][LINE:4826]
sglang-server | [Compile][Graph]Compile graph failed, error code:1343225857, session_id:0, graph_id:5, isEnableSliceSchedule:0.[FUNC:CompileGraph][FILE:ge_api.cc][LINE:1308]
sglang-server |
sglang-server |
sglang-server | During handling of the above exception, another exception occurred:
sglang-server |
sglang-server | Traceback (most recent call last):
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 2845, in run_scheduler_process
sglang-server | scheduler.event_loop_overlap()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
sglang-server | return func(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 992, in event_loop_overlap
sglang-server | batch_result = self.run_batch(batch)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1974, in run_batch
sglang-server | batch_result = self.model_worker.forward_batch_generation(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 368, in forward_batch_generation
sglang-server | logits_output, can_run_cuda_graph = self.model_runner.forward(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 2066, in forward
sglang-server | output = self._forward_raw(
sglang-server | ^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 2111, in _forward_raw
sglang-server | ret = self.forward_decode(
sglang-server | ^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 1983, in forward_decode
sglang-server | return self.model.forward(
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3_vl.py", line 726, in forward
sglang-server | hidden_states = general_mm_embed_routine(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/mm_utils.py", line 703, in general_mm_embed_routine
sglang-server | hidden_states = language_model(
sglang-server | ^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3_vl.py", line 568, in forward
sglang-server | hidden_states, residual = layer(
sglang-server | ^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3.py", line 266, in forward
sglang-server | hidden_states = self.self_attn(
sglang-server | ^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3.py", line 176, in forward
sglang-server | q, k = self.rotary_emb(positions, q, k)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/layers/rotary_embedding.py", line 1444, in forward
sglang-server | return self._forward_native(positions, query, key)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper
sglang-server | return fn(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/layers/rotary_embedding.py", line 1369, in _forward_native
sglang-server | @torch.compile(dynamic=True, backend=get_compiler_backend())
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 929, in _fn
sglang-server | return fn(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/aot_autograd.py", line 1241, in forward
sglang-server | return compiled_fn(full_args)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 384, in runtime_wrapper
sglang-server | all_outs = call_func_at_runtime_with_args(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 126, in call_func_at_runtime_with_args
sglang-server | out = normalize_as_list(f(args))
sglang-server | ^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 750, in inner_fn
sglang-server | outs = compiled_fn(args)
sglang-server | ^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 556, in wrapper
sglang-server | return compiled_fn(runtime_args)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 100, in g
sglang-server | return f(*args)
sglang-server | ^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/npu_fx_compiler.py", line 355, in call
sglang-server | gm_result = self.runner(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_ge_concrete_graph/fx2ge_converter.py", line 684, in call
sglang-server | self.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_ge_concrete_graph/fx2ge_converter.py", line 788, in compile
sglang-server | self.graph.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/ge/_ge_graph.py", line 671, in compile
sglang-server | self._executor.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_utils/error_code.py", line 46, in wapper
sglang-server | raise type(e)("\n".join(msg))
sglang-server | RuntimeError: [PID: 885] 2025-11-18-00:34:29.371.175 Inner_Error_Compile_Fail(EA0000): Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | TraceBack (most recent call last):
sglang-server | Failed to compile Op [StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)