Skip to content

run Qwen3-VL-32B-Instruct error #191

@ponyioy

Description

@ponyioy

run Qwen3-VL-32B-Instruct error

config:
services:
sglang-server:
image: quay.nju.edu.cn/ascend/sglang:main-910b
container_name: sglang-server
restart: unless-stopped

environment:
  - ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
  - TP_SIZE=4

privileged: true
network_mode: host

ipc: host
shm_size: 16g

command: >
  bash -c "
  pip install -U transformers &&
  python -m sglang.launch_server
  --model-path /root/.cache/Qwen3-VL-32B-Instruct
  --host 0.0.0.0
  --port 8000
  --trust-remote-code
  --nnodes 1
  --node-rank 0
  --attention-backend ascend
  --mm-attention-backend triton_attn
  --device npu
  --max-running-requests 32
  --context-length 8192
  --disable-radix-cache
  --chunked-prefill-size 32768
  --max-prefill-tokens 28000
  --tp-size 4
  --mem-fraction-static 0.8
  --disable-cuda-graph"
volumes:
  - /usr/local/sbin:/usr/local/sbin
  - /usr/local/Ascend/driver:/usr/local/Ascend/driver
  - /usr/local/Ascend/firmware:/usr/local/Ascend/firmware
  - /etc/ascend_install.info:/etc/ascend_install.info
  - /var/queue_schedule:/var/queue_schedule
  - /Model/:/root/.cache

devices:
  - /dev/davinci0
  - /dev/davinci1
  - /dev/davinci2
  - /dev/davinci3
  - /dev/davinci4
  - /dev/davinci5
  - /dev/davinci6
  - /dev/davinci7
  - /dev/davinci_manager
  - /dev/hisi_hdc

the log:

sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_utils/error_code.py", line 43, in wapper
sglang-server | return func(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/core/_backend.py", line 127, in compile
sglang-server | return super(TorchNpuGraph, self).compile()
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | RuntimeError: [PID: 885] 2025-11-18-00:34:29.371.175 Inner_Error_Compile_Fail(EA0000): Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | TraceBack (most recent call last):
sglang-server | Failed to compile Op [StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)
sglang-server | RuntimeError: {'errCode': 'EA0000', 'message': 'src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | ', 'traceback': 'Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)'}
sglang-server | ], optype: [StridedSliceD])
sglang-server | Compile op[StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]] failed, oppath[/usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py], optype[StridedSliceD], taskID[209]. Please check op's compilation error message.[FUNC:ReportBuildErrMessage][FILE:fusion_manager.cc][LINE:368]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281431691489568] failed to recompile single op[StridedSliceV2_2][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281431691489568] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281432631013664] failed to recompile single op[StridedSliceV2_6][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][CompOpOnly] CompileOp failed.[FUNC:CompileOpOnly][FILE:op_compiler.cc][LINE:1174]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281432631013664] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | [GraphOpt][FusedGraph][RunCompile] Failed to compile graph with compiler Normal mode Op Compiler[FUNC:SubGraphCompile][FILE:fe_graph_optimizer.cc][LINE:1417]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank9_new_sub_graph31[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank4_new_sub_graph26[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 122
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | Failed to compile Op [StridedSliceV2,[StridedSliceV2,StridedSliceV2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)
sglang-server | RuntimeError: {'errCode': 'EA0000', 'message': 'src tensor overflow, src need 128 elements, but only 122
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | ', 'traceback': 'Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)'}
sglang-server | ], optype: [StridedSliceD])
sglang-server | Compile op[StridedSliceV2,[StridedSliceV2,StridedSliceV2]] failed, oppath[/usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py], optype[StridedSliceD], taskID[226]. Please check op's compilation error message.[FUNC:ReportBuildErrMessage][FILE:fusion_manager.cc][LINE:368]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281431892816160] failed to recompile single op[StridedSliceV2][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281431892816160] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank6_new_sub_graph28[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | [SubGraphOpt][Compile][ProcFailedCompTask] Thread[281433234993440] failed to recompile single op[StridedSliceV2_4][FUNC:ProcessAllFailedCompileTasks][FILE:tbe_op_store_adapter.cc][LINE:1133]
sglang-server | [SubGraphOpt][Compile][ParalCompOp] Thread[281433234993440] failed when processing the task that had failed to compile.[FUNC:ParallelCompileOp][FILE:tbe_op_store_adapter.cc][LINE:1180]
sglang-server | Call OptimizeFusedGraph failed, ret:4294967295, engine_name:AIcoreEngine, graph_name:partition9_rank1_new_sub_graph23[FUNC:OptimizeSubGraph][FILE:graph_optimize.cc][LINE:125]
sglang-server | subgraph 3 optimize failed[FUNC:OptimizeSubGraphWithMultiThreads][FILE:graph_manager.cc][LINE:962]
sglang-server | [Call][PreRun] Failed, graph_id:5, session_id:0.[FUNC:CompileGraph][FILE:graph_manager.cc][LINE:4826]
sglang-server | [Compile][Graph]Compile graph failed, error code:1343225857, session_id:0, graph_id:5, isEnableSliceSchedule:0.[FUNC:CompileGraph][FILE:ge_api.cc][LINE:1308]
sglang-server |
sglang-server |
sglang-server | During handling of the above exception, another exception occurred:
sglang-server |
sglang-server | Traceback (most recent call last):
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 2845, in run_scheduler_process
sglang-server | scheduler.event_loop_overlap()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
sglang-server | return func(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 992, in event_loop_overlap
sglang-server | batch_result = self.run_batch(batch)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/scheduler.py", line 1974, in run_batch
sglang-server | batch_result = self.model_worker.forward_batch_generation(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 368, in forward_batch_generation
sglang-server | logits_output, can_run_cuda_graph = self.model_runner.forward(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 2066, in forward
sglang-server | output = self._forward_raw(
sglang-server | ^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 2111, in _forward_raw
sglang-server | ret = self.forward_decode(
sglang-server | ^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 1983, in forward_decode
sglang-server | return self.model.forward(
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3_vl.py", line 726, in forward
sglang-server | hidden_states = general_mm_embed_routine(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/managers/mm_utils.py", line 703, in general_mm_embed_routine
sglang-server | hidden_states = language_model(
sglang-server | ^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3_vl.py", line 568, in forward
sglang-server | hidden_states, residual = layer(
sglang-server | ^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3.py", line 266, in forward
sglang-server | hidden_states = self.self_attn(
sglang-server | ^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/models/qwen3.py", line 176, in forward
sglang-server | q, k = self.rotary_emb(positions, q, k)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
sglang-server | return self._call_impl(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
sglang-server | return forward_call(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/layers/rotary_embedding.py", line 1444, in forward
sglang-server | return self._forward_native(positions, query, key)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 736, in compile_wrapper
sglang-server | return fn(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/sglang/srt/layers/rotary_embedding.py", line 1369, in _forward_native
sglang-server | @torch.compile(dynamic=True, backend=get_compiler_backend())
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 929, in _fn
sglang-server | return fn(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/aot_autograd.py", line 1241, in forward
sglang-server | return compiled_fn(full_args)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 384, in runtime_wrapper
sglang-server | all_outs = call_func_at_runtime_with_args(
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 126, in call_func_at_runtime_with_args
sglang-server | out = normalize_as_list(f(args))
sglang-server | ^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 750, in inner_fn
sglang-server | outs = compiled_fn(args)
sglang-server | ^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 556, in wrapper
sglang-server | return compiled_fn(runtime_args)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 100, in g
sglang-server | return f(*args)
sglang-server | ^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/npu_fx_compiler.py", line 355, in call
sglang-server | gm_result = self.runner(*args, **kwargs)
sglang-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_ge_concrete_graph/fx2ge_converter.py", line 684, in call
sglang-server | self.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_ge_concrete_graph/fx2ge_converter.py", line 788, in compile
sglang-server | self.graph.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/ge/_ge_graph.py", line 671, in compile
sglang-server | self._executor.compile()
sglang-server | File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_utils/error_code.py", line 46, in wapper
sglang-server | raise type(e)("\n".join(msg))
sglang-server | RuntimeError: [PID: 885] 2025-11-18-00:34:29.371.175 Inner_Error_Compile_Fail(EA0000): Compile operator failed, cause: src tensor overflow, src need 128 elements, but only 124
sglang-server | File path: /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185
sglang-server | The context code cause the exception is:
sglang-server | 182 if self.support_dmp is True:
sglang-server | 183 data_ub_b8 = data_ub.reinterpret_cast_to("int8")
sglang-server | 184 data_gm_b8 = data_gm.reinterpret_cast_to("int8")
sglang-server | 185 -> inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | 186 else:
sglang-server | 187 inst.data_move(data_ub, data_gm, 0, 1, ceil_div(data_len, ele_per_block), 0, 0)
sglang-server | 188
sglang-server | , Traceback:
sglang-server | File /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_strides_larger_than_one.py, line 185, in _gm2ub
sglang-server | inst.data_move_pad(data_ub_b8, data_gm_b8, 1, data_len * dtype_size, 0, 0)
sglang-server | TraceBack (most recent call last):
sglang-server | Failed to compile Op [StridedSliceV2_2,[StridedSliceV2_2,StridedSliceV2_2]]. (oppath: [Compile /usr/local/Ascend/ascend-toolkit/8.3.RC1/opp/built-in/op_impl/ai_core/tbe/impl/strided_slice_d.py failed with errormsg/stack: File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/tik/tik_lib/tik_check_util.py", line 324, in print_error_msg
sglang-server | raise RuntimeError(dict_arg)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions