[DLLM] Remove cuda graph batch size limitation (#17458)

btw616 · Kangyan-Zhou · commit 0189f41c30ed · 2026-01-23T09:53:52.000-08:00
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
@@ -2530,11 +2530,6 @@ def _handle_dllm_inference(self):
                 )
                 self.attention_backend = "triton"
         elif not self.disable_cuda_graph:
-            if self.cuda_graph_bs != [1]:
-                logger.warning(
-                    "Cuda graph bs is set to [1] because of using diffusion LLM inference"
-                )
-                self.cuda_graph_bs = [1]
             if self.attention_backend != "flashinfer":
                 logger.warning(
                     "Attention backend is set to flashinfer because of enabling cuda graph in diffusion LLM inference"