From d704b5e889740903e31aa2d7cab238fd6f8c36aa Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <4936589+zhenhuaw-me@users.noreply.github.com>
Date: Mon, 9 Mar 2026 18:20:35 +0800
Subject: [PATCH 1/4] [None][chore] Remove visual_gen benchmark test from YAML
 (#12027)

Signed-off-by: Zhenhua Wang <4936589+zhenhuaw-me@users.noreply.github.com>
---
 tests/integration/test_lists/test-db/l0_dgx_b200.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_lists/test-db/l0_dgx_b200.yml b/tests/integration/test_lists/test-db/l0_dgx_b200.yml
index 409a3109199..094408ae1b4 100644
--- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml
@@ -271,7 +271,6 @@ l0_dgx_b200:
   - examples/test_visual_gen.py::test_vbench_dimension_score_wan
   - examples/test_visual_gen.py::test_vbench_dimension_score_wan22_a14b_fp8
   - examples/test_visual_gen.py::test_vbench_dimension_score_wan22_a14b_nvfp4
-  - visual_gen/test_visual_gen_benchmark.py
 # ------------- AutoDeploy Backend Stages ---------------
 - condition:
     ranges:

From a176d8347893cf2242abe7cc7617a50f3d7294bd Mon Sep 17 00:00:00 2001
From: tcherckez-nvidia <127761168+tcherckez-nvidia@users.noreply.github.com>
Date: Mon, 9 Mar 2026 14:19:36 +0200
Subject: [PATCH 2/4] [None][fix] Fix the model list as it had a dup model
 (#12029)

Signed-off-by: Tal Cherckez <127761168+tcherckez-nvidia@users.noreply.github.com>
---
 examples/auto_deploy/model_registry/models.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/auto_deploy/model_registry/models.yaml b/examples/auto_deploy/model_registry/models.yaml
index 6f6d630d16a..01887699b99 100644
--- a/examples/auto_deploy/model_registry/models.yaml
+++ b/examples/auto_deploy/model_registry/models.yaml
@@ -181,8 +181,6 @@ models:
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
 - name: deepseek-ai/DeepSeek-R1
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
-- name: deepseek-ai/DeepSeek-V3
-  yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
 - name: deepseek-ai/DeepSeek-Coder-V2-Instruct
   yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
 - name: Qwen/Qwen3-VL-8B-Instruct

From 34a915377f6bf86bc87927d8eaca83e8c7110e68 Mon Sep 17 00:00:00 2001
From: Yiyun Lu <55233584+luyiyun1021@users.noreply.github.com>
Date: Mon, 9 Mar 2026 20:31:17 +0800
Subject: [PATCH 3/4] [https://nvbugs/5863806][fix] Fix Python string
 truthiness bug in FMHA cubin selection (#11909)

Signed-off-by: Yiyun Lu <55233584+luyiyun1021@users.noreply.github.com>
Co-authored-by: Jie Li <76780849+jieli-matrix@users.noreply.github.com>
---
 cpp/kernels/fmha_v2/setup.py            | 21 ++++++++++++++-------
 tests/integration/test_lists/waives.txt |  3 ---
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/cpp/kernels/fmha_v2/setup.py b/cpp/kernels/fmha_v2/setup.py
index 88cba8f793f..01dd06c7058 100644
--- a/cpp/kernels/fmha_v2/setup.py
+++ b/cpp/kernels/fmha_v2/setup.py
@@ -3333,11 +3333,14 @@ def use_cubin_header(sm,
                      head_size,
                      dtype,
                      output_dtype=None,
-                     enable_skip_softmax=False):
+                     enable_skip_softmax=False,
+                     attention_mask_type=None):
     if enable_skip_softmax:
         return False
     if 'e4m3' in dtype and output_dtype in ['bf16', 'fp16']:
         return False
+    if attention_mask_type == AttentionMaskType.BIDIRECTIONAL_SLIDING_WINDOW:
+        return False
     return (sm == 90 and head_size == 128) or (sm == 89 and 'e4m3' in dtype)
 
 
@@ -3349,9 +3352,11 @@ def get_cubin_header(kernel_traits, specs_names):
     cubin_lens_dict = {}
     launchers_dict = {}
     for kspec, fname, lname, kname in specs_names:
+        mask_type = AttentionMaskType.BIDIRECTIONAL_SLIDING_WINDOW \
+            if '_bidirectional_sliding_window' in kname else None
         if generate_cu_trtllm and not use_cubin_header(
                 kspec.sm, kspec.head_size, kspec.dtype, kspec.output_dtype,
-                kspec.enable_skip_softmax):
+                kspec.enable_skip_softmax, mask_type):
             continue
         name = fname.replace('.', '_')
         data = 'extern unsigned char cubin_{name}_cubin[];'.format(name=name)
@@ -3487,7 +3492,8 @@ def get_cubin_header(kernel_traits, specs_names):
         return_softmax_stats_flag = pythonBoolean2cpp[sm != '90' or (
             sm == '90' and '_softmax' in kname)]
 
-        enable_skip_softmax_flag = pythonBoolean2cpp['_skipSoftmax' in kname]
+        enable_skip_softmax = '_skipSoftmax' in kname
+        enable_skip_softmax_flag = pythonBoolean2cpp[enable_skip_softmax]
 
         # meta_unroll_step
         meta_unroll_step = unroll_step if ('_nl' in kname
@@ -3516,7 +3522,8 @@ def get_cubin_header(kernel_traits, specs_names):
                 def get_lname_from_kname(kname: str) -> str:
                     if use_cubin_header(int(sm), int(head_size), prec.lower(),
                                         output_prec.lower(),
-                                        enable_skip_softmax_flag):
+                                        enable_skip_softmax,
+                                        attention_mask_type):
                         return 'nullptr'
                     lname = kname.replace('_kernel', '')
                     mask_types = [
@@ -3537,9 +3544,9 @@ def get_lname_from_kname(kname: str) -> str:
 {cubin_name}_len, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
 {attention_input_layout_value}, {is_il}, {is_flash_atten}, {is_warp_specialization}, {is_fp32_accu}, \
 {is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {enable_skip_softmax_flag}, {lname}}}\
-'''.format(**locals()) if use_cubin_header(int(sm), int(head_size),
-                                           prec.lower(), output_prec.lower(),
-                                           enable_skip_softmax_flag) else '''\
+'''.format(**locals()) if use_cubin_header(
+                    int(sm), int(head_size), prec.lower(), output_prec.lower(),
+                    enable_skip_softmax, attention_mask_type) else '''\
 {{ DATA_TYPE_{prec}, DATA_TYPE_{output_prec}, {seq_len}, {q_step}, {kv_step}, {head_size}, {head_size_v}, \
 {sage_block_sizes[0]}, {sage_block_sizes[1]}, {sage_block_sizes[2]}, kSM_{sm}, nullptr, \
 0, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 791866c9e68..b87997f01a2 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -218,8 +218,6 @@ cpp/test_e2e.py::test_model[-mamba-86] SKIP (https://nvbugs/5781665)
 unittest/llmapi/test_llm_multi_gpu_pytorch.py::test_tinyllama_logits_processor_tp2pp2 SKIP (https://nvbugs/5781731)
 accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False] SKIP (https://nvbugs/5807902)
 unittest/_torch/ray_orchestrator/multi_gpu/test_multi_instance.py::test_multi_instance[tp2_2instances] SKIP (https://nvbugs/5784566)
-accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized SKIP (https://nvbugs/5785465)
-accuracy/test_llm_api_pytorch.py::TestMinistral8BInstruct::test_fp8 SKIP (https://nvbugs/5785485)
 accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_chunked_prefill[tp8ep8-cuda_graph=False] SKIP (https://nvbugs/5795918)
 full:RTXPro6000D/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5800672)
 examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] SKIP (https://nvbugs/5802248)
@@ -276,7 +274,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[disable_s
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[v1_kv_cache-tp4-cutlass-fp8] SKIP (https://nvbugs/5651865)
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRITON] SKIP (https://nvbugs/5864263)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[v1_kv_cache-True-True-triton-auto] SKIP (https://nvbugs/5864187)
-accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8[latency-torch_compile=False] SKIP (https://nvbugs/5863806)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[v1_kv_cache-dp4-trtllm-auto] SKIP (https://nvbugs/5596343)
 test_e2e.py::test_trtllm_multimodal_benchmark_serving SKIP (https://nvbugs/5864769)
 unittest/auto_deploy/multigpu/transformations/library/test_bmm_sharding.py::test_sharding[1-1] SKIP (https://nvbugs/5875203)

From 9fe24dbee0bed41bf67c7556c60867233db18c24 Mon Sep 17 00:00:00 2001
From: sunnyqgg <159101675+sunnyqgg@users.noreply.github.com>
Date: Mon, 9 Mar 2026 21:00:00 +0800
Subject: [PATCH 4/4] [None][feat] Upgrade xgrammar from 0.1.25 to 0.1.32
 (#12016)

Signed-off-by: qgai
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 3rdparty/CMakeLists.txt                       |  14 ++
 3rdparty/fetch_content.json                   |   5 +-
 3rdparty/patches/xgrammar_constexpr.patch     |  19 +++
 ATTRIBUTIONS-Python.md                        |   2 +-
 requirements.txt                              |   2 +-
 .../attribution/data/dependency_metadata.yml  |   2 +-
 .../attribution/data/files_to_dependency.yml  |   2 +-
 security_scanning/poetry.lock                 | 155 +++++-------------
 security_scanning/pyproject.toml              |   2 +-
 9 files changed, 79 insertions(+), 124 deletions(-)
 create mode 100644 3rdparty/patches/xgrammar_constexpr.patch

diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt
index 7360e13c80d..af2ccf9d693 100644
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
@@ -30,6 +30,7 @@ foreach(DEP_IDX RANGE ${DEP_COUNT_MINUS_ONE})
   string(JSON DEP_SOURCE_SUBDIR          ERROR_VARIABLE _err GET "${DEP_OBJECT}" "source_subdir")
   string(JSON DEP_GIT_SUBMODULES_RECURSE ERROR_VARIABLE _err GET "${DEP_OBJECT}" "git_submodules_recurse")
   string(JSON DEP_USE_URL                ERROR_VARIABLE _err GET "${DEP_OBJECT}" "use_url")
+  string(JSON DEP_PATCH_FILE            ERROR_VARIABLE _err GET "${DEP_OBJECT}" "patch_file")
   # cmake-format: on
 
   # Build FetchContent_Declare arguments
@@ -53,6 +54,19 @@ foreach(DEP_IDX RANGE ${DEP_COUNT_MINUS_ONE})
     list(APPEND FETCH_ARGS SOURCE_SUBDIR "${DEP_SOURCE_SUBDIR}")
   endif()
 
+  if(DEP_PATCH_FILE AND NOT DEP_PATCH_FILE STREQUAL "")
+    list(
+      APPEND
+      FETCH_ARGS
+      PATCH_COMMAND
+      patch
+      -p1
+      --forward
+      --batch
+      -i
+      "${CMAKE_CURRENT_SOURCE_DIR}/${DEP_PATCH_FILE}")
+  endif()
+
   FetchContent_Declare(${FETCH_ARGS})
 
   # Special handling: Export deep_ep commit to global property
diff --git a/3rdparty/fetch_content.json b/3rdparty/fetch_content.json
index f28186b6235..c3d0e1dbcce 100644
--- a/3rdparty/fetch_content.json
+++ b/3rdparty/fetch_content.json
@@ -93,9 +93,10 @@
     {
       "name": "xgrammar",
       "git_repository": "https://github.com/mlc-ai/xgrammar",
-      "git_tag": "v0.1.25",
+      "git_tag": "v0.1.32",
       "git_shallow": true,
-      "source_subdir": "dont-add-this-project-with-add-subdirectory"
+      "source_subdir": "dont-add-this-project-with-add-subdirectory",
+      "patch_file": "patches/xgrammar_constexpr.patch"
     }
   ]
 }
diff --git a/3rdparty/patches/xgrammar_constexpr.patch b/3rdparty/patches/xgrammar_constexpr.patch
new file mode 100644
index 00000000000..f4e5be99a21
--- /dev/null
+++ b/3rdparty/patches/xgrammar_constexpr.patch
@@ -0,0 +1,19 @@
+--- a/cpp/grammar_functor.cc
++++ b/cpp/grammar_functor.cc
+@@ -1750,11 +1750,11 @@
+   void Apply(Grammar* grammar);
+   static std::optional<uint64_t> HashSequence(const Grammar& grammar, int32_t sequence_id);
+
+-  static const int16_t kNotEndStateFlag = -0x100;
+-  static const int16_t kEndStateFlag = -0x200;
+-  static const int16_t kSelfRecursionFlag = -0x300;
+-  static const int16_t kSimpleCycleFlag = -0x400;
+-  static const int16_t kUnKnownFlag = -0x500;
++  static constexpr int16_t kNotEndStateFlag = -0x100;
++  static constexpr int16_t kEndStateFlag = -0x200;
++  static constexpr int16_t kSelfRecursionFlag = -0x300;
++  static constexpr int16_t kSimpleCycleFlag = -0x400;
++  static constexpr int16_t kUnKnownFlag = -0x500;
+
+  private:
+   Grammar* grammar_;
diff --git a/ATTRIBUTIONS-Python.md b/ATTRIBUTIONS-Python.md
index c21c9063555..b9975173aaa 100644
--- a/ATTRIBUTIONS-Python.md
+++ b/ATTRIBUTIONS-Python.md
@@ -63471,7 +63471,7 @@ SOFTWARE.
   - `Homepage`: https://github.com/akshaynagpal/w2n
 
 
-## xgrammar (0.1.25)
+## xgrammar (0.1.32)
 
 ### Licenses
 License: `Apache 2.0`
diff --git a/requirements.txt b/requirements.txt
index 678d640864e..c41db79b650 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -56,7 +56,7 @@ patchelf
 einops
 flashinfer-python==0.6.4
 opencv-python-headless
-xgrammar==0.1.25
+xgrammar==0.1.32
 llguidance==0.7.29
 jsonschema
 backoff
diff --git a/scripts/attribution/data/dependency_metadata.yml b/scripts/attribution/data/dependency_metadata.yml
index 6d0b3380329..456d1446047 100644
--- a/scripts/attribution/data/dependency_metadata.yml
+++ b/scripts/attribution/data/dependency_metadata.yml
@@ -90,7 +90,7 @@ ucx/1.20:
 ucxx/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5:
   license: 759cb066f14805ef4068f633d9071e1d
   source: https://github.com/rapidsai/ucxx/tree/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5
-xgrammar/v0.1.25:
+xgrammar/v0.1.32:
   copyright: 989a9441d689f61fba9f797cc253e51b
   license: 8e1c96809a7467593130ecc62ae12be9
 zeromq/4.3.4-3.el8:
diff --git a/scripts/attribution/data/files_to_dependency.yml b/scripts/attribution/data/files_to_dependency.yml
index d8243f7689b..70555a26134 100644
--- a/scripts/attribution/data/files_to_dependency.yml
+++ b/scripts/attribution/data/files_to_dependency.yml
@@ -7507,7 +7507,7 @@ ucxx/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5:
 - cb11c17f716ae644b2d74652d3d3232b
 - e10a9fefe2ef09b9560cc204bd54f728
 - f75f9c7cefa54d6626032daa93ef2549
-xgrammar/v0.1.25:
+xgrammar/v0.1.32:
 - 0e2b512f384e122c3b8243ae00256e06
 - 1a6e20d89e227a29d674e12e18b5e9e7
 - 1acd98aa4050fd0b8cda58d5d4f6ef78
diff --git a/security_scanning/poetry.lock b/security_scanning/poetry.lock
index c245778e9f0..6edf13bfc00 100644
--- a/security_scanning/poetry.lock
+++ b/security_scanning/poetry.lock
@@ -2784,95 +2784,6 @@ numpy = [
 [package.extras]
 dev = ["absl-py", "pyink", "pylint (>=2.6.0)", "pytest", "pytest-xdist"]
 
-[[package]]
-name = "mlx"
-version = "0.31.0"
-description = "A framework for machine learning on Apple silicon."
-optional = false
-python-versions = ">=3.10"
-groups = ["main"]
-markers = "platform_system == \"Darwin\" and platform_machine == \"arm64\""
-files = [
-    {file = "mlx-0.31.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:38680838e0dd9a621ed4adc5a9ed8b94aeb6a4798142fbe215b821b8c6b8fc36"},
-    {file = "mlx-0.31.0-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:7aded590bcf6839307c3acc899e196936991f97b499ddbdd0cd3b228bf10792f"},
-    {file = "mlx-0.31.0-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:6e3ae83607b798b44cb3e44437095cfd26886fecc15f90f29f9eafd206d4d170"},
-    {file = "mlx-0.31.0-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:b25f785c94eb47d8104604a5de0e7d749b801e7a40073cbf457aa94c372e5593"},
-    {file = "mlx-0.31.0-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:6a4342027e6608ce69807a8f079c750a7c6161f543ebb49e55654edd03c178d6"},
-    {file = "mlx-0.31.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:285313eaeba425e58cbb3238c2d1a3894e6252d58f243ce56681d5419a568d6c"},
-    {file = "mlx-0.31.0-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:acf4f04ff33a80784a0f15c492166dc889e65659b41c410ca5a7c2d78bee2a3a"},
-    {file = "mlx-0.31.0-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:f624571e23a86654496c42a507b4bb42ded0edb91f33161fabafdbf6b81ba024"},
-    {file = "mlx-0.31.0-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:5b5306a0934b15c4e3a1088a10066bdde3966c21b95006c63ecc38ca8e3891e0"},
-    {file = "mlx-0.31.0-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:18078bc67dfb7ed602fca233d00ce93e23d590d9347da5009472455a92831066"},
-    {file = "mlx-0.31.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:de6c0a3e8aa0e7d1365d46634fdbb3f835c164fbdb6ba8a239e039a4efa07fe2"},
-    {file = "mlx-0.31.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:d6af01b15177da995336a6fd9878e7c5994720a9f1614d8f4d1dbe9293167c30"},
-    {file = "mlx-0.31.0-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:1ad14ddc3a15818f5bba0de35e88559ed8dcb93ccff2ef879ff604d02d663b25"},
-    {file = "mlx-0.31.0-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:a80754ecf64191f71da1946dc5de6cf903344cc90dd286c589792ee9d3fc62f9"},
-    {file = "mlx-0.31.0-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:363282eb094785f6aba27810ff89331c0f7829c6961f571cd0feaad09d2c809f"},
-    {file = "mlx-0.31.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4f33e9aafc6d3ad29e72743dfb786c4ce67397414f0a091469058626381fc1bc"},
-    {file = "mlx-0.31.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:242806b8ad6a4d3ce86cdff513f86520552de7592786712770b2e1ebd178816a"},
-    {file = "mlx-0.31.0-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:7f0bdbac084017820ce513a12318771a06c7ec10fad159839e27c998bc5dad89"},
-    {file = "mlx-0.31.0-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:8642dda2b23195d9921973749ae9bf764e2c7d70bfc0e60b23b6335e660cc610"},
-    {file = "mlx-0.31.0-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:c6daa671cfa3c194951d742aa09030c5008d9d9657034b2903389fa090b3ba92"},
-    {file = "mlx-0.31.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:cce3e15cf11c608c9e721502fe56e54f9f48b897e9b80f1204a48643d68710c0"},
-    {file = "mlx-0.31.0-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ba330fe40d73b202880bbb5cac62de0b639cf4c44a12853bcadb34a9e3ffe880"},
-    {file = "mlx-0.31.0-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:d2014d113070846c6cdee980653f561c92a4a663a449f64e70c15bbf74d637e1"},
-    {file = "mlx-0.31.0-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:994fab25ff521621e03001177a8f0f1a7bf8294ff340f89910ec074f9f681ed9"},
-    {file = "mlx-0.31.0-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:c3bb9961f40d098659326b0edb96e2a16adecfaf3c1f2518cad5a0b7e55a3a5d"},
-]
-
-[package.dependencies]
-mlx-metal = {version = "0.31.0", markers = "platform_system == \"Darwin\""}
-
-[package.extras]
-cpu = ["mlx-cpu (==0.31.0) ; platform_system == \"Linux\""]
-cuda = ["mlx-cuda-12 (==0.31.0) ; platform_system == \"Linux\""]
-cuda12 = ["mlx-cuda-12 (==0.31.0) ; platform_system == \"Linux\""]
-cuda13 = ["mlx-cuda-13 (==0.31.0) ; platform_system == \"Linux\""]
-dev = ["numpy (>=2)", "pre-commit", "psutil", "torch (>=2.9)", "typing_extensions"]
-
-[[package]]
-name = "mlx-lm"
-version = "0.29.1"
-description = "LLMs with MLX and the Hugging Face Hub"
-optional = false
-python-versions = ">=3.8"
-groups = ["main"]
-markers = "platform_system == \"Darwin\" and platform_machine == \"arm64\""
-files = [
-    {file = "mlx_lm-0.29.1-py3-none-any.whl", hash = "sha256:440941b3054c2a2216e97615de584cc90fa1ea874782e20699b9895721fad8dc"},
-    {file = "mlx_lm-0.29.1.tar.gz", hash = "sha256:b99180d8f33d33a077b814e550bfb2d8a59ae003d668fd1f4b3fff62a381d34b"},
-]
-
-[package.dependencies]
-jinja2 = "*"
-mlx = {version = ">=0.29.2", markers = "platform_system == \"Darwin\""}
-numpy = "*"
-protobuf = "*"
-pyyaml = "*"
-sentencepiece = "*"
-transformers = ">=4.39.3"
-
-[package.extras]
-cpu = ["mlx[cpu] (>=0.29.2)"]
-cuda = ["mlx[cuda] (>=0.29.2)"]
-evaluate = ["lm-eval", "tqdm"]
-test = ["datasets", "lm-eval"]
-train = ["datasets", "tqdm"]
-
-[[package]]
-name = "mlx-metal"
-version = "0.31.0"
-description = "A framework for machine learning on Apple silicon."
-optional = false
-python-versions = ">=3.10"
-groups = ["main"]
-markers = "platform_system == \"Darwin\" and platform_machine == \"arm64\""
-files = [
-    {file = "mlx_metal-0.31.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:1c572a6e3634a63060c103b0c38ac309e2d217be15519e3d8f0d6b452bb015f5"},
-    {file = "mlx_metal-0.31.0-py3-none-macosx_15_0_arm64.whl", hash = "sha256:554dc7cb29e0ea5fb6941df42f11a1de385b095848e6183c7a99d7c1f1a11f5d"},
-    {file = "mlx_metal-0.31.0-py3-none-macosx_26_0_arm64.whl", hash = "sha256:7fd412f55ddf9f1d90c2cd86ce281d19e8eb93d093c6dbd784a49f8bd7d0a22c"},
-]
-
 [[package]]
 name = "mpi4py"
 version = "4.1.1"
@@ -6635,41 +6546,50 @@ standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.6.3)
 
 [[package]]
 name = "xgrammar"
-version = "0.1.25"
+version = "0.1.32"
 description = "Efficient, Flexible and Portable Structured Generation"
 optional = false
 python-versions = "<4,>=3.8"
 groups = ["main"]
 files = [
-    {file = "xgrammar-0.1.25-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:d12d1078ee2b5c1531610489b433b77694a7786210ceb2c0c1c1eb058e9053c7"},
-    {file = "xgrammar-0.1.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2e940541b7cddf3ef55a70f20d4c872af7f0d900bc0ed36f434bf7212e2e729"},
-    {file = "xgrammar-0.1.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2063e1c72f0c00f47ac8ce7ce0fcbff6fa77f79012e063369683844e2570c266"},
-    {file = "xgrammar-0.1.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9785eafa251c996ebaa441f3b8a6c037538930104e265a64a013da0e6fd2ad86"},
-    {file = "xgrammar-0.1.25-cp310-cp310-win_amd64.whl", hash = "sha256:42ecefd020038b3919a473fe5b9bb9d8d809717b8689a736b81617dec4acc59b"},
-    {file = "xgrammar-0.1.25-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:27d7ac4be05cf9aa258c109a8647092ae47cb1e28df7d27caced6ab44b72b799"},
-    {file = "xgrammar-0.1.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:151c1636188bc8c5cdf318cefc5ba23221c9c8cc07cb392317fb3f7635428150"},
-    {file = "xgrammar-0.1.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35fc135650aa204bf84db7fe9c0c0f480b6b11419fe47d89f4bd21602ac33be9"},
-    {file = "xgrammar-0.1.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc19d6d7e8e51b6c9a266e949ac7fb3d2992447efeec7df32cca109149afac18"},
-    {file = "xgrammar-0.1.25-cp311-cp311-win_amd64.whl", hash = "sha256:8fcb24f5a7acd5876165c50bd51ce4bf8e6ff897344a5086be92d1fe6695f7fe"},
-    {file = "xgrammar-0.1.25-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:96500d7578c46e8551253b9211b02e02f54e147bc290479a64717d80dcf4f7e3"},
-    {file = "xgrammar-0.1.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ba9031e359447af53ce89dfb0775e7b9f4b358d513bcc28a6b4deace661dd5"},
-    {file = "xgrammar-0.1.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c519518ebc65f75053123baaf23776a21bda58f64101a64c2fc4aa467c9cd480"},
-    {file = "xgrammar-0.1.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47fdbfc6007df47de2142613220292023e88e4a570546b39591f053e4d9ec33f"},
-    {file = "xgrammar-0.1.25-cp312-cp312-win_amd64.whl", hash = "sha256:c9b3defb6b45272e896da401f43b513f5ac12104ec3101bbe4d3a7d02bcf4a27"},
-    {file = "xgrammar-0.1.25-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2d80d4bfe65d1a3334536c804b6471f32e6759f1972c9abe0ae49d5e21462c0b"},
-    {file = "xgrammar-0.1.25-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a1a6a638167d704a22a0c9670e2176104c38e38c351286a07a77143e22f9053"},
-    {file = "xgrammar-0.1.25-cp313-cp313-win_amd64.whl", hash = "sha256:ffadeba0b704667a7eb6202d409533e9d1e80af15a10add107684e0cde45b8e4"},
-    {file = "xgrammar-0.1.25-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:a62dea5d73147a254e71e07ceae4a48c0f5a294cce2fa3e028159f48da19a39d"},
-    {file = "xgrammar-0.1.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b309807ad837c1cbe2f833830b665a15309e11692b53795363c59041c65074f"},
-    {file = "xgrammar-0.1.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:073829d8a53ef482e6b51469316f6e505962460bb576ae4d4a606769c4c37678"},
-    {file = "xgrammar-0.1.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:241529d6104d97738b3e29c573bffa6d0fa89a8d0299b2c685358cc13858205c"},
-    {file = "xgrammar-0.1.25-cp39-cp39-win_amd64.whl", hash = "sha256:f5d46e1749d9324684d2462e428bc63652096addc1e2c21db2ae66ca88e76a1c"},
-    {file = "xgrammar-0.1.25.tar.gz", hash = "sha256:70ce16b27e8082f20808ed759b0733304316facc421656f0f30cfce514b5b77a"},
+    {file = "xgrammar-0.1.32-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1d179511a1258b410d1660ed00032da24256072b44d6743f5bd3f49743d90801"},
+    {file = "xgrammar-0.1.32-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b33d2e8f02ca31b93f3cbfea0ed1df7e706a2bea52d7706793884f92a8c1523a"},
+    {file = "xgrammar-0.1.32-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7d4f0f2041cdc97bacc096b4c2d1c841e91794b6e7a54e7a4853fc0907956dc"},
+    {file = "xgrammar-0.1.32-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4f3d18da40cad18e87881925f82d0aed1418cc2fb05886d516e3c9d548db57f"},
+    {file = "xgrammar-0.1.32-cp310-cp310-win_amd64.whl", hash = "sha256:e7baf71bba03a5e734df435b6378da4406b1c15f5511ab4f5d4af9f72775c756"},
+    {file = "xgrammar-0.1.32-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:51b41c47785aa198d19f8d056b394f75b4421deab88c415568f9c588b1f7e238"},
+    {file = "xgrammar-0.1.32-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7030192cb1d8579699f1f72fd14d31347a402611aab98a2da6a04c3de07e917"},
+    {file = "xgrammar-0.1.32-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a332c0364f665b410a6cfc2ada155c3a6ede430e385ac431015e31735a64fec3"},
+    {file = "xgrammar-0.1.32-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b8ad132d0fcf3a51dc054ecb0dc9808566b302122de6edaac7b4aca460adbec"},
+    {file = "xgrammar-0.1.32-cp311-cp311-win_amd64.whl", hash = "sha256:b8b1ca6d3f3c2842660458660e494aaf0a6745f1b07ae74e4c2230ab4ff70c11"},
+    {file = "xgrammar-0.1.32-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:9b78d32265f096e5567ab52c72b681855cf473481a48a1e7e6d97d414ba30b82"},
+    {file = "xgrammar-0.1.32-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23eacaf826c3aeebca0d91fc271417d9d96e157af2bacf6f14277297af7917ef"},
+    {file = "xgrammar-0.1.32-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a637d4e0c541149e0d409c24f4ec79cd74d87508ee6a17a7e64a9b9c0cf56f"},
+    {file = "xgrammar-0.1.32-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f96c7a4fcbd68e18b13cb3b6ed5d24b5326b256933f476bdaf2cc8e609c228db"},
+    {file = "xgrammar-0.1.32-cp312-cp312-win_amd64.whl", hash = "sha256:ba6e08c385cce53eda8e9b3bbfba63f100ba3dcb76fa0692a65921a36b20ad0a"},
+    {file = "xgrammar-0.1.32-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4addb8f5d5699e7df7fca6d299a91b3ef1ad799811c0ab7050d6f96d754c9c21"},
+    {file = "xgrammar-0.1.32-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:028f8d6a105d06549faee0afbebfaada90aa1941c081dcc88f3d5ef373dad934"},
+    {file = "xgrammar-0.1.32-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c0150c50eb3a56a35d6f0c0af0bce0f113ec5f84f7918bfd46b49e25ecf7fb5"},
+    {file = "xgrammar-0.1.32-cp313-cp313-win_amd64.whl", hash = "sha256:e1072d764705c8e87df6136ce3419f96ab3fd423d85f58c2d81c13a647b78894"},
+    {file = "xgrammar-0.1.32-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:4e6015ad2b941a292562f68b9a2ee1ddae8e28df840dc39232dcc7007fc6f606"},
+    {file = "xgrammar-0.1.32-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e8da3e7fc194e098b760bacb2b60ad2227cac70d7be5d2e4f7025b1c360c43d"},
+    {file = "xgrammar-0.1.32-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6588cfd9754f2c46846276a2e8284a46582a74886d7aaea02cf6ce63ccc397ce"},
+    {file = "xgrammar-0.1.32-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7f740ba83b69abb423167a5d5b13a9fcde89747220e191f6a004fae4a834311f"},
+    {file = "xgrammar-0.1.32-cp314-cp314-win_amd64.whl", hash = "sha256:9c0769c3468bd67495c28a03dc5ce3948d83cddaf0a59c6d992b12fc683a1c3e"},
+    {file = "xgrammar-0.1.32-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:da8339b38e96d105868c14b2cb2df4b7c83d7a49f8539c74fd7470d61043e5b1"},
+    {file = "xgrammar-0.1.32-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b938a9096bccc06c30abb5304b2b39c272a924ca002e19421cce5e6ee9670f4f"},
+    {file = "xgrammar-0.1.32-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe2ee94080d77b84e38cb6643b75a6ca29cf814a3e5d5da8e1176eae4034d662"},
+    {file = "xgrammar-0.1.32-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70ddbf7216e1e7ec96134a2474a6b84d2b14439a6f6379e079b7c557131be41d"},
+    {file = "xgrammar-0.1.32-cp314-cp314t-win_amd64.whl", hash = "sha256:4f68e591a6e9e121d5f03821ab2c44a7af092dc8bf7c9cde1a776871c6bd4dc5"},
+    {file = "xgrammar-0.1.32-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:88f3a00623b197c6b76dea3c0c7f326702209c1066eb9993b4723929a106c8ba"},
+    {file = "xgrammar-0.1.32-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ddf7ca871c7ec8c9ce8c34b8547344fce70ef3faa33b6e29f6826d098bc7714"},
+    {file = "xgrammar-0.1.32-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6925258c8e188fbddcf2962c6a0819f71cf664263d2346545feadee92994685e"},
+    {file = "xgrammar-0.1.32-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2691be7077e2eae34cdefce2ef3f88e2f018158ccff0eb4b94bb8725a98e2f27"},
+    {file = "xgrammar-0.1.32-cp39-cp39-win_amd64.whl", hash = "sha256:82b5c3d6b3a6fe01a1957d311473808aee90e946d38bd942c7161f1c4aedce11"},
+    {file = "xgrammar-0.1.32.tar.gz", hash = "sha256:5d424d52779ca2d3ccaf72f2289d6519efe308e933d0d3fc3c292c780825bb12"},
 ]
 
 [package.dependencies]
-mlx-lm = {version = "*", markers = "platform_system == \"Darwin\" and platform_machine == \"arm64\""}
-ninja = "*"
 numpy = "*"
 pydantic = "*"
 torch = ">=1.10.0"
@@ -6678,6 +6598,7 @@ triton = {version = "*", markers = "platform_system == \"Linux\" and platform_ma
 typing-extensions = ">=4.9.0"
 
 [package.extras]
+metal = ["mlx-lm ; platform_system == \"Darwin\" and platform_machine == \"arm64\""]
 test = ["huggingface-hub[cli]", "protobuf", "pytest", "sentencepiece", "tiktoken", "transformers (<4.50.0) ; platform_system == \"Darwin\""]
 
 [[package]]
@@ -6996,4 +6917,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.13"
-content-hash = "2a21a66f0b0512caff14b587ff5a19a8d42d3ae9b5624172c4e68f8d07044c48"
+content-hash = "593920d3d4ce64e19ce8ffc9e66e2f4e0337a23b36ae8a85ed99c9c87b427877"
diff --git a/security_scanning/pyproject.toml b/security_scanning/pyproject.toml
index ccb19badfc0..84c9d57426d 100644
--- a/security_scanning/pyproject.toml
+++ b/security_scanning/pyproject.toml
@@ -57,7 +57,7 @@ dependencies = [
     "patchelf (>=0.17.2.4,<0.18.0.0)",
     "einops (>=0.8.2,<0.9.0)",
     "flashinfer-python (==0.6.4)",
-    "xgrammar (==0.1.25)",
+    "xgrammar (==0.1.32)",
     "llguidance (==0.7.29)",
     "jsonschema (>=4.26.0,<5.0.0)",
     "backoff (>=2.2.1,<3.0.0)",