Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions 3rdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ foreach(DEP_IDX RANGE ${DEP_COUNT_MINUS_ONE})
string(JSON DEP_SOURCE_SUBDIR ERROR_VARIABLE _err GET "${DEP_OBJECT}" "source_subdir")
string(JSON DEP_GIT_SUBMODULES_RECURSE ERROR_VARIABLE _err GET "${DEP_OBJECT}" "git_submodules_recurse")
string(JSON DEP_USE_URL ERROR_VARIABLE _err GET "${DEP_OBJECT}" "use_url")
string(JSON DEP_PATCH_FILE ERROR_VARIABLE _err GET "${DEP_OBJECT}" "patch_file")
# cmake-format: on

# Build FetchContent_Declare arguments
Expand All @@ -53,6 +54,19 @@ foreach(DEP_IDX RANGE ${DEP_COUNT_MINUS_ONE})
list(APPEND FETCH_ARGS SOURCE_SUBDIR "${DEP_SOURCE_SUBDIR}")
endif()

if(DEP_PATCH_FILE AND NOT DEP_PATCH_FILE STREQUAL "")
list(
APPEND
FETCH_ARGS
PATCH_COMMAND
patch
-p1
--forward
--batch
-i
"${CMAKE_CURRENT_SOURCE_DIR}/${DEP_PATCH_FILE}")
endif()

FetchContent_Declare(${FETCH_ARGS})

# Special handling: Export deep_ep commit to global property
Expand Down
5 changes: 3 additions & 2 deletions 3rdparty/fetch_content.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@
{
"name": "xgrammar",
"git_repository": "https://github.com/mlc-ai/xgrammar",
"git_tag": "v0.1.25",
"git_tag": "v0.1.32",
"git_shallow": true,
"source_subdir": "dont-add-this-project-with-add-subdirectory"
"source_subdir": "dont-add-this-project-with-add-subdirectory",
"patch_file": "patches/xgrammar_constexpr.patch"
}
]
}
19 changes: 19 additions & 0 deletions 3rdparty/patches/xgrammar_constexpr.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--- a/cpp/grammar_functor.cc
+++ b/cpp/grammar_functor.cc
@@ -1750,11 +1750,11 @@
void Apply(Grammar* grammar);
static std::optional<uint64_t> HashSequence(const Grammar& grammar, int32_t sequence_id);

- static const int16_t kNotEndStateFlag = -0x100;
- static const int16_t kEndStateFlag = -0x200;
- static const int16_t kSelfRecursionFlag = -0x300;
- static const int16_t kSimpleCycleFlag = -0x400;
- static const int16_t kUnKnownFlag = -0x500;
+ static constexpr int16_t kNotEndStateFlag = -0x100;
+ static constexpr int16_t kEndStateFlag = -0x200;
+ static constexpr int16_t kSelfRecursionFlag = -0x300;
+ static constexpr int16_t kSimpleCycleFlag = -0x400;
+ static constexpr int16_t kUnKnownFlag = -0x500;

private:
Grammar* grammar_;
2 changes: 1 addition & 1 deletion ATTRIBUTIONS-Python.md
Original file line number Diff line number Diff line change
Expand Up @@ -63471,7 +63471,7 @@ SOFTWARE.
- `Homepage`: https://github.com/akshaynagpal/w2n


## xgrammar (0.1.25)
## xgrammar (0.1.32)

### Licenses
License: `Apache 2.0`
Expand Down
21 changes: 14 additions & 7 deletions cpp/kernels/fmha_v2/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3333,11 +3333,14 @@ def use_cubin_header(sm,
head_size,
dtype,
output_dtype=None,
enable_skip_softmax=False):
enable_skip_softmax=False,
attention_mask_type=None):
if enable_skip_softmax:
return False
if 'e4m3' in dtype and output_dtype in ['bf16', 'fp16']:
return False
if attention_mask_type == AttentionMaskType.BIDIRECTIONAL_SLIDING_WINDOW:
return False
return (sm == 90 and head_size == 128) or (sm == 89 and 'e4m3' in dtype)


Expand All @@ -3349,9 +3352,11 @@ def get_cubin_header(kernel_traits, specs_names):
cubin_lens_dict = {}
launchers_dict = {}
for kspec, fname, lname, kname in specs_names:
mask_type = AttentionMaskType.BIDIRECTIONAL_SLIDING_WINDOW \
if '_bidirectional_sliding_window' in kname else None
if generate_cu_trtllm and not use_cubin_header(
kspec.sm, kspec.head_size, kspec.dtype, kspec.output_dtype,
kspec.enable_skip_softmax):
kspec.enable_skip_softmax, mask_type):
continue
name = fname.replace('.', '_')
data = 'extern unsigned char cubin_{name}_cubin[];'.format(name=name)
Expand Down Expand Up @@ -3487,7 +3492,8 @@ def get_cubin_header(kernel_traits, specs_names):
return_softmax_stats_flag = pythonBoolean2cpp[sm != '90' or (
sm == '90' and '_softmax' in kname)]

enable_skip_softmax_flag = pythonBoolean2cpp['_skipSoftmax' in kname]
enable_skip_softmax = '_skipSoftmax' in kname
enable_skip_softmax_flag = pythonBoolean2cpp[enable_skip_softmax]

# meta_unroll_step
meta_unroll_step = unroll_step if ('_nl' in kname
Expand Down Expand Up @@ -3516,7 +3522,8 @@ def get_cubin_header(kernel_traits, specs_names):
def get_lname_from_kname(kname: str) -> str:
if use_cubin_header(int(sm), int(head_size), prec.lower(),
output_prec.lower(),
enable_skip_softmax_flag):
enable_skip_softmax,
attention_mask_type):
return 'nullptr'
lname = kname.replace('_kernel', '')
mask_types = [
Expand All @@ -3537,9 +3544,9 @@ def get_lname_from_kname(kname: str) -> str:
{cubin_name}_len, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
{attention_input_layout_value}, {is_il}, {is_flash_atten}, {is_warp_specialization}, {is_fp32_accu}, \
{is_alibi_supported}, {is_tiled}, {has_softcapping_scale}, {return_softmax_stats_flag}, {enable_skip_softmax_flag}, {lname}}}\
'''.format(**locals()) if use_cubin_header(int(sm), int(head_size),
prec.lower(), output_prec.lower(),
enable_skip_softmax_flag) else '''\
'''.format(**locals()) if use_cubin_header(
int(sm), int(head_size), prec.lower(), output_prec.lower(),
enable_skip_softmax, attention_mask_type) else '''\
{{ DATA_TYPE_{prec}, DATA_TYPE_{output_prec}, {seq_len}, {q_step}, {kv_step}, {head_size}, {head_size_v}, \
{sage_block_sizes[0]}, {sage_block_sizes[1]}, {sage_block_sizes[2]}, kSM_{sm}, nullptr, \
0, \"{kname}\", {smem}, {threads}, {meta_unroll_step}, {attention_mask_type_value}, \
Expand Down
2 changes: 0 additions & 2 deletions examples/auto_deploy/model_registry/models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,6 @@ models:
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
- name: deepseek-ai/DeepSeek-R1
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
- name: deepseek-ai/DeepSeek-V3
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
- name: deepseek-ai/DeepSeek-Coder-V2-Instruct
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
- name: Qwen/Qwen3-VL-8B-Instruct
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ patchelf
einops
flashinfer-python==0.6.4
opencv-python-headless
xgrammar==0.1.25
xgrammar==0.1.32
llguidance==0.7.29
jsonschema
backoff
Expand Down
2 changes: 1 addition & 1 deletion scripts/attribution/data/dependency_metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ ucx/1.20:
ucxx/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5:
license: 759cb066f14805ef4068f633d9071e1d
source: https://github.com/rapidsai/ucxx/tree/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5
xgrammar/v0.1.25:
xgrammar/v0.1.32:
copyright: 989a9441d689f61fba9f797cc253e51b
license: 8e1c96809a7467593130ecc62ae12be9
zeromq/4.3.4-3.el8:
Expand Down
2 changes: 1 addition & 1 deletion scripts/attribution/data/files_to_dependency.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7507,7 +7507,7 @@ ucxx/16eaa57c8d98c8ef54d666a2d2b11e76cfa565f5:
- cb11c17f716ae644b2d74652d3d3232b
- e10a9fefe2ef09b9560cc204bd54f728
- f75f9c7cefa54d6626032daa93ef2549
xgrammar/v0.1.25:
xgrammar/v0.1.32:
- 0e2b512f384e122c3b8243ae00256e06
- 1a6e20d89e227a29d674e12e18b5e9e7
- 1acd98aa4050fd0b8cda58d5d4f6ef78
Expand Down
Loading
Loading