Skip to content

Commit ed77def

Browse files
committed
bugfix
1 parent bfe6991 commit ed77def

2 files changed

Lines changed: 7 additions & 0 deletions

File tree

csrc/flat/prefill/prefill_kernel_delta_rule_sm90_extern.inc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818

1919
#pragma once
2020

21+
#include <cuda_bf16.h>
22+
#include <cuda_fp16.h>
23+
#include "cutlass/arch/arch.h"
24+
2125
namespace flat {
2226

2327
// clang-format off

flashinfer/aot.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,9 @@ def gen_all_modules(
443443
add_misc: bool,
444444
add_xqa: bool,
445445
) -> List[JitSpec]:
446+
# TEMPORARY: Only compile gdn_prefill_sm90 for testing
447+
return [gen_gdn_prefill_sm90_module()]
448+
446449
jit_specs: List[JitSpec] = []
447450
jit_specs.append(gen_spdlog_module())
448451
has_sm90 = sm_capabilities.get("sm90", False)

0 commit comments

Comments
 (0)