Skip to content

Commit 71a579a

Browse files
committed
fix build
1 parent 4a03086 commit 71a579a

File tree

2 files changed

+4
-9
lines changed

2 files changed

+4
-9
lines changed

onnxruntime/contrib_ops/cuda/bert/xqa/utils.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -660,11 +660,11 @@ __device__ inline mha::tuple<uint32_t, uint32_t, decltype(bounds)..., uint32_t>
660660
return mha::tuple_cat(mha::tuple<uint32_t>(i0 % bound0), carryLE<bound1, bounds...>(i1 + i0 / bound0, i..., iLast));
661661
}
662662

663-
__device__ __host__ inline void assertClose(float a, float b, float threshold = 0.01f) {
663+
__device__ __host__ inline void assertClose([[maybe_unused]] float a, [[maybe_unused]] float b, [[maybe_unused]] float threshold = 0.01f) {
664664
assert(abs(a - b) < threshold);
665665
}
666666

667-
__device__ __host__ inline void assertClose(half a, half b, float threshold = 0.01f) {
667+
__device__ __host__ inline void assertClose([[maybe_unused]] half a, [[maybe_unused]] half b, [[maybe_unused]] float threshold = 0.01f) {
668668
assertClose(__half2float(a), __half2float(b), threshold);
669669
}
670670

onnxruntime/test/python/transformers/test_gqa.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@
6060
enable_debug_print = False
6161

6262
enable_deterministic_check = True
63-
64-
enable_quantized_kv_tests = True
6563
# #################################################################################################
6664
# Configuration and Helper Classes
6765
# #################################################################################################
@@ -2010,10 +2008,7 @@ def test_gqa_past_flash_attention_bf16(self, name, config):
20102008
)
20112009

20122010

2013-
@unittest.skipIf(
2014-
not has_flash_attention() or not enable_quantized_kv_tests,
2015-
"Flash Attention is not available, skipping tests.",
2016-
)
2011+
@unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
20172012
class TestFlashGQABF16QuantizedKV(unittest.TestCase):
20182013
def manual_seed(self):
20192014
# Reset random seeds before each test to ensure test isolation
@@ -2245,7 +2240,7 @@ def gqa_xqa_test_cases():
22452240
yield name, config, torch_type, ort_type
22462241

22472242

2248-
@unittest.skipIf(not enable_quantized_kv_tests, "Quantized KV is not enabled, skipping tests.")
2243+
@unittest.skipIf(not has_flash_attention(), "Flash Attention is not available, skipping tests.")
22492244
class TestXQAQuantizedParity(unittest.TestCase):
22502245
"""Tests that verify fused kernels produce the same results as unfused kernels."""
22512246

0 commit comments

Comments
 (0)