Skip to content

Commit 8b4f25c

Browse files
committed
Validate incompatible cache_modifier/eviction_policy combinations in NVIDIA backend
When tl.load/tl.store is called with a PTX-illegal combination of cache_modifier and eviction_policy, Triton previously emitted PTX containing both modifiers and let ptxas fail with an opaque assembler error: ptxas error: Modifier '.evict_first' cannot be combined with modifier '.cs' Users saw a low-level message with no indication of which Python arguments caused it. Add validation in LoadStoreOpToLLVM.cpp (NVIDIA-specific PTX lowering) that emits a clear compilation error before any PTX is generated. Placing the check in the NVIDIA backend, not in backend-agnostic semantic.py, keeps the frontend neutral to PTX ISA constraints. PTX-illegal combinations covered: | op | cache_modifier | eviction_policy | |-------|----------------|------------------------------| | store | .cs | evict_first, evict_last | | store | .cg | evict_first | | load | .ca | evict_first, evict_last | | load | .cg | evict_first |
1 parent f7c1d69 commit 8b4f25c

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

test/Conversion/tritongpu_to_llvm.mlir

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// RUN: triton-opt %s -split-input-file --allocate-shared-memory-nv --convert-triton-gpu-to-llvm -reconcile-unrealized-casts 2>/dev/null | FileCheck %s --dump-input-context 20
2+
// RUN: triton-opt %s -split-input-file --allocate-shared-memory-nv --convert-triton-gpu-to-llvm -reconcile-unrealized-casts --verify-diagnostics 2>&1 | FileCheck %s --dump-input-context 20
23

34
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
45
// CHECK: llvm.func @test_empty_kernel(%arg0: i32, %arg1: !llvm.ptr<1> {tt.pointee_type = f16}, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>)
@@ -127,6 +128,80 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
127128

128129
// -----
129130

131+
// PTX-illegal combinations of cache_modifier and eviction_policy.
132+
// Before this check, ptxas would fail with an opaque assembler error
133+
// (e.g. "Modifier '.evict_first' cannot be combined with modifier '.cs'").
134+
// The NVIDIA backend now emits a clear op error before PTX generation.
135+
136+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
137+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
138+
tt.func @store_cs_evict_first(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>,
139+
%vals : tensor<256xf32, #blocked0>) {
140+
// expected-error @+1 {{cache_modifier '.cs' is incompatible with eviction_policy 'evict_first'/'evict_last'}}
141+
tt.store %ptrs, %vals evictionPolicy = evict_first cacheModifier = cs : tensor<256x!tt.ptr<f32>, #blocked0>
142+
tt.return
143+
}
144+
}
145+
146+
// -----
147+
148+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
149+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
150+
tt.func @store_cs_evict_last(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>,
151+
%vals : tensor<256xf32, #blocked0>) {
152+
// expected-error @+1 {{cache_modifier '.cs' is incompatible with eviction_policy 'evict_first'/'evict_last'}}
153+
tt.store %ptrs, %vals evictionPolicy = evict_last cacheModifier = cs : tensor<256x!tt.ptr<f32>, #blocked0>
154+
tt.return
155+
}
156+
}
157+
158+
// -----
159+
160+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
161+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
162+
tt.func @store_cg_evict_first(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>,
163+
%vals : tensor<256xf32, #blocked0>) {
164+
// expected-error @+1 {{cache_modifier '.cg' is incompatible with eviction_policy 'evict_first'}}
165+
tt.store %ptrs, %vals evictionPolicy = evict_first cacheModifier = cg : tensor<256x!tt.ptr<f32>, #blocked0>
166+
tt.return
167+
}
168+
}
169+
170+
// -----
171+
172+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
173+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
174+
tt.func @load_ca_evict_first(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>) {
175+
// expected-error @+1 {{cache_modifier '.ca' is incompatible with eviction_policy 'evict_first'/'evict_last'}}
176+
%0 = tt.load %ptrs evictionPolicy = evict_first cacheModifier = ca : tensor<256x!tt.ptr<f32>, #blocked0>
177+
tt.return
178+
}
179+
}
180+
181+
// -----
182+
183+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
184+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
185+
tt.func @load_ca_evict_last(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>) {
186+
// expected-error @+1 {{cache_modifier '.ca' is incompatible with eviction_policy 'evict_first'/'evict_last'}}
187+
%0 = tt.load %ptrs evictionPolicy = evict_last cacheModifier = ca : tensor<256x!tt.ptr<f32>, #blocked0>
188+
tt.return
189+
}
190+
}
191+
192+
// -----
193+
194+
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
195+
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
196+
tt.func @load_cg_evict_first(%ptrs : tensor<256x!tt.ptr<f32>, #blocked0>) {
197+
// expected-error @+1 {{cache_modifier '.cg' is incompatible with eviction_policy 'evict_first'}}
198+
%0 = tt.load %ptrs evictionPolicy = evict_first cacheModifier = cg : tensor<256x!tt.ptr<f32>, #blocked0>
199+
tt.return
200+
}
201+
}
202+
203+
// -----
204+
130205
#blocked0 = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [2], order = [0]}>
131206
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 2 : i32} {
132207
// CHECK-LABEL: global_load_store_no_vec

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,23 @@ struct LoadOpConversion : public ConvertOpToLLVMPattern<triton::LoadOp>,
188188
otherElems = unpackLLElements(loc, llOther, rewriter);
189189
}
190190

191+
// Validate cache_modifier + eviction_policy combinations that the PTX ISA
192+
// forbids. This check belongs here (PTX codegen) rather than in the
193+
// frontend semantic layer, which is backend-agnostic.
194+
auto cache = op.getCache();
195+
auto evict = op.getEvict();
196+
if ((evict == triton::EvictionPolicy::EVICT_FIRST ||
197+
evict == triton::EvictionPolicy::EVICT_LAST) &&
198+
cache == triton::CacheModifier::CA)
199+
return op.emitOpError(
200+
"cache_modifier '.ca' is incompatible with eviction_policy "
201+
"'evict_first'/'evict_last': .ca overrides L1 eviction policy");
202+
if (evict == triton::EvictionPolicy::EVICT_FIRST &&
203+
cache == triton::CacheModifier::CG)
204+
return op.emitOpError(
205+
"cache_modifier '.cg' is incompatible with eviction_policy "
206+
"'evict_first': .cg bypasses L1 cache");
207+
191208
// vectorized iteration through all the pointer/mask/other elements
192209
const int valueElemNBits =
193210
std::max(8u, valueElemTy.getIntOrFloatBitWidth());
@@ -399,6 +416,23 @@ struct StoreOpConversion : public ConvertOpToLLVMPattern<triton::StoreOp>,
399416
<< mask << "\n";
400417
}
401418

419+
// Validate cache_modifier + eviction_policy combinations that the PTX ISA
420+
// forbids. This check belongs here (PTX codegen) rather than in the
421+
// frontend semantic layer, which is backend-agnostic.
422+
auto cache = op.getCache();
423+
auto evict = op.getEvict();
424+
if ((evict == triton::EvictionPolicy::EVICT_FIRST ||
425+
evict == triton::EvictionPolicy::EVICT_LAST) &&
426+
cache == triton::CacheModifier::CS)
427+
return op.emitOpError(
428+
"cache_modifier '.cs' is incompatible with eviction_policy "
429+
"'evict_first'/'evict_last': .cs bypasses L1 cache");
430+
if (evict == triton::EvictionPolicy::EVICT_FIRST &&
431+
cache == triton::CacheModifier::CG)
432+
return op.emitOpError(
433+
"cache_modifier '.cg' is incompatible with eviction_policy "
434+
"'evict_first': .cg bypasses L1 cache");
435+
402436
const size_t dtsize =
403437
std::max<int>(1, valueElemTy.getIntOrFloatBitWidth() / 8);
404438
const size_t valueElemNBits = dtsize * 8;

0 commit comments

Comments
 (0)