-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[MLIR][NVVM] Add pmevent #134999
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[MLIR][NVVM] Add pmevent #134999
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copilot reviewed 1 out of 4 changed files in this pull request and generated no comments.
Files not reviewed (3)
- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td: Language not supported
- mlir/test/Target/LLVMIR/nvvmir-invalid.mlir: Language not supported
- mlir/test/Target/LLVMIR/nvvmir.mlir: Language not supported
Comments suppressed due to low confidence (1)
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp:1377
- The intrinsic call contains an empty argument in the parameter list. Please review and supply the correct parameter or remove the errant comma.
return TCGEN05_CP_2CTA(shape_mc, , is_2cta);
@llvm/pr-subscribers-mlir Author: Guray Ozen (grypp) ChangesAdd For more information, see PTX ISA Full diff: https://github.com/llvm/llvm-project/pull/134999.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0a6e66919f021..003b393fa5cd5 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -301,6 +301,37 @@ def NVVM_ReduxOp :
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Performance Monitor events
+//===----------------------------------------------------------------------===//
+
+def NVVM_PMEventOp : NVVM_Op<"pmevent">,
+ Arguments<(ins OptionalAttr<I16Attr>:$maskedEventId,
+ OptionalAttr<I32Attr>:$eventId)> {
+ let summary = "Trigger one or more Performance Monitor events.";
+
+ let description = [{
+ Triggers one or more of a fixed number of performance monitor events, with
+ event index or mask specified by immediate operand.
+
+ Without `mask` it triggers a single performance monitor event indexed by
+ immediate operand a, in the range 0..15.
+
+ With `mask` it triggers one or more of the performance monitor events. Each
+ bit in the 16-bit immediate operand a controls an event.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
+ }];
+
+ let assemblyFormat = "attr-dict (`id` `=` $eventId^)? (`mask` `=` $maskedEventId^)?";
+
+ string llvmBuilder = [{
+ // TODO
+ }];
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM Split arrive/wait barrier
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 09bff6101edd3..d926f388429f1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -243,7 +244,8 @@ void MmaOp::print(OpAsmPrinter &p) {
p.printOptionalAttrDict(this->getOperation()->getAttrs(), ignoreAttrNames);
// Print the types of the operands and result.
- p << " : " << "(";
+ p << " : "
+ << "(";
llvm::interleaveComma(SmallVector<Type, 3>{frags[0].regs[0].getType(),
frags[1].regs[0].getType(),
frags[2].regs[0].getType()},
@@ -992,7 +994,9 @@ std::string NVVM::WgmmaMmaAsyncOp::getPtx() {
ss << "},";
// Need to map read/write registers correctly.
regCnt = (regCnt * 2);
- ss << " $" << (regCnt) << "," << " $" << (regCnt + 1) << "," << " p";
+ ss << " $" << (regCnt) << ","
+ << " $" << (regCnt + 1) << ","
+ << " p";
if (getTypeD() != WGMMATypes::s32) {
ss << ", $" << (regCnt + 3) << ", $" << (regCnt + 4);
}
@@ -1219,7 +1223,7 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
: CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \
- [&]() -> auto { \
+ [&]() -> auto{ \
switch (dims) { \
case 1: \
return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \
@@ -1234,7 +1238,8 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
default: \
llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
} \
- }()
+ } \
+ ()
llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID(
int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) {
@@ -1364,13 +1369,14 @@ Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
: TCGEN05_CP_IMPL(shape_mc, src_fmt, _cg1)
#define GET_TCGEN05_CP_ID(shape_mc, src_fmt, is_2cta) \
- [&]() -> auto { \
+ [&]() -> auto{ \
if (src_fmt == Tcgen05CpSrcFormat::B6x16_P32) \
return TCGEN05_CP_2CTA(shape_mc, _b6x16_p32, is_2cta); \
if (src_fmt == Tcgen05CpSrcFormat::B4x16_P64) \
return TCGEN05_CP_2CTA(shape_mc, _b4x16_p64, is_2cta); \
return TCGEN05_CP_2CTA(shape_mc, , is_2cta); \
- }()
+ } \
+ ()
llvm::Intrinsic::ID Tcgen05CpOp::getIntrinsicID(Operation &op) {
auto curOp = cast<NVVM::Tcgen05CpOp>(op);
@@ -1536,6 +1542,24 @@ LogicalResult NVVMDialect::verifyRegionArgAttribute(Operation *op,
return success();
}
+LogicalResult PMEventOp::verify() {
+ if (!getMaskedEventId() && !getEventId()) {
+ return emitOpError() << "either `id` or `mask` must be set";
+ }
+
+ if (getMaskedEventId() && getEventId()) {
+ return emitOpError() << "`id` and `mask` cannot be set at the same time";
+ }
+
+ if (getEventId()) {
+ if (getEventId() < 0 || getEventId() > 15) {
+ return emitOpError() << "`id` must be between 0 and 15";
+ }
+ }
+
+ return llvm::success();
+}
+
//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index f87f11daeef54..4a343be3fb934 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -1,4 +1,23 @@
-// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+// RUN: mlir-opt -verify-diagnostics -split-input-file %s
+
+llvm.func @pmevent_no_id() {
+ // expected-error @below {{either `id` or `mask` must be set}}
+ nvvm.pmevent
+}
+
+// -----
+
+llvm.func @pmevent_bigger15() {
+ // expected-error @below {{`id` must be between 0 and 15}}
+ nvvm.pmevent id = 141
+}
+
+// -----
+
+llvm.func @pmevent_many_ids() {
+ // expected-error @below {{`id` and `mask` cannot be set at the same time}}
+ nvvm.pmevent id = 1 mask = 1
+}
// -----
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 3a0713f2feee8..36cfb6be18da7 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -832,6 +832,7 @@ llvm.func @nvvm_match_sync(%mask: i32, %val32: i32, %val64: i64) {
}
// -----
+
// CHECK-LABEL: @nvvm_st_bulk
llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size: i64) {
// CHECK: call void @llvm.nvvm.st.bulk(ptr %{{.*}}, i64 %{{.*}}, i64 0)
@@ -844,3 +845,19 @@ llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size:
nvvm.st.bulk %addr_shared, size = %size, init = 0: !llvm.ptr<3>
llvm.return
}
+
+
+// -----
+
+// CHECK-LABEL: @nvvm_pmevent
+llvm.func @nvvm_pmevent() {
+ nvvm.pmevent id = 0
+
+ nvvm.pmevent id = 15
+
+ nvvm.pmevent mask = 15000
+
+ nvvm.pmevent mask = 10
+
+ llvm.return
+}
|
@llvm/pr-subscribers-mlir-llvm Author: Guray Ozen (grypp) ChangesAdd For more information, see PTX ISA Full diff: https://github.com/llvm/llvm-project/pull/134999.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0a6e66919f021..003b393fa5cd5 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -301,6 +301,37 @@ def NVVM_ReduxOp :
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Performance Monitor events
+//===----------------------------------------------------------------------===//
+
+def NVVM_PMEventOp : NVVM_Op<"pmevent">,
+ Arguments<(ins OptionalAttr<I16Attr>:$maskedEventId,
+ OptionalAttr<I32Attr>:$eventId)> {
+ let summary = "Trigger one or more Performance Monitor events.";
+
+ let description = [{
+ Triggers one or more of a fixed number of performance monitor events, with
+ event index or mask specified by immediate operand.
+
+ Without `mask` it triggers a single performance monitor event indexed by
+ immediate operand a, in the range 0..15.
+
+ With `mask` it triggers one or more of the performance monitor events. Each
+ bit in the 16-bit immediate operand a controls an event.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent)
+ }];
+
+ let assemblyFormat = "attr-dict (`id` `=` $eventId^)? (`mask` `=` $maskedEventId^)?";
+
+ string llvmBuilder = [{
+ // TODO
+ }];
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM Split arrive/wait barrier
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 09bff6101edd3..d926f388429f1 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -243,7 +244,8 @@ void MmaOp::print(OpAsmPrinter &p) {
p.printOptionalAttrDict(this->getOperation()->getAttrs(), ignoreAttrNames);
// Print the types of the operands and result.
- p << " : " << "(";
+ p << " : "
+ << "(";
llvm::interleaveComma(SmallVector<Type, 3>{frags[0].regs[0].getType(),
frags[1].regs[0].getType(),
frags[2].regs[0].getType()},
@@ -992,7 +994,9 @@ std::string NVVM::WgmmaMmaAsyncOp::getPtx() {
ss << "},";
// Need to map read/write registers correctly.
regCnt = (regCnt * 2);
- ss << " $" << (regCnt) << "," << " $" << (regCnt + 1) << "," << " p";
+ ss << " $" << (regCnt) << ","
+ << " $" << (regCnt + 1) << ","
+ << " p";
if (getTypeD() != WGMMATypes::s32) {
ss << ", $" << (regCnt + 3) << ", $" << (regCnt + 4);
}
@@ -1219,7 +1223,7 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
: CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \
- [&]() -> auto { \
+ [&]() -> auto{ \
switch (dims) { \
case 1: \
return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \
@@ -1234,7 +1238,8 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
default: \
llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
} \
- }()
+ } \
+ ()
llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID(
int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) {
@@ -1364,13 +1369,14 @@ Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
: TCGEN05_CP_IMPL(shape_mc, src_fmt, _cg1)
#define GET_TCGEN05_CP_ID(shape_mc, src_fmt, is_2cta) \
- [&]() -> auto { \
+ [&]() -> auto{ \
if (src_fmt == Tcgen05CpSrcFormat::B6x16_P32) \
return TCGEN05_CP_2CTA(shape_mc, _b6x16_p32, is_2cta); \
if (src_fmt == Tcgen05CpSrcFormat::B4x16_P64) \
return TCGEN05_CP_2CTA(shape_mc, _b4x16_p64, is_2cta); \
return TCGEN05_CP_2CTA(shape_mc, , is_2cta); \
- }()
+ } \
+ ()
llvm::Intrinsic::ID Tcgen05CpOp::getIntrinsicID(Operation &op) {
auto curOp = cast<NVVM::Tcgen05CpOp>(op);
@@ -1536,6 +1542,24 @@ LogicalResult NVVMDialect::verifyRegionArgAttribute(Operation *op,
return success();
}
+LogicalResult PMEventOp::verify() {
+ if (!getMaskedEventId() && !getEventId()) {
+ return emitOpError() << "either `id` or `mask` must be set";
+ }
+
+ if (getMaskedEventId() && getEventId()) {
+ return emitOpError() << "`id` and `mask` cannot be set at the same time";
+ }
+
+ if (getEventId()) {
+ if (getEventId() < 0 || getEventId() > 15) {
+ return emitOpError() << "`id` must be between 0 and 15";
+ }
+ }
+
+ return llvm::success();
+}
+
//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index f87f11daeef54..4a343be3fb934 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -1,4 +1,23 @@
-// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+// RUN: mlir-opt -verify-diagnostics -split-input-file %s
+
+llvm.func @pmevent_no_id() {
+ // expected-error @below {{either `id` or `mask` must be set}}
+ nvvm.pmevent
+}
+
+// -----
+
+llvm.func @pmevent_bigger15() {
+ // expected-error @below {{`id` must be between 0 and 15}}
+ nvvm.pmevent id = 141
+}
+
+// -----
+
+llvm.func @pmevent_many_ids() {
+ // expected-error @below {{`id` and `mask` cannot be set at the same time}}
+ nvvm.pmevent id = 1 mask = 1
+}
// -----
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 3a0713f2feee8..36cfb6be18da7 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -832,6 +832,7 @@ llvm.func @nvvm_match_sync(%mask: i32, %val32: i32, %val64: i64) {
}
// -----
+
// CHECK-LABEL: @nvvm_st_bulk
llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size: i64) {
// CHECK: call void @llvm.nvvm.st.bulk(ptr %{{.*}}, i64 %{{.*}}, i64 0)
@@ -844,3 +845,19 @@ llvm.func @nvvm_st_bulk(%addr_gen: !llvm.ptr, %addr_shared: !llvm.ptr<3>, %size:
nvvm.st.bulk %addr_shared, size = %size, init = 0: !llvm.ptr<3>
llvm.return
}
+
+
+// -----
+
+// CHECK-LABEL: @nvvm_pmevent
+llvm.func @nvvm_pmevent() {
+ nvvm.pmevent id = 0
+
+ nvvm.pmevent id = 15
+
+ nvvm.pmevent mask = 15000
+
+ nvvm.pmevent mask = 10
+
+ llvm.return
+}
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp View the diff from clang-format here.diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index d926f3884..160f0793e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1223,7 +1223,7 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
: CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, dim, tile)
#define GET_CP_ASYNC_BULK_TENSOR_ID(op, dims, is_im2col) \
- [&]() -> auto{ \
+ [&]() -> auto { \
switch (dims) { \
case 1: \
return CP_ASYNC_BULK_TENSOR_REDUCE_MODE(op, 1, tile); \
@@ -1238,8 +1238,7 @@ llvm::Intrinsic::ID CpAsyncBulkTensorPrefetchOp::getIntrinsicID(int tensorDims,
default: \
llvm_unreachable("Invalid TensorDim in CpAsyncBulkTensorReduceOp."); \
} \
- } \
- ()
+ }()
llvm::Intrinsic::ID CpAsyncBulkTensorReduceOp::getIntrinsicID(
int tensorDims, NVVM::TMAReduxKind kind, bool isIm2Col) {
@@ -1369,14 +1368,13 @@ Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
: TCGEN05_CP_IMPL(shape_mc, src_fmt, _cg1)
#define GET_TCGEN05_CP_ID(shape_mc, src_fmt, is_2cta) \
- [&]() -> auto{ \
+ [&]() -> auto { \
if (src_fmt == Tcgen05CpSrcFormat::B6x16_P32) \
return TCGEN05_CP_2CTA(shape_mc, _b6x16_p32, is_2cta); \
if (src_fmt == Tcgen05CpSrcFormat::B4x16_P64) \
return TCGEN05_CP_2CTA(shape_mc, _b4x16_p64, is_2cta); \
return TCGEN05_CP_2CTA(shape_mc, , is_2cta); \
- } \
- ()
+ }()
llvm::Intrinsic::ID Tcgen05CpOp::getIntrinsicID(Operation &op) {
auto curOp = cast<NVVM::Tcgen05CpOp>(op);
|
Add
nvvm.pmevent
Op that Triggers one or more of a fixed number of performance monitor events, with event index or mask specified by immediate operand.For more information, see PTX ISA