-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[mlir][NVMM] Add globaltimer_wait
op
#129065
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-mlir Author: Matthias Springer (matthias-springer) ChangesA helper op to wait for a certain number of nanoseconds in a busy loop according to Full diff: https://github.com/llvm/llvm-project/pull/129065.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 944cb481b025b..d23145fdb6590 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2583,6 +2583,10 @@ def NVVM_MapaOp: NVVM_Op<"mapa",
let assemblyFormat = "$a`,` $b attr-dict `:` type($a) `->` type($res)";
}
+//===----------------------------------------------------------------------===//
+// Special control flow ops
+//===----------------------------------------------------------------------===//
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
@@ -2596,11 +2600,6 @@ def NVVM_Exit : NVVM_Op<"exit"> {
let assemblyFormat = "attr-dict";
}
-
-//===----------------------------------------------------------------------===//
-// NVVM breakpoint Op
-//===----------------------------------------------------------------------===//
-
def NVVM_Breakpoint : NVVM_Op<"breakpoint"> {
let summary = "Breakpoint Op";
let description = [{
@@ -2614,6 +2613,31 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> {
let assemblyFormat = "attr-dict";
}
+def NVVM_GlobalTimerWaitOp : NVVM_PTXBuilder_Op<"globaltimer_wait">,
+ Arguments<(ins I64:$time)> {
+ let summary = "globaltimer busy loop";
+ let description = [{
+ Wait in busy loop for certain number of nanoseconds.
+ [For more information, see PTX ISA.](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer)
+ }];
+ let assemblyFormat = "$time attr-dict";
+
+ let extraClassDefinition = [{
+ std::string $cppClass::getPtx() {
+ return R"({
+ .reg .pred continueloop;
+ .reg .u64 rd<3>;
+ mov.u64 rd0, %globaltimer;
+ add.s64 rd1, rd0, %0;
+ L_busy:
+ mov.u64 rd2, %globaltimer;
+ setp.lt.s64 continueloop, rd2, rd1;
+ @continueloop bra L_busy;
+})";
+ }
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM TCGEN05 Ops
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
index b109f00c3da13..e82eb528c06c6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
@@ -13,6 +13,8 @@
#include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h"
+#include <regex>
+
#define DEBUG_TYPE "ptx-builder"
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
#define DBGSNL() (llvm::dbgs() << "\n")
@@ -129,9 +131,9 @@ LLVM::InlineAsmOp PtxBuilder::build() {
}
// Tablegen doesn't accept $, so we use %, but inline assembly uses $.
- // Replace all % with $
- std::replace(ptxInstruction.begin(), ptxInstruction.end(), '%', '$');
-
+ // Replace all % with $, but only if they are followed by a digit.
+ ptxInstruction = std::regex_replace(ptxInstruction, std::regex("(%)(\\d+)"),
+ "$\\2", std::regex_constants::format_sed);
return rewriter.create<LLVM::InlineAsmOp>(
interfaceOp->getLoc(),
/*result types=*/resultTypes,
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index c7a6eca158276..55946afd74052 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -4,6 +4,23 @@
// and the generic `convert-to-llvm` pass.
// RUN: mlir-opt --convert-to-llvm --split-input-file %s | FileCheck %s
+// CHECK: @global_timer_wait
+// CHECK: llvm.inline_asm
+// CHECK-SAME: .reg .pred continueloop;
+// CHECK-SAME: .reg .u64 rd<3>;
+// CHECK-SAME: mov.u64 rd0, %globaltimer;
+// CHECK-SAME: add.s64 rd1, rd0, $0;
+// CHECK-SAME: L_busy:
+// CHECK-SAME: mov.u64 rd2, %globaltimer;
+// CHECK-SAME: setp.lt.s64 continueloop, rd2, rd1;
+// CHECK-SAME: @continueloop bra L_busy;
+llvm.func @global_timer_wait(%time: i64) {
+ nvvm.globaltimer_wait %time
+ llvm.return
+}
+
+// -----
+
// CHECK-LABEL: @init_mbarrier
llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %count : i32, %pred : i1) {
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.shared.b64 [$0], $1;", "r,r,b"
|
@llvm/pr-subscribers-mlir-llvm Author: Matthias Springer (matthias-springer) ChangesA helper op to wait for a certain number of nanoseconds in a busy loop according to Full diff: https://github.com/llvm/llvm-project/pull/129065.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 944cb481b025b..d23145fdb6590 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2583,6 +2583,10 @@ def NVVM_MapaOp: NVVM_Op<"mapa",
let assemblyFormat = "$a`,` $b attr-dict `:` type($a) `->` type($res)";
}
+//===----------------------------------------------------------------------===//
+// Special control flow ops
+//===----------------------------------------------------------------------===//
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
@@ -2596,11 +2600,6 @@ def NVVM_Exit : NVVM_Op<"exit"> {
let assemblyFormat = "attr-dict";
}
-
-//===----------------------------------------------------------------------===//
-// NVVM breakpoint Op
-//===----------------------------------------------------------------------===//
-
def NVVM_Breakpoint : NVVM_Op<"breakpoint"> {
let summary = "Breakpoint Op";
let description = [{
@@ -2614,6 +2613,31 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> {
let assemblyFormat = "attr-dict";
}
+def NVVM_GlobalTimerWaitOp : NVVM_PTXBuilder_Op<"globaltimer_wait">,
+ Arguments<(ins I64:$time)> {
+ let summary = "globaltimer busy loop";
+ let description = [{
+ Wait in busy loop for certain number of nanoseconds.
+ [For more information, see PTX ISA.](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer)
+ }];
+ let assemblyFormat = "$time attr-dict";
+
+ let extraClassDefinition = [{
+ std::string $cppClass::getPtx() {
+ return R"({
+ .reg .pred continueloop;
+ .reg .u64 rd<3>;
+ mov.u64 rd0, %globaltimer;
+ add.s64 rd1, rd0, %0;
+ L_busy:
+ mov.u64 rd2, %globaltimer;
+ setp.lt.s64 continueloop, rd2, rd1;
+ @continueloop bra L_busy;
+})";
+ }
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM TCGEN05 Ops
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
index b109f00c3da13..e82eb528c06c6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp
@@ -13,6 +13,8 @@
#include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.h"
+#include <regex>
+
#define DEBUG_TYPE "ptx-builder"
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
#define DBGSNL() (llvm::dbgs() << "\n")
@@ -129,9 +131,9 @@ LLVM::InlineAsmOp PtxBuilder::build() {
}
// Tablegen doesn't accept $, so we use %, but inline assembly uses $.
- // Replace all % with $
- std::replace(ptxInstruction.begin(), ptxInstruction.end(), '%', '$');
-
+ // Replace all % with $, but only if they are followed by a digit.
+ ptxInstruction = std::regex_replace(ptxInstruction, std::regex("(%)(\\d+)"),
+ "$\\2", std::regex_constants::format_sed);
return rewriter.create<LLVM::InlineAsmOp>(
interfaceOp->getLoc(),
/*result types=*/resultTypes,
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index c7a6eca158276..55946afd74052 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -4,6 +4,23 @@
// and the generic `convert-to-llvm` pass.
// RUN: mlir-opt --convert-to-llvm --split-input-file %s | FileCheck %s
+// CHECK: @global_timer_wait
+// CHECK: llvm.inline_asm
+// CHECK-SAME: .reg .pred continueloop;
+// CHECK-SAME: .reg .u64 rd<3>;
+// CHECK-SAME: mov.u64 rd0, %globaltimer;
+// CHECK-SAME: add.s64 rd1, rd0, $0;
+// CHECK-SAME: L_busy:
+// CHECK-SAME: mov.u64 rd2, %globaltimer;
+// CHECK-SAME: setp.lt.s64 continueloop, rd2, rd1;
+// CHECK-SAME: @continueloop bra L_busy;
+llvm.func @global_timer_wait(%time: i64) {
+ nvvm.globaltimer_wait %time
+ llvm.return
+}
+
+// -----
+
// CHECK-LABEL: @init_mbarrier
llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %count : i32, %pred : i1) {
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.shared.b64 [$0], $1;", "r,r,b"
|
@@ -2614,6 +2613,31 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> { | |||
let assemblyFormat = "attr-dict"; | |||
} | |||
|
|||
def NVVM_GlobalTimerWaitOp : NVVM_PTXBuilder_Op<"globaltimer_wait">, | |||
Arguments<(ins I64:$time)> { | |||
let summary = "globaltimer busy loop"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am wondering if we can leverage the nanosleep instruction for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess @matthias-springer wanted to use globaltime specifically. But yes, we can also implement its variant with nanosleep.
let summary = "globaltimer busy loop"; | ||
let description = [{ | ||
Wait in busy loop for certain number of nanoseconds. | ||
[For more information, see PTX ISA.](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is helper op with busy loop. We don't need to refer anything
@@ -2614,6 +2613,31 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> { | |||
let assemblyFormat = "attr-dict"; | |||
} | |||
|
|||
def NVVM_GlobalTimerWaitOp : NVVM_PTXBuilder_Op<"globaltimer_wait">, | |||
Arguments<(ins I64:$time)> { | |||
let summary = "globaltimer busy loop"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess @matthias-springer wanted to use globaltime specifically. But yes, we can also implement its variant with nanosleep.
A helper op to wait for a certain number of nanoseconds in a busy loop according to
%globaltimer
. This operation can be used for debugging.