Skip to content

Commit

Permalink
[MLIR][NVVM] Add support for aligned variants of cluster barriers (ll…
Browse files Browse the repository at this point in the history
…vm#78142)

This patch adds:
* Support for the 'aligned' variants of the cluster barrier Ops, by
extending the existing Op with an 'aligned' attribute.
* Docs for these Ops.
* Test cases to verify the lowering to the corresponding intrinsics.

Signed-off-by: Durgadoss R <[email protected]>
  • Loading branch information
durga4github authored Jan 15, 2024
1 parent 74cb287 commit dc01b59
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 3 deletions.
58 changes: 55 additions & 3 deletions mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -378,22 +378,74 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
}

def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);

let summary = "Cluster Barrier Arrive Op";
let description = [{
The `cluster.arrive` can be used by the threads within the cluster for synchronization and
communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
without causing the executing thread to wait for other participating threads.

The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.

[For more information, see PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
}];

string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
if ($aligned)
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_aligned);
else
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
}];
let assemblyFormat = "attr-dict";
}

def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);

let summary = "Cluster Barrier Relaxed Arrive Op";
let description = [{
The `cluster.arrive` can be used by the threads within the cluster for synchronization and
communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
without causing the executing thread to wait for other participating threads.

The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
The .relaxed qualifier on `cluster.arrive` specifies that there are no memory
ordering and visibility guarantees provided for the memory accesses performed prior to
`cluster.arrive`.

[For more information, see PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
}];

string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
if ($aligned)
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned);
else
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
}];
let assemblyFormat = "attr-dict";
}

def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);

let summary = "Cluster Barrier Wait Op";
let description = [{
The `cluster.wait` causes the executing thread to wait for all non-exited threads
of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
generates the .aligned version of the PTX instruction.

[For more information, see PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
}];

string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
if ($aligned)
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait_aligned);
else
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
}];
let assemblyFormat = "attr-dict";
}
Expand Down
6 changes: 6 additions & 0 deletions mlir/test/Dialect/LLVMIR/nvvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,26 @@ func.func @llvm_nvvm_barrier0() {
func.func @llvm_nvvm_cluster_arrive() {
// CHECK: nvvm.cluster.arrive
nvvm.cluster.arrive
// CHECK: nvvm.cluster.arrive {aligned}
nvvm.cluster.arrive {aligned}
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
func.func @llvm_nvvm_cluster_arrive_relaxed() {
// CHECK: nvvm.cluster.arrive.relaxed
nvvm.cluster.arrive.relaxed
// CHECK: nvvm.cluster.arrive.relaxed {aligned}
nvvm.cluster.arrive.relaxed {aligned}
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_cluster_wait
func.func @llvm_nvvm_cluster_wait() {
// CHECK: nvvm.cluster.wait
nvvm.cluster.wait
// CHECK: nvvm.cluster.wait {aligned}
nvvm.cluster.wait {aligned}
llvm.return
}

Expand Down
27 changes: 27 additions & 0 deletions mlir/test/Target/LLVMIR/nvvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,33 @@ llvm.func @llvm_nvvm_barrier0() {
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_cluster_arrive
llvm.func @llvm_nvvm_cluster_arrive() {
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
nvvm.cluster.arrive
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.aligned()
nvvm.cluster.arrive {aligned}
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
llvm.func @llvm_nvvm_cluster_arrive_relaxed() {
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
nvvm.cluster.arrive.relaxed
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
nvvm.cluster.arrive.relaxed {aligned}
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_cluster_wait
llvm.func @llvm_nvvm_cluster_wait() {
// CHECK: call void @llvm.nvvm.barrier.cluster.wait()
nvvm.cluster.wait
// CHECK: call void @llvm.nvvm.barrier.cluster.wait.aligned()
nvvm.cluster.wait {aligned}
llvm.return
}

// CHECK-LABEL: @nvvm_shfl
llvm.func @nvvm_shfl(
%0 : i32, %1 : i32, %2 : i32,
Expand Down

0 comments on commit dc01b59

Please sign in to comment.