Skip to content

Commit

Permalink
[MLIR] Add the convergent attribute to the barrier and shuffle ops (#…
Browse files Browse the repository at this point in the history
…97807)

When lowering from the gpu dialect to the llvm dialect for spirv, the
barrier op and shuffle ops need a convergent attribute for correctness.
  • Loading branch information
FMarno authored Jul 9, 2024
1 parent 19cc461 commit 3670e7f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 14 deletions.
13 changes: 8 additions & 5 deletions mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ namespace mlir {
static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
StringRef name,
ArrayRef<Type> paramTypes,
Type resultType) {
Type resultType,
bool isConvergent = false) {
auto func = dyn_cast_or_null<LLVM::LLVMFuncOp>(
SymbolTable::lookupSymbolIn(symbolTable, name));
if (!func) {
Expand All @@ -52,6 +53,7 @@ static LLVM::LLVMFuncOp lookupOrCreateSPIRVFn(Operation *symbolTable,
symbolTable->getLoc(), name,
LLVM::LLVMFunctionType::get(resultType, paramTypes));
func.setCConv(LLVM::cconv::CConv::SPIR_FUNC);
func.setConvergent(isConvergent);
}
return func;
}
Expand Down Expand Up @@ -89,8 +91,8 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
assert(moduleOp && "Expecting module");
Type flagTy = rewriter.getI32Type();
Type voidTy = rewriter.getType<LLVM::LLVMVoidType>();
LLVM::LLVMFuncOp func =
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy);
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
moduleOp, funcName, flagTy, voidTy, /*isConvergent=*/true);

// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
Expand Down Expand Up @@ -266,8 +268,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Type valueType = adaptor.getValue().getType();
Type offsetType = adaptor.getOffset().getType();
Type resultType = valueType;
LLVM::LLVMFuncOp func = lookupOrCreateSPIRVFn(
moduleOp, funcName, {valueType, offsetType}, resultType);
LLVM::LLVMFuncOp func =
lookupOrCreateSPIRVFn(moduleOp, funcName, {valueType, offsetType},
resultType, /*isConvergent=*/true);

Location loc = op->getLoc();
std::array<Value, 2> args{adaptor.getValue(), adaptor.getOffset()};
Expand Down
18 changes: 9 additions & 9 deletions mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ gpu.module @builtins {
// -----

gpu.module @barriers {
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32)
// CHECK: llvm.func spir_funccc @_Z7barrierj(i32) attributes {convergent}

// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
Expand All @@ -120,10 +120,10 @@ gpu.module @barriers {
// Check `gpu.shuffle` conversion with default subgroup size.

gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}

// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
Expand Down Expand Up @@ -155,10 +155,10 @@ gpu.module @shuffles {
gpu.module @shuffles attributes {
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
} {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {convergent}
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {convergent}

// CHECK-LABEL: gpu_shuffles
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
Expand Down

0 comments on commit 3670e7f

Please sign in to comment.