Skip to content

Commit

Permalink
Add address space modifier to barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
FMarno committed Sep 30, 2024
1 parent 4ae0c50 commit 874dd36
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 11 deletions.
2 changes: 2 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ def GPU_AddressSpaceEnum : GPU_I32Enum<
def GPU_AddressSpaceAttr :
GPU_I32EnumAttr<"address_space", GPU_AddressSpaceEnum>;

def GPU_AddressSpaceAttrArray : TypedArrayAttrBase<GPU_AddressSpaceAttr, "GPU Address Space array">;

//===----------------------------------------------------------------------===//
// GPU Types.
//===----------------------------------------------------------------------===//
Expand Down
19 changes: 17 additions & 2 deletions mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1355,7 +1355,8 @@ def GPU_ShuffleOp : GPU_Op<
];
}

def GPU_BarrierOp : GPU_Op<"barrier"> {
def GPU_BarrierOp : GPU_Op<"barrier">,
Arguments<(ins OptionalAttr<GPU_AddressSpaceAttrArray> :$address_spaces)> {
let summary = "Synchronizes all work items of a workgroup.";
let description = [{
The "barrier" op synchronizes all work items of a workgroup. It is used
Expand All @@ -1371,11 +1372,25 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
accessing the same memory can be avoided by synchronizing work items
in-between these accesses.

The address space of visible memory accesses can be modified by adding a
list of address spaces required to be visible. By default all address spaces
are included.

```mlir
// only workgroup address spaces accesses required to be visible
gpu.barrier memfence [#gpu.address_space<workgroup>]
// no memory accesses required to be visible
gpu.barrier memfence []
// all memory accesses required to be visible
gpu.barrier
```

Either none or all work items of a workgroup need to execute this op
in convergence.
}];
let assemblyFormat = "attr-dict";
let assemblyFormat = "(`memfence` $address_spaces^)? attr-dict";
let hasCanonicalizer = 1;
let builders = [OpBuilder<(ins)>];
}

def GPU_GPUModuleOp : GPU_Op<"module", [
Expand Down
29 changes: 24 additions & 5 deletions mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,31 @@ struct GPUBarrierConversion final : ConvertOpToLLVMPattern<gpu::BarrierOp> {
lookupOrCreateSPIRVFn(moduleOp, funcName, flagTy, voidTy,
/*isMemNone=*/false, /*isConvergent=*/true);

// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE`.
// See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
constexpr int64_t localMemFenceFlag = 1;
// Value used by SPIR-V backend to represent `CLK_LOCAL_MEM_FENCE` and
// `CLK_GLOBAL_MEM_FENCE`. See `llvm/lib/Target/SPIRV/SPIRVBuiltins.td`.
constexpr int32_t localMemFenceFlag = 1;
constexpr int32_t globalMemFenceFlag = 2;
int32_t memFenceFlag = 0;
std::optional<ArrayAttr> addressSpaces = adaptor.getAddressSpaces();
if (addressSpaces) {
for (Attribute attr : addressSpaces.value()) {
auto addressSpace = cast<gpu::AddressSpaceAttr>(attr).getValue();
switch (addressSpace) {
case gpu::AddressSpace::Global:
memFenceFlag = memFenceFlag | globalMemFenceFlag;
break;
case gpu::AddressSpace::Workgroup:
memFenceFlag = memFenceFlag | localMemFenceFlag;
break;
case gpu::AddressSpace::Private:
break;
}
}
} else {
memFenceFlag = localMemFenceFlag | globalMemFenceFlag;
}
Location loc = op->getLoc();
Value flag =
rewriter.create<LLVM::ConstantOp>(loc, flagTy, localMemFenceFlag);
Value flag = rewriter.create<LLVM::ConstantOp>(loc, flagTy, memFenceFlag);
rewriter.replaceOp(op, createSPIRVBuiltinCall(loc, rewriter, func, flag));
return success();
}
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Conversion/GPUToNVVM/GPUToNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ include "mlir/IR/PatternBase.td"
include "mlir/Dialect/GPU/IR/GPUOps.td"
include "mlir/Dialect/LLVMIR/NVVMOps.td"

def : Pat<(GPU_BarrierOp), (NVVM_Barrier0Op)>;
def : Pat<(GPU_BarrierOp : $op $memory_fence), (NVVM_Barrier0Op)>;

#endif // MLIR_CONVERSION_GPUTONVVM_TD
2 changes: 1 addition & 1 deletion mlir/lib/Conversion/GPUToROCDL/GPUToROCDL.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ include "mlir/IR/PatternBase.td"
include "mlir/Dialect/GPU/IR/GPUOps.td"
include "mlir/Dialect/LLVMIR/ROCDLOps.td"

def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>;
def : Pat<(GPU_BarrierOp : $op $memory_fence), (ROCDL_BarrierOp)>;

#endif // MLIR_CONVERSION_GPUTOROCDL_TD
3 changes: 3 additions & 0 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1351,6 +1351,9 @@ void BarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
results.add(eraseRedundantGpuBarrierOps);
}

void BarrierOp::build(mlir::OpBuilder &odsBuilder,
mlir::OperationState &odsState) {}

//===----------------------------------------------------------------------===//
// GPUFuncOp
//===----------------------------------------------------------------------===//
Expand Down
19 changes: 17 additions & 2 deletions mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,29 @@ gpu.module @barriers {

// CHECK-LABEL: gpu_barrier
func.func @gpu_barrier() {
// CHECK: [[FLAGS:%.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[FLAGS]]) {
// CHECK: [[GLOBAL_AND_LOCAL_FLAG:%.*]] = llvm.mlir.constant(3 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_AND_LOCAL_FLAG]]) {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
// CHECK-SAME-DAG: will_return
// CHECK-NOT: memory_effects = #llvm.memory_effects
// CHECK-SAME: } : (i32) -> ()
gpu.barrier
// CHECK: [[GLOBAL_AND_LOCAL_FLAG2:%.*]] = llvm.mlir.constant(3 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_AND_LOCAL_FLAG2]])
gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
// CHECK: [[LOCAL_FLAG:%.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[LOCAL_FLAG]])
gpu.barrier memfence [#gpu.address_space<workgroup>]
// CHECK: [[GLOBAL_FLAG:%.*]] = llvm.mlir.constant(2 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[GLOBAL_FLAG]])
gpu.barrier memfence [#gpu.address_space<global>]
// CHECK: [[NONE_FLAG:%.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG]])
gpu.barrier memfence []
// CHECK: [[NONE_FLAG2:%.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: llvm.call spir_funccc @_Z7barrierj([[NONE_FLAG2]])
gpu.barrier memfence [#gpu.address_space<private>]
return
}
}
Expand Down
6 changes: 6 additions & 0 deletions mlir/test/Dialect/GPU/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ module attributes {gpu.container_module} {
%shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32

"gpu.barrier"() : () -> ()
gpu.barrier
gpu.barrier memfence [#gpu.address_space<workgroup>]
gpu.barrier memfence [#gpu.address_space<global>]
gpu.barrier memfence [#gpu.address_space<global>, #gpu.address_space<workgroup>]
gpu.barrier memfence [#gpu.address_space<private>]
gpu.barrier memfence []

"some_op"(%bIdX, %tIdX) : (index, index) -> ()
%42 = memref.load %arg1[%bIdX] : memref<?xf32, 1>
Expand Down

0 comments on commit 874dd36

Please sign in to comment.