Skip to content

Commit

Permalink
[mlir][ROCDL] Plumb through AMDGPU memory access metadata (#110916)
Browse files Browse the repository at this point in the history
The LLVM backend has moved from function-wide attributes for making
assurances about potentially unsafe atomic operations (like
"unsafe-fp-atomics") to metadata on individual atomic operations.

This commit adds support for generating this metadata from MLIR.

---------

Co-authored-by: Quinn Dawkins <[email protected]>
  • Loading branch information
krzysz00 and qedawkins authored Oct 9, 2024
1 parent d905a3c commit 774893d
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 3 deletions.
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,7 @@ def LLVM_ConstantRangeAttr : LLVM_Attr<"ConstantRange", "constant_range"> {
Syntax:
```
`<` `i`(width($lower)) $lower `,` $upper `>`
```
}];

let builders = [
Expand Down
9 changes: 7 additions & 2 deletions mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,12 @@ def ROCDL_Dialect : Dialect {
"::mlir::StringAttr":$flat_work_group_size,
"::mlir::IntegerAttr":$max_flat_work_group_size,
"::mlir::IntegerAttr":$waves_per_eu,
"::mlir::BoolAttr":$unsafe_fp_atomics
"::mlir::BoolAttr":$unsafe_fp_atomics,
// Correspond to LLVM metadata of the same name
"::mlir::UnitAttr":$last_use,
"::mlir::UnitAttr":$no_remote_memory,
"::mlir::UnitAttr":$no_fine_grained_memory,
"::mlir::UnitAttr":$ignore_denormal_mode
);

let useDefaultAttributePrinterParser = 1;
Expand Down Expand Up @@ -88,7 +93,7 @@ class ROCDL_IntrPure1Op<string mnemonic> :

class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class ROCDLDialectLLVMIRTranslationInterface
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final {
auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
Expand Down Expand Up @@ -198,7 +199,6 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!value)
return op->emitOpError(Twine(attribute.getName()) +
" must be a dense i32 array attribute");
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
SmallVector<llvm::Metadata *, 3> metadata;
llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
for (int32_t i : value.asArrayRef()) {
Expand All @@ -210,6 +210,31 @@ class ROCDLDialectLLVMIRTranslationInterface
llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
llvmFunc->setMetadata("reqd_work_group_size", node);
}

// Atomic and nontemporal metadata
if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
for (llvm::Instruction *i : instructions)
i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
}
if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
attribute.getName()) {
for (llvm::Instruction *i : instructions)
i->setMetadata("amdgpu.no.remote.memory",
llvm::MDNode::get(llvmContext, {}));
}
if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
attribute.getName()) {
for (llvm::Instruction *i : instructions)
i->setMetadata("amdgpu.no.fine.grained.memory",
llvm::MDNode::get(llvmContext, {}));
}
if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
attribute.getName()) {
for (llvm::Instruction *i : instructions)
i->setMetadata("amdgpu.ignore.denormal.mode",
llvm::MDNode::get(llvmContext, {}));
}

return success();
}
};
Expand Down
23 changes: 23 additions & 0 deletions mlir/test/Target/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -564,11 +564,34 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
}

llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
// CHECK-LABEL: @rocdl_16bit_packed_floats
// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
%source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16>
llvm.return %source : vector<2xf16>
}

llvm.func @rocdl_atomic_attrs(%ptr: !llvm.ptr<1>, %data: f32) {
// CHECK-LABEL: @rocdl_atomic_attrs
// CHECK: atomicrmw
// CHECK-SAME: !amdgpu.ignore.denormal.mode
// CHECK-SAME: !amdgpu.no.fine.grained.memory
// CHECK-SAME: !amdgpu.no.remote.memory
llvm.atomicrmw fadd %ptr, %data monotonic {
rocdl.ignore_denormal_mode,
rocdl.no_fine_grained_memory,
rocdl.no_remote_memory} : !llvm.ptr<1>, f32
llvm.return
}

llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
// CHECK-LABEL: @rocdl_last_use
// CHECK: %[[ret:.+]] = load
// CHECK-SAME: !amdgpu.last.use
// CHECK: ret i32 %[[ret]]
%ret = llvm.load %ptr {rocdl.last_use} : !llvm.ptr<1> -> i32
llvm.return %ret : i32
}

// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
Expand Down

0 comments on commit 774893d

Please sign in to comment.