Skip to content

Commit

Permalink
[AMDGPU] Enable atomic optimizer for 64 bit divergent values
Browse files Browse the repository at this point in the history
  • Loading branch information
vikramRH committed Jun 27, 2024
1 parent 5bc37d0 commit cabb5d5
Show file tree
Hide file tree
Showing 11 changed files with 14,422 additions and 3,946 deletions.
22 changes: 17 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ bool AMDGPUAtomicOptimizerImpl::run(Function &F) {
return Changed;
}

static bool shouldOptimize(Type *Ty) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
case Type::IntegerTyID: {
if (Ty->getIntegerBitWidth() == 32 || Ty->getIntegerBitWidth() == 64)
return true;
default:
return false;
}
}
}

void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
// Early exit for unhandled address space atomic instructions.
switch (I.getPointerAddressSpace()) {
Expand Down Expand Up @@ -230,8 +244,7 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
// value to the atomic calculation. We can only optimize divergent values if
// we have DPP available on our subtarget, and the atomic operation is 32
// bits.
if (ValDivergent &&
(!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
if (ValDivergent && (!ST->hasDPP() || !shouldOptimize(I.getType()))) {
return;
}

Expand Down Expand Up @@ -313,8 +326,7 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) {
// value to the atomic calculation. We can only optimize divergent values if
// we have DPP available on our subtarget, and the atomic operation is 32
// bits.
if (ValDivergent &&
(!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) {
if (ValDivergent && (!ST->hasDPP() || !shouldOptimize(I.getType()))) {
return;
}

Expand Down Expand Up @@ -745,7 +757,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
// of each active lane in the wavefront. This will be our new value
// which we will provide to the atomic operation.
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
assert(TyBitWidth == 32);
assert(TyBitWidth == 32 || TyBitWidth == 64);
NewV = B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readlane,
{NewV, LastLaneIdx});
}
Expand Down
1,346 changes: 1,158 additions & 188 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll

Large diffs are not rendered by default.

1,038 changes: 872 additions & 166 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Large diffs are not rendered by default.

638 changes: 564 additions & 74 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Large diffs are not rendered by default.

1,332 changes: 1,138 additions & 194 deletions llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll

Large diffs are not rendered by default.

504 changes: 486 additions & 18 deletions llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll

Large diffs are not rendered by default.

432 changes: 414 additions & 18 deletions llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll

Large diffs are not rendered by default.

4,054 changes: 2,992 additions & 1,062 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll

Large diffs are not rendered by default.

2,473 changes: 1,894 additions & 579 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll

Large diffs are not rendered by default.

2,473 changes: 1,894 additions & 579 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll

Large diffs are not rendered by default.

4,056 changes: 2,993 additions & 1,063 deletions llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll

Large diffs are not rendered by default.

0 comments on commit cabb5d5

Please sign in to comment.