From f3d6712ee3e0a82d429069e35503d6ceed476541 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2024 15:19:31 +0100 Subject: [PATCH] [RISCV] Support constant hoisting of immediate store values (#96073) Previously getIntImmInstCost only calculated the cost of materialising the argument of a store if it was the address. This means ConstantHoisting's transformation wouldn't kick in for cases like storing two values that require multiple instructions to materialise but where one can be cheaply generated from the other (e.g. by an addition). Two key changes were needed to avoid regressions when enabling this: * Allowing constant materialisation cost to be calculated assuming zeroes are free (as might happen if you had a 2*XLEN constant and one half is zero). * Avoiding constant hoisting if we have a misaligned store that's going to be a legalised to a sequence of narrower stores. I'm seeing cases where hoisting the constant ends up with worse codegen in that case. Out of caution and so as not to unexpectedly degrade other existing hoisting logic, FreeZeroes is used only for the new cost calculations for the load instruction. It would likely make sense to revisit this later. --- .../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 6 +- .../Target/RISCV/MCTargetDesc/RISCVMatInt.h | 7 ++- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 39 ++++++++++--- .../ConstantHoisting/RISCV/immediates.ll | 58 ++++++++++++++++++- 4 files changed, 95 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index 0a857eb96935ea..26725cf7decbee 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -499,7 +499,7 @@ InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, } int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, - bool CompressionCost) { + bool CompressionCost, bool FreeZeroes) { bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit); bool HasRVC = CompressionCost && (STI.hasFeature(RISCV::FeatureStdExtC) || STI.hasFeature(RISCV::FeatureStdExtZca)); @@ -510,10 +510,12 @@ int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); + if (FreeZeroes && Chunk.getSExtValue() == 0) + continue; InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), STI); Cost += getInstSeqCost(MatSeq, HasRVC); } - return std::max(1, Cost); + return std::max(FreeZeroes ? 0 : 1, Cost); } OpndKind Inst::getOpndKind() const { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h index e87e0f32564709..ae94f3778b2175 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h @@ -71,8 +71,13 @@ InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, // If CompressionCost is true it will use a different cost calculation if RVC is // enabled. This should be used to compare two different sequences to determine // which is more compressible. +// +// If FreeZeroes is true, it will be assumed free to materialize any +// XLen-sized chunks that are 0. This is appropriate to use in instances when +// the zero register can be used, e.g. when estimating the cost of +// materializing a value used by a particular operation. int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, - bool CompressionCost = false); + bool CompressionCost = false, bool FreeZeroes = false); } // namespace RISCVMatInt } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d603138773de4b..f9eef60f77b7ac 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -109,8 +109,11 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, return Cost; } -InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { +static InstructionCost getIntImmCostImpl(const DataLayout &DL, + const RISCVSubtarget *ST, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + bool FreeZeroes) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); @@ -119,8 +122,13 @@ InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, return TTI::TCC_Free; // Otherwise, we check how many instructions it will take to materialise. - const DataLayout &DL = getDataLayout(); - return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *getST()); + return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *ST, + /*CompressionCost=*/false, FreeZeroes); +} + +InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind, false); } // Look for patterns of shift followed by AND that can be turned into a pair of @@ -172,11 +180,24 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, // split up large offsets in GEP into better parts than ConstantHoisting // can. return TTI::TCC_Free; - case Instruction::Store: - // If the address is a constant, use the materialization cost. - if (Idx == 1) - return getIntImmCost(Imm, Ty, CostKind); - return TTI::TCC_Free; + case Instruction::Store: { + // Use the materialization cost regardless of if it's the address or the + // value that is constant, except for if the store is misaligned and + // misaligned accesses are not legal (experience shows constant hoisting + // can sometimes be harmful in such cases). + if (Idx == 1 || !Inst) + return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind, + /*FreeZeroes=*/true); + + StoreInst *ST = cast(Inst); + if (!getTLI()->allowsMemoryAccessForAlignment( + Ty->getContext(), DL, getTLI()->getValueType(DL, Ty), + ST->getPointerAddressSpace(), ST->getAlign())) + return TTI::TCC_Free; + + return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind, + /*FreeZeroes=*/true); + } case Instruction::Load: // If the address is a constant, use the materialization cost. return getIntImmCost(Imm, Ty, CostKind); diff --git a/llvm/test/Transforms/ConstantHoisting/RISCV/immediates.ll b/llvm/test/Transforms/ConstantHoisting/RISCV/immediates.ll index 8f57df6edb2c0a..329281e7dc301a 100644 --- a/llvm/test/Transforms/ConstantHoisting/RISCV/immediates.ll +++ b/llvm/test/Transforms/ConstantHoisting/RISCV/immediates.ll @@ -211,12 +211,64 @@ exit: ; Check that we use a common base for immediates needed by a store if the ; constants require more than 1 instruction. -; TODO: This doesn't trigger currently. define void @test20(ptr %p1, ptr %p2) { ; CHECK-LABEL: test20 -; CHECK: store i32 15111111, ptr %p1 -; CHECK: store i32 15111112, ptr %p2 +; CHECK: %const = bitcast i32 15111111 to i32 +; CHECK: store i32 %const, ptr %p1, align 4 +; CHECK: %const_mat = add i32 %const, 1 +; CHECK: store i32 %const_mat, ptr %p2, align 4 store i32 15111111, ptr %p1, align 4 store i32 15111112, ptr %p2, align 4 ret void } + +define void @test21(ptr %p1, ptr %p2) { +; CHECK-LABEL: define void @test21( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: store i32 15111111, ptr [[P1]], align 1 +; CHECK-NEXT: store i32 15111112, ptr [[P2]], align 1 +; CHECK-NEXT: ret void +; + store i32 15111111, ptr %p1, align 1 + store i32 15111112, ptr %p2, align 1 + ret void +} + +; 0 immediates shouldn't be hoisted. +define void @test22(ptr %p1, ptr %p2) { +; CHECK-LABEL: define void @test22( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: store i64 0, ptr [[P1]], align 8 +; CHECK-NEXT: store i64 -1, ptr [[P2]], align 8 +; CHECK-NEXT: ret void +; + store i64 0, ptr %p1, align 8 + store i64 -1, ptr %p2, align 8 + ret void +} + +; 0 immediates shouldn't be hoisted. +define void @test23(ptr %p1, ptr %p2) { +; CHECK-LABEL: define void @test23( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: store i127 0, ptr [[P1]], align 8 +; CHECK-NEXT: store i127 -1, ptr [[P2]], align 8 +; CHECK-NEXT: ret void +; + store i127 0, ptr %p1, align 8 + store i127 -1, ptr %p2, align 8 + ret void +} + +; Hoisting doesn't happen for types that aren't legal. +define void @test24(ptr %p1, ptr %p2) { +; CHECK-LABEL: define void @test24( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: store i128 15111111, ptr [[P1]], align 4 +; CHECK-NEXT: store i128 15111112, ptr [[P2]], align 4 +; CHECK-NEXT: ret void +; + store i128 15111111, ptr %p1, align 4 + store i128 15111112, ptr %p2, align 4 + ret void +}