-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Transforms][Utils][PromoteMem2Reg] Propagate nnan flag on par with the nsz flag #114271
base: main
Are you sure you want to change the base?
Conversation
…he nsz flag Following the change introduced by the PR llvm#83381, this patch extends it with the same treatment of the nnan fast-math flag. This is to address the performance drop caused by PR#83200 which prevented vital InstCombine transformation due to the lack of relevant fast-math flags. The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes are being created. Proposed change allows propagation of the nnan flag down to these Phi nodes.
@llvm/pr-subscribers-llvm-transforms Author: Paul Osmialowski (pawosm-arm) ChangesFollowing the change introduced by the PR #83381, this patch extends it with the same treatment of the nnan fast-math flag. This is to address the performance drop caused by PR#83200 which prevented vital InstCombine transformation due to the lack of relevant fast-math flags. The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes are being created. Proposed change allows propagation of the nnan flag down to these Phi nodes. Full diff: https://github.com/llvm/llvm-project/pull/114271.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 656bb1ebd1161e..8a42bdddb08119 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -394,6 +394,9 @@ struct PromoteMem2Reg {
/// Whether the function has the no-signed-zeros-fp-math attribute set.
bool NoSignedZeros = false;
+ /// Whether the function has the no-nans-fp-math attribute set.
+ bool NoNaNs = false;
+
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
AssumptionCache *AC)
@@ -752,6 +755,7 @@ void PromoteMem2Reg::run() {
ForwardIDFCalculator IDF(DT);
NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool();
+ NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool();
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
@@ -1140,6 +1144,11 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
if (isa<FPMathOperator>(APN) && NoSignedZeros)
APN->setHasNoSignedZeros(true);
+ // This allows select instruction folding relevant to floating point
+ // reductions whose operand is a PHI.
+ if (isa<FPMathOperator>(APN) && NoNaNs)
+ APN->setHasNoNaNs(true);
+
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
index 2cc26363daf9c5..4eda5108b7aba4 100644
--- a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
+++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
@@ -77,3 +77,81 @@ return: ; preds = %entry,%if.then
%retval = load double, ptr %x.addr
ret double %retval
}
+
+define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define double @phi_with_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret double [[X_ADDR_0]]
+entry:
+ %x.addr = alloca double
+ %cmp = fcmp olt double %x, 0.0
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ %fneg = fneg double %x
+ store double %fneg, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load double, ptr %x.addr
+ ret double %retval
+}
+
+define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" {
+; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan(
+; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
+entry:
+ %x.addr = alloca <2 x double>
+ store <2 x double> %a, ptr %x.addr
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ store <2 x double> %b, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load <2 x double>, ptr %x.addr
+ ret <2 x double> %retval
+}
+
+define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" {
+; CHECK-LABEL: define double @phi_without_nnan(
+; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret double [[X_ADDR_0]]
+entry:
+ %x.addr = alloca double
+ %cmp = fcmp olt double %x, 0.0
+ br i1 %cmp, label %if.then, label %return
+
+if.then: ; preds = %entry
+ %fneg = fneg double %x
+ store double %fneg, ptr %x.addr
+ br label %return
+
+return: ; preds = %entry,%if.then
+ %retval = load double, ptr %x.addr
+ ret double %retval
+}
|
Following the change introduced by the PR #83381, this patch extends it with the same treatment of the nnan fast-math flag. This is to address the performance drop caused by PR#83200 which prevented vital InstCombine transformation due to the lack of relevant fast-math flags.
The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes are being created. Proposed change allows propagation of the nnan flag down to these Phi nodes.