-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Expand mul X, C where C=2^N*(3,5,9)*(3,5,9) #108100
Conversation
This is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonable common naturally arising numbers.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonably common naturally arising numbers. Full diff: https://github.com/llvm/llvm-project/pull/108100.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 23f2b0e96495e9..32641962ed0a16 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14131,6 +14131,33 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
}
+ if (HasShlAdd) {
+ for (uint64_t Divisor : {3, 5, 9}) {
+ if (MulAmt % Divisor != 0)
+ continue;
+ uint64_t MulAmt2 = MulAmt / Divisor;
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ for (uint64_t Divisor2 : {3, 5, 9}) {
+ if (MulAmt2 % Divisor2 != 0)
+ continue;
+ uint64_t MulAmt3 = MulAmt2 / Divisor;
+ if (isPowerOf2_64(MulAmt3)) {
+ SDLoc DL(N);
+ SDValue Mul359A =
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
+ SDValue Mul359B =
+ DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
+ DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT),
+ Mul359A);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
+ DAG.getConstant(Log2_64(MulAmt3), DL, VT));
+ }
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 19d7324eeff4a1..f1a01f0a2a3a09 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -570,122 +570,200 @@ define i64 @addmul72(i64 %a, i64 %b) {
}
define i64 @mul50(i64 %a) {
-; CHECK-LABEL: mul50:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 50
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul50:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 50
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul50:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 50
ret i64 %c
}
define i64 @addmul50(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul50:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 50
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul50:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 50
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul50:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 50
%d = add i64 %c, %b
ret i64 %d
}
define i64 @mul100(i64 %a) {
-; CHECK-LABEL: mul100:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 100
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul100:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 100
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul100:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 2
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 100
ret i64 %c
}
define i64 @addmul100(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul100:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 100
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul100:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 100
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul100:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 100
%d = add i64 %c, %b
ret i64 %d
}
define i64 @mul162(i64 %a) {
-; CHECK-LABEL: mul162:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 162
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul162:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 162
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul162:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 162
ret i64 %c
}
define i64 @addmul162(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul162:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 162
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul162:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 162
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul162:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 162
%d = add i64 %c, %b
ret i64 %d
}
define i64 @mul180(i64 %a) {
-; CHECK-LABEL: mul180:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 180
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul180:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 180
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul180:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 180
ret i64 %c
}
define i64 @addmul180(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul180:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 180
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul180:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 180
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul180:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 180
%d = add i64 %c, %b
ret i64 %d
}
define i64 @add255mul180(i64 %a) {
-; CHECK-LABEL: add255mul180:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 180
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: addi a0, a0, 255
-; CHECK-NEXT: ret
+; RV64I-LABEL: add255mul180:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 180
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: addi a0, a0, 255
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: add255mul180:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 1
+; RV64ZBA-NEXT: addi a0, a0, 255
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 180
%d = add i64 %c, 255
ret i64 %d
}
define i64 @mul200(i64 %a) {
-; CHECK-LABEL: mul200:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 200
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul200:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 200
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul200:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 3
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 200
ret i64 %c
}
define i64 @addmul200(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul200:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 200
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul200:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 200
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul200:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 200
%d = add i64 %c, %b
ret i64 %d
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Miscompilation:
; bin/llc -mtriple=riscv64 -mattr=+m,+zba test.ll -o -
define i64 @mul108(i64 %x) nounwind {
%mul = mul i64 %x, 108
ret i64 %mul
}
define i64 @mul135(i64 %x) nounwind {
%mul = mul i64 %x, 135
ret i64 %mul
}
.text
.attribute 4, 16
.attribute 5, "rv64i2p1_m2p0_zmmul1p0_zba1p0"
.file "test.ll"
.globl mul108 # -- Begin function mul108
.p2align 2
.type mul108,@function
mul108: # @mul108
# %bb.0:
sh3add a0, a0, a0
sh1add a0, a0, a0
ret
.Lfunc_end0:
.size mul108, .Lfunc_end0-mul108
# -- End function
.globl mul135 # -- Begin function mul135
.p2align 2
.type mul135,@function
mul135: # @mul135
# %bb.0:
sh3add a0, a0, a0
sh1add a0, a0, a0
ret
.Lfunc_end1:
.size mul135, .Lfunc_end1-mul135
# -- End function
.section ".note.GNU-stack","",@progbits
for (uint64_t Divisor2 : {3, 5, 9}) { | ||
if (MulAmt2 % Divisor2 != 0) | ||
continue; | ||
uint64_t MulAmt3 = MulAmt2 / Divisor; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uint64_t MulAmt3 = MulAmt2 / Divisor; | |
uint64_t MulAmt3 = MulAmt2 / Divisor2; |
Oops, yep, you're completely correct. Thank you. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/134/builds/5073 Here is the relevant piece of the build log for the reference
|
This is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonably common naturally arising numbers.
This is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonably common naturally arising numbers.