Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Expand mul X, C where C=2^N*(3,5,9)*(3,5,9) #108100

Merged
merged 3 commits into from
Sep 11, 2024

Conversation

preames
Copy link
Collaborator

@preames preames commented Sep 10, 2024

This is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonably common naturally arising numbers.

This is a three deep expression which is deeper than we've otherwise
gone for multiple expansions, but I think it's reasonable to do so.
This covers mul by 50, 100, and 200 which are reasonable common
naturally arising numbers.
@llvmbot
Copy link
Collaborator

llvmbot commented Sep 10, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

This is a three deep expression which is deeper than we've otherwise gone for multiple expansions, but I think it's reasonable to do so. This covers mul by 50, 100, and 200 which are reasonably common naturally arising numbers.


Full diff: https://github.com/llvm/llvm-project/pull/108100.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+27)
  • (modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+139-61)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 23f2b0e96495e9..32641962ed0a16 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14131,6 +14131,33 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
   }
 
+  if (HasShlAdd) {
+    for (uint64_t Divisor : {3, 5, 9}) {
+      if (MulAmt % Divisor != 0)
+        continue;
+      uint64_t MulAmt2 = MulAmt / Divisor;
+      // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+      // of 25 which happen to be quite common.
+      for (uint64_t Divisor2 : {3, 5, 9}) {
+        if (MulAmt2 % Divisor2 != 0)
+          continue;
+        uint64_t MulAmt3 = MulAmt2 / Divisor;
+        if (isPowerOf2_64(MulAmt3)) {
+          SDLoc DL(N);
+          SDValue Mul359A =
+              DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+                          DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
+          SDValue Mul359B =
+              DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
+                          DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT),
+                          Mul359A);
+          return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
+                           DAG.getConstant(Log2_64(MulAmt3), DL, VT));
+        }
+      }
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 19d7324eeff4a1..f1a01f0a2a3a09 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -570,122 +570,200 @@ define i64 @addmul72(i64 %a, i64 %b) {
 }
 
 define i64 @mul50(i64 %a) {
-; CHECK-LABEL: mul50:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 50
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul50:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 50
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul50:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 50
   ret i64 %c
 }
 
 define i64 @addmul50(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul50:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 50
-; CHECK-NEXT:    mul a0, a0, a2
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: addmul50:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 50
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul50:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 50
   %d = add i64 %c, %b
   ret i64 %d
 }
 
 define i64 @mul100(i64 %a) {
-; CHECK-LABEL: mul100:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 100
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul100:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 100
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul100:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 2
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 100
   ret i64 %c
 }
 
 define i64 @addmul100(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul100:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 100
-; CHECK-NEXT:    mul a0, a0, a2
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: addmul100:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 100
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul100:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 100
   %d = add i64 %c, %b
   ret i64 %d
 }
 
 define i64 @mul162(i64 %a) {
-; CHECK-LABEL: mul162:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 162
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul162:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 162
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul162:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 162
   ret i64 %c
 }
 
 define i64 @addmul162(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul162:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 162
-; CHECK-NEXT:    mul a0, a0, a2
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: addmul162:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 162
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul162:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 162
   %d = add i64 %c, %b
   ret i64 %d
 }
 
 define i64 @mul180(i64 %a) {
-; CHECK-LABEL: mul180:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 180
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul180:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 180
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul180:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 180
   ret i64 %c
 }
 
 define i64 @addmul180(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul180:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 180
-; CHECK-NEXT:    mul a0, a0, a2
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: addmul180:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 180
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul180:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh1add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 180
   %d = add i64 %c, %b
   ret i64 %d
 }
 
 define i64 @add255mul180(i64 %a) {
-; CHECK-LABEL: add255mul180:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 180
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    addi a0, a0, 255
-; CHECK-NEXT:    ret
+; RV64I-LABEL: add255mul180:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 180
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    addi a0, a0, 255
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: add255mul180:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh3add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 1
+; RV64ZBA-NEXT:    addi a0, a0, 255
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 180
   %d = add i64 %c, 255
   ret i64 %d
 }
 
 define i64 @mul200(i64 %a) {
-; CHECK-LABEL: mul200:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a1, 200
-; CHECK-NEXT:    mul a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: mul200:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 200
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: mul200:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    slli a0, a0, 3
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 200
   ret i64 %c
 }
 
 define i64 @addmul200(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul200:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a2, 200
-; CHECK-NEXT:    mul a0, a0, a2
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV64I-LABEL: addmul200:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a2, 200
+; RV64I-NEXT:    mul a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBA-LABEL: addmul200:
+; RV64ZBA:       # %bb.0:
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh2add a0, a0, a0
+; RV64ZBA-NEXT:    sh3add a0, a0, a1
+; RV64ZBA-NEXT:    ret
   %c = mul i64 %a, 200
   %d = add i64 %c, %b
   ret i64 %d

Copy link

github-actions bot commented Sep 10, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@dtcxzyw dtcxzyw requested review from wangpc-pp and removed request for pcwang-thead September 11, 2024 00:15
dtcxzyw added a commit to dtcxzyw/llvm-codegen-benchmark that referenced this pull request Sep 11, 2024
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Miscompilation:

; bin/llc -mtriple=riscv64 -mattr=+m,+zba test.ll -o -
define i64 @mul108(i64 %x) nounwind {
  %mul = mul i64 %x, 108
  ret i64 %mul
}

define i64 @mul135(i64 %x) nounwind {
  %mul = mul i64 %x, 135
  ret i64 %mul
}
        .text
        .attribute      4, 16
        .attribute      5, "rv64i2p1_m2p0_zmmul1p0_zba1p0"
        .file   "test.ll"
        .globl  mul108                          # -- Begin function mul108
        .p2align        2
        .type   mul108,@function
mul108:                                 # @mul108
# %bb.0:
        sh3add  a0, a0, a0
        sh1add  a0, a0, a0
        ret
.Lfunc_end0:
        .size   mul108, .Lfunc_end0-mul108
                                        # -- End function
        .globl  mul135                          # -- Begin function mul135
        .p2align        2
        .type   mul135,@function
mul135:                                 # @mul135
# %bb.0:
        sh3add  a0, a0, a0
        sh1add  a0, a0, a0
        ret
.Lfunc_end1:
        .size   mul135, .Lfunc_end1-mul135
                                        # -- End function
        .section        ".note.GNU-stack","",@progbits

for (uint64_t Divisor2 : {3, 5, 9}) {
if (MulAmt2 % Divisor2 != 0)
continue;
uint64_t MulAmt3 = MulAmt2 / Divisor;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
uint64_t MulAmt3 = MulAmt2 / Divisor;
uint64_t MulAmt3 = MulAmt2 / Divisor2;

@preames
Copy link
Collaborator Author

preames commented Sep 11, 2024

Miscompilation:

Oops, yep, you're completely correct. Thank you.

dtcxzyw added a commit to dtcxzyw/llvm-codegen-benchmark that referenced this pull request Sep 11, 2024
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

@preames preames merged commit 65e0574 into llvm:main Sep 11, 2024
8 checks passed
@preames preames deleted the pr-riscv-mul-359-359-pow2 branch September 11, 2024 14:33
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 11, 2024

LLVM Buildbot has detected a new failure on builder clangd-ubuntu-tsan running on clangd-ubuntu-clang while building llvm at step 6 "test-build-clangd-clangd-index-server-clangd-indexer-check-clangd".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/134/builds/5073

Here is the relevant piece of the build log for the reference
Step 6 (test-build-clangd-clangd-index-server-clangd-indexer-check-clangd) failure: test (failure)
******************** TEST 'Clangd :: target_info.test' FAILED ********************
Exit Code: 66

Command Output (stderr):
--
RUN: at line 5: rm -rf /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir && mkdir -p /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir
+ rm -rf /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir
+ mkdir -p /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir
RUN: at line 7: echo '[{"directory": "/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir", "command": "/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir/armv7-clang -x c++ the-file.cpp -v", "file": "the-file.cpp"}]' > /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir/compile_commands.json
+ echo '[{"directory": "/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir", "command": "/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir/armv7-clang -x c++ the-file.cpp -v", "file": "the-file.cpp"}]'
RUN: at line 9: sed -e "s|INPUT_DIR|/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir|g" /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/llvm-project/clang-tools-extra/clangd/test/target_info.test > /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test.1
+ sed -e 's|INPUT_DIR|/vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.dir|g' /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/llvm-project/clang-tools-extra/clangd/test/target_info.test
RUN: at line 12: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test.1 > /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test
+ sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test.1
RUN: at line 14: clangd -lit-test < /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test 2>&1 | /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/bin/FileCheck -strict-whitespace /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test
+ clangd -lit-test
+ /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/bin/FileCheck -strict-whitespace /vol/worker/clangd-ubuntu-clang/clangd-ubuntu-tsan/build/tools/clang/tools/extra/clangd/test/Output/target_info.test.tmp.test

--

********************


VitaNuo pushed a commit to VitaNuo/llvm-project that referenced this pull request Sep 12, 2024
This is a three deep expression which is deeper than we've otherwise
gone for multiple expansions, but I think it's reasonable to do so. This
covers mul by 50, 100, and 200 which are reasonably common naturally
arising numbers.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants