Skip to content

Commit

Permalink
[AArch64] Replace AND with LSL#2 for LDR target (llvm#34101) (llvm#89531
Browse files Browse the repository at this point in the history
)

Currently, process of replacing bitwise operations consisting of
`LSR`/`LSL` with `And` is performed by `DAGCombiner`.

However, in certain cases, the `AND` generated by this process
can be removed.

Consider following case:
```
        lsr x8, x8, llvm#56
        and x8, x8, #0xfc
        ldr w0, [x2, x8]
        ret
```

In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.

after changed:
```
        lsr x8, x8, llvm#58
        ldr w0, [x2, x8, lsl #2]
        ret
```

This patch checks to see if the `SHIFTING` + `AND` operation on load
target can be optimized and optimizes it if it can.
  • Loading branch information
ParkHanbum authored Aug 24, 2024
1 parent 43b8885 commit 77fccb3
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 0 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18023,6 +18023,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
}

// We do not need to fold when this shifting used in specific load case:
// (ldr x, (add x, (shl (srl x, c1) 2)))
if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned ShlAmt = C2->getZExtValue();
if (auto ShouldADD = *N->use_begin();
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->use_begin())) {
unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
if ((1ULL << ShlAmt) == ByteVT &&
isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
return false;
}
}
}
}

return true;
}

Expand Down
138 changes: 138 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
;

define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 63
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
%0 = load i16, ptr %arrayidx, align 2
ret i16 %0
}

define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 2
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
%0 = load i16, ptr %arrayidx, align 2
ret i16 %0
}

define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 1
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
%0 = load i16, ptr %arrayidx, align 2
ret i16 %0
}

define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 63
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
%0 = load i32, ptr %arrayidx, align 4
ret i32 %0
}

define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 2
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
%0 = load i32, ptr %arrayidx, align 4
ret i32 %0
}

define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 1
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
%0 = load i32, ptr %arrayidx, align 4
ret i32 %0
}

define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 63
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
%0 = load i64, ptr %arrayidx, align 8
ret i64 %0
}

define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 2
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
%0 = load i64, ptr %arrayidx, align 8
ret i64 %0
}

define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
%shr = lshr i64 %mul, 1
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
%0 = load i64, ptr %arrayidx, align 8
ret i64 %0
}

0 comments on commit 77fccb3

Please sign in to comment.