-
Notifications
You must be signed in to change notification settings - Fork 11.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SelectionDAG] Expand [US]CMP using arithmetic on boolean values inst…
…ead of selects (#98774) The previous expansion of [US]CMP was done using two selects and two compares. It produced decent code, but on many platforms it is better to implement [US]CMP nodes by performing the following operation: ``` [us]cmp(x, y) = (x [us]> y) - (x [us]< y) ``` This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 and SystemZ are currently the only targets to prefer the former approach, but other targets may also start to use it if it provides for better codegen.
- Loading branch information
1 parent
bb604ae
commit e094abd
Showing
20 changed files
with
4,391 additions
and
2,853 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s | ||
|
||
define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind { | ||
; CHECK-LABEL: scmp_8_8: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlt r0, #1 | ||
; CHECK-NEXT: movwgt r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.scmp(i8 %x, i8 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind { | ||
; CHECK-LABEL: scmp_8_16: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlt r0, #1 | ||
; CHECK-NEXT: movwgt r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.scmp(i16 %x, i16 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @scmp_8_32(i32 %x, i32 %y) nounwind { | ||
; CHECK-LABEL: scmp_8_32: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlt r0, #1 | ||
; CHECK-NEXT: movwgt r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.scmp(i32 %x, i32 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @scmp_8_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: scmp_8_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlt lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlt r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i8 @llvm.scmp(i64 %x, i64 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { | ||
; CHECK-LABEL: scmp_8_128: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} | ||
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} | ||
; CHECK-NEXT: ldr r4, [sp, #24] | ||
; CHECK-NEXT: mov r5, #0 | ||
; CHECK-NEXT: ldr r6, [sp, #28] | ||
; CHECK-NEXT: subs r7, r0, r4 | ||
; CHECK-NEXT: ldr r12, [sp, #32] | ||
; CHECK-NEXT: sbcs r7, r1, r6 | ||
; CHECK-NEXT: ldr lr, [sp, #36] | ||
; CHECK-NEXT: sbcs r7, r2, r12 | ||
; CHECK-NEXT: sbcs r7, r3, lr | ||
; CHECK-NEXT: mov r7, #0 | ||
; CHECK-NEXT: movwlt r7, #1 | ||
; CHECK-NEXT: subs r0, r4, r0 | ||
; CHECK-NEXT: sbcs r0, r6, r1 | ||
; CHECK-NEXT: sbcs r0, r12, r2 | ||
; CHECK-NEXT: sbcs r0, lr, r3 | ||
; CHECK-NEXT: movwlt r5, #1 | ||
; CHECK-NEXT: sub r0, r5, r7 | ||
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} | ||
%1 = call i8 @llvm.scmp(i128 %x, i128 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i32 @scmp_32_32(i32 %x, i32 %y) nounwind { | ||
; CHECK-LABEL: scmp_32_32: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlt r0, #1 | ||
; CHECK-NEXT: movwgt r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i32 @llvm.scmp(i32 %x, i32 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: scmp_32_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlt lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlt r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i32 @llvm.scmp(i64 %x, i64 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i64 @scmp_64_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: scmp_64_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlt lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlt r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: asr r1, r0, #31 | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i64 @llvm.scmp(i64 %x, i64 %y) | ||
ret i64 %1 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=armv7-unknown-eabi %s -o - | FileCheck %s | ||
|
||
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { | ||
; CHECK-LABEL: ucmp_8_8: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlo r0, #1 | ||
; CHECK-NEXT: movwhi r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { | ||
; CHECK-LABEL: ucmp_8_16: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlo r0, #1 | ||
; CHECK-NEXT: movwhi r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { | ||
; CHECK-LABEL: ucmp_8_32: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlo r0, #1 | ||
; CHECK-NEXT: movwhi r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: ucmp_8_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlo lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlo r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { | ||
; CHECK-LABEL: ucmp_8_128: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} | ||
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} | ||
; CHECK-NEXT: ldr r4, [sp, #24] | ||
; CHECK-NEXT: mov r5, #0 | ||
; CHECK-NEXT: ldr r6, [sp, #28] | ||
; CHECK-NEXT: subs r7, r0, r4 | ||
; CHECK-NEXT: ldr r12, [sp, #32] | ||
; CHECK-NEXT: sbcs r7, r1, r6 | ||
; CHECK-NEXT: ldr lr, [sp, #36] | ||
; CHECK-NEXT: sbcs r7, r2, r12 | ||
; CHECK-NEXT: sbcs r7, r3, lr | ||
; CHECK-NEXT: mov r7, #0 | ||
; CHECK-NEXT: movwlo r7, #1 | ||
; CHECK-NEXT: subs r0, r4, r0 | ||
; CHECK-NEXT: sbcs r0, r6, r1 | ||
; CHECK-NEXT: sbcs r0, r12, r2 | ||
; CHECK-NEXT: sbcs r0, lr, r3 | ||
; CHECK-NEXT: movwlo r5, #1 | ||
; CHECK-NEXT: sub r0, r5, r7 | ||
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} | ||
%1 = call i8 @llvm.ucmp(i128 %x, i128 %y) | ||
ret i8 %1 | ||
} | ||
|
||
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { | ||
; CHECK-LABEL: ucmp_32_32: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: cmp r0, r1 | ||
; CHECK-NEXT: mov r0, #0 | ||
; CHECK-NEXT: mov r2, #0 | ||
; CHECK-NEXT: movwlo r0, #1 | ||
; CHECK-NEXT: movwhi r2, #1 | ||
; CHECK-NEXT: sub r0, r2, r0 | ||
; CHECK-NEXT: bx lr | ||
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: ucmp_32_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlo lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlo r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y) | ||
ret i32 %1 | ||
} | ||
|
||
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { | ||
; CHECK-LABEL: ucmp_64_64: | ||
; CHECK: @ %bb.0: | ||
; CHECK-NEXT: .save {r11, lr} | ||
; CHECK-NEXT: push {r11, lr} | ||
; CHECK-NEXT: subs lr, r0, r2 | ||
; CHECK-NEXT: mov r12, #0 | ||
; CHECK-NEXT: sbcs lr, r1, r3 | ||
; CHECK-NEXT: mov lr, #0 | ||
; CHECK-NEXT: movwlo lr, #1 | ||
; CHECK-NEXT: subs r0, r2, r0 | ||
; CHECK-NEXT: sbcs r0, r3, r1 | ||
; CHECK-NEXT: movwlo r12, #1 | ||
; CHECK-NEXT: sub r0, r12, lr | ||
; CHECK-NEXT: asr r1, r0, #31 | ||
; CHECK-NEXT: pop {r11, pc} | ||
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y) | ||
ret i64 %1 | ||
} |
Oops, something went wrong.