Skip to content

Commit

Permalink
[GlobalIsel][AArch64] Select G_SCMP and G_UCMP
Browse files Browse the repository at this point in the history
  • Loading branch information
tschuett committed Jul 21, 2024
1 parent 233cca1 commit 2baded3
Show file tree
Hide file tree
Showing 7 changed files with 322 additions and 60 deletions.
19 changes: 19 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,25 @@ class GShl : public GenericMachineInstr {
};
};

/// Represents a threeway compare.
class GSUCmp : public GenericMachineInstr {
public:
Register getLHSReg() const { return getOperand(1).getReg(); }
Register getRHSReg() const { return getOperand(2).getReg(); }

bool isSigned() const { return getOpcode() == TargetOpcode::G_SCMP; }

static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_SCMP:
case TargetOpcode::G_UCMP:
return true;
default:
return false;
}
};
};

} // namespace llvm

#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ class LegalizerHelper {

LegalizeResult lowerISFPCLASS(MachineInstr &MI);

LegalizeResult lowerThreewayCompare(MachineInstr &MI);
LegalizeResult lowerMinMax(MachineInstr &MI);
LegalizeResult lowerFCopySign(MachineInstr &MI);
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4006,6 +4006,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_UMIN:
case G_UMAX:
return lowerMinMax(MI);
case G_SCMP:
case G_UCMP:
return lowerThreewayCompare(MI);
case G_FCOPYSIGN:
return lowerFCopySign(MI);
case G_FMINNUM:
Expand Down Expand Up @@ -7269,6 +7272,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
GSUCmp *Cmp = cast<GSUCmp>(&MI);

Register Dst = Cmp->getReg(0);
LLT DstTy = MRI.getType(Dst);
LLT CmpTy = DstTy.changeElementSize(1);

CmpInst::Predicate LTPredicate = Cmp->isSigned()
? CmpInst::Predicate::ICMP_SLT
: CmpInst::Predicate::ICMP_ULT;
CmpInst::Predicate GTPredicate = Cmp->isSigned()
? CmpInst::Predicate::ICMP_SGT
: CmpInst::Predicate::ICMP_UGT;

auto One = MIRBuilder.buildConstant(DstTy, 1);
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);

auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
Cmp->getRHSReg());
auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);

MI.eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_PREFETCH).custom();

getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();

getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
Expand Down
109 changes: 109 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
---
name: test_scmp
body: |
bb.0.entry:
; CHECK-LABEL: name: test_scmp
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: $w0 = COPY [[SELECT1]](s32)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x0
%4:_(s2) = G_SCMP %0(s64), %1
%14:_(s32) = G_ANYEXT %4(s2)
$w0 = COPY %14(s32)
...
---
name: test_ucmp
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ucmp
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C2]], [[SELECT]]
; CHECK-NEXT: $w0 = COPY [[SELECT1]](s32)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x0
%4:_(s2) = G_UCMP %0(s64), %1
%14:_(s32) = G_ANYEXT %4(s2)
$w0 = COPY %14(s32)
...
---
name: test_ucmp_vector
body: |
bb.0.entry:
; CHECK-LABEL: name: test_ucmp_vector
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]]
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]]
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]]
; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]]
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>)
; CHECK-NEXT: $q0 = COPY [[ANYEXT5]](<4 x s32>)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s32) = COPY $w2
%3:_(s32) = COPY $w3
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%5:_(s32) = COPY $w0
%6:_(s32) = COPY $w1
%7:_(s32) = COPY $w2
%8:_(s32) = COPY $w3
%9:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
%10:_(<4 x s2>) = G_UCMP %4(<4 x s32>), %9
%11:_(<4 x s32>) = G_ANYEXT %10(<4 x s2>)
$q0 = COPY %11(<4 x s32>)
86 changes: 62 additions & 24 deletions llvm/test/CodeGen/AArch64/scmp.ll
Original file line number Diff line number Diff line change
@@ -1,26 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scmp.8.8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w1, sxtb
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sxtb w8, w0
; CHECK-SD-NEXT: cmp w8, w1, sxtb
; CHECK-SD-NEXT: cset w8, gt
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: sxtb w9, w1
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: csinv w0, w8, wzr, ge
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
}

define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: scmp.8.16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w1, sxth
; CHECK-NEXT: cset w8, gt
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sxth w8, w0
; CHECK-SD-NEXT: cmp w8, w1, sxth
; CHECK-SD-NEXT: cset w8, gt
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sxth w9, w1
; CHECK-GI-NEXT: cmp w8, w9
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: csinv w0, w8, wzr, ge
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
}
Expand Down Expand Up @@ -48,15 +67,34 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
}

define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; CHECK-LABEL: scmp.8.128:
; CHECK: // %bb.0:
; CHECK-NEXT: cmp x2, x0
; CHECK-NEXT: sbcs xzr, x3, x1
; CHECK-NEXT: cset w8, lt
; CHECK-NEXT: cmp x0, x2
; CHECK-NEXT: sbcs xzr, x1, x3
; CHECK-NEXT: csinv w0, w8, wzr, ge
; CHECK-NEXT: ret
; CHECK-SD-LABEL: scmp.8.128:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: cmp x2, x0
; CHECK-SD-NEXT: sbcs xzr, x3, x1
; CHECK-SD-NEXT: cset w8, lt
; CHECK-SD-NEXT: cmp x0, x2
; CHECK-SD-NEXT: sbcs xzr, x1, x3
; CHECK-SD-NEXT: csinv w0, w8, wzr, ge
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: scmp.8.128:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: cset w8, lt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w9, lo
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w8, w9, w8, eq
; CHECK-GI-NEXT: cset w9, gt
; CHECK-GI-NEXT: cmp x0, x2
; CHECK-GI-NEXT: cset w10, hi
; CHECK-GI-NEXT: cmp x1, x3
; CHECK-GI-NEXT: csel w9, w10, w9, eq
; CHECK-GI-NEXT: tst w9, #0x1
; CHECK-GI-NEXT: cset w9, ne
; CHECK-GI-NEXT: tst w8, #0x1
; CHECK-GI-NEXT: csinv w0, w9, wzr, eq
; CHECK-GI-NEXT: ret
%1 = call i8 @llvm.scmp(i128 %x, i128 %y)
ret i8 %1
}
Expand Down
Loading

0 comments on commit 2baded3

Please sign in to comment.