-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Legalize fp128 types as libcalls for G_FCMP #98452
Changes from 1 commit
a294242
9c702f6
30bf051
50c5854
58a1bb9
f328a58
ce436c7
da0b110
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -723,8 +723,7 @@ static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) { | |||||||||||
if (MemType.isVector()) | ||||||||||||
return RTLIB::UNKNOWN_LIBCALL; | ||||||||||||
|
||||||||||||
#define LCALLS(A, B) \ | ||||||||||||
{ A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } | ||||||||||||
#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL} | ||||||||||||
#define LCALL5(A) \ | ||||||||||||
LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) | ||||||||||||
switch (Opc) { | ||||||||||||
|
@@ -980,6 +979,150 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, | |||||||||||
LocObserver, nullptr); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/// Returns the corresponding libcall for the given Pred and | ||||||||||||
/// the ICMP predicate that should be generated to compare with #0 | ||||||||||||
/// after the libcall. | ||||||||||||
static std::pair<RTLIB::Libcall, CmpInst::Predicate> | ||||||||||||
getFCMPLibcallDesc(const CmpInst::Predicate Pred) { | ||||||||||||
|
||||||||||||
switch (Pred) { | ||||||||||||
case CmpInst::FCMP_OEQ: | ||||||||||||
return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ}; | ||||||||||||
case CmpInst::FCMP_UNE: | ||||||||||||
return {RTLIB::UNE_F128, CmpInst::ICMP_NE}; | ||||||||||||
case CmpInst::FCMP_OGE: | ||||||||||||
return {RTLIB::OGE_F128, CmpInst::ICMP_SGE}; | ||||||||||||
case CmpInst::FCMP_OLT: | ||||||||||||
return {RTLIB::OLT_F128, CmpInst::ICMP_SLT}; | ||||||||||||
case CmpInst::FCMP_OLE: | ||||||||||||
return {RTLIB::OLE_F128, CmpInst::ICMP_SLE}; | ||||||||||||
case CmpInst::FCMP_OGT: | ||||||||||||
return {RTLIB::OGT_F128, CmpInst::ICMP_SGT}; | ||||||||||||
case CmpInst::FCMP_UNO: | ||||||||||||
return {RTLIB::UO_F128, CmpInst::ICMP_NE}; | ||||||||||||
default: | ||||||||||||
return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE}; | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
LegalizerHelper::LegalizeResult | ||||||||||||
LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, | ||||||||||||
MachineInstr &MI, | ||||||||||||
LostDebugLocObserver &LocObserver) { | ||||||||||||
auto &MF = MIRBuilder.getMF(); | ||||||||||||
auto &Ctx = MF.getFunction().getContext(); | ||||||||||||
|
||||||||||||
LLT OpLLT = MRI.getType(MI.getOperand(2).getReg()); | ||||||||||||
if (OpLLT != LLT::scalar(128) || | ||||||||||||
OpLLT != MRI.getType(MI.getOperand(3).getReg())) | ||||||||||||
return UnableToLegalize; | ||||||||||||
|
||||||||||||
Type *OpType = getFloatTypeForLLT(Ctx, OpLLT); | ||||||||||||
|
||||||||||||
// Libcall always return i32 | ||||||||||||
constexpr LLT I32LLT = LLT::scalar(32); | ||||||||||||
constexpr LLT PredTy = LLT::scalar(1); | ||||||||||||
|
||||||||||||
const Register DstReg = MI.getOperand(0).getReg(); | ||||||||||||
const Register Op1 = MI.getOperand(2).getReg(); | ||||||||||||
const Register Op2 = MI.getOperand(3).getReg(); | ||||||||||||
const auto Pred = | ||||||||||||
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); | ||||||||||||
|
||||||||||||
// Generates a libcall followed by ICMP | ||||||||||||
const auto BuildLibcall = [&](const RTLIB::Libcall Libcall, | ||||||||||||
const CmpInst::Predicate ICmpPred) -> Register { | ||||||||||||
Register Temp = MRI.createGenericVirtualRegister(I32LLT); | ||||||||||||
// Generate libcall, storing result into Temp | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe "storing result into Temp" -> "holding result in Temp" |
||||||||||||
const auto Status = | ||||||||||||
createLibcall(MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0}, | ||||||||||||
{{Op1, OpType, 0}, {Op2, OpType, 1}}, LocObserver, &MI); | ||||||||||||
if (!Status) | ||||||||||||
return MCRegister::NoRegister; | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just use Register() |
||||||||||||
|
||||||||||||
// FCMP libcall always returns an i32, we need to compare it with #0 to get | ||||||||||||
// the final result. | ||||||||||||
const Register Res = MRI.createGenericVirtualRegister(PredTy); | ||||||||||||
MIRBuilder.buildICmp(ICmpPred, Res, Temp, | ||||||||||||
MIRBuilder.buildConstant(I32LLT, 0)); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
return Res; | ||||||||||||
}; | ||||||||||||
|
||||||||||||
// Simple case if we have a direct mapping from predicate to libcall | ||||||||||||
if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Pred); | ||||||||||||
Libcall != RTLIB::UNKNOWN_LIBCALL && | ||||||||||||
ICmpPred != CmpInst::BAD_ICMP_PREDICATE) { | ||||||||||||
if (const auto Res = BuildLibcall(Libcall, ICmpPred)) { | ||||||||||||
MIRBuilder.buildCopy(DstReg, Res); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the type of the function is always an i32, might that need to be zext/sext/trunc to the predicate type? |
||||||||||||
return Legalized; | ||||||||||||
} | ||||||||||||
return UnableToLegalize; | ||||||||||||
} | ||||||||||||
|
||||||||||||
// No direct mapping found, should be generated as combination of libcalls. | ||||||||||||
|
||||||||||||
switch (Pred) { | ||||||||||||
case CmpInst::FCMP_UEQ: { | ||||||||||||
// FCMP_UEQ: unordered or equal | ||||||||||||
// Convert into (FCMP_OEQ || FCMP_UNO). | ||||||||||||
|
||||||||||||
const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ); | ||||||||||||
const auto Oeq = BuildLibcall(OeqLibcall, OeqPred); | ||||||||||||
|
||||||||||||
const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO); | ||||||||||||
const auto Uno = BuildLibcall(UnoLibcall, UnoPred); | ||||||||||||
|
||||||||||||
MIRBuilder.buildCopy(DstReg, MIRBuilder.buildOr(PredTy, Oeq, Uno)); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a problem with generating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was, because BuildLibcall returned I changed |
||||||||||||
break; | ||||||||||||
} | ||||||||||||
case CmpInst::FCMP_ONE: { | ||||||||||||
// FCMP_ONE: ordered and operands are unequal | ||||||||||||
// Convert into (!FCMP_OEQ && !FCMP_UNO). | ||||||||||||
|
||||||||||||
// We inverse the predicate instead of generating a NOT | ||||||||||||
// to save one instruciton. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instruciton->instruction There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||||||||||||
// On AArch64 isel can even select two cmp into a single ccmp. | ||||||||||||
const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ); | ||||||||||||
const auto NotOeq = | ||||||||||||
BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred)); | ||||||||||||
|
||||||||||||
const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO); | ||||||||||||
const auto NotUno = | ||||||||||||
BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred)); | ||||||||||||
|
||||||||||||
if (NotOeq && NotUno) | ||||||||||||
MIRBuilder.buildCopy(DstReg, MIRBuilder.buildAnd(PredTy, NotOeq, NotUno)); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some of this is inconsistent about whether BuildLibcall can fail. The ones below use the output unconditionally. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. Now all the branches check for failures. |
||||||||||||
else | ||||||||||||
return UnableToLegalize; | ||||||||||||
|
||||||||||||
break; | ||||||||||||
} | ||||||||||||
case CmpInst::FCMP_ULT: | ||||||||||||
case CmpInst::FCMP_UGE: | ||||||||||||
case CmpInst::FCMP_UGT: | ||||||||||||
case CmpInst::FCMP_ULE: | ||||||||||||
case CmpInst::FCMP_ORD: { | ||||||||||||
// Convert into: !(inverse(Pred)) | ||||||||||||
// E.g. FCMP_ULT becomes !FCMP_OGE | ||||||||||||
// This is equivalent to the following, but saves some instructions. | ||||||||||||
// MIRBuilder.buildNot( | ||||||||||||
// PredTy, | ||||||||||||
// MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy, | ||||||||||||
// Op1, Op2)); | ||||||||||||
const auto [InversedLibcall, InversedPred] = | ||||||||||||
getFCMPLibcallDesc(CmpInst::getInversePredicate(Pred)); | ||||||||||||
MIRBuilder.buildCopy( | ||||||||||||
DstReg, BuildLibcall(InversedLibcall, | ||||||||||||
CmpInst::getInversePredicate(InversedPred))); | ||||||||||||
break; | ||||||||||||
} | ||||||||||||
default: | ||||||||||||
return UnableToLegalize; | ||||||||||||
} | ||||||||||||
|
||||||||||||
return Legalized; | ||||||||||||
} | ||||||||||||
|
||||||||||||
// The function is used to legalize operations that set default environment | ||||||||||||
// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that. | ||||||||||||
// On most targets supported in glibc FE_DFL_MODE is defined as | ||||||||||||
|
@@ -1120,6 +1263,12 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { | |||||||||||
return Status; | ||||||||||||
break; | ||||||||||||
} | ||||||||||||
case TargetOpcode::G_FCMP: { | ||||||||||||
LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver); | ||||||||||||
if (Status != Legalized) | ||||||||||||
return Status; | ||||||||||||
break; | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Directly handle everything and return here |
||||||||||||
} | ||||||||||||
case TargetOpcode::G_FPTOSI: | ||||||||||||
case TargetOpcode::G_FPTOUI: { | ||||||||||||
// FIXME: Support other types | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -560,7 +560,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
}) | ||
.widenScalarOrEltToNextPow2(1) | ||
.clampScalar(0, s32, s32) | ||
.clampScalarOrElt(1, MinFPScalar, s64) | ||
.clampScalarOrElt(1, MinFPScalar, s128) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this needed? If so could it use minScalarOrElt? Is it worth scalarizing vectors in this same patch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Replaced with
Yes, added
|
||
.minScalarEltSameAsIf( | ||
[=](const LegalityQuery &Query) { | ||
const LLT &Ty = Query.Types[0]; | ||
|
@@ -572,7 +572,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
.clampNumElements(1, v4s16, v8s16) | ||
.clampNumElements(1, v2s32, v4s32) | ||
.clampMaxNumElements(1, s64, 2) | ||
.moreElementsToNextPow2(1); | ||
.moreElementsToNextPow2(1) | ||
.libcallFor({{s32, s128}}); | ||
|
||
// Extensions | ||
auto ExtLegalFunc = [=](const LegalityQuery &Query) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,SDISEL | ||
; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,GISEL | ||
; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel -global-isel-abort=1 | FileCheck %s --check-prefixes=CHECK,GISEL | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think -global-isel-abort=1 is the default if -global-isel is specified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
target triple = "arm64-apple-ios" | ||
|
||
define i32 @single_same(i32 %a, i32 %b) nounwind ssp { | ||
|
@@ -950,29 +950,51 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 | |
; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead. | ||
|
||
define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 { | ||
; CHECK-LABEL: f128_select_and_olt_oge: | ||
; CHECK: ; %bb.0: | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill | ||
; CHECK-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill | ||
; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill | ||
; CHECK-NEXT: mov x19, x1 | ||
; CHECK-NEXT: mov x20, x0 | ||
; CHECK-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill | ||
; CHECK-NEXT: bl ___lttf2 | ||
; CHECK-NEXT: cmp w0, #0 | ||
; CHECK-NEXT: cset w21, lt | ||
; CHECK-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload | ||
; CHECK-NEXT: bl ___getf2 | ||
; CHECK-NEXT: cmp w0, #0 | ||
; CHECK-NEXT: cset w8, ge | ||
; CHECK-NEXT: tst w8, w21 | ||
; CHECK-NEXT: csel w0, w20, w19, ne | ||
; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload | ||
; CHECK-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload | ||
; CHECK-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload | ||
; CHECK-NEXT: add sp, sp, #80 | ||
; CHECK-NEXT: ret | ||
; SDISEL-LABEL: f128_select_and_olt_oge: | ||
; SDISEL: ; %bb.0: | ||
; SDISEL-NEXT: sub sp, sp, #80 | ||
; SDISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill | ||
; SDISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill | ||
; SDISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill | ||
; SDISEL-NEXT: mov x19, x1 | ||
; SDISEL-NEXT: mov x20, x0 | ||
; SDISEL-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill | ||
; SDISEL-NEXT: bl ___lttf2 | ||
; SDISEL-NEXT: cmp w0, #0 | ||
; SDISEL-NEXT: cset w21, lt | ||
; SDISEL-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload | ||
; SDISEL-NEXT: bl ___getf2 | ||
; SDISEL-NEXT: cmp w0, #0 | ||
; SDISEL-NEXT: cset w8, ge | ||
; SDISEL-NEXT: tst w8, w21 | ||
; SDISEL-NEXT: csel w0, w20, w19, ne | ||
; SDISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload | ||
; SDISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload | ||
; SDISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload | ||
; SDISEL-NEXT: add sp, sp, #80 | ||
; SDISEL-NEXT: ret | ||
; | ||
; GISEL-LABEL: f128_select_and_olt_oge: | ||
; GISEL: ; %bb.0: | ||
; GISEL-NEXT: sub sp, sp, #80 | ||
; GISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill | ||
; GISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill | ||
; GISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill | ||
; GISEL-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill | ||
; GISEL-NEXT: mov x19, x0 | ||
; GISEL-NEXT: mov x20, x1 | ||
; GISEL-NEXT: bl ___lttf2 | ||
; GISEL-NEXT: mov x21, x0 | ||
; GISEL-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload | ||
; GISEL-NEXT: bl ___getf2 | ||
; GISEL-NEXT: cmp w21, #0 | ||
; GISEL-NEXT: ccmp w0, #0, #8, lt | ||
; GISEL-NEXT: csel w0, w19, w20, ge | ||
; GISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload | ||
; GISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload | ||
; GISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload | ||
; GISEL-NEXT: add sp, sp, #80 | ||
; GISEL-NEXT: ret | ||
%c0 = fcmp olt fp128 %v0, %v1 | ||
%c1 = fcmp oge fp128 %v2, %v3 | ||
%cr = and i1 %c1, %c0 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to get access to the parameters.