From a77d3ea310c61cf59c1146895b2d51fe014eb0a9 Mon Sep 17 00:00:00 2001 From: Malay Sanghi Date: Tue, 9 Jul 2024 16:54:25 +0800 Subject: [PATCH] [X86][GlobalISel] Add instruction selection support for x87 ld/st (#97016) Add x87 G_LOAD/G_STORE selection support to existing C++ lowering. --- .../X86/GISel/X86InstructionSelector.cpp | 28 ++- .../CodeGen/X86/GlobalISel/x86_64-fallback.ll | 9 - llvm/test/CodeGen/X86/isel-x87.ll | 225 ++++++++++++++++++ 3 files changed, 248 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/isel-x87.ll diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 303783ea3fd223..d73873812eeb6c 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -195,6 +195,15 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { return &X86::VR512RegClass; } + if (RB.getID() == X86::PSRRegBankID) { + if (Ty.getSizeInBits() == 80) + return &X86::RFP80RegClass; + if (Ty.getSizeInBits() == 64) + return &X86::RFP64RegClass; + if (Ty.getSizeInBits() == 32) + return &X86::RFP32RegClass; + } + llvm_unreachable("Unknown RegBank!"); } @@ -462,6 +471,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, : (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); + if (X86::PSRRegBankID == RB.getID()) + return Isload ? X86::LD_Fp32m : X86::ST_Fp32m; } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV64rm : X86::MOV64mr; @@ -472,6 +483,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, : (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); + if (X86::PSRRegBankID == RB.getID()) + return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; + } else if (Ty == LLT::scalar(80)) { + return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { if (Alignment >= Align(16)) return Isload ? (HasVLX ? X86::VMOVAPSZ128rm @@ -611,7 +626,9 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, I.removeOperand(0); addFullAddress(MIB, AM).addUse(DefReg); } - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); + I.addImplicitDefUseOperands(MF); + return Constrained; } static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { @@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, const Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); - Align Alignment = Align(DstTy.getSizeInBytes()); + // Create the load from the constant pool. + const ConstantFP *CFP = I.getOperand(1).getFPImm(); + const auto &DL = MF.getDataLayout(); + Align Alignment = DL.getPrefTypeAlign(CFP->getType()); const DebugLoc &DbgLoc = I.getDebugLoc(); unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment); - // Create the load from the constant pool. - const ConstantFP *CFP = I.getOperand(1).getFPImm(); unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment); MachineInstr *LoadInst = nullptr; unsigned char OpFlag = STI.classifyLocalReference(nullptr); @@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment); + LLT::pointer(0, DL.getPointerSizeInBits()), Alignment); LoadInst = addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll index 39302734dde787..bb0f0ae14f3043 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll @@ -7,15 +7,6 @@ ; When we cannot produce a test case anymore, that means we can remove ; the fallback path. -; Check that we fallback on invoke translation failures. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump -; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump: -define void @test_x86_fp80_dump(ptr %ptr){ - store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16 - ret void -} - ; Check that we fallback on byVal argument ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2 diff --git a/llvm/test/CodeGen/X86/isel-x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll new file mode 100644 index 00000000000000..690c1f6ea968cb --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-x87.ll @@ -0,0 +1,225 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86 +; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64 + +define x86_fp80 @f0(x86_fp80 noundef %a) nounwind { +; GISEL_X86-LABEL: f0: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: pushl %ebp +; GISEL_X86-NEXT: movl %esp, %ebp +; GISEL_X86-NEXT: andl $-16, %esp +; GISEL_X86-NEXT: subl $48, %esp +; GISEL_X86-NEXT: fldt 8(%ebp) +; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}} +; GISEL_X86-NEXT: fxch %st(1) +; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp) +; GISEL_X86-NEXT: fstpt (%esp) +; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp) +; GISEL_X86-NEXT: fldt (%esp) +; GISEL_X86-NEXT: faddp %st, %st(1) +; GISEL_X86-NEXT: movl %ebp, %esp +; GISEL_X86-NEXT: popl %ebp +; GISEL_X86-NEXT: retl +; +; SDAG_X86-LABEL: f0: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: pushl %ebp +; SDAG_X86-NEXT: movl %esp, %ebp +; SDAG_X86-NEXT: andl $-16, %esp +; SDAG_X86-NEXT: subl $48, %esp +; SDAG_X86-NEXT: fldt 8(%ebp) +; SDAG_X86-NEXT: fld %st(0) +; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp) +; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; SDAG_X86-NEXT: fld %st(0) +; SDAG_X86-NEXT: fstpt (%esp) +; SDAG_X86-NEXT: faddp %st, %st(1) +; SDAG_X86-NEXT: movl %ebp, %esp +; SDAG_X86-NEXT: popl %ebp +; SDAG_X86-NEXT: retl +; +; GISEL_X64-LABEL: f0: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: fxch %st(1) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: retq +; +; SDAG_X64-LABEL: f0: +; SDAG_X64: # %bb.0: +; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: fld %st(0) +; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SDAG_X64-NEXT: fld %st(0) +; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp) +; SDAG_X64-NEXT: faddp %st, %st(1) +; SDAG_X64-NEXT: retq + %a.addr = alloca x86_fp80, align 16 + %x = alloca x86_fp80, align 16 + store x86_fp80 %a, ptr %a.addr, align 16 + store x86_fp80 0xK400A8000000000000000, ptr %x, align 16 + %load1 = load x86_fp80, ptr %a.addr, align 16 + %load2 = load x86_fp80, ptr %x, align 16 + %add = fadd x86_fp80 %load1, %load2 + ret x86_fp80 %add +} + + +define void @f1(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f1: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fsubrp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; SDAG_X86-LABEL: f1: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fsubrp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f1: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fsubrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq + %load1 = load x86_fp80, ptr %a, align 4 + %load2 = load x86_fp80, ptr %b, align 4 + %sub = fsub x86_fp80 %load1, %load2 + store x86_fp80 %sub, ptr %a, align 4 + ret void +} + +define void @f2(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f2: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fmulp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; SDAG_X86-LABEL: f2: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fmulp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f2: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fmulp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq + %load1 = load x86_fp80, ptr %a, align 16 + %load2 = load x86_fp80, ptr %b, align 16 + %mul = fmul x86_fp80 %load1, %load2 + store x86_fp80 %mul, ptr %a, align 16 + ret void +} + +define void @f3(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f3: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: fldt (%eax) +; GISEL_X86-NEXT: fldt (%ecx) +; GISEL_X86-NEXT: fdivrp %st, %st(1) +; GISEL_X86-NEXT: fstpt (%eax) +; GISEL_X86-NEXT: retl +; +; SDAG_X86-LABEL: f3: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: fldt (%ecx) +; SDAG_X86-NEXT: fldt (%eax) +; SDAG_X86-NEXT: fdivrp %st, %st(1) +; SDAG_X86-NEXT: fstpt (%ecx) +; SDAG_X86-NEXT: retl +; +; CHECK-64-LABEL: f3: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: fldt (%rdi) +; CHECK-64-NEXT: fldt (%rsi) +; CHECK-64-NEXT: fdivrp %st, %st(1) +; CHECK-64-NEXT: fstpt (%rdi) +; CHECK-64-NEXT: retq + %load1 = load x86_fp80, ptr %a, align 4 + %load2 = load x86_fp80, ptr %b, align 4 + %div = fdiv x86_fp80 %load1, %load2 + store x86_fp80 %div, ptr %a, align 4 + ret void +} + +define void @f6(ptr %a, ptr %b) nounwind { +; GISEL_X86-LABEL: f6: +; GISEL_X86: # %bb.0: +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; GISEL_X86-NEXT: flds (%eax) +; GISEL_X86-NEXT: faddp %st, %st(1) +; GISEL_X86-NEXT: fstps (%ecx) +; GISEL_X86-NEXT: retl +; +; SDAG_X86-LABEL: f6: +; SDAG_X86: # %bb.0: +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SDAG_X86-NEXT: flds (%ecx) +; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}} +; SDAG_X86-NEXT: fstps (%eax) +; SDAG_X86-NEXT: retl +; +; GISEL_X64-LABEL: f6: +; GISEL_X64: # %bb.0: +; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; GISEL_X64-NEXT: flds (%rdi) +; GISEL_X64-NEXT: faddp %st, %st(1) +; GISEL_X64-NEXT: fstps (%rsi) +; GISEL_X64-NEXT: retq +; +; SDAG_X64-LABEL: f6: +; SDAG_X64: # %bb.0: +; SDAG_X64-NEXT: flds (%rdi) +; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SDAG_X64-NEXT: fstps (%rsi) +; SDAG_X64-NEXT: retq + %load1 = load float, ptr %a + %add = fadd float %load1, 20.0 + store float %add, ptr %b + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-32: {{.*}} +; FAST_X64: {{.*}} +; FAST_X86: {{.*}}