Skip to content

Commit

Permalink
[X86][GlobalISel] Add instruction selection support for x87 ld/st (#9…
Browse files Browse the repository at this point in the history
…7016)

Add x87 G_LOAD/G_STORE selection support to existing C++ lowering.
  • Loading branch information
MalaySanghi authored Jul 9, 2024
1 parent 765e2f9 commit a77d3ea
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 14 deletions.
28 changes: 23 additions & 5 deletions llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::VR512RegClass;
}

if (RB.getID() == X86::PSRRegBankID) {
if (Ty.getSizeInBits() == 80)
return &X86::RFP80RegClass;
if (Ty.getSizeInBits() == 64)
return &X86::RFP64RegClass;
if (Ty.getSizeInBits() == 32)
return &X86::RFP32RegClass;
}

llvm_unreachable("Unknown RegBank!");
}

Expand Down Expand Up @@ -462,6 +471,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSSZmr :
HasAVX ? X86::VMOVSSmr :
X86::MOVSSmr);
if (X86::PSRRegBankID == RB.getID())
return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
Expand All @@ -472,6 +483,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSDZmr :
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
if (X86::PSRRegBankID == RB.getID())
return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
} else if (Ty == LLT::scalar(80)) {
return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
Expand Down Expand Up @@ -611,7 +626,9 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
I.addImplicitDefUseOperands(MF);
return Constrained;
}

static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
Expand Down Expand Up @@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
Align Alignment = Align(DstTy.getSizeInBytes());
// Create the load from the constant pool.
const ConstantFP *CFP = I.getOperand(1).getFPImm();
const auto &DL = MF.getDataLayout();
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
const DebugLoc &DbgLoc = I.getDebugLoc();

unsigned Opc =
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);

// Create the load from the constant pool.
const ConstantFP *CFP = I.getOperand(1).getFPImm();
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
Expand All @@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,

MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);

LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,6 @@
; When we cannot produce a test case anymore, that means we can remove
; the fallback path.

; Check that we fallback on invoke translation failures.
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
define void @test_x86_fp80_dump(ptr %ptr){
store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
ret void
}

; Check that we fallback on byVal argument
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
Expand Down
225 changes: 225 additions & 0 deletions llvm/test/CodeGen/X86/isel-x87.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64

define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
; GISEL_X86-LABEL: f0:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: pushl %ebp
; GISEL_X86-NEXT: movl %esp, %ebp
; GISEL_X86-NEXT: andl $-16, %esp
; GISEL_X86-NEXT: subl $48, %esp
; GISEL_X86-NEXT: fldt 8(%ebp)
; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
; GISEL_X86-NEXT: fxch %st(1)
; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
; GISEL_X86-NEXT: fstpt (%esp)
; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
; GISEL_X86-NEXT: fldt (%esp)
; GISEL_X86-NEXT: faddp %st, %st(1)
; GISEL_X86-NEXT: movl %ebp, %esp
; GISEL_X86-NEXT: popl %ebp
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f0:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: pushl %ebp
; SDAG_X86-NEXT: movl %esp, %ebp
; SDAG_X86-NEXT: andl $-16, %esp
; SDAG_X86-NEXT: subl $48, %esp
; SDAG_X86-NEXT: fldt 8(%ebp)
; SDAG_X86-NEXT: fld %st(0)
; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; SDAG_X86-NEXT: fld %st(0)
; SDAG_X86-NEXT: fstpt (%esp)
; SDAG_X86-NEXT: faddp %st, %st(1)
; SDAG_X86-NEXT: movl %ebp, %esp
; SDAG_X86-NEXT: popl %ebp
; SDAG_X86-NEXT: retl
;
; GISEL_X64-LABEL: f0:
; GISEL_X64: # %bb.0:
; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; GISEL_X64-NEXT: fxch %st(1)
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: faddp %st, %st(1)
; GISEL_X64-NEXT: retq
;
; SDAG_X64-LABEL: f0:
; SDAG_X64: # %bb.0:
; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: fld %st(0)
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; SDAG_X64-NEXT: fld %st(0)
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: faddp %st, %st(1)
; SDAG_X64-NEXT: retq
%a.addr = alloca x86_fp80, align 16
%x = alloca x86_fp80, align 16
store x86_fp80 %a, ptr %a.addr, align 16
store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
%load1 = load x86_fp80, ptr %a.addr, align 16
%load2 = load x86_fp80, ptr %x, align 16
%add = fadd x86_fp80 %load1, %load2
ret x86_fp80 %add
}


define void @f1(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f1:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fsubrp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f1:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fsubrp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f1:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fsubrp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 4
%load2 = load x86_fp80, ptr %b, align 4
%sub = fsub x86_fp80 %load1, %load2
store x86_fp80 %sub, ptr %a, align 4
ret void
}

define void @f2(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f2:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fmulp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f2:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fmulp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f2:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fmulp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 16
%load2 = load x86_fp80, ptr %b, align 16
%mul = fmul x86_fp80 %load1, %load2
store x86_fp80 %mul, ptr %a, align 16
ret void
}

define void @f3(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f3:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fdivrp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f3:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fdivrp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f3:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fdivrp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 4
%load2 = load x86_fp80, ptr %b, align 4
%div = fdiv x86_fp80 %load1, %load2
store x86_fp80 %div, ptr %a, align 4
ret void
}

define void @f6(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f6:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; GISEL_X86-NEXT: flds (%eax)
; GISEL_X86-NEXT: faddp %st, %st(1)
; GISEL_X86-NEXT: fstps (%ecx)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f6:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: flds (%ecx)
; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
; SDAG_X86-NEXT: fstps (%eax)
; SDAG_X86-NEXT: retl
;
; GISEL_X64-LABEL: f6:
; GISEL_X64: # %bb.0:
; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; GISEL_X64-NEXT: flds (%rdi)
; GISEL_X64-NEXT: faddp %st, %st(1)
; GISEL_X64-NEXT: fstps (%rsi)
; GISEL_X64-NEXT: retq
;
; SDAG_X64-LABEL: f6:
; SDAG_X64: # %bb.0:
; SDAG_X64-NEXT: flds (%rdi)
; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; SDAG_X64-NEXT: fstps (%rsi)
; SDAG_X64-NEXT: retq
%load1 = load float, ptr %a
%add = fadd float %load1, 20.0
store float %add, ptr %b
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-32: {{.*}}
; FAST_X64: {{.*}}
; FAST_X86: {{.*}}

0 comments on commit a77d3ea

Please sign in to comment.