diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8b794656d5c219..fd03eeba911490 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -435,13 +435,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } else { setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal); + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } if (Subtarget.hasSPE()) setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); - // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && @@ -9060,6 +9060,103 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, return FP; } +SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc Dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + SDValue Chain = Op.getOperand(0); + + // If requested mode is constant, just use simpler mtfsb/mffscrni + if (auto *CVal = dyn_cast(Op.getOperand(1))) { + uint64_t Mode = CVal->getZExtValue(); + assert(Mode < 4 && "Unsupported rounding mode!"); + unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); + if (Subtarget.isISA3_0()) + return SDValue( + DAG.getMachineNode( + PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other}, + {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}), + 1); + SDNode *SetHi = DAG.getMachineNode( + (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(30, Dl, MVT::i32, true), Chain}); + SDNode *SetLo = DAG.getMachineNode( + (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)}); + return SDValue(SetLo, 0); + } + + // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format. + SDValue One = DAG.getConstant(1, Dl, MVT::i32); + SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(3, Dl, MVT::i32)); + SDValue DstFlag = DAG.getNode( + ISD::XOR, Dl, MVT::i32, SrcFlag, + DAG.getNode(ISD::AND, Dl, MVT::i32, + DAG.getNOT(Dl, + DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One), + MVT::i32), + One)); + // For Power9, there's faster mffscrn, and we don't need to read FPSCR + SDValue MFFS; + if (!Subtarget.isISA3_0()) { + MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + } + SDValue NewFPSCR; + if (Subtarget.isPPC64()) { + if (Subtarget.isISA3_0()) { + NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64); + } else { + // Set the last two bits (rounding mode) of bitcasted FPSCR. + SDNode *InsertRN = DAG.getMachineNode( + PPC::RLDIMI, Dl, MVT::i64, + {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), + DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag), + DAG.getTargetConstant(0, Dl, MVT::i32), + DAG.getTargetConstant(62, Dl, MVT::i32)}); + NewFPSCR = SDValue(InsertRN, 0); + } + NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR); + } else { + // In 32-bit mode, store f64, load and update the lower half. + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + SDValue Addr = Subtarget.isLittleEndian() + ? StackSlot + : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, + DAG.getConstant(4, Dl, PtrVT)); + if (Subtarget.isISA3_0()) { + Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo()); + } else { + Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); + SDValue Tmp = + DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); + Chain = Tmp.getValue(1); + Tmp = SDValue(DAG.getMachineNode( + PPC::RLWIMI, Dl, MVT::i32, + {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32), + DAG.getTargetConstant(30, Dl, MVT::i32), + DAG.getTargetConstant(31, Dl, MVT::i32)}), + 0); + Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + } + NewFPSCR = + DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo()); + Chain = NewFPSCR.getValue(1); + } + if (Subtarget.isISA3_0()) + return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other}, + {NewFPSCR, Chain}), + 1); + SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true); + SDNode *MTFSF = DAG.getMachineNode( + PPC::MTFSF, Dl, MVT::Other, + {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain}); + return SDValue(MTFSF, 0); +} + SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11921,6 +12018,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); + case ISD::SET_ROUNDING: + return LowerSET_ROUNDING(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0bdfdcd15441f4..8907c3c5a81c3c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1296,6 +1296,7 @@ namespace llvm { const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index c1f7181b30f3f6..cd2d7813af3962 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -5,14 +5,17 @@ ; RUN: -check-prefix=PPC64 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mattr=-direct-move \ ; RUN: | FileCheck %s -check-prefix=PPC64LE +; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=pwr9 \ +; RUN: | FileCheck %s -check-prefix=P9_32 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mcpu=pwr9 \ +; RUN: | FileCheck %s -check-prefix=P9 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le | FileCheck %s \ ; RUN: -check-prefix=DM -define i32 @foo() { +define i32 @foo() #0 { ; PPC32-LABEL: foo: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: stwu 1, -32(1) -; PPC32-NEXT: .cfi_def_cfa_offset 32 ; PPC32-NEXT: mffs 0 ; PPC32-NEXT: stfd 0, 16(1) ; PPC32-NEXT: lwz 3, 20(1) @@ -51,6 +54,33 @@ define i32 @foo() { ; PPC64LE-NEXT: stw 3, -4(1) ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: foo: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: stwu 1, -32(1) +; P9_32-NEXT: mffs 0 +; P9_32-NEXT: stfd 0, 16(1) +; P9_32-NEXT: lwz 3, 20(1) +; P9_32-NEXT: clrlwi 4, 3, 30 +; P9_32-NEXT: not 3, 3 +; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9_32-NEXT: xor 3, 4, 3 +; P9_32-NEXT: stw 3, 24(1) +; P9_32-NEXT: stw 3, 28(1) +; P9_32-NEXT: addi 1, 1, 32 +; P9_32-NEXT: blr +; +; P9-LABEL: foo: +; P9: # %bb.0: # %entry +; P9-NEXT: mffs 0 +; P9-NEXT: mffprd 3, 0 +; P9-NEXT: clrlwi 4, 3, 30 +; P9-NEXT: not 3, 3 +; P9-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9-NEXT: xor 3, 4, 3 +; P9-NEXT: stw 3, -8(1) +; P9-NEXT: stw 3, -4(1) +; P9-NEXT: blr +; ; DM-LABEL: foo: ; DM: # %bb.0: # %entry ; DM-NEXT: mffs 0 @@ -77,4 +107,254 @@ return: ; preds = %entry ret i32 %retval3 } -declare i32 @llvm.get.rounding() nounwind +define void @setrnd_tozero() #0 { +; PPC32-LABEL: setrnd_tozero: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tozero: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tozero: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; P9_32-LABEL: setrnd_tozero: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 1 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_tozero: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 1 +; P9-NEXT: blr +; +; DM-LABEL: setrnd_tozero: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 0) + ret void +} + +define void @setrnd_tonearest_tieeven() #0 { +; PPC32-LABEL: setrnd_tonearest_tieeven: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tonearest_tieeven: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tonearest_tieeven: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; P9_32-LABEL: setrnd_tonearest_tieeven: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 0 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_tonearest_tieeven: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 0 +; P9-NEXT: blr +; +; DM-LABEL: setrnd_tonearest_tieeven: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 1) + ret void +} + +define void @setrnd_toposinf() #0 { +; PPC32-LABEL: setrnd_toposinf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toposinf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toposinf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; P9_32-LABEL: setrnd_toposinf: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 2 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_toposinf: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 2 +; P9-NEXT: blr +; +; DM-LABEL: setrnd_toposinf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 2) + ret void +} + +define void @setrnd_toneginf() #0 { +; PPC32-LABEL: setrnd_toneginf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toneginf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toneginf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; P9_32-LABEL: setrnd_toneginf: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 3 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_toneginf: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 3 +; P9-NEXT: blr +; +; DM-LABEL: setrnd_toneginf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 3) + ret void +} + +define void @setrnd_var(i32 %x) #0 { +; PPC32-LABEL: setrnd_var: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: stwu 1, -16(1) +; PPC32-NEXT: mffs 0 +; PPC32-NEXT: stfd 0, 8(1) +; PPC32-NEXT: clrlwi 4, 3, 30 +; PPC32-NEXT: lwz 5, 12(1) +; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC32-NEXT: xor 3, 3, 4 +; PPC32-NEXT: xori 3, 3, 1 +; PPC32-NEXT: rlwimi 5, 3, 0, 30, 31 +; PPC32-NEXT: stw 5, 12(1) +; PPC32-NEXT: lfd 0, 8(1) +; PPC32-NEXT: mtfsf 255, 0 +; PPC32-NEXT: addi 1, 1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_var: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mffs 0 +; PPC64-NEXT: stfd 0, -16(1) +; PPC64-NEXT: clrlwi 4, 3, 30 +; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64-NEXT: ld 5, -16(1) +; PPC64-NEXT: xor 3, 3, 4 +; PPC64-NEXT: xori 3, 3, 1 +; PPC64-NEXT: clrldi 3, 3, 32 +; PPC64-NEXT: rldimi 5, 3, 0, 62 +; PPC64-NEXT: std 5, -8(1) +; PPC64-NEXT: lfd 0, -8(1) +; PPC64-NEXT: mtfsf 255, 0 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_var: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mffs 0 +; PPC64LE-NEXT: clrlwi 4, 3, 30 +; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: stfd 0, -16(1) +; PPC64LE-NEXT: xor 3, 3, 4 +; PPC64LE-NEXT: ld 4, -16(1) +; PPC64LE-NEXT: xori 3, 3, 1 +; PPC64LE-NEXT: clrldi 3, 3, 32 +; PPC64LE-NEXT: rldimi 4, 3, 0, 62 +; PPC64LE-NEXT: std 4, -8(1) +; PPC64LE-NEXT: lfd 0, -8(1) +; PPC64LE-NEXT: mtfsf 255, 0 +; PPC64LE-NEXT: blr +; +; P9_32-LABEL: setrnd_var: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: stwu 1, -16(1) +; P9_32-NEXT: clrlwi 4, 3, 30 +; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9_32-NEXT: xor 3, 3, 4 +; P9_32-NEXT: xori 3, 3, 1 +; P9_32-NEXT: stw 3, 12(1) +; P9_32-NEXT: lfd 0, 8(1) +; P9_32-NEXT: mffscrn 0, 0 +; P9_32-NEXT: addi 1, 1, 16 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_var: +; P9: # %bb.0: # %entry +; P9-NEXT: clrlwi 4, 3, 30 +; P9-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9-NEXT: xor 3, 3, 4 +; P9-NEXT: xori 3, 3, 1 +; P9-NEXT: mtfprd 0, 3 +; P9-NEXT: mffscrn 0, 0 +; P9-NEXT: blr +; +; DM-LABEL: setrnd_var: +; DM: # %bb.0: # %entry +; DM-NEXT: clrlwi 4, 3, 30 +; DM-NEXT: rlwinm 3, 3, 31, 31, 31 +; DM-NEXT: xor 3, 3, 4 +; DM-NEXT: xori 3, 3, 1 +; DM-NEXT: clrldi 3, 3, 32 +; DM-NEXT: mffs 0 +; DM-NEXT: mffprd 4, 0 +; DM-NEXT: rldimi 4, 3, 0, 62 +; DM-NEXT: mtfprd 0, 4 +; DM-NEXT: mtfsf 255, 0 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 %x) + ret void +} + +declare i32 @llvm.get.rounding() #0 +declare void @llvm.set.rounding(i32) #0 + +attributes #0 = { nounwind }