Skip to content

Commit

Permalink
[LoongArch] [CodeGen] Add options for Clang to generate LoongArch-spe…
Browse files Browse the repository at this point in the history
…cific frecipe & frsqrte instructions (llvm#109917)

Two options: `-mfrecipe` & `-mno-frecipe`.
Enable or Disable frecipe.{s/d} and frsqrte.{s/d} instructions. 
The default is `-mno-frecipe`.
  • Loading branch information
tangaac authored and EricWF committed Oct 22, 2024
1 parent 7ad45c8 commit d4649c7
Show file tree
Hide file tree
Showing 15 changed files with 1,411 additions and 0 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5387,6 +5387,10 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
let Flags = [TargetSpecific] in {
def msimd_EQ : Joined<["-"], "msimd=">, Group<m_loongarch_Features_Group>,
HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">;
def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>,
HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">;
def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>,
HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">;
def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>,
HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">;
def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>,
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D,
} else /*-mno-lasx*/
Features.push_back("-lasx");
}

// Select frecipe feature determined by -m[no-]frecipe.
if (const Arg *A =
Args.getLastArg(options::OPT_mfrecipe, options::OPT_mno_frecipe)) {
if (A->getOption().matches(options::OPT_mfrecipe))
Features.push_back("+frecipe");
else
Features.push_back("-frecipe");
}
}

std::string loongarch::postProcessTargetCPUString(const std::string &CPU,
Expand Down
30 changes: 30 additions & 0 deletions clang/test/Driver/loongarch-mfrecipe.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/// Test -m[no]frecipe options.

// RUN: %clang --target=loongarch64 -mfrecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-FRECIPE
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -fsyntax-only %s -### 2>&1 | \
// RUN: FileCheck %s --check-prefix=CC1-NO-FRECIPE

// RUN: %clang --target=loongarch64 -mfrecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE
// RUN: %clang --target=loongarch64 -mno-frecipe -mfrecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FRECIPE
// RUN: %clang --target=loongarch64 -mfrecipe -mno-frecipe -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-NO-FRECIPE


// CC1-FRECIPE: "-target-feature" "+frecipe"
// CC1-NO-FRECIPE: "-target-feature" "-frecipe"

// IR-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+frecipe{{(,.*)?}}"
// IR-NO-FRECIPE: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-frecipe{{(,.*)?}}"

int foo(void) {
return 42;
}
6 changes: 6 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ def SDT_LoongArchMOVGR2FR_W_LA64
def SDT_LoongArchMOVFR2GR_S_LA64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;

def loongarch_movgr2fr_w_la64
: SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>;
def loongarch_movfr2gr_s_la64
: SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>;
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;

//===----------------------------------------------------------------------===//
// Instructions
Expand Down Expand Up @@ -286,6 +290,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_s FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(int_loongarch_frsqrte_s FPR32:$src), (FRSQRTE_S FPR32:$src)>;
def : Pat<(loongarch_frecipe FPR32:$src), (FRECIPE_S FPR32:$src)>;
def : Pat<(loongarch_frsqrte FPR32:$src), (FRSQRTE_S FPR32:$src)>;
}

// fmadd.s: fj * fk + fa
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ let Predicates = [HasFrecipe] in {
// FP approximate reciprocal operation
def : Pat<(int_loongarch_frecipe_d FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(int_loongarch_frsqrte_d FPR64:$src), (FRSQRTE_D FPR64:$src)>;
def : Pat<(loongarch_frecipe FPR64:$src), (FRECIPE_D FPR64:$src)>;
def : Pat<(loongarch_frsqrte FPR64:$src), (FRSQRTE_D FPR64:$src)>;
}

// fmadd.d: fj * fk + fa
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4697,6 +4697,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_ZERO)
NODE_NAME_CASE(VALL_NONZERO)
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
}
#undef NODE_NAME_CASE
return nullptr;
Expand Down Expand Up @@ -5900,6 +5902,71 @@ Register LoongArchTargetLowering::getExceptionSelectorRegister(
return LoongArch::R5;
}

//===----------------------------------------------------------------------===//
// Target Optimization Hooks
//===----------------------------------------------------------------------===//

static int getEstimateRefinementSteps(EVT VT,
const LoongArchSubtarget &Subtarget) {
// Feature FRECIPE instrucions relative accuracy is 2^-14.
// IEEE float has 23 digits and double has 52 digits.
int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
return RefinementSteps;
}

SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
if (Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);

return Estimate;
}
}

return SDValue();
}

SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
if (Subtarget.hasFrecipe()) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();

if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
(VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
(VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
(VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {

if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
}
}

return SDValue();
}

//===----------------------------------------------------------------------===//
// LoongArch Inline Assembly Support
//===----------------------------------------------------------------------===//
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ enum NodeType : unsigned {
VALL_NONZERO,
VANY_NONZERO,

// Floating point approximate reciprocal operation
FRECIPE,
FRSQRTE

// Intrinsic operations end =============================================
};
} // end namespace LoongArchISD
Expand Down Expand Up @@ -216,6 +220,17 @@ class LoongArchTargetLowering : public TargetLowering {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;

bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;

SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;

ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

// Target nodes.
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;

def lasxsplati8
Expand Down Expand Up @@ -2094,6 +2095,15 @@ foreach Inst = ["XVFRECIPE_S", "XVFRSQRTE_S"] in
foreach Inst = ["XVFRECIPE_D", "XVFRSQRTE_D"] in
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4f64 LASX256:$xj)),
(!cast<LAInst>(Inst) LASX256:$xj)>;

def : Pat<(loongarch_vfrecipe v8f32:$src),
(XVFRECIPE_S v8f32:$src)>;
def : Pat<(loongarch_vfrecipe v4f64:$src),
(XVFRECIPE_D v4f64:$src)>;
def : Pat<(loongarch_vfrsqrte v8f32:$src),
(XVFRSQRTE_S v8f32:$src)>;
def : Pat<(loongarch_vfrsqrte v4f64:$src),
(XVFRSQRTE_D v4f64:$src)>;
}

def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;

// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
Expand Down Expand Up @@ -50,6 +52,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;

def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;

def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
Expand Down Expand Up @@ -2238,6 +2242,15 @@ foreach Inst = ["VFRECIPE_S", "VFRSQRTE_S"] in
foreach Inst = ["VFRECIPE_D", "VFRSQRTE_D"] in
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2f64 LSX128:$vj)),
(!cast<LAInst>(Inst) LSX128:$vj)>;

def : Pat<(loongarch_vfrecipe v4f32:$src),
(VFRECIPE_S v4f32:$src)>;
def : Pat<(loongarch_vfrecipe v2f64:$src),
(VFRECIPE_D v2f64:$src)>;
def : Pat<(loongarch_vfrsqrte v4f32:$src),
(VFRSQRTE_S v4f32:$src)>;
def : Pat<(loongarch_vfrsqrte v2f64:$src),
(VFRSQRTE_D v2f64:$src)>;
}

// load
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/LoongArch/fdiv-reciprocal-estimate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,-frecipe < %s | FileCheck %s --check-prefix=LA32F
; RUN: llc --mtriple=loongarch32 --mattr=+f,-d,+frecipe < %s | FileCheck %s --check-prefix=LA32F-FRECIPE
; RUN: llc --mtriple=loongarch64 --mattr=+d,-frecipe < %s | FileCheck %s --check-prefix=LA64D
; RUN: llc --mtriple=loongarch64 --mattr=+d,+frecipe < %s | FileCheck %s --check-prefix=LA64D-FRECIPE

;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction

define float @fdiv_s(float %x, float %y) {
; LA32F-LABEL: fdiv_s:
; LA32F: # %bb.0:
; LA32F-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_s:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA32F-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA32F-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA32F-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_s:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.s $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_s:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: frecipe.s $fa2, $fa1
; LA64D-FRECIPE-NEXT: fmul.s $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.s $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.s $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast float %x, %y
ret float %div
}

define double @fdiv_d(double %x, double %y) {
; LA32F-LABEL: fdiv_d:
; LA32F: # %bb.0:
; LA32F-NEXT: addi.w $sp, $sp, -16
; LA32F-NEXT: .cfi_def_cfa_offset 16
; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-NEXT: .cfi_offset 1, -4
; LA32F-NEXT: bl %plt(__divdf3)
; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: addi.w $sp, $sp, 16
; LA32F-NEXT: ret
;
; LA32F-FRECIPE-LABEL: fdiv_d:
; LA32F-FRECIPE: # %bb.0:
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, -16
; LA32F-FRECIPE-NEXT: .cfi_def_cfa_offset 16
; LA32F-FRECIPE-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32F-FRECIPE-NEXT: .cfi_offset 1, -4
; LA32F-FRECIPE-NEXT: bl %plt(__divdf3)
; LA32F-FRECIPE-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32F-FRECIPE-NEXT: addi.w $sp, $sp, 16
; LA32F-FRECIPE-NEXT: ret
;
; LA64D-LABEL: fdiv_d:
; LA64D: # %bb.0:
; LA64D-NEXT: fdiv.d $fa0, $fa0, $fa1
; LA64D-NEXT: ret
;
; LA64D-FRECIPE-LABEL: fdiv_d:
; LA64D-FRECIPE: # %bb.0:
; LA64D-FRECIPE-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
; LA64D-FRECIPE-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI1_0)
; LA64D-FRECIPE-NEXT: frecipe.d $fa3, $fa1
; LA64D-FRECIPE-NEXT: fmadd.d $fa2, $fa1, $fa3, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa2, $fa2, $fa3, $fa3
; LA64D-FRECIPE-NEXT: fmul.d $fa3, $fa0, $fa2
; LA64D-FRECIPE-NEXT: fnmsub.d $fa0, $fa1, $fa3, $fa0
; LA64D-FRECIPE-NEXT: fmadd.d $fa0, $fa2, $fa0, $fa3
; LA64D-FRECIPE-NEXT: ret
%div = fdiv fast double %x, %y
ret double %div
}
Loading

0 comments on commit d4649c7

Please sign in to comment.