Skip to content

Commit

Permalink
[RISCV][SLEEF]: Support SLEEF vector library for RISC-V target.
Browse files Browse the repository at this point in the history
SLEEF math vector library support RISC-V target.
Commit: shibatch/sleef#477

This patch enables the use of auto-vectorization with
subsequent replacement by the corresponding SLEEF function.
  • Loading branch information
mga-sc committed Oct 29, 2024
1 parent 59085e9 commit 529fde0
Show file tree
Hide file tree
Showing 7 changed files with 3,244 additions and 21 deletions.
149 changes: 149 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,155 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED, "_ZGVsMxv")

#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV)

TLI_DEFINE_VECFUNC("acos", "Sleef_acosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("acosf", "Sleef_acosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("acosh", "Sleef_acoshdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("acoshf", "Sleef_acoshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("asin", "Sleef_asindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("asinf", "Sleef_asinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("asinh", "Sleef_asinhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("asinhf", "Sleef_asinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("atan", "Sleef_atandx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("atanf", "Sleef_atanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("atan2", "Sleef_atan2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("atan2f", "Sleef_atan2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("atanh", "Sleef_atanhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("atanhf", "Sleef_atanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("cbrt", "Sleef_cbrtdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("cbrtf", "Sleef_cbrtfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("copysign", "Sleef_copysigndx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("copysignf", "Sleef_copysignfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("cos", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("cosf", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("cosh", "Sleef_coshdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("coshf", "Sleef_coshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("cospi", "Sleef_cospidx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("cospif", "Sleef_cospifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("erf", "Sleef_erfdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("erff", "Sleef_erffx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("erfc", "Sleef_erfcdx_u15rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("erfcf", "Sleef_erfcfx_u15rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("exp", "Sleef_expdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp.f64", "Sleef_expdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("expf", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("exp10", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("exp10f", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("exp2", "Sleef_exp2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("exp2f", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "Sleef_exp2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("expm1", "Sleef_expm1dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("expm1f", "Sleef_expm1fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("fdim", "Sleef_fdimdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("fdimf", "Sleef_fdimfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("fma", "Sleef_fmadx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvvv")
TLI_DEFINE_VECFUNC("fmaf", "Sleef_fmafx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvvv")

TLI_DEFINE_VECFUNC("fmax", "Sleef_fmaxdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("fmaxf", "Sleef_fmaxfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("fmin", "Sleef_fmindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("fminf", "Sleef_fminfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("fmod", "Sleef_fmoddx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("fmodf", "Sleef_fmodfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("hypot", "Sleef_hypotdx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("hypotf", "Sleef_hypotfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("ilogb", "Sleef_ilogbdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("ilogbf", "Sleef_ilogbfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("ldexp", "Sleef_ldexpdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("ldexpf", "Sleef_ldexpfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("lgamma", "Sleef_lgammadx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("lgammaf", "Sleef_lgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("log", "Sleef_logdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("logf", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log.f64", "Sleef_logdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log.f32", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("log10", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log10.f64", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("log10f", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("log1p", "Sleef_log1pdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("log1pf", "Sleef_log1pfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("log2", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("log2f", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log2.f64", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.log2.f32", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("modf", "Sleef_modfdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8")
TLI_DEFINE_VECFUNC("modff", "Sleef_modffx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4")

TLI_DEFINE_VECFUNC("nextafter", "Sleef_nextafterdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("nextafterf", "Sleef_nextafterfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("pow", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("powf", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("llvm.pow.f64", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
TLI_DEFINE_VECFUNC("llvm.pow.f32", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")

TLI_DEFINE_VECFUNC("sin", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("sinf", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.sin.f64", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("llvm.sin.f32", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("sincos", "Sleef_sincosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
TLI_DEFINE_VECFUNC("sincosf", "Sleef_sincosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")

TLI_DEFINE_VECFUNC("sincospi", "Sleef_sincospidx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
TLI_DEFINE_VECFUNC("sincospif", "Sleef_sincospifx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")

TLI_DEFINE_VECFUNC("sinh", "Sleef_sinhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("sinhf", "Sleef_sinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("sinpi", "Sleef_sinpidx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("sinpif", "Sleef_sinpifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("sqrt", "Sleef_sqrtdx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("sqrtf", "Sleef_sqrtfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("tan", "Sleef_tandx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("tanf", "Sleef_tanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("tanh", "Sleef_tanhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("tanhf", "Sleef_tanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

TLI_DEFINE_VECFUNC("tgamma", "Sleef_tgammadx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
TLI_DEFINE_VECFUNC("tgammaf", "Sleef_tgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")

#elif defined(TLI_DEFINE_ARMPL_VECFUNCS)

TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/VFABIDemangler.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ enum class VFParamKind {
enum class VFISAKind {
AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
SVE, // AArch64 Scalable Vector Extension
RVV, // RISC-V Scalable Vector Extension
SSE, // x86 SSE
AVX, // x86 AVX
AVX2, // x86 AVX2
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Analysis/TargetLibraryInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1322,6 +1322,14 @@ static const VecDesc VecFuncs_SLEEFGNUABI_VFScalable[] = {
#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
};

static const VecDesc VecFuncs_SKEEFGNUABI_VFScalableRISCV[] = {
#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
{SCAL, VEC, VF, MASK, VABI_PREFIX},
#include "llvm/Analysis/VecFuncs.def"
#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
};

static const VecDesc VecFuncs_ArmPL[] = {
#define TLI_DEFINE_ARMPL_VECFUNCS
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
Expand Down Expand Up @@ -1371,6 +1379,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4);
addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable);
break;
case llvm::Triple::riscv64:
addVectorizableFunctions(VecFuncs_SKEEFGNUABI_VFScalableRISCV);
break;
}
break;
}
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/IR/VFABIDemangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
.Case("n", VFISAKind::AdvancedSIMD)
.Case("s", VFISAKind::SVE)
.Case("v", VFISAKind::RVV)
.Case("b", VFISAKind::SSE)
.Case("c", VFISAKind::AVX)
.Case("d", VFISAKind::AVX2)
Expand Down Expand Up @@ -79,9 +80,9 @@ static ParseRet tryParseVLEN(StringRef &ParseString, VFISAKind ISA,
std::pair<unsigned, bool> &ParsedVF) {
if (ParseString.consume_front("x")) {
// SVE is the only scalable ISA currently supported.
if (ISA != VFISAKind::SVE) {
if (ISA != VFISAKind::SVE && ISA != VFISAKind::RVV) {
LLVM_DEBUG(dbgs() << "Vector function variant declared with scalable VF "
<< "but ISA is not SVE\n");
<< "but ISA supported for SVE and RVV only\n");
return ParseRet::Error;
}
// We can't determine the VF of a scalable vector by looking at the vlen
Expand Down Expand Up @@ -301,9 +302,8 @@ static ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
// the number of elements of the given type which would fit in such a vector.
static std::optional<ElementCount> getElementCountForTy(const VFISAKind ISA,
const Type *Ty) {
// Only AArch64 SVE is supported at present.
assert(ISA == VFISAKind::SVE &&
"Scalable VF decoding only implemented for SVE\n");
assert((ISA == VFISAKind::SVE || ISA == VFISAKind::RVV) &&
"Scalable VF decoding only implemented for SVE and RVV\n");

if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
return ElementCount::getScalable(2);
Expand Down
Loading

0 comments on commit 529fde0

Please sign in to comment.