From 529fde0a813aa7949d11de0d321937ffaa67b2b4 Mon Sep 17 00:00:00 2001
From: Goncharov Mark <mark.goncharov@syntacore.com>
Date: Thu, 24 Oct 2024 14:02:03 +0000
Subject: [PATCH] [RISCV][SLEEF]: Support SLEEF vector library for RISC-V
 target.

SLEEF math vector library support RISC-V target.
Commit: https://github.com/shibatch/sleef/pull/477

This patch enables the use of auto-vectorization with
subsequent replacement by the corresponding SLEEF function.
---
 llvm/include/llvm/Analysis/VecFuncs.def       |  149 +
 llvm/include/llvm/IR/VFABIDemangler.h         |    1 +
 llvm/lib/Analysis/TargetLibraryInfo.cpp       |   11 +
 llvm/lib/IR/VFABIDemangler.cpp                |   10 +-
 .../replace-with-veclib-sleef-scalable.ll     |  492 ++++
 .../RISCV/veclib-function-calls.ll            | 2559 +++++++++++++++++
 llvm/test/Transforms/Util/add-TLI-mappings.ll |   43 +-
 7 files changed, 3244 insertions(+), 21 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/replace-with-veclib-sleef-scalable.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll

diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index c4586894e3e490..6d34b18de50fec 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -892,6 +892,155 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGV
 TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma",  SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV)
+
+TLI_DEFINE_VECFUNC("acos", "Sleef_acosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("acosf", "Sleef_acosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("acosh", "Sleef_acoshdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("acoshf", "Sleef_acoshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("asin", "Sleef_asindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("asinf", "Sleef_asinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("asinh", "Sleef_asinhdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("asinhf", "Sleef_asinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("atan", "Sleef_atandx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("atanf", "Sleef_atanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("atan2", "Sleef_atan2dx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("atan2f", "Sleef_atan2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("atanh", "Sleef_atanhdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("atanhf", "Sleef_atanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("cbrt", "Sleef_cbrtdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("cbrtf", "Sleef_cbrtfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("copysign", "Sleef_copysigndx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("copysignf", "Sleef_copysignfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("cos", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("cosf", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("cosh", "Sleef_coshdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("coshf", "Sleef_coshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("cospi", "Sleef_cospidx_u05rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("cospif", "Sleef_cospifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("erf", "Sleef_erfdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("erff", "Sleef_erffx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("erfc", "Sleef_erfcdx_u15rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("erfcf", "Sleef_erfcfx_u15rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("exp", "Sleef_expdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "Sleef_expdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("expf", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("exp10", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("exp10f", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("exp2", "Sleef_exp2dx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("exp2f", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "Sleef_exp2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("expm1", "Sleef_expm1dx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("expm1f", "Sleef_expm1fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("fdim", "Sleef_fdimdx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("fdimf", "Sleef_fdimfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("fma", "Sleef_fmadx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvvv")
+TLI_DEFINE_VECFUNC("fmaf", "Sleef_fmafx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvvv")
+
+TLI_DEFINE_VECFUNC("fmax", "Sleef_fmaxdx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("fmaxf", "Sleef_fmaxfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("fmin", "Sleef_fmindx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("fminf", "Sleef_fminfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("fmod", "Sleef_fmoddx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("fmodf", "Sleef_fmodfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("hypot", "Sleef_hypotdx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("hypotf", "Sleef_hypotfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("ilogb", "Sleef_ilogbdx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("ilogbf", "Sleef_ilogbfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("ldexp", "Sleef_ldexpdx_rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("ldexpf", "Sleef_ldexpfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("lgamma", "Sleef_lgammadx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("lgammaf", "Sleef_lgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("log", "Sleef_logdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("logf", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log.f64", "Sleef_logdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log.f32", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("log10", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("log10f", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("log1p", "Sleef_log1pdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("log1pf", "Sleef_log1pfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("log2", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("log2f", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("modf", "Sleef_modfdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8")
+TLI_DEFINE_VECFUNC("modff", "Sleef_modffx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4")
+
+TLI_DEFINE_VECFUNC("nextafter", "Sleef_nextafterdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("nextafterf", "Sleef_nextafterfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("pow", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("powf", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
+
+TLI_DEFINE_VECFUNC("sin", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("sinf", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("sincos", "Sleef_sincosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "Sleef_sincosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "Sleef_sincospidx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "Sleef_sincospifx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")
+
+TLI_DEFINE_VECFUNC("sinh", "Sleef_sinhdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("sinhf", "Sleef_sinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("sinpi", "Sleef_sinpidx_u05rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("sinpif", "Sleef_sinpifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("sqrt", "Sleef_sqrtdx_u05rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("sqrtf", "Sleef_sqrtfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("tan", "Sleef_tandx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("tanf", "Sleef_tanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("tanh", "Sleef_tanhdx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("tanhf", "Sleef_tanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
+TLI_DEFINE_VECFUNC("tgamma", "Sleef_tgammadx_u10rvvm2",  SCALABLE(2), NOMASK, "_ZGVvNxv")
+TLI_DEFINE_VECFUNC("tgammaf", "Sleef_tgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
+
 #elif defined(TLI_DEFINE_ARMPL_VECFUNCS)
 
 TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
diff --git a/llvm/include/llvm/IR/VFABIDemangler.h b/llvm/include/llvm/IR/VFABIDemangler.h
index 8450d4ce541e13..e142ae5cf84f3d 100644
--- a/llvm/include/llvm/IR/VFABIDemangler.h
+++ b/llvm/include/llvm/IR/VFABIDemangler.h
@@ -44,6 +44,7 @@ enum class VFParamKind {
 enum class VFISAKind {
   AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
   SVE,          // AArch64 Scalable Vector Extension
+  RVV,          // RISC-V Scalable Vector Extension
   SSE,          // x86 SSE
   AVX,          // x86 AVX
   AVX2,         // x86 AVX2
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 0ee83d217a5001..65e439417fb06f 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -1322,6 +1322,14 @@ static const VecDesc VecFuncs_SLEEFGNUABI_VFScalable[] = {
 #undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
 };
 
+static const VecDesc VecFuncs_SKEEFGNUABI_VFScalableRISCV[] = {
+#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX)                   \
+  {SCAL, VEC, VF, MASK, VABI_PREFIX},
+#include "llvm/Analysis/VecFuncs.def"
+#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
+};
+
 static const VecDesc VecFuncs_ArmPL[] = {
 #define TLI_DEFINE_ARMPL_VECFUNCS
 #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX)                   \
@@ -1371,6 +1379,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
       addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4);
       addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable);
       break;
+    case llvm::Triple::riscv64:
+      addVectorizableFunctions(VecFuncs_SKEEFGNUABI_VFScalableRISCV);
+      break;
     }
     break;
   }
diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp
index cdfb9fbfaa084d..2b17dccd39d5d8 100644
--- a/llvm/lib/IR/VFABIDemangler.cpp
+++ b/llvm/lib/IR/VFABIDemangler.cpp
@@ -42,6 +42,7 @@ static ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
     ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
               .Case("n", VFISAKind::AdvancedSIMD)
               .Case("s", VFISAKind::SVE)
+              .Case("v", VFISAKind::RVV)
               .Case("b", VFISAKind::SSE)
               .Case("c", VFISAKind::AVX)
               .Case("d", VFISAKind::AVX2)
@@ -79,9 +80,9 @@ static ParseRet tryParseVLEN(StringRef &ParseString, VFISAKind ISA,
                              std::pair<unsigned, bool> &ParsedVF) {
   if (ParseString.consume_front("x")) {
     // SVE is the only scalable ISA currently supported.
-    if (ISA != VFISAKind::SVE) {
+    if (ISA != VFISAKind::SVE && ISA != VFISAKind::RVV) {
       LLVM_DEBUG(dbgs() << "Vector function variant declared with scalable VF "
-                        << "but ISA is not SVE\n");
+                        << "but ISA supported for SVE and RVV only\n");
       return ParseRet::Error;
     }
     // We can't determine the VF of a scalable vector by looking at the vlen
@@ -301,9 +302,8 @@ static ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
 // the number of elements of the given type which would fit in such a vector.
 static std::optional<ElementCount> getElementCountForTy(const VFISAKind ISA,
                                                         const Type *Ty) {
-  // Only AArch64 SVE is supported at present.
-  assert(ISA == VFISAKind::SVE &&
-         "Scalable VF decoding only implemented for SVE\n");
+  assert((ISA == VFISAKind::SVE || ISA == VFISAKind::RVV) &&
+         "Scalable VF decoding only implemented for SVE and RVV\n");
 
   if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
     return ElementCount::getScalable(2);
diff --git a/llvm/test/CodeGen/RISCV/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/RISCV/replace-with-veclib-sleef-scalable.ll
new file mode 100644
index 00000000000000..5bfe345ae8b9cc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/replace-with-veclib-sleef-scalable.ll
@@ -0,0 +1,492 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt -mtriple=riscv64-unknown-linux-gnu -mattr=+v -vector-library=sleefgnuabi -replace-with-veclib -S < %s | FileCheck %s
+
+;.
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @Sleef_cosdx_u10rvvm2, ptr @Sleef_cosfx_u10rvvm2, ptr @Sleef_expfx_u10rvvm2, ptr @Sleef_exp10dx_u10rvvm2, ptr @Sleef_exp10fx_u10rvvm2, ptr @Sleef_exp2dx_u10rvvm2, ptr @Sleef_exp2fx_u10rvvm2, ptr @Sleef_logfx_u10rvvm2, ptr @Sleef_log10dx_u10rvvm2, ptr @Sleef_log10fx_u10rvvm2, ptr @Sleef_log2dx_u10rvvm2, ptr @Sleef_log2fx_u10rvvm2, ptr @Sleef_sindx_u10rvvm2, ptr @Sleef_sinfx_u10rvvm2, ptr @Sleef_fmindx_rvvm2, ptr @Sleef_fminfx_rvvm2], section "llvm.metadata"
+;.
+define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_ceil_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_ceil_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_ceil_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_copysign_vscale_f64(<vscale x 2 x double> %mag, <vscale x 2 x double> %sgn) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_copysign_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[MAG:%.*]], <vscale x 2 x double> [[SGN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[MAG]], <vscale x 2 x double> [[SGN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> %mag, <vscale x 2 x double> %sgn)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_copysign_vscale_f32(<vscale x 4 x float> %mag, <vscale x 4 x float> %sgn) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_copysign_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[MAG:%.*]], <vscale x 4 x float> [[SGN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[MAG]], <vscale x 4 x float> [[SGN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> %mag, <vscale x 4 x float> %sgn)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_cos_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_cos_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_cosdx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_cos_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_cos_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_cosfx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_expfx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_exp10dx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_exp10fx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp2_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp2_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_exp2dx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp2_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_exp2fx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_fabs_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_fabs_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_fabs_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_fabs_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_floor_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_floor_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_floor_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_floor_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_fma_vscale_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c ) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_fma_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_fma_vscale_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_fma_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_log_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_log_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_logfx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_log10_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_log10dx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_log10_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_log10fx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_log2_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_log2_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_log2dx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_log2_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_log2_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_log2fx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_maxnum_vscale_f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_maxnum_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN0:%.*]], <vscale x 2 x double> [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[IN0]], <vscale x 2 x double> [[IN1]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_maxnum_vscale_f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_maxnum_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN0:%.*]], <vscale x 4 x float> [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[IN0]], <vscale x 4 x float> [[IN1]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_minnum_vscale_f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_minnum_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN0:%.*]], <vscale x 2 x double> [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[IN0]], <vscale x 2 x double> [[IN1]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %in0, <vscale x 2 x double> %in1)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_minnum_vscale_f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_minnum_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN0:%.*]], <vscale x 4 x float> [[IN1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[IN0]], <vscale x 4 x float> [[IN1]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %in0, <vscale x 4 x float> %in1)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_nearbyint_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_nearbyint_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POW]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POW]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_rint_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_rint_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_rint_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_rint_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_round_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_round_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_round_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_round_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sin_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_sin_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @Sleef_sindx_u10rvvm2(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sin_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_sin_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @Sleef_sinfx_u10rvvm2(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_sqrt_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_sqrt_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_sqrt_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_sqrt_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_trunc_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_trunc_vscale_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = call fast <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> %in)
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_trunc_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_trunc_vscale_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1 = call fast <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %in)
+  ret <vscale x 4 x float> %1
+}
+
+define <vscale x 2 x double> @frem_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: define <vscale x 2 x double> @frem_f64(
+; CHECK-SAME: <vscale x 2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = frem <vscale x 2 x double> %in, %in
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1= frem <vscale x 2 x double> %in, %in
+  ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @frem_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: define <vscale x 4 x float> @frem_f32(
+; CHECK-SAME: <vscale x 4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = frem <vscale x 4 x float> %in, %in
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %1= frem <vscale x 4 x float> %in, %in
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.log2.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float>)
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+v" }
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll
new file mode 100644
index 00000000000000..86bb4bec8ff3d4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll
@@ -0,0 +1,2559 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --filter "call.*(cos|sin|tan|cbrt|erf|exp[^e]|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2
+; RUN: opt -mattr=+v -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -S < %s | FileCheck %s
+
+target triple = "riscv64-unknown-linux-gnu"
+
+; We are checking whether loops containing function calls can be vectorized,
+; when the compiler provides TLI mappings to their vector variants.
+
+declare double @acos(double)
+declare float @acosf(float)
+
+;.
+; CHECK: @llvm.compiler.used = appending global [86 x ptr] [ptr @Sleef_acosdx_u10rvvm2, ptr @Sleef_acosfx_u10rvvm2, ptr @Sleef_acoshdx_u10rvvm2, ptr @Sleef_acoshfx_u10rvvm2, ptr @Sleef_asindx_u10rvvm2, ptr @Sleef_asinfx_u10rvvm2, ptr @Sleef_asinhdx_u10rvvm2, ptr @Sleef_asinhfx_u10rvvm2, ptr @Sleef_atandx_u10rvvm2, ptr @Sleef_atanfx_u10rvvm2, ptr @Sleef_atan2dx_u10rvvm2, ptr @Sleef_atan2fx_u10rvvm2, ptr @Sleef_atanhdx_u10rvvm2, ptr @Sleef_atanhfx_u10rvvm2, ptr @Sleef_cbrtdx_u10rvvm2, ptr @Sleef_cbrtfx_u10rvvm2, ptr @Sleef_copysigndx_rvvm2, ptr @Sleef_copysignfx_rvvm2, ptr @Sleef_cosdx_u10rvvm2, ptr @Sleef_cosfx_u10rvvm2, ptr @Sleef_coshdx_u10rvvm2, ptr @Sleef_coshfx_u10rvvm2, ptr @Sleef_cospidx_u05rvvm2, ptr @Sleef_cospifx_u05rvvm2, ptr @Sleef_erfdx_u10rvvm2, ptr @Sleef_erffx_u10rvvm2, ptr @Sleef_erfcdx_u15rvvm2, ptr @Sleef_erfcfx_u15rvvm2, ptr @Sleef_expdx_u10rvvm2, ptr @Sleef_expfx_u10rvvm2, ptr @Sleef_exp10dx_u10rvvm2, ptr @Sleef_exp10fx_u10rvvm2, ptr @Sleef_exp2dx_u10rvvm2, ptr @Sleef_exp2fx_u10rvvm2, ptr @Sleef_expm1dx_u10rvvm2, ptr @Sleef_expm1fx_u10rvvm2, ptr @Sleef_fdimdx_rvvm2, ptr @Sleef_fdimfx_rvvm2, ptr @Sleef_fmadx_rvvm2, ptr @Sleef_fmafx_rvvm2, ptr @Sleef_fmaxdx_rvvm2, ptr @Sleef_fmaxfx_rvvm2, ptr @Sleef_fmindx_u10rvvm2, ptr @Sleef_fminfx_u10rvvm2, ptr @Sleef_fmoddx_rvvm2, ptr @Sleef_fmodfx_rvvm2, ptr @Sleef_hypotdx_u05rvvm2, ptr @Sleef_hypotfx_u05rvvm2, ptr @Sleef_ilogbdx_rvvm2, ptr @Sleef_ilogbfx_rvvm2, ptr @Sleef_ldexpdx_rvvm2, ptr @Sleef_ldexpfx_rvvm2, ptr @Sleef_lgammadx_u10rvvm2, ptr @Sleef_lgammafx_u10rvvm2, ptr @Sleef_logdx_u10rvvm2, ptr @Sleef_logfx_u10rvvm2, ptr @Sleef_log10dx_u10rvvm2, ptr @Sleef_log10fx_u10rvvm2, ptr @Sleef_log1pdx_u10rvvm2, ptr @Sleef_log1pfx_u10rvvm2, ptr @Sleef_log2dx_u10rvvm2, ptr @Sleef_log2fx_u10rvvm2, ptr @Sleef_modfdx_rvvm2, ptr @Sleef_modffx_rvvm2, ptr @Sleef_nextafterdx_rvvm2, ptr @Sleef_nextafterfx_rvvm2, ptr @Sleef_powdx_u10rvvm2, ptr @Sleef_powfx_u10rvvm2, ptr @Sleef_sindx_u10rvvm2, ptr @Sleef_sinfx_u10rvvm2, ptr @Sleef_sincosdx_u10rvvm2, ptr @Sleef_sincosfx_u10rvvm2, ptr @Sleef_sincospidx_u10rvvm2, ptr @Sleef_sincospifx_u10rvvm2, ptr @Sleef_sinhdx_u10rvvm2, ptr @Sleef_sinhfx_u10rvvm2, ptr @Sleef_sinpidx_u05rvvm2, ptr @Sleef_sinpifx_u05rvvm2, ptr @Sleef_sqrtdx_u05rvvm2, ptr @Sleef_sqrtfx_u05rvvm2, ptr @Sleef_tandx_u10rvvm2, ptr @Sleef_tanfx_u10rvvm2, ptr @Sleef_tanhdx_u10rvvm2, ptr @Sleef_tanhfx_u10rvvm2, ptr @Sleef_tgammadx_u10rvvm2, ptr @Sleef_tgammafx_u10rvvm2], section "llvm.metadata"
+;.
+define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @acos_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_acosdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @acos(double [[IN:%.*]]) #[[ATTR2:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @acos(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @acos_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_acosfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @acosf(float [[IN:%.*]]) #[[ATTR3:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @acosf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @acosh(double)
+declare float @acoshf(float)
+
+define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @acosh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_acoshdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @acosh(double [[IN:%.*]]) #[[ATTR4:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @acosh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @acosh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_acoshfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @acoshf(float [[IN:%.*]]) #[[ATTR5:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @acoshf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @asin(double)
+declare float @asinf(float)
+
+define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @asin_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_asindx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @asin(double [[IN:%.*]]) #[[ATTR6:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @asin(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @asin_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_asinfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @asinf(float [[IN:%.*]]) #[[ATTR7:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @asinf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @asinh(double)
+declare float @asinhf(float)
+
+define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @asinh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_asinhdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @asinh(double [[IN:%.*]]) #[[ATTR8:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @asinh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @asinh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_asinhfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @asinhf(float [[IN:%.*]]) #[[ATTR9:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @asinhf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @atan(double)
+declare float @atanf(float)
+
+define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atan_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_atandx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @atan(double [[IN:%.*]]) #[[ATTR10:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @atan(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atan_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_atanfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @atanf(float [[IN:%.*]]) #[[ATTR11:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @atanf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @atan2(double, double)
+declare float @atan2f(float, float)
+
+define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atan2_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_atan2dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @atan2(double [[IN:%.*]], double [[IN]]) #[[ATTR12:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @atan2(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atan2_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_atan2fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @atan2f(float [[IN:%.*]], float [[IN]]) #[[ATTR13:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @atan2f(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @atanh(double)
+declare float @atanhf(float)
+
+define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atanh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_atanhdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @atanh(double [[IN:%.*]]) #[[ATTR14:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @atanh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @atanh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_atanhfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @atanhf(float [[IN:%.*]]) #[[ATTR15:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @atanhf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @cbrt(double)
+declare float @cbrtf(float)
+
+define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cbrt_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_cbrtdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @cbrt(double [[IN:%.*]]) #[[ATTR16:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @cbrt(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cbrt_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_cbrtfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @cbrtf(float [[IN:%.*]]) #[[ATTR17:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @cbrtf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @copysign(double, double)
+declare float @copysignf(float, float)
+
+define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @copysign_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_copysigndx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) #[[ATTR18:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @copysign(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @copysign_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_copysignfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) #[[ATTR19:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @copysignf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @cos(double)
+declare float @cosf(float)
+
+define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cos_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_cosdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @cos(double [[IN:%.*]]) #[[ATTR20:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @cos(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cos_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_cosfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @cosf(float [[IN:%.*]]) #[[ATTR21:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @cosf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @cosh(double)
+declare float @coshf(float)
+
+define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cosh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_coshdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @cosh(double [[IN:%.*]]) #[[ATTR22:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @cosh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cosh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_coshfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @coshf(float [[IN:%.*]]) #[[ATTR23:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @coshf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @cospi(double)
+declare float @cospif(float)
+
+define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cospi_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_cospidx_u05rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) #[[ATTR24:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @cospi(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @cospi_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_cospifx_u05rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) #[[ATTR25:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @cospif(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @erf(double)
+declare float @erff(float)
+
+define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @erf_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_erfdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @erf(double [[IN:%.*]]) #[[ATTR26:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @erf(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @erf_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_erffx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @erff(float [[IN:%.*]]) #[[ATTR27:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @erff(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @erfc(double)
+declare float @erfcf(float)
+
+define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @erfc_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_erfcdx_u15rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @erfc(double [[IN:%.*]]) #[[ATTR28:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @erfc(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @erfc_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_erfcfx_u15rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @erfcf(float [[IN:%.*]]) #[[ATTR29:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @erfcf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @exp(double)
+declare float @expf(float)
+
+define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_expdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @exp(double [[IN:%.*]]) #[[ATTR30:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @exp(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_expfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @expf(float [[IN:%.*]]) #[[ATTR31:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @expf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @exp10(double)
+declare float @exp10f(float)
+
+define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp10_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_exp10dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @exp10(double [[IN:%.*]]) #[[ATTR32:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @exp10(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp10_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_exp10fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @exp10f(float [[IN:%.*]]) #[[ATTR33:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @exp10f(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @exp2(double)
+declare float @exp2f(float)
+
+define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp2_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_exp2dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @exp2(double [[IN:%.*]]) #[[ATTR34:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @exp2(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @exp2_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_exp2fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @exp2f(float [[IN:%.*]]) #[[ATTR35:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @exp2f(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @expm1(double)
+declare float @expm1f(float)
+
+define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @expm1_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_expm1dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @expm1(double [[IN:%.*]]) #[[ATTR36:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @expm1(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @expm1_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_expm1fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @expm1f(float [[IN:%.*]]) #[[ATTR37:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @expm1f(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @fdim(double, double)
+declare float @fdimf(float, float)
+
+define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fdim_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_fdimdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) #[[ATTR38:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @fdim(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fdim_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_fdimfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) #[[ATTR39:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @fdimf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @fma(double, double, double)
+declare float @fmaf(float, float, float)
+
+define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fma_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_fmadx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) #[[ATTR40:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @fma(double %in, double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fma_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_fmafx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) #[[ATTR41:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @fmaf(float %in, float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @fmax(double, double)
+declare float @fmaxf(float, float)
+
+define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmax_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_fmaxdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) #[[ATTR42:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @fmax(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmax_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_fmaxfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) #[[ATTR43:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @fmaxf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @fmin(double, double)
+declare float @fminf(float, float)
+
+define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmin_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_fmindx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) #[[ATTR44:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @fmin(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmin_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_fminfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) #[[ATTR45:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @fminf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @fmod(double, double)
+declare float @fmodf(float, float)
+
+define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmod_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_fmoddx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) #[[ATTR46:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @fmod(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @fmod_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_fmodfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) #[[ATTR47:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @fmodf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @hypot(double, double)
+declare float @hypotf(float, float)
+
+define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @hypot_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_hypotdx_u05rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @hypot(double [[IN:%.*]], double [[IN]]) #[[ATTR48:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @hypot(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @hypot_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_hypotfx_u05rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @hypotf(float [[IN:%.*]], float [[IN]]) #[[ATTR49:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @hypotf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare i32 @ilogb(double)
+declare i32 @ilogbf(float)
+
+define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @ilogb_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x i32> @Sleef_ilogbdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) #[[ATTR50:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call i32 @ilogb(double %in)
+  %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv
+  store i32 %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @ilogb_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x i32> @Sleef_ilogbfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) #[[ATTR51:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call i32 @ilogbf(float %in)
+  %out.gep = getelementptr inbounds i32, ptr %out.ptr, i64 %iv
+  store i32 %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @ldexp(double, i32)
+declare float @ldexpf(float, i32)
+
+define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @ldexp_f64
+; CHECK-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP11:%.*]] = call <vscale x 2 x double> @Sleef_ldexpdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i32> [[WIDE_LOAD1:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) #[[ATTR52:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in1.gep = getelementptr inbounds double, ptr %in1.ptr, i64 %iv
+  %in1 = load double, ptr %in1.gep, align 8
+  %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv
+  %in2 = load i32, ptr %in2.gep, align 8
+  %call = tail call double @ldexp(double %in1, i32 %in2)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @ldexp_f32
+; CHECK-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP11:%.*]] = call <vscale x 4 x float> @Sleef_ldexpfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_LOAD1:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) #[[ATTR53:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in1.gep = getelementptr inbounds float, ptr %in1.ptr, i64 %iv
+  %in1 = load float, ptr %in1.gep, align 8
+  %in2.gep = getelementptr inbounds i32, ptr %in2.ptr, i64 %iv
+  %in2 = load i32, ptr %in2.gep, align 8
+  %call = tail call float @ldexpf(float %in1, i32 %in2)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @lgamma(double)
+declare float @lgammaf(float)
+
+define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @lgamma_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_lgammadx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) #[[ATTR54:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @lgamma(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @lgamma_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_lgammafx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) #[[ATTR55:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @lgammaf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @log(double)
+declare float @logf(float)
+
+define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_logdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK:    [[CALL:%.*]] = tail call double @log(double [[IN:%.*]]) #[[ATTR56:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @log(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_logfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @logf(float [[IN:%.*]]) #[[ATTR57:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @logf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @log10(double)
+declare float @log10f(float)
+
+define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log10_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_log10dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @log10(double [[IN:%.*]]) #[[ATTR58:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @log10(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log10_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_log10fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @log10f(float [[IN:%.*]]) #[[ATTR59:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @log10f(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @log1p(double)
+declare float @log1pf(float)
+
+define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log1p_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_log1pdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @log1p(double [[IN:%.*]]) #[[ATTR60:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @log1p(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log1p_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_log1pfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @log1pf(float [[IN:%.*]]) #[[ATTR61:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @log1pf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @log2(double)
+declare float @log2f(float)
+
+define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log2_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_log2dx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @log2(double [[IN:%.*]]) #[[ATTR62:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @log2(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @log2_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_log2fx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @log2f(float [[IN:%.*]]) #[[ATTR63:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @log2f(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @modf(double, ptr)
+declare float @modff(float, ptr)
+
+define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+;
+; CHECK-LABEL: define void @modf_f64
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP17:%.*]] = call <vscale x 2 x double> @Sleef_modfdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
+; CHECK:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %data = call double @modf(double %num, ptr %gepb)
+  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+;
+; CHECK-LABEL: define void @modf_f32
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP17:%.*]] = call <vscale x 4 x float> @Sleef_modffx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]])
+; CHECK:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %data = call float @modff(float %num, ptr %gepb)
+  %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+declare double @nextafter(double, double)
+declare float @nextafterf(float, float)
+
+define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @nextafter_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_nextafterdx_rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) #[[ATTR66:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @nextafter(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @nextafter_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_nextafterfx_rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) #[[ATTR67:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @nextafterf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @pow_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_powdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x double> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call double @pow(double [[IN:%.*]], double [[IN]]) #[[ATTR68:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @pow(double %in, double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @pow_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_powfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK:    [[CALL:%.*]] = tail call float @powf(float [[IN:%.*]], float [[IN]]) #[[ATTR69:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @powf(float %in, float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @sin(double)
+declare float @sinf(float)
+
+define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sin_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_sindx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @sin(double [[IN:%.*]]) #[[ATTR70:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @sin(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sin_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_sinfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @sinf(float [[IN:%.*]]) #[[ATTR71:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @sinf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare void @sincos(double, ptr, ptr)
+declare void @sincosf(float, ptr, ptr)
+
+define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+;
+; CHECK-LABEL: define void @sincos_f64
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    call void @Sleef_sincosdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
+; CHECK:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincos(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+;
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    call void @Sleef_sincosfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
+; CHECK:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
+  call void @sincosf(float %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+declare void @sincospi(double, ptr, ptr)
+declare void @sincospif(float, ptr, ptr)
+
+define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+;
+; CHECK-LABEL: define void @sincospi_f64
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    call void @Sleef_sincospidx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
+; CHECK:    call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincospi(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @sincospi_f32
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] {
+; CHECK:    call void @Sleef_sincospifx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]])
+; CHECK:    call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
+  call void @sincospif(float %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+declare double @sinh(double)
+declare float @sinhf(float)
+
+define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sinh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_sinhdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @sinh(double [[IN:%.*]]) #[[ATTR76:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @sinh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sinh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_sinhfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @sinhf(float [[IN:%.*]]) #[[ATTR77:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @sinhf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @sinpi(double)
+declare float @sinpif(float)
+
+define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sinpi_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_sinpidx_u05rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) #[[ATTR78:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @sinpi(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sinpi_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_sinpifx_u05rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) #[[ATTR79:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @sinpif(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @sqrt(double)
+declare float @sqrtf(float)
+
+define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sqrt_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_sqrtdx_u05rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) #[[ATTR80:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @sqrt(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @sqrt_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_sqrtfx_u05rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) #[[ATTR81:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @sqrtf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @tan(double)
+declare float @tanf(float)
+
+define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tan_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_tandx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @tan(double [[IN:%.*]]) #[[ATTR82:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @tan(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tan_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_tanfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @tanf(float [[IN:%.*]]) #[[ATTR83:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @tanf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @tanh(double)
+declare float @tanhf(float)
+
+define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tanh_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_tanhdx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @tanh(double [[IN:%.*]]) #[[ATTR84:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @tanh(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tanh_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_tanhfx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @tanhf(float [[IN:%.*]]) #[[ATTR85:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @tanhf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+declare double @tgamma(double)
+declare float @tgammaf(float)
+
+define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tgamma_f64
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 2 x double> @Sleef_tgammadx_u10rvvm2(<vscale x 2 x double> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) #[[ATTR86:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %call = tail call double @tgamma(double %in)
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %call, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+;
+; CHECK-LABEL: define void @tgamma_f32
+; CHECK-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] {
+; CHECK:    [[TMP9:%.*]] = call <vscale x 4 x float> @Sleef_tgammafx_u10rvvm2(<vscale x 4 x float> [[WIDE_LOAD:%.*]])
+; CHECK:    [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) #[[ATTR87:[0-9]+]]
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %call = tail call float @tgammaf(float %in)
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %call, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
+; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVvNxv_acos(Sleef_acosdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVvNxv_acosf(Sleef_acosfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVvNxv_acosh(Sleef_acoshdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR5]] = { "vector-function-abi-variant"="_ZGVvNxv_acoshf(Sleef_acoshfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR6]] = { "vector-function-abi-variant"="_ZGVvNxv_asin(Sleef_asindx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR7]] = { "vector-function-abi-variant"="_ZGVvNxv_asinf(Sleef_asinfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR8]] = { "vector-function-abi-variant"="_ZGVvNxv_asinh(Sleef_asinhdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR9]] = { "vector-function-abi-variant"="_ZGVvNxv_asinhf(Sleef_asinhfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR10]] = { "vector-function-abi-variant"="_ZGVvNxv_atan(Sleef_atandx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR11]] = { "vector-function-abi-variant"="_ZGVvNxv_atanf(Sleef_atanfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR12]] = { "vector-function-abi-variant"="_ZGVvNxvv_atan2(Sleef_atan2dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR13]] = { "vector-function-abi-variant"="_ZGVvNxvv_atan2f(Sleef_atan2fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR14]] = { "vector-function-abi-variant"="_ZGVvNxv_atanh(Sleef_atanhdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR15]] = { "vector-function-abi-variant"="_ZGVvNxv_atanhf(Sleef_atanhfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR16]] = { "vector-function-abi-variant"="_ZGVvNxv_cbrt(Sleef_cbrtdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR17]] = { "vector-function-abi-variant"="_ZGVvNxv_cbrtf(Sleef_cbrtfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR18]] = { "vector-function-abi-variant"="_ZGVvNxvv_copysign(Sleef_copysigndx_rvvm2)" }
+; CHECK: attributes #[[ATTR19]] = { "vector-function-abi-variant"="_ZGVvNxvv_copysignf(Sleef_copysignfx_rvvm2)" }
+; CHECK: attributes #[[ATTR20]] = { "vector-function-abi-variant"="_ZGVvNxv_cos(Sleef_cosdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR21]] = { "vector-function-abi-variant"="_ZGVvNxv_cosf(Sleef_cosfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR22]] = { "vector-function-abi-variant"="_ZGVvNxv_cosh(Sleef_coshdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR23]] = { "vector-function-abi-variant"="_ZGVvNxv_coshf(Sleef_coshfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR24]] = { "vector-function-abi-variant"="_ZGVvNxv_cospi(Sleef_cospidx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR25]] = { "vector-function-abi-variant"="_ZGVvNxv_cospif(Sleef_cospifx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR26]] = { "vector-function-abi-variant"="_ZGVvNxv_erf(Sleef_erfdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR27]] = { "vector-function-abi-variant"="_ZGVvNxv_erff(Sleef_erffx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR28]] = { "vector-function-abi-variant"="_ZGVvNxv_erfc(Sleef_erfcdx_u15rvvm2)" }
+; CHECK: attributes #[[ATTR29]] = { "vector-function-abi-variant"="_ZGVvNxv_erfcf(Sleef_erfcfx_u15rvvm2)" }
+; CHECK: attributes #[[ATTR30]] = { "vector-function-abi-variant"="_ZGVvNxv_exp(Sleef_expdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR31]] = { "vector-function-abi-variant"="_ZGVvNxv_expf(Sleef_expfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR32]] = { "vector-function-abi-variant"="_ZGVvNxv_exp10(Sleef_exp10dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR33]] = { "vector-function-abi-variant"="_ZGVvNxv_exp10f(Sleef_exp10fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR34]] = { "vector-function-abi-variant"="_ZGVvNxv_exp2(Sleef_exp2dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR35]] = { "vector-function-abi-variant"="_ZGVvNxv_exp2f(Sleef_exp2fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR36]] = { "vector-function-abi-variant"="_ZGVvNxv_expm1(Sleef_expm1dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR37]] = { "vector-function-abi-variant"="_ZGVvNxv_expm1f(Sleef_expm1fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR38]] = { "vector-function-abi-variant"="_ZGVvNxvv_fdim(Sleef_fdimdx_rvvm2)" }
+; CHECK: attributes #[[ATTR39]] = { "vector-function-abi-variant"="_ZGVvNxvv_fdimf(Sleef_fdimfx_rvvm2)" }
+; CHECK: attributes #[[ATTR40]] = { "vector-function-abi-variant"="_ZGVvNxvvv_fma(Sleef_fmadx_rvvm2)" }
+; CHECK: attributes #[[ATTR41]] = { "vector-function-abi-variant"="_ZGVvNxvvv_fmaf(Sleef_fmafx_rvvm2)" }
+; CHECK: attributes #[[ATTR42]] = { "vector-function-abi-variant"="_ZGVvNxvv_fmax(Sleef_fmaxdx_rvvm2)" }
+; CHECK: attributes #[[ATTR43]] = { "vector-function-abi-variant"="_ZGVvNxvv_fmaxf(Sleef_fmaxfx_rvvm2)" }
+; CHECK: attributes #[[ATTR44]] = { "vector-function-abi-variant"="_ZGVvNxvv_fmin(Sleef_fmindx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR45]] = { "vector-function-abi-variant"="_ZGVvNxvv_fminf(Sleef_fminfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR46]] = { "vector-function-abi-variant"="_ZGVvNxvv_fmod(Sleef_fmoddx_rvvm2)" }
+; CHECK: attributes #[[ATTR47]] = { "vector-function-abi-variant"="_ZGVvNxvv_fmodf(Sleef_fmodfx_rvvm2)" }
+; CHECK: attributes #[[ATTR48]] = { "vector-function-abi-variant"="_ZGVvNxvv_hypot(Sleef_hypotdx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR49]] = { "vector-function-abi-variant"="_ZGVvNxvv_hypotf(Sleef_hypotfx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR50]] = { "vector-function-abi-variant"="_ZGVvNxv_ilogb(Sleef_ilogbdx_rvvm2)" }
+; CHECK: attributes #[[ATTR51]] = { "vector-function-abi-variant"="_ZGVvNxv_ilogbf(Sleef_ilogbfx_rvvm2)" }
+; CHECK: attributes #[[ATTR52]] = { "vector-function-abi-variant"="_ZGVvNxvv_ldexp(Sleef_ldexpdx_rvvm2)" }
+; CHECK: attributes #[[ATTR53]] = { "vector-function-abi-variant"="_ZGVvNxvv_ldexpf(Sleef_ldexpfx_rvvm2)" }
+; CHECK: attributes #[[ATTR54]] = { "vector-function-abi-variant"="_ZGVvNxv_lgamma(Sleef_lgammadx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR55]] = { "vector-function-abi-variant"="_ZGVvNxv_lgammaf(Sleef_lgammafx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR56]] = { "vector-function-abi-variant"="_ZGVsMxv_log(Sleef_logdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR57]] = { "vector-function-abi-variant"="_ZGVvNxv_logf(Sleef_logfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR58]] = { "vector-function-abi-variant"="_ZGVvNxv_log10(Sleef_log10dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR59]] = { "vector-function-abi-variant"="_ZGVvNxv_log10f(Sleef_log10fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR60]] = { "vector-function-abi-variant"="_ZGVvNxv_log1p(Sleef_log1pdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR61]] = { "vector-function-abi-variant"="_ZGVvNxv_log1pf(Sleef_log1pfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR62]] = { "vector-function-abi-variant"="_ZGVvNxv_log2(Sleef_log2dx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR63]] = { "vector-function-abi-variant"="_ZGVvNxv_log2f(Sleef_log2fx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR64]] = { "vector-function-abi-variant"="_ZGVvNxvl8_modf(Sleef_modfdx_rvvm2)" }
+; CHECK: attributes #[[ATTR65]] = { "vector-function-abi-variant"="_ZGVvNxvl4_modff(Sleef_modffx_rvvm2)" }
+; CHECK: attributes #[[ATTR66]] = { "vector-function-abi-variant"="_ZGVvNxvv_nextafter(Sleef_nextafterdx_rvvm2)" }
+; CHECK: attributes #[[ATTR67]] = { "vector-function-abi-variant"="_ZGVvNxvv_nextafterf(Sleef_nextafterfx_rvvm2)" }
+; CHECK: attributes #[[ATTR68]] = { "vector-function-abi-variant"="_ZGVvNxvv_pow(Sleef_powdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR69]] = { "vector-function-abi-variant"="_ZGVvNxvv_powf(Sleef_powfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR70]] = { "vector-function-abi-variant"="_ZGVvNxv_sin(Sleef_sindx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR71]] = { "vector-function-abi-variant"="_ZGVvNxv_sinf(Sleef_sinfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR72]] = { "vector-function-abi-variant"="_ZGVvNxvl8l8_sincos(Sleef_sincosdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR73]] = { "vector-function-abi-variant"="_ZGVvNxvl4l4_sincosf(Sleef_sincosfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR74]] = { "vector-function-abi-variant"="_ZGVvNxvl8l8_sincospi(Sleef_sincospidx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR75]] = { "vector-function-abi-variant"="_ZGVvNxvl4l4_sincospif(Sleef_sincospifx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR76]] = { "vector-function-abi-variant"="_ZGVvNxv_sinh(Sleef_sinhdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR77]] = { "vector-function-abi-variant"="_ZGVvNxv_sinhf(Sleef_sinhfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR78]] = { "vector-function-abi-variant"="_ZGVvNxv_sinpi(Sleef_sinpidx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR79]] = { "vector-function-abi-variant"="_ZGVvNxv_sinpif(Sleef_sinpifx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR80]] = { "vector-function-abi-variant"="_ZGVvNxv_sqrt(Sleef_sqrtdx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR81]] = { "vector-function-abi-variant"="_ZGVvNxv_sqrtf(Sleef_sqrtfx_u05rvvm2)" }
+; CHECK: attributes #[[ATTR82]] = { "vector-function-abi-variant"="_ZGVvNxv_tan(Sleef_tandx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR83]] = { "vector-function-abi-variant"="_ZGVvNxv_tanf(Sleef_tanfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR84]] = { "vector-function-abi-variant"="_ZGVvNxv_tanh(Sleef_tanhdx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR85]] = { "vector-function-abi-variant"="_ZGVvNxv_tanhf(Sleef_tanhfx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR86]] = { "vector-function-abi-variant"="_ZGVvNxv_tgamma(Sleef_tgammadx_u10rvvm2)" }
+; CHECK: attributes #[[ATTR87]] = { "vector-function-abi-variant"="_ZGVvNxv_tgammaf(Sleef_tgammafx_u10rvvm2)" }
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !{[[META0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[META3:![0-9]+]] = distinct !{[[META3]], [[META2]], [[META1]]}
+; CHECK: [[META4:![0-9]+]] = distinct !{[[META4]], [[META1]], [[META2]]}
+; CHECK: [[META5:![0-9]+]] = distinct !{[[META5]], [[META2]], [[META1]]}
+; CHECK: [[META6:![0-9]+]] = distinct !{[[META6]], [[META1]], [[META2]]}
+; CHECK: [[META7:![0-9]+]] = distinct !{[[META7]], [[META2]], [[META1]]}
+; CHECK: [[META8:![0-9]+]] = distinct !{[[META8]], [[META1]], [[META2]]}
+; CHECK: [[META9:![0-9]+]] = distinct !{[[META9]], [[META2]], [[META1]]}
+; CHECK: [[META10:![0-9]+]] = distinct !{[[META10]], [[META1]], [[META2]]}
+; CHECK: [[META11:![0-9]+]] = distinct !{[[META11]], [[META2]], [[META1]]}
+; CHECK: [[META12:![0-9]+]] = distinct !{[[META12]], [[META1]], [[META2]]}
+; CHECK: [[META13:![0-9]+]] = distinct !{[[META13]], [[META2]], [[META1]]}
+; CHECK: [[META14:![0-9]+]] = distinct !{[[META14]], [[META1]], [[META2]]}
+; CHECK: [[META15:![0-9]+]] = distinct !{[[META15]], [[META2]], [[META1]]}
+; CHECK: [[META16:![0-9]+]] = distinct !{[[META16]], [[META1]], [[META2]]}
+; CHECK: [[META17:![0-9]+]] = distinct !{[[META17]], [[META2]], [[META1]]}
+; CHECK: [[META18:![0-9]+]] = distinct !{[[META18]], [[META1]], [[META2]]}
+; CHECK: [[META19:![0-9]+]] = distinct !{[[META19]], [[META2]], [[META1]]}
+; CHECK: [[META20:![0-9]+]] = distinct !{[[META20]], [[META1]], [[META2]]}
+; CHECK: [[META21:![0-9]+]] = distinct !{[[META21]], [[META2]], [[META1]]}
+; CHECK: [[META22:![0-9]+]] = distinct !{[[META22]], [[META1]], [[META2]]}
+; CHECK: [[META23:![0-9]+]] = distinct !{[[META23]], [[META2]], [[META1]]}
+; CHECK: [[META24:![0-9]+]] = distinct !{[[META24]], [[META1]], [[META2]]}
+; CHECK: [[META25:![0-9]+]] = distinct !{[[META25]], [[META2]], [[META1]]}
+; CHECK: [[META26:![0-9]+]] = distinct !{[[META26]], [[META1]], [[META2]]}
+; CHECK: [[META27:![0-9]+]] = distinct !{[[META27]], [[META2]], [[META1]]}
+; CHECK: [[META28:![0-9]+]] = distinct !{[[META28]], [[META1]], [[META2]]}
+; CHECK: [[META29:![0-9]+]] = distinct !{[[META29]], [[META2]], [[META1]]}
+; CHECK: [[META30:![0-9]+]] = distinct !{[[META30]], [[META1]], [[META2]]}
+; CHECK: [[META31:![0-9]+]] = distinct !{[[META31]], [[META2]], [[META1]]}
+; CHECK: [[META32:![0-9]+]] = distinct !{[[META32]], [[META1]], [[META2]]}
+; CHECK: [[META33:![0-9]+]] = distinct !{[[META33]], [[META2]], [[META1]]}
+; CHECK: [[META34:![0-9]+]] = distinct !{[[META34]], [[META1]], [[META2]]}
+; CHECK: [[META35:![0-9]+]] = distinct !{[[META35]], [[META2]], [[META1]]}
+; CHECK: [[META36:![0-9]+]] = distinct !{[[META36]], [[META1]], [[META2]]}
+; CHECK: [[META37:![0-9]+]] = distinct !{[[META37]], [[META2]], [[META1]]}
+; CHECK: [[META38:![0-9]+]] = distinct !{[[META38]], [[META1]], [[META2]]}
+; CHECK: [[META39:![0-9]+]] = distinct !{[[META39]], [[META2]], [[META1]]}
+; CHECK: [[META40:![0-9]+]] = distinct !{[[META40]], [[META1]], [[META2]]}
+; CHECK: [[META41:![0-9]+]] = distinct !{[[META41]], [[META2]], [[META1]]}
+; CHECK: [[META42:![0-9]+]] = distinct !{[[META42]], [[META1]], [[META2]]}
+; CHECK: [[META43:![0-9]+]] = distinct !{[[META43]], [[META2]], [[META1]]}
+; CHECK: [[META44:![0-9]+]] = distinct !{[[META44]], [[META1]], [[META2]]}
+; CHECK: [[META45:![0-9]+]] = distinct !{[[META45]], [[META2]], [[META1]]}
+; CHECK: [[META46:![0-9]+]] = distinct !{[[META46]], [[META1]], [[META2]]}
+; CHECK: [[META47:![0-9]+]] = distinct !{[[META47]], [[META2]], [[META1]]}
+; CHECK: [[META48:![0-9]+]] = distinct !{[[META48]], [[META1]], [[META2]]}
+; CHECK: [[META49:![0-9]+]] = distinct !{[[META49]], [[META2]], [[META1]]}
+; CHECK: [[META50:![0-9]+]] = distinct !{[[META50]], [[META1]], [[META2]]}
+; CHECK: [[META51:![0-9]+]] = distinct !{[[META51]], [[META2]], [[META1]]}
+; CHECK: [[META52:![0-9]+]] = distinct !{[[META52]], [[META1]], [[META2]]}
+; CHECK: [[META53:![0-9]+]] = distinct !{[[META53]], [[META2]], [[META1]]}
+; CHECK: [[META54:![0-9]+]] = distinct !{[[META54]], [[META1]], [[META2]]}
+; CHECK: [[META55:![0-9]+]] = distinct !{[[META55]], [[META2]], [[META1]]}
+; CHECK: [[META56:![0-9]+]] = distinct !{[[META56]], [[META1]], [[META2]]}
+; CHECK: [[META57:![0-9]+]] = distinct !{[[META57]], [[META2]], [[META1]]}
+; CHECK: [[META58:![0-9]+]] = distinct !{[[META58]], [[META1]], [[META2]]}
+; CHECK: [[META59:![0-9]+]] = distinct !{[[META59]], [[META2]], [[META1]]}
+; CHECK: [[META60:![0-9]+]] = distinct !{[[META60]], [[META1]], [[META2]]}
+; CHECK: [[META61:![0-9]+]] = distinct !{[[META61]], [[META2]], [[META1]]}
+; CHECK: [[META62:![0-9]+]] = distinct !{[[META62]], [[META1]], [[META2]]}
+; CHECK: [[META63:![0-9]+]] = distinct !{[[META63]], [[META2]], [[META1]]}
+; CHECK: [[META64:![0-9]+]] = distinct !{[[META64]], [[META1]], [[META2]]}
+; CHECK: [[META65:![0-9]+]] = distinct !{[[META65]], [[META2]], [[META1]]}
+; CHECK: [[META66:![0-9]+]] = distinct !{[[META66]], [[META1]], [[META2]]}
+; CHECK: [[META67:![0-9]+]] = distinct !{[[META67]], [[META2]], [[META1]]}
+; CHECK: [[META68:![0-9]+]] = distinct !{[[META68]], [[META1]], [[META2]]}
+; CHECK: [[META69:![0-9]+]] = distinct !{[[META69]], [[META2]], [[META1]]}
+; CHECK: [[META70:![0-9]+]] = distinct !{[[META70]], [[META1]], [[META2]]}
+; CHECK: [[META71:![0-9]+]] = distinct !{[[META71]], [[META2]], [[META1]]}
+; CHECK: [[META72:![0-9]+]] = distinct !{[[META72]], [[META1]], [[META2]]}
+; CHECK: [[META73:![0-9]+]] = distinct !{[[META73]], [[META2]], [[META1]]}
+; CHECK: [[META74:![0-9]+]] = distinct !{[[META74]], [[META1]], [[META2]]}
+; CHECK: [[META75:![0-9]+]] = distinct !{[[META75]], [[META2]], [[META1]]}
+; CHECK: [[META76:![0-9]+]] = distinct !{[[META76]], [[META1]], [[META2]]}
+; CHECK: [[META77:![0-9]+]] = distinct !{[[META77]], [[META2]], [[META1]]}
+; CHECK: [[META78:![0-9]+]] = distinct !{[[META78]], [[META1]], [[META2]]}
+; CHECK: [[META79:![0-9]+]] = distinct !{[[META79]], [[META2]], [[META1]]}
+; CHECK: [[META80:![0-9]+]] = distinct !{[[META80]], [[META1]], [[META2]]}
+; CHECK: [[META81:![0-9]+]] = distinct !{[[META81]], [[META2]], [[META1]]}
+; CHECK: [[META82:![0-9]+]] = distinct !{[[META82]], [[META1]], [[META2]]}
+; CHECK: [[META83:![0-9]+]] = distinct !{[[META83]], [[META2]], [[META1]]}
+; CHECK: [[META84:![0-9]+]] = distinct !{[[META84]], [[META1]], [[META2]]}
+; CHECK: [[META85:![0-9]+]] = distinct !{[[META85]], [[META2]], [[META1]]}
+; CHECK: [[META86:![0-9]+]] = distinct !{[[META86]], [[META1]], [[META2]]}
+; CHECK: [[META87:![0-9]+]] = distinct !{[[META87]], [[META2]], [[META1]]}
+; CHECK: [[META88:![0-9]+]] = distinct !{[[META88]], [[META1]], [[META2]]}
+; CHECK: [[META89:![0-9]+]] = distinct !{[[META89]], [[META2]], [[META1]]}
+; CHECK: [[META90:![0-9]+]] = distinct !{[[META90]], [[META1]], [[META2]]}
+; CHECK: [[META91:![0-9]+]] = distinct !{[[META91]], [[META2]], [[META1]]}
+; CHECK: [[META92:![0-9]+]] = distinct !{[[META92]], [[META1]], [[META2]]}
+; CHECK: [[META93:![0-9]+]] = distinct !{[[META93]], [[META2]], [[META1]]}
+; CHECK: [[META94:![0-9]+]] = distinct !{[[META94]], [[META1]], [[META2]]}
+; CHECK: [[META95:![0-9]+]] = distinct !{[[META95]], [[META2]], [[META1]]}
+; CHECK: [[META96:![0-9]+]] = distinct !{[[META96]], [[META1]], [[META2]]}
+; CHECK: [[META97:![0-9]+]] = distinct !{[[META97]], [[META2]], [[META1]]}
+; CHECK: [[META98:![0-9]+]] = distinct !{[[META98]], [[META1]], [[META2]]}
+; CHECK: [[META99:![0-9]+]] = distinct !{[[META99]], [[META2]], [[META1]]}
+; CHECK: [[META100:![0-9]+]] = distinct !{[[META100]], [[META1]], [[META2]]}
+; CHECK: [[META101:![0-9]+]] = distinct !{[[META101]], [[META2]], [[META1]]}
+; CHECK: [[META102:![0-9]+]] = distinct !{[[META102]], [[META1]], [[META2]]}
+; CHECK: [[META103:![0-9]+]] = distinct !{[[META103]], [[META2]], [[META1]]}
+; CHECK: [[META104:![0-9]+]] = distinct !{[[META104]], [[META1]], [[META2]]}
+; CHECK: [[META105:![0-9]+]] = distinct !{[[META105]], [[META2]], [[META1]]}
+; CHECK: [[META106:![0-9]+]] = distinct !{[[META106]], [[META1]], [[META2]]}
+; CHECK: [[META107:![0-9]+]] = distinct !{[[META107]], [[META2]], [[META1]]}
+; CHECK: [[META108:![0-9]+]] = distinct !{[[META108]], [[META1]], [[META2]]}
+; CHECK: [[META109:![0-9]+]] = distinct !{[[META109]], [[META2]], [[META1]]}
+; CHECK: [[META110:![0-9]+]] = distinct !{[[META110]], [[META1]], [[META2]]}
+; CHECK: [[META111:![0-9]+]] = distinct !{[[META111]], [[META2]], [[META1]]}
+; CHECK: [[META112:![0-9]+]] = distinct !{[[META112]], [[META1]], [[META2]]}
+; CHECK: [[META113:![0-9]+]] = distinct !{[[META113]], [[META2]], [[META1]]}
+; CHECK: [[META114:![0-9]+]] = distinct !{[[META114]], [[META1]], [[META2]]}
+; CHECK: [[META115:![0-9]+]] = distinct !{[[META115]], [[META2]], [[META1]]}
+; CHECK: [[META116:![0-9]+]] = distinct !{[[META116]], [[META1]], [[META2]]}
+; CHECK: [[META117:![0-9]+]] = distinct !{[[META117]], [[META2]], [[META1]]}
+; CHECK: [[META118:![0-9]+]] = distinct !{[[META118]], [[META1]], [[META2]]}
+; CHECK: [[META119:![0-9]+]] = distinct !{[[META119]], [[META2]], [[META1]]}
+; CHECK: [[META120:![0-9]+]] = distinct !{[[META120]], [[META1]], [[META2]]}
+; CHECK: [[META121:![0-9]+]] = distinct !{[[META121]], [[META2]], [[META1]]}
+; CHECK: [[META122:![0-9]+]] = distinct !{[[META122]], [[META1]], [[META2]]}
+; CHECK: [[META123:![0-9]+]] = distinct !{[[META123]], [[META2]], [[META1]]}
+; CHECK: [[META124:![0-9]+]] = distinct !{[[META124]], [[META1]], [[META2]]}
+; CHECK: [[META125:![0-9]+]] = distinct !{[[META125]], [[META2]], [[META1]]}
+; CHECK: [[META126:![0-9]+]] = distinct !{[[META126]], [[META1]], [[META2]]}
+; CHECK: [[META127:![0-9]+]] = distinct !{[[META127]], [[META2]], [[META1]]}
+; CHECK: [[META128:![0-9]+]] = distinct !{[[META128]], [[META1]], [[META2]]}
+; CHECK: [[META129:![0-9]+]] = distinct !{[[META129]], [[META2]], [[META1]]}
+; CHECK: [[META130:![0-9]+]] = distinct !{[[META130]], [[META1]], [[META2]]}
+; CHECK: [[META131:![0-9]+]] = distinct !{[[META131]], [[META2]], [[META1]]}
+; CHECK: [[META132:![0-9]+]] = distinct !{[[META132]], [[META1]], [[META2]]}
+; CHECK: [[META133:![0-9]+]] = distinct !{[[META133]], [[META2]], [[META1]]}
+; CHECK: [[META134:![0-9]+]] = distinct !{[[META134]], [[META1]], [[META2]]}
+; CHECK: [[META135:![0-9]+]] = distinct !{[[META135]], [[META2]], [[META1]]}
+; CHECK: [[META136:![0-9]+]] = distinct !{[[META136]], [[META1]], [[META2]]}
+; CHECK: [[META137:![0-9]+]] = distinct !{[[META137]], [[META2]], [[META1]]}
+; CHECK: [[META138:![0-9]+]] = distinct !{[[META138]], [[META1]], [[META2]]}
+; CHECK: [[META139:![0-9]+]] = distinct !{[[META139]], [[META2]], [[META1]]}
+; CHECK: [[META140:![0-9]+]] = distinct !{[[META140]], [[META1]], [[META2]]}
+; CHECK: [[META141:![0-9]+]] = distinct !{[[META141]], [[META2]], [[META1]]}
+; CHECK: [[META142:![0-9]+]] = distinct !{[[META142]], [[META1]], [[META2]]}
+; CHECK: [[META143:![0-9]+]] = distinct !{[[META143]], [[META2]], [[META1]]}
+; CHECK: [[META144:![0-9]+]] = distinct !{[[META144]], [[META1]], [[META2]]}
+; CHECK: [[META145:![0-9]+]] = distinct !{[[META145]], [[META2]], [[META1]]}
+; CHECK: [[META146:![0-9]+]] = distinct !{[[META146]], [[META1]], [[META2]]}
+; CHECK: [[META147:![0-9]+]] = distinct !{[[META147]], [[META2]], [[META1]]}
+; CHECK: [[META148:![0-9]+]] = distinct !{[[META148]], [[META1]], [[META2]]}
+; CHECK: [[META149:![0-9]+]] = distinct !{[[META149]], [[META2]], [[META1]]}
+; CHECK: [[META150:![0-9]+]] = distinct !{[[META150]], [[META1]], [[META2]]}
+; CHECK: [[META151:![0-9]+]] = distinct !{[[META151]], [[META2]], [[META1]]}
+; CHECK: [[META152:![0-9]+]] = distinct !{[[META152]], [[META1]], [[META2]]}
+; CHECK: [[META153:![0-9]+]] = distinct !{[[META153]], [[META2]], [[META1]]}
+; CHECK: [[META154:![0-9]+]] = distinct !{[[META154]], [[META1]], [[META2]]}
+; CHECK: [[META155:![0-9]+]] = distinct !{[[META155]], [[META2]], [[META1]]}
+; CHECK: [[META156:![0-9]+]] = distinct !{[[META156]], [[META1]], [[META2]]}
+; CHECK: [[META157:![0-9]+]] = distinct !{[[META157]], [[META2]], [[META1]]}
+; CHECK: [[META158:![0-9]+]] = distinct !{[[META158]], [[META1]], [[META2]]}
+; CHECK: [[META159:![0-9]+]] = distinct !{[[META159]], [[META2]], [[META1]]}
+; CHECK: [[META160:![0-9]+]] = distinct !{[[META160]], [[META1]], [[META2]]}
+; CHECK: [[META161:![0-9]+]] = distinct !{[[META161]], [[META2]], [[META1]]}
+; CHECK: [[META162:![0-9]+]] = distinct !{[[META162]], [[META1]], [[META2]]}
+; CHECK: [[META163:![0-9]+]] = distinct !{[[META163]], [[META2]], [[META1]]}
+; CHECK: [[META164:![0-9]+]] = distinct !{[[META164]], [[META1]], [[META2]]}
+; CHECK: [[META165:![0-9]+]] = distinct !{[[META165]], [[META2]], [[META1]]}
+; CHECK: [[META166:![0-9]+]] = distinct !{[[META166]], [[META1]], [[META2]]}
+; CHECK: [[META167:![0-9]+]] = distinct !{[[META167]], [[META2]], [[META1]]}
+; CHECK: [[META168:![0-9]+]] = distinct !{[[META168]], [[META1]], [[META2]]}
+; CHECK: [[META169:![0-9]+]] = distinct !{[[META169]], [[META2]], [[META1]]}
+; CHECK: [[META170:![0-9]+]] = distinct !{[[META170]], [[META1]], [[META2]]}
+; CHECK: [[META171:![0-9]+]] = distinct !{[[META171]], [[META2]], [[META1]]}
+; CHECK: [[META172:![0-9]+]] = distinct !{[[META172]], [[META1]], [[META2]]}
+; CHECK: [[META173:![0-9]+]] = distinct !{[[META173]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index 4e4b81e89a3270..75f731f71b7335 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -4,6 +4,7 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE
 ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,SLEEFGNUABI
+; RUN: opt -mtriple=riscv64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,SLEEFGNUABI_RISCV
 ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=ArmPL -passes=inject-tli-mappings -S < %s | FileCheck %s  --check-prefixes=COMMON,ARMPL
 
 ; COMMON-LABEL: @llvm.compiler.used = appending global
@@ -86,14 +87,15 @@ define float @modf_f32(float %in, ptr %iptr) {
 declare float @modff(float, ptr) #0
 
 define double @sin_f64(double %in) {
-; COMMON-LABEL: @sin_f64(
-; SVML:         call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
-; AMDLIBM:      call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
-; MASSV:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
-; ACCELERATE:   call double @sin(double %{{.*}})
-; LIBMVEC-X86:  call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
-; SLEEFGNUABI:  call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
-; ARMPL:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; COMMON-LABEL:       @sin_f64(
+; SVML:               call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; AMDLIBM:            call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; MASSV:              call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; ACCELERATE:         call double @sin(double %{{.*}})
+; LIBMVEC-X86:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; SLEEFGNUABI:        call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; SLEEFGNUABI_RISCV:  call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; ARMPL:              call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
 ; No mapping of "sin" to a vector function for Accelerate.
 ; ACCELERATE-NOT:  _ZGV_LLVM_{{.*}}_sin({{.*}})
   %call = tail call double @sin(double %in)
@@ -143,14 +145,15 @@ define void @sincospi_f32(float %in, ptr %sin, ptr %cos) {
 declare void @sincospif(float, ptr, ptr) #0
 
 define float @call_llvm.log10.f32(float %in) {
-; COMMON-LABEL: @call_llvm.log10.f32(
-; SVML:         call float @llvm.log10.f32(float %{{.*}})
-; AMDLIBM:      call float @llvm.log10.f32(float %{{.*}})
-; LIBMVEC-X86:  call float @llvm.log10.f32(float %{{.*}})
-; MASSV:        call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
-; ACCELERATE:   call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
-; SLEEFGNUABI:  call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
-; ARMPL:        call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; COMMON-LABEL:       @call_llvm.log10.f32(
+; SVML:               call float @llvm.log10.f32(float %{{.*}})
+; AMDLIBM:            call float @llvm.log10.f32(float %{{.*}})
+; LIBMVEC-X86:        call float @llvm.log10.f32(float %{{.*}})
+; MASSV:              call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; ACCELERATE:         call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; SLEEFGNUABI:        call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; SLEEFGNUABI_RISCV:  call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
+; ARMPL:              call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
 ; No mapping of "llvm.log10.f32" to a vector function for SVML.
 ; SVML-NOT:        _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
 ; AMDLIBM-NOT:        _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
@@ -200,6 +203,9 @@ declare float @llvm.log10.f32(float) #0
 ; SLEEFGNUABI: declare <4 x float> @_ZGVnN4v_log10f(<4 x float>)
 ; SLEEFGNUABI: declare <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float>, <vscale x 4 x i1>)
 
+; SLEEFGNUABI_RISCV: declare <vscale x 2 x double> @Sleef_sindx_u10rvvm2(<vscale x 2 x double>)
+; SLEEFGNUABI_RISCV: declare <vscale x 4 x float> @Sleef_log10fx_u10rvvm2(<vscale x 4 x float>)
+
 ; ARMPL: declare <2 x double> @armpl_vmodfq_f64(<2 x double>, ptr)
 ; ARMPL: declare <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double>, ptr, <vscale x 2 x i1>)
 ; ARMPL: declare <4 x float> @armpl_vmodfq_f32(<4 x float>, ptr)
@@ -266,6 +272,11 @@ attributes #0 = { nounwind readnone }
 ; SLEEFGNUABI-SAME:   "_ZGV_LLVM_N4v_llvm.log10.f32(_ZGVnN4v_log10f),
 ; SLEEFGNUABI-SAME:   _ZGVsMxv_llvm.log10.f32(_ZGVsMxv_log10f)" }
 
+; SLEEFGNUABI_RISCV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
+; SLEEFGNUABI_RISCV-SAME:   "_ZGVvNxv_sin(Sleef_sindx_u10rvvm2)" }
+; SLEEFGNUABI_RISCV:      attributes #[[LOG10]] = { "vector-function-abi-variant"=
+; SLEEFGNUABI_RISCV-SAME:   "_ZGVvNxv_llvm.log10.f32(Sleef_log10fx_u10rvvm2)" }
+
 ; ARMPL:      attributes #[[MODF]] = { "vector-function-abi-variant"=
 ; ARMPL-SAME:    "_ZGV_LLVM_N2vl8_modf(armpl_vmodfq_f64),
 ; ARMPL-SAME:    _ZGVsMxvl8_modf(armpl_svmodf_f64_x)" }