openjdk · XiaohongGong · Jul 18, 2023 · Oct 23, 2023 · Oct 23, 2023 · Nov 14, 2023
diff --git a/doc/building.md b/doc/building.md
@@ -546,6 +546,20 @@ Hotspot.
 Use `--with-libffi=<path>` if `configure` does not properly locate your libffi
 files.
 
+### libsleef
+
+libsleef, the [SIMD Library for Evaluating Elementary Functions](
+https://sleef.org/) is required when building libvmath.so on Linux+AArch64
+platforms.
+
+  * To install on an apt-based Linux, try running `sudo apt-get install
+    libsleef-dev`.
+  * To install on an rpm-based Linux, try running `sudo yum install
+    sleef-devel`.
+
+Use `--with-libsleef=<path>` if `configure` does not properly locate your libsleef
+files.
+
 ## Build Tools Requirements
 
 ### Autoconf

diff --git a/make/autoconf/lib-vmath.m4 b/make/autoconf/lib-vmath.m4
@@ -0,0 +1,105 @@
+#
+# Copyright (c) 2023, Arm Limited. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+###############################################################################
+#
+# Setup vmath framework and check its dependences
+#
+AC_DEFUN_ONCE([LIB_SETUP_VMATH],
+[
+  AC_ARG_WITH(libsleef, [AS_HELP_STRING([--with-libsleef],
+      [specify prefix directory for the libsleef library
+      (expecting the libraries under PATH/lib and the headers under PATH/include)])])
+
+  LIBSLEEF_FOUND=no
+  LIBVMATH_CFLAGS=
+  LIBVMATH_LIBS=
+
+  if test "x${with_libsleef}" = xno; then
+    AC_MSG_NOTICE([libvmath will not be compiled, because its dependence libsleef is disabled in command line])
+  else
+    # Check the specified libsleef.so
+    if test "x${with_libsleef}" != x; then
+      AC_MSG_CHECKING([the specified LIBSLEEF])
+      if test -e ${with_libsleef}/lib/libsleef.so &&
+         test -e ${with_libsleef}/include/sleef.h; then
+        LIBSLEEF_FOUND=yes
+        LIBVMATH_LIBS="-L${with_libsleef}/lib"
+        LIBVMATH_CFLAGS="-I${with_libsleef}/include"
+      else
+        AC_MSG_ERROR([Could not locate libsleef.so or sleef.h in ${with_libsleef}])
+      fi
+      AC_MSG_RESULT([${LIBSLEEF_FOUND}])
+    fi
+
+    # Check the system locations if libsleef is not specified with option
+    if test "x$SYSROOT" = x && test "x${LIBSLEEF_FOUND}" = "xno"; then
+      PKG_CHECK_MODULES([LIBSLEEF], [sleef], [LIBSLEEF_FOUND=yes], [LIBSLEEF_FOUND=no])
+    fi
+    if test "x$LIBSLEEF_FOUND" = xno; then
+      AC_CHECK_HEADERS([sleef.h],
+          [LIBSLEEF_FOUND=yes],
+          []
+      )
+    fi
+
+    if test "x${LIBSLEEF_FOUND}" = "xyes"; then
+      LIBVMATH_LIBS="${LIBVMATH_LIBS} -lsleef"
+
+      if test "x${OPENJDK_TARGET_CPU}" = "xaarch64"; then
+        # Check the ARM SVE feature
+        SVE_FEATURE_SUPPORT=no
+        SVE_CFLAGS="-march=armv8-a+sve"
+
+        AC_LANG_PUSH(C)
+        OLD_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $SVE_CFLAGS"
+
+        AC_MSG_CHECKING([if ARM SVE feature is supported])
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <arm_sve.h>],
+            [
+              svint32_t r = svdup_n_s32(1);
+              return 0;
+            ])],
+            [
+              SVE_FEATURE_SUPPORT=yes
+              LIBVMATH_CFLAGS="${LIBVMATH_CFLAGS} ${SVE_CFLAGS}"
+            ],
+            []
+        )
+        AC_MSG_RESULT([${SVE_FEATURE_SUPPORT}])
+
+        CFLAGS="$OLD_CFLAGS"
+        AC_LANG_POP(C)
+      fi
+
+    fi
+
+  fi
+
+  AC_SUBST(LIBSLEEF_FOUND)
+  AC_SUBST(LIBVMATH_CFLAGS)
+  AC_SUBST(LIBVMATH_LIBS)
+])
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
@@ -32,6 +32,7 @@ m4_include([lib-fontconfig.m4])
 m4_include([lib-freetype.m4])
 m4_include([lib-hsdis.m4])
 m4_include([lib-std.m4])
+m4_include([lib-vmath.m4])
 m4_include([lib-x11.m4])
 
 m4_include([lib-tests.m4])
@@ -125,6 +126,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
   LIB_SETUP_HSDIS
   LIB_SETUP_LIBFFI
   LIB_SETUP_MISC_LIBS
+  LIB_SETUP_VMATH
   LIB_SETUP_X11
 
   LIB_TESTS_SETUP_GTEST

diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in
@@ -889,6 +889,9 @@ USE_EXTERNAL_LIBPNG:=@USE_EXTERNAL_LIBPNG@
 PNG_LIBS:=@PNG_LIBS@
 PNG_CFLAGS:=@PNG_CFLAGS@
 
+LIBSLEEF_FOUND:=@LIBSLEEF_FOUND@
+LIBVMATH_CFLAGS:=@LIBVMATH_CFLAGS@
+LIBVMATH_LIBS:=@LIBVMATH_LIBS@
 ####################################################
 #
 # Misc

diff --git a/make/modules/jdk.incubator.vector/Lib.gmk b/make/modules/jdk.incubator.vector/Lib.gmk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -39,4 +39,16 @@ ifeq ($(call isTargetOs, linux windows)+$(call isTargetCpu, x86_64)+$(INCLUDE_CO
   TARGETS += $(BUILD_LIBJSVML)
 endif
 
+ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, aarch64)+$(INCLUDE_COMPILER2)+$(LIBSLEEF_FOUND), true+true+true+yes)
+  $(eval $(call SetupJdkLibrary, BUILD_LIBVMATH, \
+      NAME := vmath, \
+      CFLAGS := $(CFLAGS_JDKLIB) $(LIBVMATH_CFLAGS) -fvisibility=default, \
+      LDFLAGS := $(LDFLAGS_JDKLIB) \
+          $(call SET_SHARED_LIBRARY_ORIGIN), \
+      LIBS := $(JDKLIB_LIBS) $(LIBVMATH_LIBS) \
+  ))
+
+  TARGETS += $(BUILD_LIBVMATH)
+endif
+
 ################################################################################
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -2326,14 +2326,18 @@ const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length)
   return new TypeVectMask(elemTy, length);
 }
 
-// Vector calling convention not yet implemented.
 bool Matcher::supports_vector_calling_convention(void) {
-  return false;
+  return EnableVectorSupport && UseVectorStubs;
 }
 
 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  Unimplemented();
-  return OptoRegPair(0, 0);
+  assert(EnableVectorSupport && UseVectorStubs, "sanity");
+  int lo = V0_num;
+  int hi = V0_H_num;
+  if (ideal_reg == Op_VecX || ideal_reg == Op_VecA) {
+    hi = V0_K_num;
+  }
+  return OptoRegPair(hi, lo);
 }
 
 // Is this branch offset short enough that a short branch can be used?
@@ -16548,6 +16552,22 @@ instruct CallLeafDirect(method meth)
   ins_pipe(pipe_class_call);
 %}
 
+// Call Runtime Instruction without safepoint and with vector arguments
+instruct CallLeafDirectVector(method meth)
+%{
+  match(CallLeafVector);
+
+  effect(USE meth);
+
+  ins_cost(CALL_COST);
+
+  format %{ "CALL, runtime leaf vector $meth" %}
+
+  ins_encode(aarch64_enc_java_to_runtime(meth));
+
+  ins_pipe(pipe_class_call);
+%}
+
 // Call Runtime Instruction
 
 instruct CallLeafNoFPDirect(method meth)

diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -891,7 +891,20 @@ static int c_calling_convention_priv(const BasicType *sig_bt,
 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
                                              uint num_bits,
                                              uint total_args_passed) {
-  Unimplemented();
+  // More than 8 argument inputs are not supported now.
+  assert(total_args_passed <= Argument::n_float_register_parameters_c, "unsupported");
+  assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
+
+  static const FloatRegister VEC_ArgReg[Argument::n_float_register_parameters_c] = {
+    v0, v1, v2, v3, v4, v5, v6, v7
+  };
+
+  // On SVE, we use the same vector registers with 128-bit vector registers on NEON.
+  int next_reg_val = num_bits == 64 ? 1 : 3;
+  for (uint i = 0; i < total_args_passed; i++) {
+    VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
+    regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
+  }
   return 0;
 }
 

diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -42,6 +42,7 @@
 #include "oops/oop.inline.hpp"
 #include "prims/methodHandles.hpp"
 #include "prims/upcallLinker.hpp"
+#include "runtime/arguments.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/continuation.hpp"
 #include "runtime/continuationEntry.inline.hpp"
@@ -8506,6 +8507,72 @@ class StubGenerator: public StubCodeGenerator {
     if (UseAdler32Intrinsics) {
       StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
     }
+
+#ifdef COMPILER2
+    // Get native vector math stub routine addresses
+    void* libvmath = nullptr;
+    char ebuf[1024];
+    char dll_name[JVM_MAXPATHLEN];
+    if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "vmath")) {
+      libvmath = os::dll_load(dll_name, ebuf, sizeof ebuf);
+    }
+    if (libvmath != nullptr) {
+      // Method naming convention
+      //   All the methods are named as <OP><T><N>_<U><suffix>
+      //   Where:
+      //     <OP>     is the operation name, e.g. sin
+      //     <T>      is optional to indicate float/double
+      //              "f/d" for vector float/double operation
+      //     <N>      is the number of elements in the vector
+      //              "2/4" for neon, and "x" for sve
+      //     <U>      is the precision level
+      //              "u10/u05" represents 1.0/0.5 ULP error bounds
+      //               We use "u10" for all operations by default
+      //               But for those functions do not have u10 support, we use "u05" instead
+      //     <suffix> indicates neon/sve
+      //              "sve/advsimd" for sve/neon implementations
+      //     e.g. sinfx_u10sve is the method for computing vector float sin using SVE instructions
+      //          cosd2_u10advsimd is the method for computing 2 elements vector double cos using NEON instructions
+      //
+      log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "vmath" JNI_LIB_SUFFIX, p2i(libvmath));
+
+      // Math vector stubs implemented with SVE for scalable vector size.
+      if (UseSVE > 0) {
+        for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+          int vop = VectorSupport::VECTOR_OP_MATH_START + op;
+
+          // The native library does not support u10 level of "hypot".
+          const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
+
+          snprintf(ebuf, sizeof(ebuf), "%sfx_%ssve", VectorSupport::mathname[op], ulf);
+          StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libvmath, ebuf);
+
+          snprintf(ebuf, sizeof(ebuf), "%sdx_%ssve", VectorSupport::mathname[op], ulf);
+          StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_SCALABLE][op] = (address)os::dll_lookup(libvmath, ebuf);
+        }
+      }
+
+      // Math vector stubs implemented with NEON for 64/128 bits vector size.
+      for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+        int vop = VectorSupport::VECTOR_OP_MATH_START + op;
+
+        // The native library does not support u10 level of "hypot".
+        const char* ulf = (vop == VectorSupport::VECTOR_OP_HYPOT) ? "u05" : "u10";
+
+        snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
+        StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libvmath, ebuf);
+
+        snprintf(ebuf, sizeof(ebuf), "%sf4_%sadvsimd", VectorSupport::mathname[op], ulf);
+        StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libvmath, ebuf);
+
+        snprintf(ebuf, sizeof(ebuf), "%sd2_%sadvsimd", VectorSupport::mathname[op], ulf);
+        StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libvmath, ebuf);
+      }
+    } else {
+      log_info(library)("Failed to load native vector math library!");
+    }
+#endif // COMPILER2
+
 #endif // COMPILER2_OR_JVMCI
   }
 

diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -4238,41 +4238,41 @@ void StubGenerator::generate_compiler_stubs() {
 
     log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "jsvml" JNI_LIB_SUFFIX, p2i(libjsvml));
     if (UseAVX > 2) {
-      for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
-        int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+      for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+        int vop = VectorSupport::VECTOR_OP_MATH_START + op;
         if ((!VM_Version::supports_avx512dq()) &&
             (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
           continue;
         }
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
+        snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::mathname[op]);
         StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-        snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
+        snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::mathname[op]);
         StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
       }
     }
     const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
-    for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
-      int vop = VectorSupport::VECTOR_OP_SVML_START + op;
+    for (int op = 0; op < VectorSupport::NUM_VECTOR_OP_MATH; op++) {
+      int vop = VectorSupport::VECTOR_OP_MATH_START + op;
       if (vop == VectorSupport::VECTOR_OP_POW) {
         continue;
       }
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
 
-      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
+      snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::mathname[op], avx_sse_str);
       StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
     }
   }

diff --git a/src/hotspot/share/opto/callnode.cpp b/src/hotspot/share/opto/callnode.cpp
@@ -755,7 +755,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
 
     if (Opcode() == Op_CallLeafVector) {
       // If the return is in vector, compute appropriate regmask taking into account the whole range
-      if(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ) {
+      if(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ) {
         if(OptoReg::is_valid(regs.second())) {
           for (OptoReg::Name r = regs.first(); r <= regs.second(); r = OptoReg::add(r, 1)) {
             rm.Insert(r);

diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp
@@ -363,7 +363,7 @@ class LibraryCallKit : public GraphKit {
   bool inline_index_vector();
   bool inline_index_partially_in_upper_range();
 
-  Node* gen_call_to_svml(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
+  Node* gen_call_to_vector_math(int vector_api_op_id, BasicType bt, int num_elem, Node* opd1, Node* opd2);
 
   enum VectorMaskUseType {
     VecMaskUseLoad  = 1 << 0,