From 446aae8c7b50def60ce91492613739b4cbbc3ddd Mon Sep 17 00:00:00 2001 From: Llibert Areste Salo Date: Wed, 21 Feb 2024 08:34:35 +0000 Subject: [PATCH] changes to simd --- Examples/Fluid_Kerr/PerfectFluidLevel.cpp | 2 +- Examples/Fluid_Kerr/PrimitiveRecovery.hpp | 6 ++++-- Source/simd/arm/neon.hpp | 17 +++++++++++++++++ Source/simd/arm/sve.hpp | 12 ++++++++++++ Source/simd/simd.hpp | 10 ++++++++++ Source/simd/x64/avx.hpp | 12 ++++++++++++ Source/simd/x64/avx512.hpp | 12 ++++++++++++ Source/simd/x64/sse.hpp | 12 ++++++++++++ 8 files changed, 80 insertions(+), 3 deletions(-) diff --git a/Examples/Fluid_Kerr/PerfectFluidLevel.cpp b/Examples/Fluid_Kerr/PerfectFluidLevel.cpp index 8727982c4..a7cb1d768 100644 --- a/Examples/Fluid_Kerr/PerfectFluidLevel.cpp +++ b/Examples/Fluid_Kerr/PerfectFluidLevel.cpp @@ -90,7 +90,7 @@ void PerfectFluidLevel::specificEvalRHS(GRLevelData &a_soln, GRLevelData &a_rhs, BoxLoops::loop(make_compute_pack(TraceARemoval(), PositiveDensity(), PositiveChiAndAlpha(), PrimitiveRecovery()), - a_soln, a_soln, INCLUDE_GHOST_CELLS, disable_simd()); + a_soln, a_soln, INCLUDE_GHOST_CELLS/*, disable_simd()*/); // Calculate MatterCCZ4 right hand side with matter_t = ScalarField EoS eos(m_p.eos_params); diff --git a/Examples/Fluid_Kerr/PrimitiveRecovery.hpp b/Examples/Fluid_Kerr/PrimitiveRecovery.hpp index e438753ac..599156277 100644 --- a/Examples/Fluid_Kerr/PrimitiveRecovery.hpp +++ b/Examples/Fluid_Kerr/PrimitiveRecovery.hpp @@ -58,8 +58,10 @@ class PrimitiveRecovery int i = 0; - while (diff > tolerance) - // while (simd_compare_lt(tolerance, diff)) + auto cond = simd_all_false(simd_compare_lt(diff, tolerance)); + //while (simd_all_false(simd_compare_lt(diff, tolerance))) + while (i<20) + // while (simd_compare_lt(tolerance, diff)) { i++; Wa = sqrt(pow(xa, 2.) / (pow(xa, 2.) - r)); diff --git a/Source/simd/arm/neon.hpp b/Source/simd/arm/neon.hpp index 4525bd18d..b80de2304 100644 --- a/Source/simd/arm/neon.hpp +++ b/Source/simd/arm/neon.hpp @@ -95,6 +95,14 @@ template <> struct simd : public simd_base return vmaxq_f64(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + uint64x2_t high_bits = vshrq_n_u64(input, 63); + //Lanes are indexed like bits (big-endian) + int moved_mask = vgetq_lane_u64(high_bits, 0) | (vgetq_lane_u64(high_bits, 1) << 1); + return moved_mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return vsqrtq_f64(a); } }; @@ -170,6 +178,15 @@ template <> struct simd : public simd_base return vmaxq_f32(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + uint32x4_t high_bits = vshrq_n_u32(input, 31); + //Lanes are indexed like bits (big-endian) + int moved_mask = vgetq_lane_u32(high_bits, 0) | (vgetq_lane_u32(high_bits, 1) << 1) + | (vgetq_lane_u32(high_bits, 2) << 2) (vgetq_lane_u32(high_bits, 3) << 3); + return moved_mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return vsqrtq_f32(a); } }; diff --git a/Source/simd/arm/sve.hpp b/Source/simd/arm/sve.hpp index b5ed9d127..c4a368ab0 100644 --- a/Source/simd/arm/sve.hpp +++ b/Source/simd/arm/sve.hpp @@ -122,6 +122,12 @@ template <> struct simd : public simd_base return svmax_f64_z(svptrue_b64(), a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + //Would be nice to implement a movemask here, for extensions to lane-specific info + return !svptest_any(svptrue_b64(), cond); + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return svsqrt_f64_z(svptrue_b64(), a); @@ -206,6 +212,12 @@ template <> struct simd : public simd_base return svmax_f32_z(svptrue_b32(), a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + //Would be nice to implement a movemask here, for extensions to lane-specific info + return !svptest_any(svptrue_b32(), cond); + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return svsqrt_f32_z(svptrue_b32(), a); diff --git a/Source/simd/simd.hpp b/Source/simd/simd.hpp index 03f36bdc3..940152fee 100644 --- a/Source/simd/simd.hpp +++ b/Source/simd/simd.hpp @@ -126,6 +126,11 @@ template struct simd { return (a > b) ? a : b; } + + friend ALWAYS_INLINE bool simd_all_false(const bool cond) + { + return !cond; + } }; // Define all the simd-functions whose implementation does not depend on the @@ -170,6 +175,11 @@ template ALWAYS_INLINE t simd_max(const t &a, const t &b) return (a > b) ? a : b; } +template ALWAYS_INLINE bool simd_all_false(const bool cond) +{ + return !cond; +} + //<-- End: Defining the simd specific calls for non-simd datatypes. #include "simdify.hpp" diff --git a/Source/simd/x64/avx.hpp b/Source/simd/x64/avx.hpp index c11e74fa9..5681e12e2 100644 --- a/Source/simd/x64/avx.hpp +++ b/Source/simd/x64/avx.hpp @@ -109,6 +109,12 @@ template <> struct simd : public simd_base return _mm256_max_pd(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = _mm256_movemask_pd(cond); + return mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return _mm256_sqrt_pd(a); @@ -193,6 +199,12 @@ template <> struct simd : public simd_base return _mm256_max_ps(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = _mm256_movemask_ps(cond); + return mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return _mm256_sqrt_ps(a); diff --git a/Source/simd/x64/avx512.hpp b/Source/simd/x64/avx512.hpp index b9efab03e..3ba08a44b 100644 --- a/Source/simd/x64/avx512.hpp +++ b/Source/simd/x64/avx512.hpp @@ -103,6 +103,12 @@ template <> struct simd : public simd_base return _mm512_max_pd(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = static_cast(cond); + return mask == 0 ? true : false; + } + #ifdef __AVX512ER__ friend ALWAYS_INLINE simd exp2(const simd &a) { @@ -198,6 +204,12 @@ template <> struct simd : public simd_base return _mm512_max_ps(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = static_cast(cond); + return mask == 0 ? true : false; + } + #ifdef __AVX512ER__ friend ALWAYS_INLINE simd exp2(const simd &a) { diff --git a/Source/simd/x64/sse.hpp b/Source/simd/x64/sse.hpp index 3aa46ab32..e6a548c61 100644 --- a/Source/simd/x64/sse.hpp +++ b/Source/simd/x64/sse.hpp @@ -121,6 +121,12 @@ template <> struct simd : public simd_base return _mm_max_pd(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = _mm_movemask_pd(cond); + return mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return _mm_sqrt_pd(a); @@ -215,6 +221,12 @@ template <> struct simd : public simd_base return _mm_max_ps(a, b); } + friend ALWAYS_INLINE bool simd_all_false(const mask_t cond) + { + int mask = _mm_movemask_ps(cond); + return mask == 0 ? true : false; + } + friend ALWAYS_INLINE simd simd_sqrt(const simd &a) { return _mm_sqrt_ps(a);