From e20b78987346dc3c3b7ed539ff28a2dedc1392d8 Mon Sep 17 00:00:00 2001 From: Lee James O'Riordan Date: Thu, 30 May 2024 14:56:12 -0400 Subject: [PATCH] Fix ambiguous function overload with newer compiler when using streaming AVX ops (#729) * Fix ambiguous function overload with newer compiler when using streaming ops * Auto update version from '0.37.0-dev11' to '0.37.0-dev12' * Update changelog * Fix incorrect label in github CI * Auto update version from '0.37.0-dev13' to '0.37.0-dev14' * Update .github/workflows/tests_linux_cpp.yml Co-authored-by: Vincent Michaud-Rioux * Auto update version from '0.37.0-dev17' to '0.37.0-dev18' * Fix order of stream vs store * Auto update version from '0.37.0-dev18' to '0.37.0-dev23' --------- Co-authored-by: ringo-but-quantum Co-authored-by: Vincent Michaud-Rioux --- .github/CHANGELOG.md | 10 +++-- .github/workflows/tests_linux_cpp.yml | 16 ++++---- pennylane_lightning/core/_version.py | 2 +- .../cpu_kernels/avx_common/AVX2Concept.hpp | 28 ++++++++++++- .../cpu_kernels/avx_common/AVX512Concept.hpp | 41 +++++++++++++++---- 5 files changed, 75 insertions(+), 22 deletions(-) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 651a43ec7d..18de54b56a 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -48,19 +48,21 @@ * Changed the name of `lightning.tensor` to `default.tensor` with the `quimb` backend. [(#719)](https://github.com/PennyLaneAI/pennylane-lightning/pull/719) -<<<<<<< maa/probs-py-cpp-dispatch * Patch the C++ `Measurements.probs(wires)` method in Lightning-Qubit and Lighnting-Kokkos to `Measurements.probs()` when called with all wires. This will trigger a more optimized implementation for calculating the probabilities of the entire system. [(#744)](https://github.com/PennyLaneAI/pennylane-lightning/pull/744) -======= + * Remove the daily schedule from the "Compat Check w/PL - release/release" GitHub action. [(#746)](https://github.com/PennyLaneAI/pennylane-lightning/pull/746) ->>>>>>> master + ### Documentation ### Bug fixes +* Fix AVX streaming operation support with newer GCC. + [(#729)](https://github.com/PennyLaneAI/pennylane-lightning/pull/729) + * Revert changes calling `IMAG`, `ONE`, `ZERO` templated functions in Kokkos kernels since they are incompatible with device execution. [(#733)](https://github.com/PennyLaneAI/pennylane-lightning/pull/733) @@ -80,7 +82,7 @@ This release contains contributions from (in alphabetical order): -Ali Asadi, Amintor Dusko, Pietropaolo Frisoni, Vincent Michaud-Rioux, Mudit Pandey, Shuli Shu +Ali Asadi, Amintor Dusko, Pietropaolo Frisoni, Vincent Michaud-Rioux, Lee James O'Riordan, Mudit Pandey, Shuli Shu --- diff --git a/.github/workflows/tests_linux_cpp.yml b/.github/workflows/tests_linux_cpp.yml index fb4461461c..2f538b5daf 100644 --- a/.github/workflows/tests_linux_cpp.yml +++ b/.github/workflows/tests_linux_cpp.yml @@ -46,12 +46,12 @@ jobs: matrix: pl_backend: ["lightning_qubit"] enable_kernel_omp: ["OFF", "ON"] - enable_kernel_avx_stream: ["OFF", "ON"] + enable_kernel_avx_streaming: ["OFF", "ON"] exclude: - enable_kernel_omp: OFF - enable_kernel_avx_stream: ON + enable_kernel_avx_streaming: ON timeout-minutes: 60 - name: C++ Tests (${{ matrix.pl_backend }}, ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }}, ENABLE_KERNEL_AVX_STREAM=${{ matrix.enable_kernel_avx_stream }}) + name: C++ Tests (${{ matrix.pl_backend }}, ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }}, ENABLE_KERNEL_AVX_STREAMING=${{ matrix.enable_kernel_avx_streaming }}) runs-on: ${{ needs.determine_runner.outputs.runner_group }} steps: @@ -77,7 +77,7 @@ jobs: -DPL_BACKEND=${{ matrix.pl_backend }} \ -DCMAKE_CXX_COMPILER=$(which g++-$GCC_VERSION) \ -DENABLE_COVERAGE=ON \ - -DLQ_ENABLE_KERNEL_AVX_STREAM=${{ matrix.enable_kernel_avx_stream }} \ + -DLQ_ENABLE_KERNEL_AVX_STREAMING=${{ matrix.enable_kernel_avx_streaming }} \ -DLQ_ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }} cmake --build ./Build @@ -87,13 +87,13 @@ jobs: for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done; lcov --directory . -b ../pennylane_lightning/core/src --capture --output-file coverage.info lcov --remove coverage.info '/usr/*' --output-file coverage.info - mv coverage.info coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}.info + mv coverage.info coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}.info - name: Upload test results uses: actions/upload-artifact@v3 if: always() with: - name: ubuntu-tests-reports-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }} + name: ubuntu-tests-reports-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }} path: | ./Build/tests/results/ @@ -102,8 +102,8 @@ jobs: - name: Upload code coverage results uses: actions/upload-artifact@v3 with: - name: ubuntu-codecov-results-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }} - path: ./Build/coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}.info + name: ubuntu-codecov-results-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }} + path: ./Build/coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}.info if-no-files-found: error cpptestswithOpenBLAS: diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 110ca74c5f..9d704fc017 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.37.0-dev22" +__version__ = "0.37.0-dev23" diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp index 34fcbbe67d..1a29403dfe 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp @@ -55,6 +55,18 @@ template struct AVX2Concept { } } + PL_FORCE_INLINE + static auto load(const PrecisionT *p) -> IntrinsicType { + if constexpr (std::is_same_v) { + return _mm256_load_ps(p); + } else if (std::is_same_v) { + return _mm256_load_pd(p); + } else { + static_assert(std::is_same_v || + std::is_same_v); + } + } + PL_FORCE_INLINE static auto loadu(const std::complex *p) -> IntrinsicType { if constexpr (std::is_same_v) { @@ -91,6 +103,18 @@ template struct AVX2Concept { } } + PL_FORCE_INLINE + static void store_(PrecisionT *p, IntrinsicType value) { + if constexpr (std::is_same_v) { + _mm256_store_ps(p, value); + } else if (std::is_same_v) { + _mm256_store_pd(p, value); + } else { + static_assert(std::is_same_v || + std::is_same_v); + } + } + PL_FORCE_INLINE static void stream_(std::complex *p, IntrinsicType value) { if constexpr (std::is_same_v) { @@ -122,9 +146,9 @@ template struct AVX2Concept { PL_FORCE_INLINE static void store(PrecisionT *p, IntrinsicType value) { #ifdef PL_LQ_KERNEL_AVX_STREAMING - store_(p, value); -#else stream_(p, value); +#else + store_(p, value); #endif } diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp index 4fb2e3a449..279a2adfac 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp @@ -46,6 +46,17 @@ template struct AVX512Concept { PL_FORCE_INLINE static auto load(std::complex *p) -> IntrinsicType { + if constexpr (std::is_same_v) { + return _mm512_load_ps(reinterpret_cast(p)); + } else if (std::is_same_v) { + return _mm512_load_pd(reinterpret_cast(p)); + } else { + static_assert(std::is_same_v || + std::is_same_v); + } + } + PL_FORCE_INLINE + static auto load(PrecisionT *p) -> IntrinsicType { if constexpr (std::is_same_v) { return _mm512_load_ps(p); } else if (std::is_same_v) { @@ -59,9 +70,9 @@ template struct AVX512Concept { PL_FORCE_INLINE static auto loadu(std::complex *p) -> IntrinsicType { if constexpr (std::is_same_v) { - return _mm512_loadu_ps(p); + return _mm512_loadu_ps(reinterpret_cast(p)); } else if (std::is_same_v) { - return _mm512_loadu_pd(p); + return _mm512_loadu_pd(reinterpret_cast(p)); } else { static_assert(std::is_same_v || std::is_same_v); @@ -82,6 +93,18 @@ template struct AVX512Concept { PL_FORCE_INLINE static void store_(std::complex *p, IntrinsicType value) { + if constexpr (std::is_same_v) { + _mm512_store_ps(reinterpret_cast(p), value); + } else if (std::is_same_v) { + _mm512_store_pd(reinterpret_cast(p), value); + } else { + static_assert(std::is_same_v || + std::is_same_v); + } + } + + PL_FORCE_INLINE + static void store_(PrecisionT *p, IntrinsicType value) { if constexpr (std::is_same_v) { _mm512_store_ps(p, value); } else if (std::is_same_v) { @@ -95,9 +118,9 @@ template struct AVX512Concept { PL_FORCE_INLINE static void stream_(std::complex *p, IntrinsicType value) { if constexpr (std::is_same_v) { - _mm512_stream_ps(p, value); + _mm512_stream_ps(reinterpret_cast(p), value); } else if (std::is_same_v) { - _mm512_stream_pd(p, value); + _mm512_stream_pd(reinterpret_cast(p), value); } else { static_assert(std::is_same_v || std::is_same_v); @@ -118,15 +141,19 @@ template struct AVX512Concept { PL_FORCE_INLINE static void store(std::complex *p, IntrinsicType value) { - store(reinterpret_cast(p), value); +#ifdef PL_LQ_KERNEL_AVX_STREAMING + stream_(reinterpret_cast(p), value); +#else + store_(reinterpret_cast(p), value); +#endif } PL_FORCE_INLINE static void store(PrecisionT *p, IntrinsicType value) { #ifdef PL_LQ_KERNEL_AVX_STREAMING - store_(p, value); -#else stream_(p, value); +#else + store_(p, value); #endif }