Fix ambiguous function overload with newer compiler when using stream…

…ing AVX ops (#729) * Fix ambiguous function overload with newer compiler when using streaming ops * Auto update version from '0.37.0-dev11' to '0.37.0-dev12' * Update changelog * Fix incorrect label in github CI * Auto update version from '0.37.0-dev13' to '0.37.0-dev14' * Update .github/workflows/tests_linux_cpp.yml Co-authored-by: Vincent Michaud-Rioux <[email protected]> * Auto update version from '0.37.0-dev17' to '0.37.0-dev18' * Fix order of stream vs store * Auto update version from '0.37.0-dev18' to '0.37.0-dev23' --------- Co-authored-by: ringo-but-quantum <[email protected]> Co-authored-by: Vincent Michaud-Rioux <[email protected]>
PennyLaneAI · May 30, 2024 · e20b789 · e20b789
1 parent 15096e7
commit e20b789
Show file tree

Hide file tree

Showing 5 changed files with 75 additions and 22 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -48,19 +48,21 @@
 * Changed the name of `lightning.tensor` to `default.tensor` with the `quimb` backend.
   [(#719)](https://github.com/PennyLaneAI/pennylane-lightning/pull/719)
 
-<<<<<<< maa/probs-py-cpp-dispatch
 * Patch the C++ `Measurements.probs(wires)` method in Lightning-Qubit and Lighnting-Kokkos to `Measurements.probs()` when called with all wires.
   This will trigger a more optimized implementation for calculating the probabilities of the entire system.
   [(#744)](https://github.com/PennyLaneAI/pennylane-lightning/pull/744)
-=======
+
 * Remove the daily schedule from the "Compat Check w/PL - release/release" GitHub action.
   [(#746)](https://github.com/PennyLaneAI/pennylane-lightning/pull/746)
->>>>>>> master
+
 
 ### Documentation
 
 ### Bug fixes
 
+* Fix AVX streaming operation support with newer GCC.
+  [(#729)](https://github.com/PennyLaneAI/pennylane-lightning/pull/729)
+
 * Revert changes calling `IMAG`, `ONE`, `ZERO` templated functions in Kokkos kernels since they are incompatible with device execution.
   [(#733)](https://github.com/PennyLaneAI/pennylane-lightning/pull/733)
 
@@ -80,7 +82,7 @@
 
 This release contains contributions from (in alphabetical order):
 
-Ali Asadi, Amintor Dusko, Pietropaolo Frisoni, Vincent Michaud-Rioux, Mudit Pandey, Shuli Shu
+Ali Asadi, Amintor Dusko, Pietropaolo Frisoni, Vincent Michaud-Rioux, Lee James O'Riordan, Mudit Pandey, Shuli Shu
 
 ---
 

diff --git a/.github/workflows/tests_linux_cpp.yml b/.github/workflows/tests_linux_cpp.yml
@@ -46,12 +46,12 @@ jobs:
       matrix:
         pl_backend: ["lightning_qubit"]
         enable_kernel_omp: ["OFF", "ON"]
-        enable_kernel_avx_stream: ["OFF", "ON"]
+        enable_kernel_avx_streaming: ["OFF", "ON"]
         exclude:
         - enable_kernel_omp: OFF
-          enable_kernel_avx_stream: ON
+          enable_kernel_avx_streaming: ON
     timeout-minutes: 60
-    name: C++ Tests (${{ matrix.pl_backend }}, ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }}, ENABLE_KERNEL_AVX_STREAM=${{ matrix.enable_kernel_avx_stream }})
+    name: C++ Tests (${{ matrix.pl_backend }}, ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }}, ENABLE_KERNEL_AVX_STREAMING=${{ matrix.enable_kernel_avx_streaming }})
     runs-on: ${{ needs.determine_runner.outputs.runner_group }}
 
     steps:
@@ -77,7 +77,7 @@ jobs:
               -DPL_BACKEND=${{ matrix.pl_backend }} \
               -DCMAKE_CXX_COMPILER=$(which g++-$GCC_VERSION) \
               -DENABLE_COVERAGE=ON \
-              -DLQ_ENABLE_KERNEL_AVX_STREAM=${{ matrix.enable_kernel_avx_stream }} \
+              -DLQ_ENABLE_KERNEL_AVX_STREAMING=${{ matrix.enable_kernel_avx_streaming }} \
               -DLQ_ENABLE_KERNEL_OMP=${{ matrix.enable_kernel_omp }}
 
             cmake --build ./Build
@@ -87,13 +87,13 @@ jobs:
             for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
             lcov --directory . -b ../pennylane_lightning/core/src --capture --output-file coverage.info
             lcov --remove coverage.info '/usr/*' --output-file coverage.info
-            mv coverage.info coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}.info
+            mv coverage.info coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}.info
 
       - name: Upload test results
         uses: actions/upload-artifact@v3
         if: always()
         with:
-          name: ubuntu-tests-reports-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}
+          name: ubuntu-tests-reports-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}
           path: |
             ./Build/tests/results/
 
@@ -102,8 +102,8 @@ jobs:
       - name: Upload code coverage results
         uses: actions/upload-artifact@v3
         with:
-          name: ubuntu-codecov-results-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}
-          path: ./Build/coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_stream }}-${{ matrix.enable_kernel_omp }}.info
+          name: ubuntu-codecov-results-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}
+          path: ./Build/coverage-${{ github.job }}-${{ matrix.pl_backend }}-${{ matrix.enable_kernel_avx_streaming }}-${{ matrix.enable_kernel_omp }}.info
           if-no-files-found: error
 
   cpptestswithOpenBLAS:

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.37.0-dev22"
+__version__ = "0.37.0-dev23"
diff --git a/...ightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp b/...ightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX2Concept.hpp
@@ -55,6 +55,18 @@ template <typename T> struct AVX2Concept {
         }
     }
 
+    PL_FORCE_INLINE
+    static auto load(const PrecisionT *p) -> IntrinsicType {
+        if constexpr (std::is_same_v<PrecisionT, float>) {
+            return _mm256_load_ps(p);
+        } else if (std::is_same_v<PrecisionT, double>) {
+            return _mm256_load_pd(p);
+        } else {
+            static_assert(std::is_same_v<PrecisionT, float> ||
+                          std::is_same_v<PrecisionT, double>);
+        }
+    }
+
     PL_FORCE_INLINE
     static auto loadu(const std::complex<PrecisionT> *p) -> IntrinsicType {
         if constexpr (std::is_same_v<PrecisionT, float>) {
@@ -91,6 +103,18 @@ template <typename T> struct AVX2Concept {
         }
     }
 
+    PL_FORCE_INLINE
+    static void store_(PrecisionT *p, IntrinsicType value) {
+        if constexpr (std::is_same_v<PrecisionT, float>) {
+            _mm256_store_ps(p, value);
+        } else if (std::is_same_v<PrecisionT, double>) {
+            _mm256_store_pd(p, value);
+        } else {
+            static_assert(std::is_same_v<PrecisionT, float> ||
+                          std::is_same_v<PrecisionT, double>);
+        }
+    }
+
     PL_FORCE_INLINE
     static void stream_(std::complex<PrecisionT> *p, IntrinsicType value) {
         if constexpr (std::is_same_v<PrecisionT, float>) {
@@ -122,9 +146,9 @@ template <typename T> struct AVX2Concept {
     PL_FORCE_INLINE
     static void store(PrecisionT *p, IntrinsicType value) {
 #ifdef PL_LQ_KERNEL_AVX_STREAMING
-        store_(p, value);
-#else
         stream_(p, value);
+#else
+        store_(p, value);
 #endif
     }
 

diff --git a/...htning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp b/...htning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVX512Concept.hpp
@@ -46,6 +46,17 @@ template <typename T> struct AVX512Concept {
 
     PL_FORCE_INLINE
     static auto load(std::complex<PrecisionT> *p) -> IntrinsicType {
+        if constexpr (std::is_same_v<PrecisionT, float>) {
+            return _mm512_load_ps(reinterpret_cast<PrecisionT *>(p));
+        } else if (std::is_same_v<PrecisionT, double>) {
+            return _mm512_load_pd(reinterpret_cast<PrecisionT *>(p));
+        } else {
+            static_assert(std::is_same_v<PrecisionT, float> ||
+                          std::is_same_v<PrecisionT, double>);
+        }
+    }
+    PL_FORCE_INLINE
+    static auto load(PrecisionT *p) -> IntrinsicType {
         if constexpr (std::is_same_v<PrecisionT, float>) {
             return _mm512_load_ps(p);
         } else if (std::is_same_v<PrecisionT, double>) {
@@ -59,9 +70,9 @@ template <typename T> struct AVX512Concept {
     PL_FORCE_INLINE
     static auto loadu(std::complex<PrecisionT> *p) -> IntrinsicType {
         if constexpr (std::is_same_v<PrecisionT, float>) {
-            return _mm512_loadu_ps(p);
+            return _mm512_loadu_ps(reinterpret_cast<PrecisionT *>(p));
         } else if (std::is_same_v<PrecisionT, double>) {
-            return _mm512_loadu_pd(p);
+            return _mm512_loadu_pd(reinterpret_cast<PrecisionT *>(p));
         } else {
             static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>);
@@ -82,6 +93,18 @@ template <typename T> struct AVX512Concept {
 
     PL_FORCE_INLINE
     static void store_(std::complex<PrecisionT> *p, IntrinsicType value) {
+        if constexpr (std::is_same_v<PrecisionT, float>) {
+            _mm512_store_ps(reinterpret_cast<PrecisionT *>(p), value);
+        } else if (std::is_same_v<PrecisionT, double>) {
+            _mm512_store_pd(reinterpret_cast<PrecisionT *>(p), value);
+        } else {
+            static_assert(std::is_same_v<PrecisionT, float> ||
+                          std::is_same_v<PrecisionT, double>);
+        }
+    }
+
+    PL_FORCE_INLINE
+    static void store_(PrecisionT *p, IntrinsicType value) {
         if constexpr (std::is_same_v<PrecisionT, float>) {
             _mm512_store_ps(p, value);
         } else if (std::is_same_v<PrecisionT, double>) {
@@ -95,9 +118,9 @@ template <typename T> struct AVX512Concept {
     PL_FORCE_INLINE
     static void stream_(std::complex<PrecisionT> *p, IntrinsicType value) {
         if constexpr (std::is_same_v<PrecisionT, float>) {
-            _mm512_stream_ps(p, value);
+            _mm512_stream_ps(reinterpret_cast<PrecisionT *>(p), value);
         } else if (std::is_same_v<PrecisionT, double>) {
-            _mm512_stream_pd(p, value);
+            _mm512_stream_pd(reinterpret_cast<PrecisionT *>(p), value);
         } else {
             static_assert(std::is_same_v<PrecisionT, float> ||
                           std::is_same_v<PrecisionT, double>);
@@ -118,15 +141,19 @@ template <typename T> struct AVX512Concept {
 
     PL_FORCE_INLINE
     static void store(std::complex<PrecisionT> *p, IntrinsicType value) {
-        store(reinterpret_cast<PrecisionT *>(p), value);
+#ifdef PL_LQ_KERNEL_AVX_STREAMING
+        stream_(reinterpret_cast<PrecisionT *>(p), value);
+#else
+        store_(reinterpret_cast<PrecisionT *>(p), value);
+#endif
     }
 
     PL_FORCE_INLINE
     static void store(PrecisionT *p, IntrinsicType value) {
 #ifdef PL_LQ_KERNEL_AVX_STREAMING
-        store_(p, value);
-#else
         stream_(p, value);
+#else
+        store_(p, value);
 #endif
     }