From e96a53fa9789253b4a766c86b078f5cbf62d603f Mon Sep 17 00:00:00 2001 From: Vincent Michaud-Rioux Date: Thu, 7 Sep 2023 10:35:37 -0400 Subject: [PATCH] Template/expval (#489) * M pennylane_lightning/core/src/bindings/Bindings.hpp; hack `JacobianData` to work with devices. M pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp; `applyMatrix` bugfix: use intermediate hostview to copy matrix data; same bugfix for `getDataVector`. M pennylane_lightning/core/src/simulators/lightning_kokkos/algorithms/AdjointJacobianKokkos.hpp; use copy constructor. M pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp; use copy constructor. M pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp; use copy constructor. M requirements-dev.txt; add clang-format-14. * Auto update version * Update changelog. * Auto update version * Auto update version * Add an argument to adjointJacobian to avoid syncing and copying state vector data in adjoint-diff. * Reformat * trigger CI * [skip ci] Update changelog. * Introduce std::unordered_map expval_funcs_. * Introduce applyExpectationValueFunctor. * Add binding to LKokkos expval(matrix, wires). Combine expval functor calls into two templated methods. Call specialized expval methods when possible. Remove obsolete 'Apply directly' tests. * Update changelog. * Add test for arbitrary expval(Hermitian). * Add getExpectationValueMultiQubitOpFunctor. * Add typename hint for macos. * Add typename macos. * Use Kokkos::ThreadVectorRange policy for innerloop in getExpectationValueMultiQubitOpFunctor. * Auto update version * Auto update version * Couple fix for HIP. * WIP * Add specialized 3-5 qubit expval functors. * Add implementation notes. * [skip ci] Polish getExpValMatrix and limit getExpValNQubitOpFunctor to N == 4 (seems optimal on OPENMP/HIP backends although CUDA can go further). * Fix whitespace warning. * Auto update version * Use simplified bitshifts in 1- & 2-qubit expval kernels. * Update tests. * Update changelog. * Bump pennylane version. * Auto update version * Reimplement expval functors with macros. * Auto update version * Update CHANGELOG.md * Auto update version * trigger CI * trigger CI * Bump Kokkos to 4.1.00 in CI. * Revert kokkos ver. * Add tests for macroed expval functors. * Remove redundant black lines. * Use matrix interface to get expval of HermitianObs in LKokkos. * Cover kokkos_args error. --------- Co-authored-by: Dev version update bot Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> --- .github/CHANGELOG.md | 3 + pennylane_lightning/core/_version.py | 2 +- .../lightning_kokkos/gates/README.md | 75 +++ .../measurements/CMakeLists.txt | 1 + .../measurements/ExpValFunctors.hpp | 478 ++++++++++++++---- .../measurements/MeasurementsKokkos.hpp | 65 ++- .../tests/Test_StateVectorKokkos_Expval.cpp | 154 ++++++ .../lightning_kokkos/lightning_kokkos.py | 2 +- tests/test_device.py | 9 + tests/test_expval.py | 10 +- tests/test_gates.py | 27 + 11 files changed, 709 insertions(+), 117 deletions(-) create mode 100644 pennylane_lightning/core/src/simulators/lightning_kokkos/gates/README.md diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 9d081c9de2..b165f9e656 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -12,6 +12,9 @@ ### Improvements +* Refactor LKokkos `Measurements` class to use Kokkos `RangePolicy` together with special functors to obtain the expectation value of 1- to 4-wire generic unitary gates. For more than 4 wires, the general implementation using Kokkos `TeamPolicy` is employed to yield the best all-around performance. + [(#489)] (https://github.com/PennyLaneAI/pennylane-lightning/pull/489) + * Add tests to increase LKokkos coverage. [(#485)](https://github.com/PennyLaneAI/pennylane-lightning/pull/485) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 57254db4dd..22113c5ec6 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.33.0-dev3" +__version__ = "0.33.0-dev4" diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/README.md b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/README.md new file mode 100644 index 0000000000..1957e4c206 --- /dev/null +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/README.md @@ -0,0 +1,75 @@ +# Implementation + +## Expval kernels + +The general multi-qubit operator kernel requires a private variable `coeffs_in` to store state vector coefficients. +In the Kokkos framework, this variable cannot be a private member of the functor, say, because all threads will overwrite it. +One must then use `TeamPolicy`s together with `scratch_memory_space` which allows creating and manipulating thread-local variables. +This implementation however appears suboptimal compared with the straightforward `RangePolicy` with bit-injection one. + +The last being more verbose, it is only implemented for 1- and 2-qubit observables. +It is however possible to generate the code automatically for higher qubit counts with the following Python script. + +```python +for n_wires in range(1, 6): + name = f"getExpVal{n_wires}QubitOpFunctor" + nU = 2**n_wires + + print( + f"""template struct {name} {{ + + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + KokkosIntVector wires; + std::size_t dim; + std::size_t num_qubits; + + {name}(const KokkosComplexVector &arr_, + const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + const KokkosIntVector &wires_) {{ + wires = wires_; + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + dim = 1U << wires.size(); + }} + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const {{ + const std::size_t kdim = k * dim; + """ + ) + + for k in range(nU): + print( + f""" + std::size_t i{k:0{n_wires}b} = kdim | {k}; + for (std::size_t pos = 0; pos < n_wires; pos++) {{ + std::size_t x = + ((i{k:0{n_wires}b} >> (n_wires - pos - 1)) ^ + (i{k:0{n_wires}b} >> (num_qubits - wires(pos) - 1))) & + 1U; + i{k:0{n_wires}b} = i{k:0{n_wires}b} ^ ((x << (n_wires - pos - 1)) | + (x << (num_qubits - wires(pos) - 1))); + }} + """ + ) + + # print("expval += real(") + for k in range(nU): + tmp = f"expval += real(conj(arr(i{k:0{n_wires}b})) * (" + tmp += f"matrix(0B{k:0{n_wires}b}{0:0{n_wires}b}) * arr(i{0:0{n_wires}b})" + for j in range(1, nU): + tmp += ( + f" + matrix(0B{k:0{n_wires}b}{j:0{n_wires}b}) * arr(i{j:0{n_wires}b})" + ) + print(tmp, end="") + print("));") + print("}") + print("};") +``` \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/CMakeLists.txt index 47050770ca..6c8effc873 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/CMakeLists.txt +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/CMakeLists.txt @@ -11,6 +11,7 @@ target_link_libraries(${PL_BACKEND}_measurements INTERFACE lightning_compile_op lightning_observables lightning_utils ${PL_BACKEND} + ${PL_BACKEND}_observables ${PL_BACKEND}_utils ) diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp index 8cb1eded63..89972d05c2 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp @@ -161,102 +161,7 @@ template struct getExpectationValueHadamardFunctor { } }; -template struct getExpectationValueSingleQubitOpFunctor { - Kokkos::View *> arr; - Kokkos::View *> matrix; - - std::size_t rev_wire; - std::size_t rev_wire_shift; - std::size_t wire_parity; - std::size_t wire_parity_inv; - - getExpectationValueSingleQubitOpFunctor( - Kokkos::View *> &arr_, - std::size_t num_qubits, - const Kokkos::View *> &matrix_, - const std::vector &wires) { - arr = arr_; - matrix = matrix_; - rev_wire = num_qubits - wires[0] - 1; - rev_wire_shift = (static_cast(1U) << rev_wire); - wire_parity = fillTrailingOnes(rev_wire); - wire_parity_inv = fillLeadingOnes(rev_wire + 1); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const std::size_t k, PrecisionT &expval) const { - const std::size_t i0 = - ((k << 1U) & wire_parity_inv) | (wire_parity & k); - const std::size_t i1 = i0 | rev_wire_shift; - - expval += real( - conj(arr[i0]) * (matrix[0B00] * arr[i0] + matrix[0B01] * arr[i1]) + - conj(arr[i1]) * (matrix[0B10] * arr[i0] + matrix[0B11] * arr[i1])); - } -}; - -template struct getExpectationValueTwoQubitOpFunctor { - Kokkos::View *> arr; - Kokkos::View *> matrix; - - std::size_t rev_wire0; - std::size_t rev_wire1; - std::size_t rev_wire0_shift; - std::size_t rev_wire1_shift; - std::size_t rev_wire_min; - std::size_t rev_wire_max; - std::size_t parity_low; - std::size_t parity_high; - std::size_t parity_middle; - - getExpectationValueTwoQubitOpFunctor( - Kokkos::View *> &arr_, - std::size_t num_qubits, - const Kokkos::View *> &matrix_, - const std::vector &wires) { - rev_wire0 = num_qubits - wires[1] - 1; - rev_wire1 = num_qubits - wires[0] - 1; - - rev_wire0_shift = static_cast(1U) << rev_wire0; - rev_wire1_shift = static_cast(1U) << rev_wire1; - - rev_wire_min = std::min(rev_wire0, rev_wire1); - rev_wire_max = std::max(rev_wire0, rev_wire1); - - parity_low = fillTrailingOnes(rev_wire_min); - parity_high = fillLeadingOnes(rev_wire_max + 1); - parity_middle = - fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); - - arr = arr_; - matrix = matrix_; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const std::size_t k, PrecisionT &expval) const { - const std::size_t i00 = ((k << 2U) & parity_high) | - ((k << 1U) & parity_middle) | (k & parity_low); - const std::size_t i10 = i00 | rev_wire1_shift; - const std::size_t i01 = i00 | rev_wire0_shift; - const std::size_t i11 = i00 | rev_wire0_shift | rev_wire1_shift; - - expval += - real(conj(arr[i00]) * - (matrix[0B0000] * arr[i00] + matrix[0B0001] * arr[i01] + - matrix[0B0010] * arr[i10] + matrix[0B0011] * arr[i11]) + - conj(arr[i01]) * - (matrix[0B0100] * arr[i00] + matrix[0B0101] * arr[i01] + - matrix[0B0110] * arr[i10] + matrix[0B0111] * arr[i11]) + - conj(arr[i10]) * - (matrix[0B1000] * arr[i00] + matrix[0B1001] * arr[i01] + - matrix[0B1010] * arr[i10] + matrix[0B1011] * arr[i11]) + - conj(arr[i11]) * - (matrix[0B1100] * arr[i00] + matrix[0B1101] * arr[i01] + - matrix[0B1110] * arr[i10] + matrix[0B1111] * arr[i11])); - } -}; - -template struct getExpectationValueMultiQubitOpFunctor { +template struct getExpValMultiQubitOpFunctor { using ComplexT = Kokkos::complex; using KokkosComplexVector = Kokkos::View; using KokkosIntVector = Kokkos::View; @@ -272,13 +177,18 @@ template struct getExpectationValueMultiQubitOpFunctor { std::size_t dim; std::size_t num_qubits; - getExpectationValueMultiQubitOpFunctor(const KokkosComplexVector &arr_, - std::size_t num_qubits_, - const KokkosComplexVector &matrix_, - KokkosIntVector &wires_) { + getExpValMultiQubitOpFunctor(const KokkosComplexVector &arr_, + std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + const std::vector &wires_) { + Kokkos::View> + wires_host(wires_.data(), wires_.size()); + Kokkos::resize(wires, wires_.size()); + Kokkos::deep_copy(wires, wires_host); + dim = 1U << wires_.size(); num_qubits = num_qubits_; - wires = wires_; arr = arr_; matrix = matrix_; } @@ -355,4 +265,370 @@ template struct getExpectationValueSparseFunctor { } }; +template struct getExpVal1QubitOpFunctor { + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + const std::size_t n_wires = 1; + const std::size_t dim = 1U << n_wires; + std::size_t num_qubits; + std::size_t rev_wire; + std::size_t rev_wire_shift; + std::size_t wire_parity; + std::size_t wire_parity_inv; + + getExpVal1QubitOpFunctor( + const KokkosComplexVector &arr_, const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + [[maybe_unused]] const std::vector &wires_) { + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + rev_wire = num_qubits - wires_[0] - 1; + rev_wire_shift = (static_cast(1U) << rev_wire); + wire_parity = fillTrailingOnes(rev_wire); + wire_parity_inv = fillLeadingOnes(rev_wire + 1); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const { + const std::size_t i0 = + ((k << 1U) & wire_parity_inv) | (wire_parity & k); + const std::size_t i1 = i0 | rev_wire_shift; + + expval += real(conj(arr(i0)) * + (matrix(0B00) * arr(i0) + matrix(0B01) * arr(i1))); + expval += real(conj(arr(i1)) * + (matrix(0B10) * arr(i0) + matrix(0B11) * arr(i1))); + } +}; + +#define ENTRY2(xx, yy) xx << 2 | yy +#define TERM2(xx, yy, iyy) matrix(ENTRY2(xx, yy)) * arr(iyy) +#define EXPVAL2(ixx, xx) \ + conj(arr(ixx)) * (TERM2(xx, 0B00, i00) + TERM2(xx, 0B01, i01) + \ + TERM2(xx, 0B10, i10) + TERM2(xx, 0B11, i11)) + +template struct getExpVal2QubitOpFunctor { + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + const std::size_t n_wires = 2; + const std::size_t dim = 1U << n_wires; + std::size_t num_qubits; + std::size_t rev_wire0; + std::size_t rev_wire1; + std::size_t rev_wire0_shift; + std::size_t rev_wire1_shift; + std::size_t rev_wire_min; + std::size_t rev_wire_max; + std::size_t parity_low; + std::size_t parity_high; + std::size_t parity_middle; + + getExpVal2QubitOpFunctor( + const KokkosComplexVector &arr_, const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + [[maybe_unused]] const std::vector &wires_) { + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + + rev_wire0 = num_qubits - wires_[1] - 1; + rev_wire1 = num_qubits - wires_[0] - 1; + rev_wire0_shift = static_cast(1U) << rev_wire0; + rev_wire1_shift = static_cast(1U) << rev_wire1; + rev_wire_min = std::min(rev_wire0, rev_wire1); + rev_wire_max = std::max(rev_wire0, rev_wire1); + parity_low = fillTrailingOnes(rev_wire_min); + parity_high = fillLeadingOnes(rev_wire_max + 1); + parity_middle = + fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const { + const std::size_t i00 = ((k << 2U) & parity_high) | + ((k << 1U) & parity_middle) | (k & parity_low); + const std::size_t i10 = i00 | rev_wire1_shift; + const std::size_t i01 = i00 | rev_wire0_shift; + const std::size_t i11 = i00 | rev_wire0_shift | rev_wire1_shift; + + expval += real(EXPVAL2(i00, 0B00)); + expval += real(EXPVAL2(i10, 0B10)); + expval += real(EXPVAL2(i01, 0B01)); + expval += real(EXPVAL2(i11, 0B11)); + } +}; + +#define ENTRY3(xx, yy) xx << 3 | yy +#define TERM3(xx, yy, iyy) matrix(ENTRY3(xx, yy)) * arr(iyy) +#define EXPVAL3(ixx, xx) \ + conj(arr(ixx)) * (TERM3(xx, 0B000, i000) + TERM3(xx, 0B001, i001) + \ + TERM3(xx, 0B010, i010) + TERM3(xx, 0B011, i011) + \ + TERM3(xx, 0B100, i100) + TERM3(xx, 0B101, i101) + \ + TERM3(xx, 0B110, i110) + TERM3(xx, 0B111, i111)) +#define INDEX(ivar, xx) \ + std::size_t ivar = kdim | xx; \ + for (std::size_t pos = 0; pos < n_wires; pos++) { \ + std::size_t x = ((ivar >> (n_wires - pos - 1)) ^ \ + (ivar >> (num_qubits - wires(pos) - 1))) & \ + 1U; \ + ivar = ivar ^ ((x << (n_wires - pos - 1)) | \ + (x << (num_qubits - wires(pos) - 1))); \ + } + +template struct getExpVal3QubitOpFunctor { + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + KokkosIntVector wires; + const std::size_t n_wires = 3; + const std::size_t dim = 1U << n_wires; + std::size_t num_qubits; + + getExpVal3QubitOpFunctor(const KokkosComplexVector &arr_, + const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + const std::vector &wires_) { + Kokkos::View> + wires_host(wires_.data(), wires_.size()); + Kokkos::resize(wires, wires_.size()); + Kokkos::deep_copy(wires, wires_host); + + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const { + const std::size_t kdim = k * dim; + INDEX(i000, 0B000); + INDEX(i001, 0B001); + INDEX(i010, 0B010); + INDEX(i011, 0B011); + INDEX(i100, 0B100); + INDEX(i101, 0B101); + INDEX(i110, 0B110); + INDEX(i111, 0B111); + + expval += real(EXPVAL3(i000, 0B000)); + expval += real(EXPVAL3(i001, 0B001)); + expval += real(EXPVAL3(i010, 0B010)); + expval += real(EXPVAL3(i011, 0B011)); + expval += real(EXPVAL3(i100, 0B100)); + expval += real(EXPVAL3(i101, 0B101)); + expval += real(EXPVAL3(i110, 0B110)); + expval += real(EXPVAL3(i111, 0B111)); + } +}; + +#define ENTRY4(xx, yy) xx << 4 | yy +#define TERM4(xx, yy, iyy) matrix(ENTRY4(xx, yy)) * arr(iyy) +#define EXPVAL4(ixx, xx) \ + conj(arr(ixx)) * (TERM4(xx, 0B0000, i0000) + TERM4(xx, 0B0001, i0001) + \ + TERM4(xx, 0B0010, i0010) + TERM4(xx, 0B0011, i0011) + \ + TERM4(xx, 0B0100, i0100) + TERM4(xx, 0B0101, i0101) + \ + TERM4(xx, 0B0110, i0110) + TERM4(xx, 0B0111, i0111) + \ + TERM4(xx, 0B1000, i1000) + TERM4(xx, 0B1001, i1001) + \ + TERM4(xx, 0B1010, i1010) + TERM4(xx, 0B1011, i1011) + \ + TERM4(xx, 0B1100, i1100) + TERM4(xx, 0B1101, i1101) + \ + TERM4(xx, 0B1110, i1110) + TERM4(xx, 0B1111, i1111)) + +template struct getExpVal4QubitOpFunctor { + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + KokkosIntVector wires; + const std::size_t n_wires = 4; + const std::size_t dim = 1U << n_wires; + std::size_t num_qubits; + + getExpVal4QubitOpFunctor(const KokkosComplexVector &arr_, + const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + const std::vector &wires_) { + Kokkos::View> + wires_host(wires_.data(), wires_.size()); + Kokkos::resize(wires, wires_.size()); + Kokkos::deep_copy(wires, wires_host); + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const { + const std::size_t kdim = k * dim; + + INDEX(i0000, 0B0000); + INDEX(i0001, 0B0001); + INDEX(i0010, 0B0010); + INDEX(i0011, 0B0011); + INDEX(i0100, 0B0100); + INDEX(i0101, 0B0101); + INDEX(i0110, 0B0110); + INDEX(i0111, 0B0111); + INDEX(i1000, 0B1000); + INDEX(i1001, 0B1001); + INDEX(i1010, 0B1010); + INDEX(i1011, 0B1011); + INDEX(i1100, 0B1100); + INDEX(i1101, 0B1101); + INDEX(i1110, 0B1110); + INDEX(i1111, 0B1111); + + expval += real(EXPVAL4(i0000, 0B0000)); + expval += real(EXPVAL4(i0001, 0B0001)); + expval += real(EXPVAL4(i0010, 0B0010)); + expval += real(EXPVAL4(i0011, 0B0011)); + expval += real(EXPVAL4(i0100, 0B0100)); + expval += real(EXPVAL4(i0101, 0B0101)); + expval += real(EXPVAL4(i0110, 0B0110)); + expval += real(EXPVAL4(i0111, 0B0111)); + expval += real(EXPVAL4(i1000, 0B1000)); + expval += real(EXPVAL4(i1001, 0B1001)); + expval += real(EXPVAL4(i1010, 0B1010)); + expval += real(EXPVAL4(i1011, 0B1011)); + expval += real(EXPVAL4(i1100, 0B1100)); + expval += real(EXPVAL4(i1101, 0B1101)); + expval += real(EXPVAL4(i1110, 0B1110)); + expval += real(EXPVAL4(i1111, 0B1111)); + } +}; + +#define ENTRY5(xx, yy) xx << 5 | yy +#define TERM5(xx, yy, iyy) matrix(ENTRY5(xx, yy)) * arr(iyy) +#define EXPVAL5(ixx, xx) \ + conj(arr(ixx)) * \ + (TERM5(xx, 0B00000, i00000) + TERM5(xx, 0B00001, i00001) + \ + TERM5(xx, 0B00010, i00010) + TERM5(xx, 0B00011, i00011) + \ + TERM5(xx, 0B00100, i00100) + TERM5(xx, 0B00101, i00101) + \ + TERM5(xx, 0B00110, i00110) + TERM5(xx, 0B00111, i00111) + \ + TERM5(xx, 0B01000, i01000) + TERM5(xx, 0B01001, i01001) + \ + TERM5(xx, 0B01010, i01010) + TERM5(xx, 0B01011, i01011) + \ + TERM5(xx, 0B01100, i01100) + TERM5(xx, 0B01101, i01101) + \ + TERM5(xx, 0B01110, i01110) + TERM5(xx, 0B01111, i01111) + \ + TERM5(xx, 0B10000, i10000) + TERM5(xx, 0B10001, i10001) + \ + TERM5(xx, 0B10010, i10010) + TERM5(xx, 0B10011, i10011) + \ + TERM5(xx, 0B10100, i10100) + TERM5(xx, 0B10101, i10101) + \ + TERM5(xx, 0B10110, i10110) + TERM5(xx, 0B10111, i10111) + \ + TERM5(xx, 0B11000, i11000) + TERM5(xx, 0B11001, i11001) + \ + TERM5(xx, 0B11010, i11010) + TERM5(xx, 0B11011, i11011) + \ + TERM5(xx, 0B11100, i11100) + TERM5(xx, 0B11101, i11101) + \ + TERM5(xx, 0B11110, i11110) + TERM5(xx, 0B11111, i11111)) + +template struct getExpVal5QubitOpFunctor { + using ComplexT = Kokkos::complex; + using KokkosComplexVector = Kokkos::View; + using KokkosIntVector = Kokkos::View; + + KokkosComplexVector arr; + KokkosComplexVector matrix; + KokkosIntVector wires; + const std::size_t n_wires = 5; + const std::size_t dim = 1U << n_wires; + std::size_t num_qubits; + + getExpVal5QubitOpFunctor(const KokkosComplexVector &arr_, + const std::size_t num_qubits_, + const KokkosComplexVector &matrix_, + const std::vector &wires_) { + Kokkos::View> + wires_host(wires_.data(), wires_.size()); + Kokkos::resize(wires, wires_.size()); + Kokkos::deep_copy(wires, wires_host); + arr = arr_; + matrix = matrix_; + num_qubits = num_qubits_; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t k, PrecisionT &expval) const { + const std::size_t kdim = k * dim; + + INDEX(i00000, 0B00000); + INDEX(i00001, 0B00001); + INDEX(i00010, 0B00010); + INDEX(i00011, 0B00011); + INDEX(i00100, 0B00100); + INDEX(i00101, 0B00101); + INDEX(i00110, 0B00110); + INDEX(i00111, 0B00111); + INDEX(i01000, 0B01000); + INDEX(i01001, 0B01001); + INDEX(i01010, 0B01010); + INDEX(i01011, 0B01011); + INDEX(i01100, 0B01100); + INDEX(i01101, 0B01101); + INDEX(i01110, 0B01110); + INDEX(i01111, 0B01111); + INDEX(i10000, 0B10000); + INDEX(i10001, 0B10001); + INDEX(i10010, 0B10010); + INDEX(i10011, 0B10011); + INDEX(i10100, 0B10100); + INDEX(i10101, 0B10101); + INDEX(i10110, 0B10110); + INDEX(i10111, 0B10111); + INDEX(i11000, 0B11000); + INDEX(i11001, 0B11001); + INDEX(i11010, 0B11010); + INDEX(i11011, 0B11011); + INDEX(i11100, 0B11100); + INDEX(i11101, 0B11101); + INDEX(i11110, 0B11110); + INDEX(i11111, 0B11111); + + expval += real(EXPVAL5(i00000, 0B00000)); + expval += real(EXPVAL5(i00001, 0B00001)); + expval += real(EXPVAL5(i00010, 0B00010)); + expval += real(EXPVAL5(i00011, 0B00011)); + expval += real(EXPVAL5(i00100, 0B00100)); + expval += real(EXPVAL5(i00101, 0B00101)); + expval += real(EXPVAL5(i00110, 0B00110)); + expval += real(EXPVAL5(i00111, 0B00111)); + expval += real(EXPVAL5(i01000, 0B01000)); + expval += real(EXPVAL5(i01001, 0B01001)); + expval += real(EXPVAL5(i01010, 0B01010)); + expval += real(EXPVAL5(i01011, 0B01011)); + expval += real(EXPVAL5(i01100, 0B01100)); + expval += real(EXPVAL5(i01101, 0B01101)); + expval += real(EXPVAL5(i01110, 0B01110)); + expval += real(EXPVAL5(i01111, 0B01111)); + expval += real(EXPVAL5(i10000, 0B10000)); + expval += real(EXPVAL5(i10001, 0B10001)); + expval += real(EXPVAL5(i10010, 0B10010)); + expval += real(EXPVAL5(i10011, 0B10011)); + expval += real(EXPVAL5(i10100, 0B10100)); + expval += real(EXPVAL5(i10101, 0B10101)); + expval += real(EXPVAL5(i10110, 0B10110)); + expval += real(EXPVAL5(i10111, 0B10111)); + expval += real(EXPVAL5(i11000, 0B11000)); + expval += real(EXPVAL5(i11001, 0B11001)); + expval += real(EXPVAL5(i11010, 0B11010)); + expval += real(EXPVAL5(i11011, 0B11011)); + expval += real(EXPVAL5(i11100, 0B11100)); + expval += real(EXPVAL5(i11101, 0B11101)); + expval += real(EXPVAL5(i11110, 0B11110)); + expval += real(EXPVAL5(i11111, 0B11111)); + } +}; + } // namespace Pennylane::LightningKokkos::Functors diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp index e78b590561..dc9e92a342 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp @@ -21,6 +21,7 @@ #include "MeasurementsBase.hpp" #include "MeasuresFunctors.hpp" #include "Observables.hpp" +#include "ObservablesKokkos.hpp" #include "StateVectorKokkos.hpp" #include "Util.hpp" @@ -29,6 +30,7 @@ namespace { using namespace Pennylane::Measures; using namespace Pennylane::Observables; using Pennylane::LightningKokkos::StateVectorKokkos; +using Pennylane::LightningKokkos::Observables::HermitianObs; using Pennylane::LightningKokkos::Util::getRealOfComplexInnerProduct; using Pennylane::LightningKokkos::Util::SparseMV_Kokkos; using Pennylane::Util::exp2; @@ -128,18 +130,49 @@ class Measurements final */ auto getExpValMatrix(const KokkosVector &matrix, const std::vector &wires) -> PrecisionT { - if (wires.size() == 1) { - return applyExpValFunctor(matrix, wires); - } else if (wires.size() == 2) { - return applyExpValFunctor( - matrix, wires); - } else { - StateVectorT ob_sv{this->_statevector}; - ob_sv.applyMultiQubitOp(matrix, wires); - return getRealOfComplexInnerProduct(this->_statevector.getView(), - ob_sv.getView()); + std::size_t num_qubits = this->_statevector.getNumQubits(); + std::size_t two2N = std::exp2(num_qubits - wires.size()); + std::size_t dim = std::exp2(wires.size()); + const KokkosVector arr_data = this->_statevector.getView(); + + PrecisionT expval = 0.0; + switch (wires.size()) { + case 1: + Kokkos::parallel_reduce(two2N, + getExpVal1QubitOpFunctor( + arr_data, num_qubits, matrix, wires), + expval); + break; + case 2: + Kokkos::parallel_reduce(two2N, + getExpVal2QubitOpFunctor( + arr_data, num_qubits, matrix, wires), + expval); + break; + case 3: + Kokkos::parallel_reduce(two2N, + getExpVal3QubitOpFunctor( + arr_data, num_qubits, matrix, wires), + expval); + break; + case 4: + Kokkos::parallel_reduce(two2N, + getExpVal4QubitOpFunctor( + arr_data, num_qubits, matrix, wires), + expval); + break; + default: + std::size_t scratch_size = ScratchViewComplex::shmem_size(dim); + Kokkos::parallel_reduce( + "getExpValMultiQubitOpFunctor", + TeamPolicy(two2N, Kokkos::AUTO, dim) + .set_scratch_size(0, Kokkos::PerTeam(scratch_size)), + getExpValMultiQubitOpFunctor(arr_data, num_qubits, + matrix, wires), + expval); + break; } + return expval; } /** @@ -155,6 +188,16 @@ class Measurements final ob_sv.getView()); } + /** + * @brief Calculate expectation value for a HermitianObs. + * + * @param ob HermitianObs. + * @return Expectation value with respect to the given observable. + */ + PrecisionT expval(const HermitianObs &ob) { + return expval(ob.getMatrix(), ob.getWires()); + } + /** * @brief Expected value of an observable. * diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/tests/Test_StateVectorKokkos_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/tests/Test_StateVectorKokkos_Expval.cpp index 7eec6715b3..63204a5109 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/tests/Test_StateVectorKokkos_Expval.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/tests/Test_StateVectorKokkos_Expval.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ using namespace Pennylane::LightningKokkos::Observables; using Pennylane::Util::createNonTrivialState; using Pennylane::Util::write_CSR_vectors; using std::size_t; +std::mt19937_64 re{1337}; } // namespace /// @endcond @@ -379,6 +381,158 @@ TEMPLATE_TEST_CASE("Test expectation value of TensorProdObs", } } +TEMPLATE_TEST_CASE("Test expectation value of NQubit Hermitian", + "[StateVectorKokkos_Expval]", float, double) { + using ComplexT = StateVectorKokkos::ComplexT; + using VectorT = TestVector>; + const size_t num_qubits = 7; + VectorT sv_data = createRandomStateVectorData(re, num_qubits); + + StateVectorKokkos kokkos_sv( + reinterpret_cast(sv_data.data()), sv_data.size()); + auto m = Measurements(kokkos_sv); + + auto X0 = std::make_shared>>( + "PauliX", std::vector{0}); + auto Y1 = std::make_shared>>( + "PauliY", std::vector{1}); + auto Z2 = std::make_shared>>( + "PauliZ", std::vector{2}); + auto X3 = std::make_shared>>( + "PauliX", std::vector{3}); + auto Y4 = std::make_shared>>( + "PauliY", std::vector{4}); + + ComplexT j{0.0, 1.0}; + ComplexT u{1.0, 0.0}; + ComplexT z{0.0, 0.0}; + + SECTION("3Qubit") { + auto ob = + TensorProdObs>::create({X0, Y1, Z2}); + auto expected = m.expval(*ob); + std::vector matrix{z, z, z, z, z, z, -j, z, z, z, z, z, z, + z, z, j, z, z, z, z, j, z, z, z, z, z, + z, z, z, -j, z, z, z, z, -j, z, z, z, z, + z, z, z, z, j, z, z, z, z, j, z, z, z, + z, z, z, z, z, -j, z, z, z, z, z, z}; + SECTION("Hermitian") { + auto hermitian = + HermitianObs>(matrix, {0, 1, 2}); + auto res = m.expval(hermitian); + + CHECK(expected == Approx(res)); + } + SECTION("Matrix") { + auto res = m.expval(matrix, {0, 1, 2}); + CHECK(expected == Approx(res)); + } + } + + SECTION("4Qubit") { + auto ob = TensorProdObs>::create( + {X0, Y1, Z2, X3}); + auto expected = m.expval(*ob); + std::vector matrix{ + z, z, z, z, z, z, z, z, z, z, z, z, z, -j, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, -j, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, j, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, j, z, z, z, z, z, z, z, z, z, z, j, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, j, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, -j, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, -j, z, z, z, z, z, z, z, z, z, z, -j, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, -j, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, j, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, j, z, z, z, z, z, z, z, z, z, z, j, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, j, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, -j, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, -j, z, z, z, z, z, z, z, z, z, z, z, z, z}; + SECTION("Hermitian") { + auto hermitian = + HermitianObs>(matrix, {0, 1, 2, 3}); + auto res = m.expval(hermitian); + + CHECK(expected == Approx(res)); + } + SECTION("Matrix") { + auto res = m.expval(matrix, {0, 1, 2, 3}); + CHECK(expected == Approx(res)); + } + } + + SECTION("5Qubit") { + auto ob = TensorProdObs>::create( + {X0, Y1, Z2, X3, Y4}); + auto expected = m.expval(*ob); + std::vector matrix{ + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, -u, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + -u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, -u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, u, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, -u, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, -u, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, -u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, u, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, -u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, u, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, -u, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, u, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, -u, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, u, z, z, z, z, z, z, z, z, z, z, + z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z}; + SECTION("Hermitian") { + auto hermitian = HermitianObs>( + matrix, {0, 1, 2, 3, 4}); + auto res = m.expval(hermitian); + + CHECK(expected == Approx(res)); + } + SECTION("Matrix") { + auto res = m.expval(matrix, {0, 1, 2, 3, 4}); + CHECK(expected == Approx(res)); + } + } +} + TEMPLATE_TEST_CASE("StateVectorKokkos::Hamiltonian_expval_Sparse", "[StateVectorKokkos_Expval]", float, double) { using ComplexT = StateVectorKokkos::ComplexT; diff --git a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py index 671a5d8611..56cda8f92c 100644 --- a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py +++ b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py @@ -205,7 +205,7 @@ def __init__( else: type0 = type(InitializationSettings()) raise TypeError( - f"Argument kokkos_args must be of {type0} but it is of {type(kokkos_args)}." + f"Argument kokkos_args must be of type {type0} but it is of {type(kokkos_args)}." ) self._sync = sync diff --git a/tests/test_device.py b/tests/test_device.py index 001e502009..a45f269fb1 100644 --- a/tests/test_device.py +++ b/tests/test_device.py @@ -39,3 +39,12 @@ def test_create_device_with_dtype(C): def test_create_device_with_unsupported_dtype(): with pytest.raises(TypeError, match="Unsupported complex Type:"): dev = qml.device(device_name, wires=1, c_dtype=np.complex256) + + +@pytest.mark.skipif( + device_name != "lightning.kokkos" or not ld._CPP_BINARY_AVAILABLE, + reason="Only lightning.kokkos has a kwarg kokkos_args.", +) +def test_create_device_with_unsupported_kokkos_args(): + with pytest.raises(TypeError, match="Argument kokkos_args must be of type"): + dev = qml.device(device_name, wires=1, kokkos_args=np.complex256) diff --git a/tests/test_expval.py b/tests/test_expval.py index 69584187c0..9041eb363a 100644 --- a/tests/test_expval.py +++ b/tests/test_expval.py @@ -101,17 +101,21 @@ def test_hadamard_expectation(self, theta, phi, qubit_device, tol): ) / np.sqrt(2) assert np.allclose(res, expected, tol) - @pytest.mark.parametrize("n_wires", range(1, 5)) + @pytest.mark.parametrize("n_wires", range(1, 7)) def test_hermitian_expectation(self, n_wires, theta, phi, qubit_device, tol): """Test that Hadamard expectation value is correct""" - dev_def = qml.device("default.qubit", wires=6) - dev = qubit_device(wires=6) + n_qubits = 7 + dev_def = qml.device("default.qubit", wires=n_qubits) + dev = qubit_device(wires=n_qubits) m = 2**n_wires U = np.random.rand(m, m) + 1j * np.random.rand(m, m) U = U + np.conj(U.T) obs = qml.Hermitian(U, wires=range(n_wires)) + init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) + init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) def circuit(): + qml.StatePrep(init_state, wires=range(n_qubits)) qml.RY(theta, wires=[0]) qml.RY(phi, wires=[1]) qml.CNOT(wires=[0, 1]) diff --git a/tests/test_gates.py b/tests/test_gates.py index bcdfca6306..d3d33b8ade 100644 --- a/tests/test_gates.py +++ b/tests/test_gates.py @@ -16,6 +16,7 @@ """ import pytest from conftest import LightningDevice, device_name +from conftest import THETA, PHI import itertools import numpy as np @@ -236,3 +237,29 @@ def test_get_diagonalizing_gates(obs, has_rotation): assert qml.equal(rot_actual, rot_expected) else: assert len(actual) == 0 + + +@pytest.mark.parametrize("theta,phi", list(zip(THETA, PHI))) +@pytest.mark.parametrize("n_wires", range(1, 7)) +def test_qubit_unitary(n_wires, theta, phi, tol): + """Test that Hadamard expectation value is correct""" + n_qubits = 10 + dev_def = qml.device("default.qubit", wires=n_qubits) + dev = qml.device(device_name, wires=n_qubits) + m = 2**n_wires + U = np.random.rand(m, m) + 1j * np.random.rand(m, m) + U, _ = np.linalg.qr(U) + init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) + init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) + + def circuit(): + qml.StatePrep(init_state, wires=range(n_qubits)) + qml.RY(theta, wires=[0]) + qml.RY(phi, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.QubitUnitary(U, wires=range(2, 2 + n_wires)) + return qml.state() + + circ = qml.QNode(circuit, dev) + circ_def = qml.QNode(circuit, dev_def) + assert np.allclose(circ(), circ_def(), tol)