PennyLaneAI · vincentmr · Aug 25, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -26,7 +26,10 @@
 
 * Update the CMake internal references to enable sub-project compilation with affecting the parent package.
   [(#478)](https://github.com/PennyLaneAI/pennylane-lightning/pull/478)
-
+
+* Modify `registerAdjointJacobian` and LKokkos' `applyMatrix` method to support device execution (with CUDA-12)
+  [(#477)](https://github.com/PennyLaneAI/pennylane-lightning/pull/477)
+
 * `apply` no longer mutates the inputted list of operations.
   [(#474)](https://github.com/PennyLaneAI/pennylane-lightning/pull/474)
 

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.32.0-dev10"
+__version__ = "0.32.0-dev11"
diff --git a/pennylane_lightning/core/src/algorithms/AdjointJacobianBase.hpp b/pennylane_lightning/core/src/algorithms/AdjointJacobianBase.hpp
@@ -157,8 +157,9 @@ template <class StateVectorT, class Derived> class AdjointJacobianBase {
      */
     inline void adjointJacobian(std::span<PrecisionT> jac,
                                 const JacobianData<StateVectorT> &jd,
+                                const StateVectorT &ref_data = {0},
                                 bool apply_operations = false) {
-        return static_cast<Derived *>(this)->adjointJacobian(jac, jd,
+        return static_cast<Derived *>(this)->adjointJacobian(jac, jd, ref_data,
                                                              apply_operations);
     }
 

diff --git a/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp b/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp
@@ -90,7 +90,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{
                 num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
             PL_REQUIRE_THROWS_MATCHES(
-                adj.adjointJacobian(std::span{jacobian}, tape, true),
+                adj.adjointJacobian(std::span{jacobian}, tape, psi, true),
                 LightningException,
                 "The size of preallocated jacobian must be same as");
         }
@@ -116,7 +116,7 @@ template <typename TypeList> void testAdjointJacobian() {
                 JacobianData<StateVectorT> tape{
                     num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
                 REQUIRE_NOTHROW(
-                    adj.adjointJacobian(std::span{jacobian}, tape, true));
+                    adj.adjointJacobian(std::span{jacobian}, tape, psi, true));
             }
         }
 
@@ -141,7 +141,7 @@ template <typename TypeList> void testAdjointJacobian() {
 
                 JacobianData<StateVectorT> tape{
                     num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
-                adj.adjointJacobian(std::span{jacobian}, tape, true);
+                adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
                 CAPTURE(jacobian);
                 CHECK(-sin(p) == Approx(jacobian[0]));
@@ -169,7 +169,7 @@ template <typename TypeList> void testAdjointJacobian() {
 
                 JacobianData<StateVectorT> tape{
                     num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
-                adj.adjointJacobian(std::span{jacobian}, tape, true);
+                adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
                 CAPTURE(jacobian);
                 CHECK(cos(p) == Approx(jacobian[0]).margin(1e-7));
@@ -199,7 +199,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{num_params,    psi.getLength(),
                                             psi.getData(), {obs1, obs2},
                                             ops,           tp};
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
             CHECK(-sin(param[0]) == Approx(jacobian[0]).margin(1e-7));
@@ -233,7 +233,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{num_params,    psi.getLength(),
                                             psi.getData(), {obs1, obs2, obs3},
                                             ops,           tp};
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
             CHECK(-sin(param[0]) == Approx(jacobian[0]).margin(1e-7));
@@ -271,7 +271,7 @@ template <typename TypeList> void testAdjointJacobian() {
                                             psi.getData(), {obs1, obs2, obs3},
                                             ops,           t_params};
 
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
             CHECK(-sin(param[0]) == Approx(jacobian[0]).margin(1e-7));
@@ -307,7 +307,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{
                 num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
 
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
 
@@ -353,7 +353,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{
                 num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
 
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
 
@@ -405,7 +405,7 @@ template <typename TypeList> void testAdjointJacobian() {
 
                 JacobianData<StateVectorT> tape{
                     num_params, psi.getLength(), psi.getData(), {obs}, ops, tp};
-                adj.adjointJacobian(std::span{jacobian}, tape, true);
+                adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
                 CAPTURE(theta);
                 CAPTURE(jacobian);
@@ -473,7 +473,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{
                 t_params.size(), psi.getLength(), psi.getData(), {obs}, ops,
                 t_params};
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             std::vector<PrecisionT> expected{-0.71429188, 0.04998561,
                                              -0.71904837};
@@ -510,7 +510,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{
                 num_params, psi.getLength(), psi.getData(), {ham}, ops, tp};
 
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
             CHECK(-0.3 * sin(param[0]) == Approx(jacobian[0]).margin(1e-7));
@@ -546,7 +546,7 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape{num_params,    psi.getLength(),
                                             psi.getData(), {ham},
                                             ops,           t_params};
-            adj.adjointJacobian(std::span{jacobian}, tape, true);
+            adj.adjointJacobian(std::span{jacobian}, tape, psi, true);
 
             CAPTURE(jacobian);
             CHECK((-0.47 * sin(param[0]) == Approx(jacobian[0]).margin(1e-7)));
@@ -588,8 +588,8 @@ template <typename TypeList> void testAdjointJacobian() {
             JacobianData<StateVectorT> tape2{num_params,    psi.getLength(),
                                              psi.getData(), {obs2},
                                              ops,           t_params};
-            adj.adjointJacobian(std::span{jacobian1}, tape1, true);
-            adj.adjointJacobian(std::span{jacobian2}, tape2, true);
+            adj.adjointJacobian(std::span{jacobian1}, tape1, psi, true);
+            adj.adjointJacobian(std::span{jacobian2}, tape2, psi, true);
 
             CHECK((jacobian1 == PLApprox(jacobian2).margin(1e-7)));
         }

diff --git a/pennylane_lightning/core/src/bindings/Bindings.hpp b/pennylane_lightning/core/src/bindings/Bindings.hpp
@@ -468,9 +468,7 @@ auto registerAdjointJacobian(
                                         observables,
                                         operations,
                                         trainableParams};
-
-    adjoint_jacobian.adjointJacobian(std::span{jac}, jd);
-
+    adjoint_jacobian.adjointJacobian(std::span{jac}, jd, sv);
     return py::array_t<PrecisionT>(py::cast(jac));
 }
 

diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp
@@ -62,6 +62,7 @@ class StateVectorKokkos final
     using PrecisionT = fp_t;
     using ComplexT = Kokkos::complex<fp_t>;
     using KokkosExecSpace = Kokkos::DefaultExecutionSpace;
+    using HostExecSpace = Kokkos::DefaultHostExecutionSpace;
     using KokkosVector = Kokkos::View<ComplexT *>;
     using KokkosSizeTVector = Kokkos::View<size_t *>;
     using KokkosRangePolicy = Kokkos::RangePolicy<KokkosExecSpace>;
@@ -81,12 +82,10 @@ class StateVectorKokkos final
         Kokkos::View<PrecisionT *, Kokkos::HostSpace,
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
     using ScratchViewComplex =
-        Kokkos::View<ComplexT *,
-                     Kokkos::DefaultExecutionSpace::scratch_memory_space,
+        Kokkos::View<ComplexT *, KokkosExecSpace::scratch_memory_space,
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
     using ScratchViewSizeT =
-        Kokkos::View<size_t *,
-                     Kokkos::DefaultExecutionSpace::scratch_memory_space,
+        Kokkos::View<size_t *, KokkosExecSpace::scratch_memory_space,
                      Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
     using TeamPolicy = Kokkos::TeamPolicy<>;
 
@@ -398,10 +397,14 @@ class StateVectorKokkos final
                             bool inverse = false) {
         PL_ABORT_IF(wires.empty(), "Number of wires must be larger than 0");
         size_t n = 1U << wires.size();
-        KokkosVector matrix_("matrix_", n * n);
-        for (size_t i = 0; i < n * n; i++) {
-            matrix_(i) = matrix[i];
-        }
+        size_t n2 = n * n;
+        KokkosVector matrix_("matrix_", n2);
+        typename KokkosVector::HostMirror matrix_h =
+            Kokkos::create_mirror_view(matrix_);
+        Kokkos::parallel_for(
+            Kokkos::RangePolicy<HostExecSpace>(0, n2),
+            KOKKOS_LAMBDA(const size_t i) { matrix_h(i) = matrix[i]; });
+        Kokkos::deep_copy(matrix_, matrix_h);
         applyMultiQubitOp(matrix_, wires, inverse);
     }
 
@@ -760,13 +763,14 @@ class StateVectorKokkos final
      * @brief Get underlying data vector
      */
     [[nodiscard]] auto getDataVector() -> std::vector<ComplexT> {
-        std::vector<ComplexT> data_(getData(), getData() + this->getLength());
+        std::vector<ComplexT> data_(this->getLength());
+        DeviceToHost(data_.data(), data_.size());
         return data_;
     }
 
     [[nodiscard]] auto getDataVector() const -> const std::vector<ComplexT> {
-        const std::vector<ComplexT> data_(getData(),
-                                          getData() + this->getLength());
+        std::vector<ComplexT> data_(this->getLength());
+        DeviceToHost(data_.data(), data_.size());
         return data_;
     }
 
@@ -782,7 +786,7 @@ class StateVectorKokkos final
      * @brief Copy data from the device space to the host space.
      *
      */
-    inline void DeviceToHost(ComplexT *sv, size_t length) {
+    inline void DeviceToHost(ComplexT *sv, size_t length) const {
         Kokkos::deep_copy(UnmanagedComplexHostView(sv, length), *data_);
     }
 

diff --git a/...ylane_lightning/core/src/simulators/lightning_kokkos/algorithms/AdjointJacobianKokkos.hpp b/...ylane_lightning/core/src/simulators/lightning_kokkos/algorithms/AdjointJacobianKokkos.hpp
@@ -83,6 +83,7 @@ class AdjointJacobian final
      */
     void adjointJacobian(std::span<PrecisionT> jac,
                          const JacobianData<StateVectorT> &jd,
+                         const StateVectorT &ref_data,
                          bool apply_operations = false) {
         const OpsData<StateVectorT> &ops = jd.getOperations();
         const std::vector<std::string> &ops_name = ops.getOpsName();
@@ -112,12 +113,8 @@ class AdjointJacobian final
         auto tp_it = tp.rbegin();
         const auto tp_rend = tp.rend();
 
-        StateVectorKokkos<PrecisionT> ref_data(jd.getPtrStateVec(),
-                                               jd.getSizeStateVec());
-
         // Create $U_{1:p}\vert \lambda \rangle$
-        StateVectorT lambda(ref_data.getNumQubits());
-        lambda.DeviceToDevice(ref_data.getView());
+        StateVectorT lambda{ref_data};
 
         // Apply given operations to statevector if requested
         if (apply_operations) {
@@ -129,7 +126,7 @@ class AdjointJacobian final
                                            StateVectorT(lambda.getNumQubits()));
         this->applyObservables(H_lambda, lambda, obs);
 
-        StateVectorT mu(lambda.getNumQubits());
+        StateVectorT mu{lambda.getNumQubits()};
 
         for (int op_idx = static_cast<int>(ops_name.size() - 1); op_idx >= 0;
              op_idx--) {

diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp
@@ -312,8 +312,7 @@ class Measurements final
      * @return Expectation value with respect to the given observable.
      */
     PrecisionT expval(const Observable<StateVectorT> &ob) {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob.applyInPlace(ob_sv);
         return getRealOfComplexInnerProduct(this->_statevector.getView(),
                                             ob_sv.getView());
@@ -328,8 +327,7 @@ class Measurements final
      */
     PrecisionT expval(const std::vector<ComplexT> &matrix,
                       const std::vector<size_t> &wires) {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob_sv.applyMatrix(matrix, wires);
         return getRealOfComplexInnerProduct(this->_statevector.getView(),
                                             ob_sv.getView());
@@ -344,8 +342,7 @@ class Measurements final
      */
     PrecisionT expval(const std::string &operation,
                       const std::vector<size_t> &wires) {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob_sv.applyOperation(operation, wires);
         return getRealOfComplexInnerProduct(this->_statevector.getView(),
                                             ob_sv.getView());
@@ -425,8 +422,7 @@ class Measurements final
      * @return Variance with respect to the given observable.
      */
     auto var(const Observable<StateVectorT> &ob) -> PrecisionT {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob.applyInPlace(ob_sv);
 
         const PrecisionT mean_square =
@@ -447,8 +443,7 @@ class Measurements final
      */
     PrecisionT var(const std::string &operation,
                    const std::vector<size_t> &wires) {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob_sv.applyOperation(operation, wires);
 
         const PrecisionT mean_square =
@@ -469,8 +464,7 @@ class Measurements final
      */
     PrecisionT var(const std::vector<ComplexT> &matrix,
                    const std::vector<size_t> &wires) {
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
         ob_sv.applyMatrix(matrix, wires);
 
         const PrecisionT mean_square =
@@ -533,8 +527,7 @@ class Measurements final
             (this->_statevector.getLength() != (size_t(row_map_size) - 1)),
             "Statevector and Hamiltonian have incompatible sizes.");
 
-        StateVectorT ob_sv(this->_statevector.getNumQubits());
-        ob_sv.DeviceToDevice(this->_statevector.getView());
+        StateVectorT ob_sv{this->_statevector};
 
         SparseMV_Kokkos<PrecisionT>(this->_statevector.getView(),
                                     ob_sv.getView(), row_map_ptr, row_map_size,
@@ -582,7 +575,7 @@ class Measurements final
      * @return Floating point std::vector with probabilities.
      * The basis columns are rearranged according to wires.
      */
-    auto probs(const std::vector<size_t> &wires) {
+    std::vector<PrecisionT> probs(const std::vector<size_t> &wires) {
         using MDPolicyType_2D =
             Kokkos::MDRangePolicy<Kokkos::Rank<2, Kokkos::Iterate::Left>>;
 

diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp
@@ -186,11 +186,10 @@ class Hamiltonian final : public HamiltonianBase<StateVectorT> {
      * @param sv The statevector to update
      */
     void applyInPlace(StateVectorT &sv) const override {
-        StateVectorT buffer(sv.getNumQubits());
+        StateVectorT buffer{sv.getNumQubits()};
         buffer.initZeros();
-
         for (size_t term_idx = 0; term_idx < this->coeffs_.size(); term_idx++) {
-            StateVectorT tmp(sv);
+            StateVectorT tmp{sv};
             this->obs_[term_idx]->applyInPlace(tmp);
             LightningKokkos::Util::axpy_Kokkos<PrecisionT>(
                 ComplexT{this->coeffs_[term_idx], 0.0}, tmp.getView(),
@@ -215,8 +214,7 @@ template <class StateVectorT, bool use_openmp> struct HamiltonianApplyInPlace {
         KokkosVector res("results", sv.getLength());
         Kokkos::deep_copy(res, ComplexT{0.0, 0.0});
         for (size_t term_idx = 0; term_idx < coeffs.size(); term_idx++) {
-            StateVectorT tmp(sv.getNumQubits());
-            tmp.DeviceToDevice(sv.getView());
+            StateVectorT tmp{sv};
             terms[term_idx]->applyInPlace(tmp);
             LightningKokkos::Util::axpy_Kokkos<PrecisionT>(
                 ComplexT{coeffs[term_idx], 0.0}, tmp.getView(), res,

diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp
@@ -210,6 +210,7 @@ class AdjointJacobian final
      */
     void adjointJacobian(std::span<PrecisionT> jac,
                          const JacobianData<StateVectorT> &jd,
+                         [[maybe_unused]] const StateVectorT &ref_data = {0},
                          bool apply_operations = false) {
         const OpsData<StateVectorT> &ops = jd.getOperations();
         const std::vector<std::string> &ops_name = ops.getOpsName();

diff --git a/...tning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp b/...tning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp
@@ -338,7 +338,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
         }();
 
         std::vector<PrecisionT> jac(num_params);
-        adj.adjointJacobian(std::span{jac}, jd);
+        adj.adjointJacobian(std::span{jac}, jd, sv);
 
         REQUIRE(grad_vjp == approx(jac).margin(1e-5));
     }

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -6,4 +6,5 @@ pybind11
 pytest
 pytest-cov
 pytest-mock
-black==23.7.0
+black==23.7.0
+clang-format==14