Add ObservablesTNCuda & MeasurementTNCuda Class to `Lightning.Ten…

…sor` (#728) ### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [x] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** [SC-61961] & [SC-61960] Add `Observables` and `Measurement` classes to the `lightning.tensor` backend. **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum <[email protected]> Co-authored-by: Rashid N H M <[email protected]> Co-authored-by: Ali Asadi <[email protected]> Co-authored-by: Vincent Michaud-Rioux <[email protected]> Co-authored-by: Vincent Michaud-Rioux <[email protected]> Co-authored-by: Amintor Dusko <[email protected]> Co-authored-by: Dev version update bot <github-actions[bot]@users.noreply.github.com> Co-authored-by: Pietropaolo Frisoni <[email protected]> Co-authored-by: Christina Lee <[email protected]> Co-authored-by: Lee James O'Riordan <[email protected]>
PennyLaneAI · Jun 3, 2024 · e24ba31 · e24ba31
1 parent 40ce6f8
commit e24ba31
Show file tree

Hide file tree

Showing 23 changed files with 2,237 additions and 78 deletions.
diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
@@ -1,6 +1,8 @@
 # Release 0.37.0-dev
 
 ### New features since last release
+* Add `observable` and `expval` support to `cutensornet` backed `lightning.tensor` C++ layer.
+  [(#728)](https://github.com/PennyLaneAI/pennylane-lightning/pull/728)
 
 * Add gate support to `cutensornet` backed `lightning.tensor` C++ layer.
   [(#718)](https://github.com/PennyLaneAI/pennylane-lightning/pull/718)

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.37.0-dev25"
+__version__ = "0.37.0-dev26"
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/base/TensorBase.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/base/TensorBase.hpp
@@ -48,6 +48,18 @@ template <class PrecisionT, class Derived> class TensorBase {
         length_ = std::accumulate(extents.begin(), extents.end(),
                                   std::size_t{1}, std::multiplies<>());
     }
+    /**
+     * @brief Construct a tensor object with given extents.
+     *
+     * @param extents Extents of a tensor object.
+     */
+    explicit TensorBase(const std::vector<std::size_t> &extents)
+        : rank_(extents.size()),
+          modes_(std::move(std::vector(rank_, std::size_t{0}))),
+          extents_(std::move(extents)) {
+        length_ = std::accumulate(extents_.begin(), extents_.end(),
+                                  std::size_t{1}, std::multiplies<>());
+    }
 
     ~TensorBase() {}
 

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/TensorCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/TensorCuda.hpp
@@ -48,6 +48,15 @@ class TensorCuda final : public TensorBase<PrecisionT, TensorCuda<PrecisionT>> {
     using BaseType = TensorBase<PrecisionT, TensorCuda>;
     using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));
 
+    /**
+     * @brief Construct a new TensorCuda object.
+     *
+     * @param rank Tensor rank.
+     * @param modes Tensor modes.
+     * @param extents Tensor extents.
+     * @param dev_tag Device tag.
+     * @param device_alloc If true, allocate memory on device.
+     */
     explicit TensorCuda(const std::size_t rank,
                         const std::vector<std::size_t> &modes,
                         const std::vector<std::size_t> &extents,
@@ -56,6 +65,24 @@ class TensorCuda final : public TensorBase<PrecisionT, TensorCuda<PrecisionT>> {
           data_buffer_{std::make_shared<DataBuffer<CFP_t>>(
               BaseType::getLength(), dev_tag, device_alloc)} {}
 
+    /**
+     * @brief Construct a new TensorCuda object from a host data.
+     *
+     * @param extents Tensor extents.
+     * @param host_tensor Host tensor data.
+     * @param dev_tag Device tag.
+     * @param device_alloc If true, allocate memory on device.
+     */
+    explicit TensorCuda(const std::vector<std::size_t> &extents,
+                        const std::vector<CFP_t> &host_tensor,
+                        const DevTag<int> &dev_tag, bool device_alloc = true)
+        : TensorBase<PrecisionT, TensorCuda<PrecisionT>>(extents),
+          data_buffer_{std::make_shared<DataBuffer<CFP_t>>(
+              BaseType::getLength(), dev_tag, device_alloc)} {
+        data_buffer_->CopyHostDataToGpu(host_tensor.data(),
+                                        BaseType::getLength());
+    }
+
     TensorCuda() = delete;
 
     ~TensorCuda() = default;

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/CMakeLists.txt
@@ -59,6 +59,8 @@ endif()
 ###############################################################################
 set(COMPONENT_SUBDIRS      base
                            gates
+                           measurements
+                           observables
                            utils
 )
 foreach(COMP ${COMPONENT_SUBDIRS})

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp
@@ -32,6 +32,7 @@
 #include "TNCudaBase.hpp"
 #include "TensorCuda.hpp"
 #include "TensornetBase.hpp"
+#include "Util.hpp"
 #include "cuda_helpers.hpp"
 #include "tncudaError.hpp"
 #include "tncuda_helpers.hpp"
@@ -61,6 +62,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
     using BaseType = TNCudaBase<Precision, MPSTNCuda>;
 
     MPSStatus MPSInitialized_ = MPSStatus::MPSInitNotSet;
+    MPSStatus MPSFinalized_ = MPSStatus::MPSFinalizedNotSet;
 
     const std::size_t maxBondDim_;
 
@@ -70,9 +72,12 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
 
     std::vector<TensorCuda<Precision>> tensors_;
 
+    std::vector<TensorCuda<Precision>> tensors_out_;
+
   public:
     using CFP_t = decltype(cuUtil::getCudaType(Precision{}));
     using ComplexT = std::complex<Precision>;
+    using PrecisionT = Precision;
 
   public:
     MPSTNCuda() = delete;
@@ -133,6 +138,19 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
         return tensorsDataPtr;
     }
 
+    /**
+     * @brief Get a vector of pointers to tensor data of each site.
+     *
+     * @return std::vector<CFP_t *>
+     */
+    [[nodiscard]] auto getTensorsOutDataPtr() -> std::vector<CFP_t *> {
+        std::vector<CFP_t *> tensorsOutDataPtr(BaseType::getNumQubits());
+        for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
+            tensorsOutDataPtr[i] = tensors_out_[i].getDataBuffer().getData();
+        }
+        return tensorsOutDataPtr;
+    }
+
     /**
      * @brief Set current quantum state as zero state.
      */
@@ -160,8 +178,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
                         "Please ensure all elements of a basis state should be "
                         "either 0 or 1.");
 
-        CFP_t value_cu =
-            Pennylane::LightningGPU::Util::complexToCu<ComplexT>({1.0, 0.0});
+        CFP_t value_cu = cuUtil::complexToCu<ComplexT>(ComplexT{1.0, 0.0});
 
         for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
             tensors_[i].getDataBuffer().zeroInit();
@@ -186,6 +203,39 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
         }
     };
 
+    /**
+     * @brief Get final state of the quantum circuit.
+     */
+    void get_final_state() {
+        if (MPSFinalized_ == MPSStatus::MPSFinalizedNotSet) {
+            MPSFinalized_ = MPSStatus::MPSFinalizedSet;
+            PL_CUTENSORNET_IS_SUCCESS(cutensornetStateFinalizeMPS(
+                /* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
+                /* cutensornetState_t */ BaseType::getQuantumState(),
+                /* cutensornetBoundaryCondition_t */
+                CUTENSORNET_BOUNDARY_CONDITION_OPEN,
+                /* const int64_t *const extentsOut[] */
+                getSitesExtentsPtr().data(),
+                /*strides=*/nullptr));
+        }
+
+        // Optional: SVD
+        cutensornetTensorSVDAlgo_t algo =
+            CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; // default
+
+        PL_CUTENSORNET_IS_SUCCESS(cutensornetStateConfigure(
+            /* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
+            /* cutensornetState_t */ BaseType::getQuantumState(),
+            /* cutensornetStateAttributes_t */
+            CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO,
+            /* const void * */ &algo,
+            /* size_t */ sizeof(algo)));
+
+        BaseType::computeState(
+            const_cast<int64_t **>(getSitesExtentsPtr().data()),
+            reinterpret_cast<void **>(getTensorsOutDataPtr().data()));
+    }
+
     /**
      * @brief Get the full state vector representation of a MPS quantum state.
      *
@@ -208,7 +258,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
         void *output_tensorPtr[] = {
             static_cast<void *>(output_tensor.getDataBuffer().getData())};
 
-        this->computeState(output_tensorPtr);
+        BaseType::computeState(nullptr, output_tensorPtr);
 
         std::vector<ComplexT> results(output_extent.front());
         output_tensor.CopyGpuDataToHost(results.data(), results.size());
@@ -281,16 +331,10 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
     std::vector<std::vector<int64_t>> setSitesExtents_int64_() {
         std::vector<std::vector<int64_t>> localSitesExtents_int64;
 
-        for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
-            // Convert datatype of sitesExtents to int64 as required by
-            // cutensornet backend
-            std::vector<int64_t> siteExtents_int64(sitesExtents_[i].size());
-            std::transform(sitesExtents_[i].begin(), sitesExtents_[i].end(),
-                           siteExtents_int64.begin(), [](std::size_t x) {
-                               return static_cast<int64_t>(x);
-                           });
-
-            localSitesExtents_int64.push_back(std::move(siteExtents_int64));
+        for (const auto &siteExtents : sitesExtents_) {
+            localSitesExtents_int64.push_back(
+                std::move(Pennylane::Util::cast_vector<std::size_t, int64_t>(
+                    siteExtents)));
         }
         return localSitesExtents_int64;
     }
@@ -303,6 +347,9 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
             // construct mps tensors reprensentation
             tensors_.emplace_back(sitesModes_[i].size(), sitesModes_[i],
                                   sitesExtents_[i], BaseType::getDevTag());
+
+            tensors_out_.emplace_back(sitesModes_[i].size(), sitesModes_[i],
+                                      sitesExtents_[i], BaseType::getDevTag());
         }
     }
 

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp
@@ -127,6 +127,15 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         PL_CUTENSORNET_IS_SUCCESS(cutensornetDestroyState(quantumState_));
     }
 
+    /**
+     * @brief Get the CUDA data type.
+     *
+     * @return cudaDataType_t
+     */
+    [[nodiscard]] auto getCudaDataType() const -> cudaDataType_t {
+        return typeData_;
+    }
+
     /**
      * @brief Get the cutensornet handle that the object is using.
      *
@@ -181,8 +190,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             "Invalid arguments: number of operations, wires and inverses"
             "must all be equal");
         for (std::size_t i = 0; i < numOperations; i++) {
-            this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i],
-                                 ops_params[i]);
+            applyOperation(ops[i], ops_wires[i], ops_adjoint[i], ops_params[i]);
         }
     }
 
@@ -209,7 +217,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             "Invalid arguments: number of operations, wires and inverses"
             "must all be equal");
         for (std::size_t i = 0; i < numOperations; i++) {
-            this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i], {});
+            applyOperation(ops[i], ops_wires[i], ops_adjoint[i], {});
         }
     }
 
@@ -232,11 +240,10 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         DataBuffer<PrecisionT, int> dummy_device_data(
             Pennylane::Util::exp2(wires.size()), getDevTag());
         int64_t id;
-        std::vector<int32_t> stateModes(wires.size());
-        std::transform(
-            wires.begin(), wires.end(), stateModes.begin(), [&](std::size_t x) {
-                return static_cast<int32_t>(BaseType::getNumQubits() - 1 - x);
-            });
+
+        std::vector<int32_t> stateModes =
+            cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
+                wires, BaseType::getNumQubits());
 
         // TODO: Need changes to support to the controlled gate tensor API once
         // the API is finalized in cutensornet lib.
@@ -256,12 +263,9 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             /* const int32_t unitary */ 1,
             /* int64_t * */ &id));
         if (!gate_matrix.empty()) {
-            std::vector<CFP_t> matrix_cu(gate_matrix.size());
-            std::transform(gate_matrix.begin(), gate_matrix.end(),
-                           matrix_cu.begin(), [](const ComplexT &x) {
-                               return cuUtil::complexToCu<ComplexT>(x);
-                           });
             auto gate_key = std::make_pair(opName, par);
+            std::vector<CFP_t> matrix_cu =
+                cuUtil::complexToCu<ComplexT>(gate_matrix);
             gate_cache_->add_gate(static_cast<std::size_t>(id), gate_key,
                                   matrix_cu);
         } else {
@@ -278,54 +282,12 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
     }
 
   protected:
-    /**
-     * @brief Returns the workspace size.
-     *
-     * @return std::size_t
-     */
-    std::size_t
-    getWorkSpaceMemorySize(cutensornetWorkspaceDescriptor_t &workDesc) {
-        int64_t worksize{0};
-
-        PL_CUTENSORNET_IS_SUCCESS(cutensornetWorkspaceGetMemorySize(
-            /* const cutensornetHandle_t */ getTNCudaHandle(),
-            /* cutensornetWorkspaceDescriptor_t */ workDesc,
-            /* cutensornetWorksizePref_t */
-            CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
-            /* cutensornetMemspace_t*/ CUTENSORNET_MEMSPACE_DEVICE,
-            /* cutensornetWorkspaceKind_t */ CUTENSORNET_WORKSPACE_SCRATCH,
-            /*  int64_t * */ &worksize));
-
-        // Ensure data is aligned by 256 bytes
-        worksize += int64_t{256} - worksize % int64_t{256};
-
-        return static_cast<std::size_t>(worksize);
-    }
-
-    /**
-     * @brief Set memory for a workspace.
-     *
-     * @param workDesc cutensornet work space descriptor
-     * @param scratchPtr Pointer to scratch memory
-     * @param worksize Memory size of a work space
-     */
-    void setWorkSpaceMemory(cutensornetWorkspaceDescriptor_t &workDesc,
-                            void *scratchPtr, std::size_t &worksize) {
-        PL_CUTENSORNET_IS_SUCCESS(cutensornetWorkspaceSetMemory(
-            /* const cutensornetHandle_t */ getTNCudaHandle(),
-            /* cutensornetWorkspaceDescriptor_t */ workDesc,
-            /* cutensornetMemspace_t*/ CUTENSORNET_MEMSPACE_DEVICE,
-            /* cutensornetWorkspaceKind_t */ CUTENSORNET_WORKSPACE_SCRATCH,
-            /* void *const */ scratchPtr,
-            /* int64_t */ static_cast<int64_t>(worksize)));
-    }
-
     /**
      * @brief Save quantumState information to data provided by a user
      *
      * @param tensorPtr Pointer to tensors provided by a user
      */
-    void computeState(void **tensorPtr) {
+    void computeState(int64_t **extentsPtr, void **tensorPtr) {
         cutensornetWorkspaceDescriptor_t workDesc;
         PL_CUTENSORNET_IS_SUCCESS(
             cutensornetCreateWorkspaceDescriptor(getTNCudaHandle(), &workDesc));
@@ -341,7 +303,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             /* cutensornetWorkspaceDescriptor_t */ workDesc,
             /*  cudaStream_t unused in v24.03*/ 0x0));
 
-        std::size_t worksize = getWorkSpaceMemorySize(workDesc);
+        std::size_t worksize =
+            getWorkSpaceMemorySize(getTNCudaHandle(), workDesc);
 
         PL_ABORT_IF(worksize > scratchSize,
                     "Insufficient workspace size on Device!");
@@ -350,14 +313,15 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         DataBuffer<std::size_t, int> d_scratch(d_scratch_length, getDevTag(),
                                                true);
 
-        setWorkSpaceMemory(
-            workDesc, reinterpret_cast<void *>(d_scratch.getData()), worksize);
+        setWorkSpaceMemory(getTNCudaHandle(), workDesc,
+                           reinterpret_cast<void *>(d_scratch.getData()),
+                           worksize);
 
         PL_CUTENSORNET_IS_SUCCESS(cutensornetStateCompute(
             /* const cutensornetHandle_t */ getTNCudaHandle(),
             /* cutensornetState_t */ getQuantumState(),
             /* cutensornetWorkspaceDescriptor_t */ workDesc,
-            /* int64_t * */ nullptr,
+            /* int64_t * */ extentsPtr,
             /* int64_t *stridesOut */ nullptr,
             /* void * */ tensorPtr,
             /* cudaStream_t */ getDevTag().getStreamID()));

diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.20)
+
+project(${PL_BACKEND}_measurements LANGUAGES CXX)
+
+add_library(${PL_BACKEND}_measurements INTERFACE)
+
+target_include_directories(${PL_BACKEND}_measurements INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
+
+target_link_libraries(${PL_BACKEND}_measurements INTERFACE  lightning_compile_options
+                                                            lightning_external_libs
+                                                            ${PL_TENSOR}
+                                                            ${PL_BACKEND}_utils
+                                                            ${PL_BACKEND}_observables
+                                                            )
+
+if (BUILD_TESTS)
+    enable_testing()
+    add_subdirectory("tests")
+endif()