Skip to content

Commit

Permalink
Add ObservablesTNCuda & MeasurementTNCuda Class to `Lightning.Ten…
Browse files Browse the repository at this point in the history
…sor` (#728)

### Before submitting

Please complete the following checklist when submitting a PR:

- [ ] All new features must include a unit test.
If you've fixed a bug or added code that should be tested, add a test to
the
      [`tests`](../tests) directory!

- [ ] All new functions and code must be clearly commented and
documented.
If you do make documentation changes, make sure that the docs build and
      render correctly by running `make docs`.

- [x] Ensure that the test suite passes, by running `make test`.

- [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing
the
      change, and including a link back to the PR.

- [x] Ensure that code is properly formatted by running `make format`. 

When all the above are checked, delete everything above the dashed
line and fill in the pull request template.


------------------------------------------------------------------------------------------------------------

**Context:**

[SC-61961] & [SC-61960]

Add `Observables` and `Measurement` classes to the `lightning.tensor`
backend.

**Description of the Change:**

**Benefits:**

**Possible Drawbacks:**

**Related GitHub Issues:**

---------

Co-authored-by: ringo-but-quantum <[email protected]>
Co-authored-by: Rashid N H M <[email protected]>
Co-authored-by: Ali Asadi <[email protected]>
Co-authored-by: Vincent Michaud-Rioux <[email protected]>
Co-authored-by: Vincent Michaud-Rioux <[email protected]>
Co-authored-by: Amintor Dusko <[email protected]>
Co-authored-by: Dev version update bot <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Pietropaolo Frisoni <[email protected]>
Co-authored-by: Christina Lee <[email protected]>
Co-authored-by: Lee James O'Riordan <[email protected]>
  • Loading branch information
11 people authored Jun 3, 2024
1 parent 40ce6f8 commit e24ba31
Show file tree
Hide file tree
Showing 23 changed files with 2,237 additions and 78 deletions.
2 changes: 2 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Release 0.37.0-dev

### New features since last release
* Add `observable` and `expval` support to `cutensornet` backed `lightning.tensor` C++ layer.
[(#728)](https://github.com/PennyLaneAI/pennylane-lightning/pull/728)

* Add gate support to `cutensornet` backed `lightning.tensor` C++ layer.
[(#718)](https://github.com/PennyLaneAI/pennylane-lightning/pull/718)
Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.37.0-dev25"
__version__ = "0.37.0-dev26"
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ template <class PrecisionT, class Derived> class TensorBase {
length_ = std::accumulate(extents.begin(), extents.end(),
std::size_t{1}, std::multiplies<>());
}
/**
* @brief Construct a tensor object with given extents.
*
* @param extents Extents of a tensor object.
*/
explicit TensorBase(const std::vector<std::size_t> &extents)
: rank_(extents.size()),
modes_(std::move(std::vector(rank_, std::size_t{0}))),
extents_(std::move(extents)) {
length_ = std::accumulate(extents_.begin(), extents_.end(),
std::size_t{1}, std::multiplies<>());
}

~TensorBase() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ class TensorCuda final : public TensorBase<PrecisionT, TensorCuda<PrecisionT>> {
using BaseType = TensorBase<PrecisionT, TensorCuda>;
using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));

/**
* @brief Construct a new TensorCuda object.
*
* @param rank Tensor rank.
* @param modes Tensor modes.
* @param extents Tensor extents.
* @param dev_tag Device tag.
* @param device_alloc If true, allocate memory on device.
*/
explicit TensorCuda(const std::size_t rank,
const std::vector<std::size_t> &modes,
const std::vector<std::size_t> &extents,
Expand All @@ -56,6 +65,24 @@ class TensorCuda final : public TensorBase<PrecisionT, TensorCuda<PrecisionT>> {
data_buffer_{std::make_shared<DataBuffer<CFP_t>>(
BaseType::getLength(), dev_tag, device_alloc)} {}

/**
* @brief Construct a new TensorCuda object from a host data.
*
* @param extents Tensor extents.
* @param host_tensor Host tensor data.
* @param dev_tag Device tag.
* @param device_alloc If true, allocate memory on device.
*/
explicit TensorCuda(const std::vector<std::size_t> &extents,
const std::vector<CFP_t> &host_tensor,
const DevTag<int> &dev_tag, bool device_alloc = true)
: TensorBase<PrecisionT, TensorCuda<PrecisionT>>(extents),
data_buffer_{std::make_shared<DataBuffer<CFP_t>>(
BaseType::getLength(), dev_tag, device_alloc)} {
data_buffer_->CopyHostDataToGpu(host_tensor.data(),
BaseType::getLength());
}

TensorCuda() = delete;

~TensorCuda() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ endif()
###############################################################################
set(COMPONENT_SUBDIRS base
gates
measurements
observables
utils
)
foreach(COMP ${COMPONENT_SUBDIRS})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "TNCudaBase.hpp"
#include "TensorCuda.hpp"
#include "TensornetBase.hpp"
#include "Util.hpp"
#include "cuda_helpers.hpp"
#include "tncudaError.hpp"
#include "tncuda_helpers.hpp"
Expand Down Expand Up @@ -61,6 +62,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
using BaseType = TNCudaBase<Precision, MPSTNCuda>;

MPSStatus MPSInitialized_ = MPSStatus::MPSInitNotSet;
MPSStatus MPSFinalized_ = MPSStatus::MPSFinalizedNotSet;

const std::size_t maxBondDim_;

Expand All @@ -70,9 +72,12 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {

std::vector<TensorCuda<Precision>> tensors_;

std::vector<TensorCuda<Precision>> tensors_out_;

public:
using CFP_t = decltype(cuUtil::getCudaType(Precision{}));
using ComplexT = std::complex<Precision>;
using PrecisionT = Precision;

public:
MPSTNCuda() = delete;
Expand Down Expand Up @@ -133,6 +138,19 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
return tensorsDataPtr;
}

/**
* @brief Get a vector of pointers to tensor data of each site.
*
* @return std::vector<CFP_t *>
*/
[[nodiscard]] auto getTensorsOutDataPtr() -> std::vector<CFP_t *> {
std::vector<CFP_t *> tensorsOutDataPtr(BaseType::getNumQubits());
for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
tensorsOutDataPtr[i] = tensors_out_[i].getDataBuffer().getData();
}
return tensorsOutDataPtr;
}

/**
* @brief Set current quantum state as zero state.
*/
Expand Down Expand Up @@ -160,8 +178,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
"Please ensure all elements of a basis state should be "
"either 0 or 1.");

CFP_t value_cu =
Pennylane::LightningGPU::Util::complexToCu<ComplexT>({1.0, 0.0});
CFP_t value_cu = cuUtil::complexToCu<ComplexT>(ComplexT{1.0, 0.0});

for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
tensors_[i].getDataBuffer().zeroInit();
Expand All @@ -186,6 +203,39 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
}
};

/**
* @brief Get final state of the quantum circuit.
*/
void get_final_state() {
if (MPSFinalized_ == MPSStatus::MPSFinalizedNotSet) {
MPSFinalized_ = MPSStatus::MPSFinalizedSet;
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateFinalizeMPS(
/* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
/* cutensornetState_t */ BaseType::getQuantumState(),
/* cutensornetBoundaryCondition_t */
CUTENSORNET_BOUNDARY_CONDITION_OPEN,
/* const int64_t *const extentsOut[] */
getSitesExtentsPtr().data(),
/*strides=*/nullptr));
}

// Optional: SVD
cutensornetTensorSVDAlgo_t algo =
CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; // default

PL_CUTENSORNET_IS_SUCCESS(cutensornetStateConfigure(
/* const cutensornetHandle_t */ BaseType::getTNCudaHandle(),
/* cutensornetState_t */ BaseType::getQuantumState(),
/* cutensornetStateAttributes_t */
CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO,
/* const void * */ &algo,
/* size_t */ sizeof(algo)));

BaseType::computeState(
const_cast<int64_t **>(getSitesExtentsPtr().data()),
reinterpret_cast<void **>(getTensorsOutDataPtr().data()));
}

/**
* @brief Get the full state vector representation of a MPS quantum state.
*
Expand All @@ -208,7 +258,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
void *output_tensorPtr[] = {
static_cast<void *>(output_tensor.getDataBuffer().getData())};

this->computeState(output_tensorPtr);
BaseType::computeState(nullptr, output_tensorPtr);

std::vector<ComplexT> results(output_extent.front());
output_tensor.CopyGpuDataToHost(results.data(), results.size());
Expand Down Expand Up @@ -281,16 +331,10 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
std::vector<std::vector<int64_t>> setSitesExtents_int64_() {
std::vector<std::vector<int64_t>> localSitesExtents_int64;

for (std::size_t i = 0; i < BaseType::getNumQubits(); i++) {
// Convert datatype of sitesExtents to int64 as required by
// cutensornet backend
std::vector<int64_t> siteExtents_int64(sitesExtents_[i].size());
std::transform(sitesExtents_[i].begin(), sitesExtents_[i].end(),
siteExtents_int64.begin(), [](std::size_t x) {
return static_cast<int64_t>(x);
});

localSitesExtents_int64.push_back(std::move(siteExtents_int64));
for (const auto &siteExtents : sitesExtents_) {
localSitesExtents_int64.push_back(
std::move(Pennylane::Util::cast_vector<std::size_t, int64_t>(
siteExtents)));
}
return localSitesExtents_int64;
}
Expand All @@ -303,6 +347,9 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
// construct mps tensors reprensentation
tensors_.emplace_back(sitesModes_[i].size(), sitesModes_[i],
sitesExtents_[i], BaseType::getDevTag());

tensors_out_.emplace_back(sitesModes_[i].size(), sitesModes_[i],
sitesExtents_[i], BaseType::getDevTag());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,15 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
PL_CUTENSORNET_IS_SUCCESS(cutensornetDestroyState(quantumState_));
}

/**
* @brief Get the CUDA data type.
*
* @return cudaDataType_t
*/
[[nodiscard]] auto getCudaDataType() const -> cudaDataType_t {
return typeData_;
}

/**
* @brief Get the cutensornet handle that the object is using.
*
Expand Down Expand Up @@ -181,8 +190,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
"Invalid arguments: number of operations, wires and inverses"
"must all be equal");
for (std::size_t i = 0; i < numOperations; i++) {
this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i],
ops_params[i]);
applyOperation(ops[i], ops_wires[i], ops_adjoint[i], ops_params[i]);
}
}

Expand All @@ -209,7 +217,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
"Invalid arguments: number of operations, wires and inverses"
"must all be equal");
for (std::size_t i = 0; i < numOperations; i++) {
this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i], {});
applyOperation(ops[i], ops_wires[i], ops_adjoint[i], {});
}
}

Expand All @@ -232,11 +240,10 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
DataBuffer<PrecisionT, int> dummy_device_data(
Pennylane::Util::exp2(wires.size()), getDevTag());
int64_t id;
std::vector<int32_t> stateModes(wires.size());
std::transform(
wires.begin(), wires.end(), stateModes.begin(), [&](std::size_t x) {
return static_cast<int32_t>(BaseType::getNumQubits() - 1 - x);
});

std::vector<int32_t> stateModes =
cuUtil::NormalizeCastIndices<std::size_t, int32_t>(
wires, BaseType::getNumQubits());

// TODO: Need changes to support to the controlled gate tensor API once
// the API is finalized in cutensornet lib.
Expand All @@ -256,12 +263,9 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* const int32_t unitary */ 1,
/* int64_t * */ &id));
if (!gate_matrix.empty()) {
std::vector<CFP_t> matrix_cu(gate_matrix.size());
std::transform(gate_matrix.begin(), gate_matrix.end(),
matrix_cu.begin(), [](const ComplexT &x) {
return cuUtil::complexToCu<ComplexT>(x);
});
auto gate_key = std::make_pair(opName, par);
std::vector<CFP_t> matrix_cu =
cuUtil::complexToCu<ComplexT>(gate_matrix);
gate_cache_->add_gate(static_cast<std::size_t>(id), gate_key,
matrix_cu);
} else {
Expand All @@ -278,54 +282,12 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
}

protected:
/**
* @brief Returns the workspace size.
*
* @return std::size_t
*/
std::size_t
getWorkSpaceMemorySize(cutensornetWorkspaceDescriptor_t &workDesc) {
int64_t worksize{0};

PL_CUTENSORNET_IS_SUCCESS(cutensornetWorkspaceGetMemorySize(
/* const cutensornetHandle_t */ getTNCudaHandle(),
/* cutensornetWorkspaceDescriptor_t */ workDesc,
/* cutensornetWorksizePref_t */
CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
/* cutensornetMemspace_t*/ CUTENSORNET_MEMSPACE_DEVICE,
/* cutensornetWorkspaceKind_t */ CUTENSORNET_WORKSPACE_SCRATCH,
/* int64_t * */ &worksize));

// Ensure data is aligned by 256 bytes
worksize += int64_t{256} - worksize % int64_t{256};

return static_cast<std::size_t>(worksize);
}

/**
* @brief Set memory for a workspace.
*
* @param workDesc cutensornet work space descriptor
* @param scratchPtr Pointer to scratch memory
* @param worksize Memory size of a work space
*/
void setWorkSpaceMemory(cutensornetWorkspaceDescriptor_t &workDesc,
void *scratchPtr, std::size_t &worksize) {
PL_CUTENSORNET_IS_SUCCESS(cutensornetWorkspaceSetMemory(
/* const cutensornetHandle_t */ getTNCudaHandle(),
/* cutensornetWorkspaceDescriptor_t */ workDesc,
/* cutensornetMemspace_t*/ CUTENSORNET_MEMSPACE_DEVICE,
/* cutensornetWorkspaceKind_t */ CUTENSORNET_WORKSPACE_SCRATCH,
/* void *const */ scratchPtr,
/* int64_t */ static_cast<int64_t>(worksize)));
}

/**
* @brief Save quantumState information to data provided by a user
*
* @param tensorPtr Pointer to tensors provided by a user
*/
void computeState(void **tensorPtr) {
void computeState(int64_t **extentsPtr, void **tensorPtr) {
cutensornetWorkspaceDescriptor_t workDesc;
PL_CUTENSORNET_IS_SUCCESS(
cutensornetCreateWorkspaceDescriptor(getTNCudaHandle(), &workDesc));
Expand All @@ -341,7 +303,8 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* cutensornetWorkspaceDescriptor_t */ workDesc,
/* cudaStream_t unused in v24.03*/ 0x0));

std::size_t worksize = getWorkSpaceMemorySize(workDesc);
std::size_t worksize =
getWorkSpaceMemorySize(getTNCudaHandle(), workDesc);

PL_ABORT_IF(worksize > scratchSize,
"Insufficient workspace size on Device!");
Expand All @@ -350,14 +313,15 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
DataBuffer<std::size_t, int> d_scratch(d_scratch_length, getDevTag(),
true);

setWorkSpaceMemory(
workDesc, reinterpret_cast<void *>(d_scratch.getData()), worksize);
setWorkSpaceMemory(getTNCudaHandle(), workDesc,
reinterpret_cast<void *>(d_scratch.getData()),
worksize);

PL_CUTENSORNET_IS_SUCCESS(cutensornetStateCompute(
/* const cutensornetHandle_t */ getTNCudaHandle(),
/* cutensornetState_t */ getQuantumState(),
/* cutensornetWorkspaceDescriptor_t */ workDesc,
/* int64_t * */ nullptr,
/* int64_t * */ extentsPtr,
/* int64_t *stridesOut */ nullptr,
/* void * */ tensorPtr,
/* cudaStream_t */ getDevTag().getStreamID()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.20)

project(${PL_BACKEND}_measurements LANGUAGES CXX)

add_library(${PL_BACKEND}_measurements INTERFACE)

target_include_directories(${PL_BACKEND}_measurements INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})

target_link_libraries(${PL_BACKEND}_measurements INTERFACE lightning_compile_options
lightning_external_libs
${PL_TENSOR}
${PL_BACKEND}_utils
${PL_BACKEND}_observables
)

if (BUILD_TESTS)
enable_testing()
add_subdirectory("tests")
endif()
Loading

0 comments on commit e24ba31

Please sign in to comment.