Skip to content

Commit

Permalink
Optimize gate cache recording for lightning.tensor (#879)
Browse files Browse the repository at this point in the history
### Before submitting

Please complete the following checklist when submitting a PR:

- [ ] All new features must include a unit test.
If you've fixed a bug or added code that should be tested, add a test to
the
      [`tests`](../tests) directory!

- [ ] All new functions and code must be clearly commented and
documented.
If you do make documentation changes, make sure that the docs build and
      render correctly by running `make docs`.

- [ ] Ensure that the test suite passes, by running `make test`.

- [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing
the
      change, and including a link back to the PR.

- [x] Ensure that code is properly formatted by running `make format`. 

When all the above are checked, delete everything above the dashed
line and fill in the pull request template.


------------------------------------------------------------------------------------------------------------

**Context:**

[SC-72517]

Current implementation of `applyOperation` avoid the overhead of a
`cutensornetStateUpdateTensorOperator` call and the creation of a new
`DataBuffer` object. A new `gate_ids_` private data is added for the
quick generation of a new key, which does not exist in the `gate_cache`.

**Description of the Change:**

**Benefits:**

**Possible Drawbacks:**

**Related GitHub Issues:**

---------

Co-authored-by: ringo-but-quantum <[email protected]>
Co-authored-by: Luis Alfredo Nuñez Meneses <[email protected]>
  • Loading branch information
3 people authored Aug 30, 2024
1 parent 756eb7b commit b9d3334
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 22 deletions.
5 changes: 4 additions & 1 deletion .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,11 @@

### Improvements

* Optimize gate cache recording for `lightning.tensor` C++ layer.
[(#879)](https://github.com/PennyLaneAI/pennylane-lightning/pull/879)

* Updated calls of ``size_t`` to ``std::size_t`` everywhere.
[(#816)](https://github.com/PennyLaneAI/pennylane-lightning/pull/816/)
[(#816)](https://github.com/PennyLaneAI/pennylane-lightning/pull/816)

* Update `ctrl_decomp_zyz` tests with `len(control_wires) > 1`.
[(#821)](https://github.com/PennyLaneAI/pennylane-lightning/pull/821)
Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.38.0-dev52"
__version__ = "0.38.0-dev53"
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <complex>
#include <memory>
#include <set>
#include <type_traits>
#include <vector>

Expand Down Expand Up @@ -70,6 +71,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
// states as v24.03

std::shared_ptr<TNCudaGateCache<PrecisionT>> gate_cache_;
std::set<int64_t> gate_ids_;

public:
TNCudaBase() = delete;
Expand Down Expand Up @@ -265,8 +267,18 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
"Unsupported gate: MPS method only supports 1, 2-wires gates");

auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;
DataBuffer<PrecisionT, int> dummy_device_data(
Pennylane::Util::exp2(wires.size()), getDevTag());

int64_t dummy_id = gate_ids_.empty() ? 1 : *gate_ids_.rbegin() + 1;

if (gate_matrix.empty()) [[likely]] {
gate_cache_->add_gate(dummy_id, opName, par, adjoint);
} else [[unlikely]] {
auto gate_key = std::make_pair(opName, par);
std::vector<CFP_t> matrix_cu =
cuUtil::complexToCu<ComplexT>(gate_matrix);
gate_cache_->add_gate(dummy_id, gate_key, matrix_cu, adjoint);
}

int64_t id;

std::vector<int32_t> stateModes =
Expand All @@ -284,30 +296,19 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
/* cutensornetState_t */ getQuantumState(),
/* int32_t numStateModes */ stateModes.size(),
/* const int32_t * stateModes */ stateModes.data(),
/* void * */ static_cast<void *>(dummy_device_data.getData()),
/* void * */
static_cast<void *>(gate_cache_->get_gate_device_ptr(dummy_id)),
/* const int64_t *tensorModeStrides */ nullptr,
/* const int32_t immutable */ 0,
/* const int32_t adjoint */ 0,
/* const int32_t unitary */ 1,
/* int64_t * */ &id));
if (!gate_matrix.empty()) {
auto gate_key = std::make_pair(opName, par);
std::vector<CFP_t> matrix_cu =
cuUtil::complexToCu<ComplexT>(gate_matrix);
gate_cache_->add_gate(static_cast<std::size_t>(id), gate_key,
matrix_cu, adjoint);
} else {
gate_cache_->add_gate(static_cast<std::size_t>(id), opName, par,
adjoint);

if (dummy_id != id) {
gate_cache_->update_key(dummy_id, id);
}
PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator(
/* const cutensornetHandle_t */ getTNCudaHandle(),
/* cutensornetState_t */ getQuantumState(),
/* int64_t tensorId*/ id,
/* void* */
static_cast<void *>(
gate_cache_->get_gate_device_ptr(static_cast<std::size_t>(id))),
/* int32_t unitary*/ 1));

gate_ids_.insert(id);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ template <class PrecisionT> class TNCudaGateCache {
*/
auto is_empty() const -> bool { return device_gates_.empty(); }

/**
* @brief Update an existing key with a new one.
*
* @param old_key The old key to be updated.
* @param new_key The new key to be updated.
*/
void update_key(const std::size_t old_key, const std::size_t new_key) {
auto it = device_gates_.extract(old_key);
it.key() = new_key;
device_gates_.insert(std::move(it));
}

private:
const DevTag<int> device_tag_;
std::size_t total_alloc_bytes_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,33 @@ TEMPLATE_TEST_CASE("MPSTNCuda::Gates::PauliX", "[MPSTNCuda_Nonparam]", float,
}
}

TEMPLATE_TEST_CASE("MPSTNCuda::Gates::applyOperation-gatematrix",
"[MPSTNCuda_Nonparam]", float, double) {
std::size_t num_qubits = 3;
std::size_t maxExtent = 2;
DevTag<int> dev_tag{0, 0};

SECTION("Apply different wire indices") {
const std::size_t index = GENERATE(0, 1, 2);
MPSTNCuda<TestType> mps_state{num_qubits, maxExtent, dev_tag};

std::vector<std::complex<TestType>> gate_matrix = {
cuUtil::ZERO<std::complex<TestType>>(),
cuUtil::ONE<std::complex<TestType>>(),
cuUtil::ONE<std::complex<TestType>>(),
cuUtil::ZERO<std::complex<TestType>>()};

mps_state.applyOperation("applyMatrix", {index}, false, {},
gate_matrix);

auto results = mps_state.getDataVector();

CHECK(results[0] == cuUtil::ZERO<std::complex<TestType>>());
CHECK(results[0b1 << (num_qubits - index - 1)] ==
cuUtil::ONE<std::complex<TestType>>());
}
}

TEMPLATE_TEST_CASE("MPSTNCuda::Gates::PauliY", "[MPSTNCuda_Nonparam]", float,
double) {
const bool inverse = GENERATE(false, true);
Expand Down

0 comments on commit b9d3334

Please sign in to comment.