From b9d33349bdeac878cd9c949bf49fa2a371cf90df Mon Sep 17 00:00:00 2001 From: Shuli Shu <31480676+multiphaseCFD@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:12:18 -0400 Subject: [PATCH] Optimize gate cache recording for `lightning.tensor` (#879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [ ] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** [SC-72517] Current implementation of `applyOperation` avoid the overhead of a `cutensornetStateUpdateTensorOperator` call and the creation of a new `DataBuffer` object. A new `gate_ids_` private data is added for the quick generation of a new key, which does not exist in the `gate_cache`. **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum Co-authored-by: Luis Alfredo Nuñez Meneses --- .github/CHANGELOG.md | 5 ++- pennylane_lightning/core/_version.py | 2 +- .../lightning_tensor/tncuda/TNCudaBase.hpp | 41 ++++++++++--------- .../tncuda/gates/TNCudaGateCache.hpp | 12 ++++++ .../gates/tests/Test_MPSTNCuda_NonParam.cpp | 27 ++++++++++++ 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 18cd6b8aff..1c791e1ee9 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -36,8 +36,11 @@ ### Improvements +* Optimize gate cache recording for `lightning.tensor` C++ layer. + [(#879)](https://github.com/PennyLaneAI/pennylane-lightning/pull/879) + * Updated calls of ``size_t`` to ``std::size_t`` everywhere. - [(#816)](https://github.com/PennyLaneAI/pennylane-lightning/pull/816/) + [(#816)](https://github.com/PennyLaneAI/pennylane-lightning/pull/816) * Update `ctrl_decomp_zyz` tests with `len(control_wires) > 1`. [(#821)](https://github.com/PennyLaneAI/pennylane-lightning/pull/821) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 30103c25ab..d7b122382e 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.38.0-dev52" +__version__ = "0.38.0-dev53" diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp index b545ef890d..df3594ebc2 100644 --- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -70,6 +71,7 @@ class TNCudaBase : public TensornetBase { // states as v24.03 std::shared_ptr> gate_cache_; + std::set gate_ids_; public: TNCudaBase() = delete; @@ -265,8 +267,18 @@ class TNCudaBase : public TensornetBase { "Unsupported gate: MPS method only supports 1, 2-wires gates"); auto &&par = (params.empty()) ? std::vector{0.0} : params; - DataBuffer dummy_device_data( - Pennylane::Util::exp2(wires.size()), getDevTag()); + + int64_t dummy_id = gate_ids_.empty() ? 1 : *gate_ids_.rbegin() + 1; + + if (gate_matrix.empty()) [[likely]] { + gate_cache_->add_gate(dummy_id, opName, par, adjoint); + } else [[unlikely]] { + auto gate_key = std::make_pair(opName, par); + std::vector matrix_cu = + cuUtil::complexToCu(gate_matrix); + gate_cache_->add_gate(dummy_id, gate_key, matrix_cu, adjoint); + } + int64_t id; std::vector stateModes = @@ -284,30 +296,19 @@ class TNCudaBase : public TensornetBase { /* cutensornetState_t */ getQuantumState(), /* int32_t numStateModes */ stateModes.size(), /* const int32_t * stateModes */ stateModes.data(), - /* void * */ static_cast(dummy_device_data.getData()), + /* void * */ + static_cast(gate_cache_->get_gate_device_ptr(dummy_id)), /* const int64_t *tensorModeStrides */ nullptr, /* const int32_t immutable */ 0, /* const int32_t adjoint */ 0, /* const int32_t unitary */ 1, /* int64_t * */ &id)); - if (!gate_matrix.empty()) { - auto gate_key = std::make_pair(opName, par); - std::vector matrix_cu = - cuUtil::complexToCu(gate_matrix); - gate_cache_->add_gate(static_cast(id), gate_key, - matrix_cu, adjoint); - } else { - gate_cache_->add_gate(static_cast(id), opName, par, - adjoint); + + if (dummy_id != id) { + gate_cache_->update_key(dummy_id, id); } - PL_CUTENSORNET_IS_SUCCESS(cutensornetStateUpdateTensorOperator( - /* const cutensornetHandle_t */ getTNCudaHandle(), - /* cutensornetState_t */ getQuantumState(), - /* int64_t tensorId*/ id, - /* void* */ - static_cast( - gate_cache_->get_gate_device_ptr(static_cast(id))), - /* int32_t unitary*/ 1)); + + gate_ids_.insert(id); } /** diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp index 30e5d824f1..d1d08e266e 100644 --- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp @@ -173,6 +173,18 @@ template class TNCudaGateCache { */ auto is_empty() const -> bool { return device_gates_.empty(); } + /** + * @brief Update an existing key with a new one. + * + * @param old_key The old key to be updated. + * @param new_key The new key to be updated. + */ + void update_key(const std::size_t old_key, const std::size_t new_key) { + auto it = device_gates_.extract(old_key); + it.key() = new_key; + device_gates_.insert(std::move(it)); + } + private: const DevTag device_tag_; std::size_t total_alloc_bytes_; diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp index 8718cb1934..83d3d73230 100644 --- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/tests/Test_MPSTNCuda_NonParam.cpp @@ -120,6 +120,33 @@ TEMPLATE_TEST_CASE("MPSTNCuda::Gates::PauliX", "[MPSTNCuda_Nonparam]", float, } } +TEMPLATE_TEST_CASE("MPSTNCuda::Gates::applyOperation-gatematrix", + "[MPSTNCuda_Nonparam]", float, double) { + std::size_t num_qubits = 3; + std::size_t maxExtent = 2; + DevTag dev_tag{0, 0}; + + SECTION("Apply different wire indices") { + const std::size_t index = GENERATE(0, 1, 2); + MPSTNCuda mps_state{num_qubits, maxExtent, dev_tag}; + + std::vector> gate_matrix = { + cuUtil::ZERO>(), + cuUtil::ONE>(), + cuUtil::ONE>(), + cuUtil::ZERO>()}; + + mps_state.applyOperation("applyMatrix", {index}, false, {}, + gate_matrix); + + auto results = mps_state.getDataVector(); + + CHECK(results[0] == cuUtil::ZERO>()); + CHECK(results[0b1 << (num_qubits - index - 1)] == + cuUtil::ONE>()); + } +} + TEMPLATE_TEST_CASE("MPSTNCuda::Gates::PauliY", "[MPSTNCuda_Nonparam]", float, double) { const bool inverse = GENERATE(false, true);