From c6f3f3f75aeedf6ce9c43dac50a0b1f08c8a6282 Mon Sep 17 00:00:00 2001 From: Shuli Shu <31480676+multiphaseCFD@users.noreply.github.com> Date: Mon, 7 Oct 2024 20:03:08 -0400 Subject: [PATCH] Add native `setStateVector` support to `lightning.gpu` (#930) ### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [x] Ensure that the test suite passes, by running `make test`. - [x] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [x] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** [SC-74668] `setStateVector` via state and wires is supported in the C++ layer in `lightning.gpu` **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum --- .github/CHANGELOG.md | 3 + pennylane_lightning/core/_version.py | 2 +- .../lightning_gpu/StateVectorCudaMPI.hpp | 156 ++++++++++++------ .../lightning_gpu/StateVectorCudaManaged.hpp | 101 ++++++++---- .../lightning_gpu/bindings/LGPUBindings.hpp | 25 +-- .../bindings/LGPUBindingsMPI.hpp | 25 +-- .../Test_StateVectorCudaManaged_NonParam.cpp | 63 +------ .../mpi/Test_StateVectorCudaMPI_NonParam.cpp | 21 +-- .../lightning_gpu/_state_vector.py | 29 +--- .../lightning_gpu/lightning_gpu.py | 11 +- 10 files changed, 222 insertions(+), 214 deletions(-) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 6ac7b1fb2..27ecd62a7 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -43,6 +43,9 @@ ### Improvements +* Add `setStateVector(state, wire)` support to the `lightning.gpu` C++ layer. + [(#930)](https://github.com/PennyLaneAI/pennylane-lightning/pull/930) + * Add zero-state initialization to both `StateVectorCudaManaged` and `StateVectorCudaMPI` constructors to remove the `reset_state` in the python layer ctor and refactor `setBasisState(state, wires)` in the C++ layer. [(#933)](https://github.com/PennyLaneAI/pennylane-lightning/pull/933) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 4f9f65022..c6e0b5050 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.39.0-dev39" +__version__ = "0.39.0-dev40" diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp index 577c510f9..964c5e69c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp @@ -289,61 +289,48 @@ class StateVectorCudaMPI final BaseType::getDataBuffer().zeroInit(); setBasisState_(value, index, use_async); } + /** - * @brief Set values for a batch of elements of the state-vector. This - * method is implemented by the customized CUDA kernel defined in the - * DataBuffer class. + * @brief Set values for a batch of elements of the state-vector. * - * @param num_indices Number of elements to be passed to the state vector. - * @param values Pointer to values to be set for the target elements. - * @param indices Pointer to indices of the target elements. - * @param async Use an asynchronous memory copy. + * @param state_ptr Pointer to initial state data. + * @param num_states Length of initial state data. + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. Default is false. */ - template - void setStateVector(const index_type num_indices, - const std::complex *values, - const index_type *indices, const bool async = false) { - BaseType::getDataBuffer().zeroInit(); - - std::vector indices_local; - std::vector> values_local; - - for (std::size_t i = 0; i < static_cast(num_indices); - i++) { - int index = indices[i]; - PL_ASSERT(index >= 0); - std::size_t rankId = - static_cast(index) >> BaseType::getNumQubits(); - - if (rankId == mpi_manager_.getRank()) { - int local_index = static_cast( - compute_local_index(static_cast(index), - this->getNumLocalQubits())); - indices_local.push_back(local_index); - values_local.push_back(values[i]); + void setStateVector(const ComplexT *state_ptr, const std::size_t num_states, + const std::vector &wires, + bool use_async = false) { + PL_ABORT_IF_NOT(num_states == Pennylane::Util::exp2(wires.size()), + "Inconsistent state and wires dimensions."); + + const auto num_qubits = this->getTotalNumQubits(); + + PL_ABORT_IF_NOT(std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "Invalid wire index."); + + using index_type = + typename std::conditional::value, + int32_t, int64_t>::type; + + // Calculate the indices of the state-vector to be set. + // TODO: Could move to GPU/MPI calculation if the state size is large. + std::vector indices(num_states); + const std::size_t num_wires = wires.size(); + constexpr std::size_t one{1U}; + for (std::size_t i = 0; i < num_states; i++) { + std::size_t index{0U}; + for (std::size_t j = 0; j < num_wires; j++) { + const std::size_t bit = (i & (one << j)) >> j; + index |= bit << (num_qubits - 1 - wires[num_wires - 1 - j]); } + indices[i] = static_cast(index); } - - auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); - auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); - - index_type num_elements = indices_local.size(); - - DataBuffer d_indices{ - static_cast(num_elements), device_id, stream_id, true}; - - DataBuffer d_values{static_cast(num_elements), - device_id, stream_id, true}; - - d_indices.CopyHostDataToGpu(indices_local.data(), d_indices.getLength(), - async); - d_values.CopyHostDataToGpu(values_local.data(), d_values.getLength(), - async); - - setStateVector_CUDA(BaseType::getData(), num_elements, - d_values.getData(), d_indices.getData(), - thread_per_block, stream_id); - PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + setStateVector_(num_states, state_ptr, indices.data(), + use_async); mpi_manager_.Barrier(); } @@ -1548,6 +1535,62 @@ class StateVectorCudaMPI final return t_indices; } + /** + * @brief Set values for a batch of elements of the state-vector. This + * method is implemented by the customized CUDA kernel defined in the + * DataBuffer class. + * + * @param num_indices Number of elements to be passed to the state vector. + * @param values Pointer to values to be set for the target elements. + * @param indices Pointer to indices of the target elements. + * @param async Use an asynchronous memory copy. + */ + template + void setStateVector_(const index_type num_indices, + const std::complex *values, + const index_type *indices, const bool async = false) { + BaseType::getDataBuffer().zeroInit(); + + std::vector indices_local; + std::vector> values_local; + + for (std::size_t i = 0; i < static_cast(num_indices); + i++) { + int index = indices[i]; + PL_ASSERT(index >= 0); + std::size_t rankId = + static_cast(index) >> BaseType::getNumQubits(); + + if (rankId == mpi_manager_.getRank()) { + int local_index = static_cast( + compute_local_index(static_cast(index), + this->getNumLocalQubits())); + indices_local.push_back(local_index); + values_local.push_back(values[i]); + } + } + + auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + + index_type num_elements = indices_local.size(); + + DataBuffer d_indices{ + static_cast(num_elements), device_id, stream_id, true}; + + DataBuffer d_values{static_cast(num_elements), + device_id, stream_id, true}; + + d_indices.CopyHostDataToGpu(indices_local.data(), d_indices.getLength(), + async); + d_values.CopyHostDataToGpu(values_local.data(), d_values.getLength(), + async); + + setStateVector_CUDA(BaseType::getData(), num_elements, + d_values.getData(), d_indices.getData(), + thread_per_block, stream_id); + } + /** * @brief Set value for a single element of the state-vector on device. This * method is implemented by cudaMemcpy. @@ -1637,8 +1680,8 @@ class StateVectorCudaMPI final } /** - * @brief Apply parametric Pauli gates to local statevector using custateVec - * calls. + * @brief Apply parametric Pauli gates to local statevector using + * custateVec calls. * * @param pauli_words List of Pauli words representing operation. * @param ctrls Control wires @@ -1708,7 +1751,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); @@ -1868,7 +1912,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); @@ -2009,7 +2054,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp index a4e40f1d2..9e68592df 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp @@ -215,36 +215,46 @@ class StateVectorCudaManaged } /** - * @brief Set values for a batch of elements of the state-vector. This - * method is implemented by the customized CUDA kernel defined in the - * DataBuffer class. + * @brief Set values for a batch of elements of the state-vector. * - * @param num_indices Number of elements to be passed to the state vector. - * @param values Pointer to values to be set for the target elements. - * @param indices Pointer to indices of the target elements. - * @param async Use an asynchronous memory copy. + * @param state_ptr Pointer to the initial state data. + * @param num_states Length of the initial state data. + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. Default is false. */ - template - void setStateVector(const index_type num_indices, - const std::complex *values, - const index_type *indices, const bool async = false) { - BaseType::getDataBuffer().zeroInit(); - - auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); - auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); - - index_type num_elements = num_indices; - DataBuffer d_indices{ - static_cast(num_elements), device_id, stream_id, true}; - DataBuffer d_values{static_cast(num_elements), - device_id, stream_id, true}; + void setStateVector(const ComplexT *state_ptr, const std::size_t num_states, + const std::vector &wires, + bool use_async = false) { + PL_ABORT_IF_NOT(num_states == Pennylane::Util::exp2(wires.size()), + "Inconsistent state and wires dimensions."); - d_indices.CopyHostDataToGpu(indices, d_indices.getLength(), async); - d_values.CopyHostDataToGpu(values, d_values.getLength(), async); + const auto num_qubits = BaseType::getNumQubits(); - setStateVector_CUDA(BaseType::getData(), num_elements, - d_values.getData(), d_indices.getData(), - thread_per_block, stream_id); + PL_ABORT_IF_NOT(std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "Invalid wire index."); + + using index_type = + typename std::conditional::value, + int32_t, int64_t>::type; + + // Calculate the indices of the state-vector to be set. + // TODO: Could move to GPU calculation if the state size is large. + std::vector indices(num_states); + const std::size_t num_wires = wires.size(); + constexpr std::size_t one{1U}; + for (std::size_t i = 0; i < num_states; i++) { + std::size_t index{0U}; + for (std::size_t j = 0; j < num_wires; j++) { + const std::size_t bit = (i & (one << j)) >> j; + index |= bit << (num_qubits - 1 - wires[num_wires - 1 - j]); + } + indices[i] = static_cast(index); + } + setStateVector_(num_states, state_ptr, indices.data(), + use_async); } /** @@ -1346,9 +1356,8 @@ class StateVectorCudaManaged return t_indices; } - /** - * @brief Set value for a single element of the state-vector on device. This - * method is implemented by cudaMemcpy. + /** @brief Set value for a single element of the state-vector on device. + * This method is implemented by cudaMemcpy. * * @param value Value to be set for the target element. * @param index Index of the target element. @@ -1362,6 +1371,40 @@ class StateVectorCudaManaged stream_id); } + /** + * @brief Set values for a batch of elements of the state-vector. This + * method is implemented by the customized CUDA kernel defined in the + * DataBuffer class. + * + * @param num_indices Number of elements to be passed to the state vector. + * @param values Pointer to values to be set for the target elements. + * @param indices Pointer to indices of the target elements. + * @param async Use an asynchronous memory copy. + */ + template + void setStateVector_(const index_type num_indices, + const std::complex *values, + const index_type *indices, const bool async = false) { + BaseType::getDataBuffer().zeroInit(); + + auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + + index_type num_elements = num_indices; + DataBuffer d_indices{ + static_cast(num_elements), device_id, stream_id, true}; + DataBuffer d_values{static_cast(num_elements), + device_id, stream_id, true}; + + d_indices.CopyHostDataToGpu(indices, d_indices.getLength(), async); + d_values.CopyHostDataToGpu(values, d_values.getLength(), async); + + setStateVector_CUDA(BaseType::getData(), num_elements, + d_values.getData(), d_indices.getData(), + thread_per_block, stream_id); + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + } + /** * @brief Apply parametric Pauli gates using custateVec calls. * diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp index b2d03eba5..c361bd6ed 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp @@ -63,10 +63,6 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { using ParamT = PrecisionT; // Parameter's data precision using np_arr_c = py::array_t, py::array::c_style | py::array::forcecast>; - using np_arr_sparse_ind = typename std::conditional< - std::is_same::value, - py::array_t, - py::array_t>::type; registerGatesForStateVector(pyclass); @@ -91,20 +87,15 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { "Set the state vector to a basis state on GPU.") .def( "setStateVector", - [](StateVectorT &sv, const np_arr_sparse_ind &indices, - const np_arr_c &state, const bool use_async) { - using index_type = typename std::conditional< - std::is_same::value, int32_t, int64_t>::type; - - sv.template setStateVector( - static_cast(indices.request().size), - static_cast *>( - state.request().ptr), - static_cast(indices.request().ptr), - use_async); + [](StateVectorT &sv, const np_arr_c &state, + const std::vector &wires, const bool async) { + const auto state_buffer = state.request(); + const auto state_ptr = + static_cast *>(state_buffer.ptr); + sv.setStateVector(state_ptr, state_buffer.size, wires, async); }, - "Set State Vector on GPU with values and their corresponding " - "indices for the state vector on device") + "Set State Vector on GPU with values for the state vector and " + "wires on the host memory.") .def( "DeviceToDevice", [](StateVectorT &sv, const StateVectorT &other, bool async) { diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp index f6a933aca..2d3313f69 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp @@ -63,10 +63,6 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { using ParamT = PrecisionT; // Parameter's data precision using np_arr_c = py::array_t, py::array::c_style | py::array::forcecast>; - using np_arr_sparse_ind = typename std::conditional< - std::is_same::value, - py::array_t, - py::array_t>::type; registerGatesForStateVector(pyclass); @@ -95,20 +91,15 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { "Set the state vector to a basis state on GPU.") .def( "setStateVector", - [](StateVectorT &sv, const np_arr_sparse_ind &indices, - const np_arr_c &state, const bool use_async) { - using index_type = typename std::conditional< - std::is_same::value, int32_t, int64_t>::type; - - sv.template setStateVector( - static_cast(indices.request().size), - static_cast *>( - state.request().ptr), - static_cast(indices.request().ptr), - use_async); + [](StateVectorT &sv, const np_arr_c &state, + const std::vector &wires, const bool async) { + const auto state_buffer = state.request(); + const auto state_ptr = + static_cast *>(state_buffer.ptr); + sv.setStateVector(state_ptr, state_buffer.size, wires, async); }, - "Set State Vector on GPU with values and their corresponding " - "indices for the state vector on device") + "Set State Vector on GPU with values for the state vector and " + "wires on the host memory.") .def( "DeviceToDevice", [](StateVectorT &sv, const StateVectorT &other, bool async) { diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp index dc0c1a712..af864d8b0 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp @@ -1069,68 +1069,15 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVector", } StateVectorCudaManaged sv{num_qubits}; - sv.CopyHostDataToGpu(init_state.data(), init_state.size()); - - using index_type = - typename std::conditional::value, - int32_t, int64_t>::type; - // The setStates will shuffle the state vector values on the device with - // the following indices and values setting on host. For example, the - // values[i] is used to set the indices[i] th element of state vector on - // the device. For example, values[2] (init_state[5]) will be copied to - // indices[2]th or (4th) element of the state vector. - std::vector indices = {0, 2, 4, 6, 1, 3, 5, 7}; - - std::vector> values = { - init_state[1], init_state[3], init_state[5], init_state[7], - init_state[0], init_state[2], init_state[4], init_state[6]}; - - sv.template setStateVector(values.size(), values.data(), - indices.data(), false); - CHECK(expected_state == Pennylane::Util::approx(sv.getDataVector())); - } -} -// LCOV_EXCL_START -TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVectorwith_thread_setting", - "[StateVectorCudaManaged_Nonparam]", float, double) { - using PrecisionT = TestType; - const std::size_t num_qubits = 3; - std::mt19937 re{1337}; - - SECTION("SetStates with a non-default GPU thread setting") { - auto init_state = - createRandomStateVectorData(re, num_qubits); - auto expected_state = init_state; + std::vector> values(init_state.begin(), + init_state.end()); - for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); - i++) { - std::swap(expected_state[i * 2], expected_state[i * 2 + 1]); - } - - StateVectorCudaManaged sv{num_qubits}; - sv.CopyHostDataToGpu(init_state.data(), init_state.size()); - - using index_type = - typename std::conditional::value, - int32_t, int64_t>::type; - - std::vector indices = {0, 2, 4, 6, 1, 3, 5, 7}; - - std::vector> values = { - init_state[1], init_state[3], init_state[5], init_state[7], - init_state[0], init_state[2], init_state[4], init_state[6]}; - - // default setting of the number of threads in a block is 256. - const std::size_t threads_per_block = 1024; - - sv.template setStateVector( - values.size(), values.data(), indices.data(), false); - - CHECK(expected_state == Pennylane::Util::approx(sv.getDataVector())); + sv.setStateVector(values.data(), values.size(), + std::vector{0, 1, 2}); + CHECK(init_state == Pennylane::Util::approx(sv.getDataVector())); } } -// LCOV_EXCL_STOP TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetIthStates", "[StateVectorCudaManaged_Nonparam]", float, double) { diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp index b9ed7fcbe..968badd4d 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -158,21 +159,17 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector", "the host") { StateVectorCudaMPI sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - // The setStates will shuffle the state vector values on the device with - // the following indices and values setting on host. For example, the - // values[i] is used to set the indices[i] th element of state vector on - // the device. For example, values[2] (init_state[5]) will be copied to - // indices[2]th or (4th) element of the state vector. - sv.template setStateVector( - init_state.size(), init_state.data(), indices.data(), false); + std::vector> values(init_state.begin(), + init_state.end()); + std::vector wires(num_qubits); + std::iota(wires.begin(), wires.end(), 0); + sv.setStateVector(values.data(), values.size(), wires); - mpi_manager.Barrier(); - sv.CopyGpuDataToHost(local_state.data(), - static_cast(subSvLength)); - mpi_manager.Barrier(); + auto expected_local_state_vector = mpi_manager.scatter(values, 0); - CHECK(expected_local_state == Pennylane::Util::approx(local_state)); + CHECK(expected_local_state_vector == + Pennylane::Util::approx(sv.getDataVector())); } } diff --git a/pennylane_lightning/lightning_gpu/_state_vector.py b/pennylane_lightning/lightning_gpu/_state_vector.py index faca301e8..a00044356 100644 --- a/pennylane_lightning/lightning_gpu/_state_vector.py +++ b/pennylane_lightning/lightning_gpu/_state_vector.py @@ -31,7 +31,6 @@ except ImportError as ex: warn(str(ex), UserWarning) -from itertools import product from typing import Union import numpy as np @@ -69,7 +68,7 @@ class LightningGPUStateVector(LightningBaseStateVector): device_name(string): state vector device name. Options: ["lightning.gpu"] mpi_handler(MPIHandler): MPI handler for PennyLane Lightning GPU device. Provides functionality to distribute the state-vector to multiple devices. - sync (bool): is host-device data copy synchronized or not. + use_async (bool): is host-device data copy asynchronized or not. """ def __init__( @@ -77,7 +76,7 @@ def __init__( num_wires: int, dtype: Union[np.complex128, np.complex64] = np.complex128, mpi_handler: MPIHandler = None, - sync: bool = True, + use_async: bool = False, ): super().__init__(num_wires, dtype) @@ -92,7 +91,7 @@ def __init__( self._num_local_wires = mpi_handler.num_local_wires self._mpi_handler = mpi_handler - self._sync = sync + self._use_async = use_async # Initialize the state vector if self._mpi_handler.use_mpi: # using MPI @@ -120,7 +119,7 @@ def _state_dtype(self): # without MPI return StateVectorC128 if self.dtype == np.complex128 else StateVectorC64 - def syncD2H(self, state_vector, use_async=False): + def syncD2H(self, state_vector, use_async: bool = False): """Copy the state vector data on device to a state vector on the host provided by the user. Args: state_vector(array[complex]): the state vector array on host. @@ -155,7 +154,7 @@ def state(self): self.syncD2H(state) return state - def syncH2D(self, state_vector, use_async=False): + def syncH2D(self, state_vector, use_async: bool = False): """Copy the state vector data on host provided by the user to the state vector on the device Args: state_vector(array[complex]): the state vector array on host. @@ -189,7 +188,7 @@ def _asarray(arr, dtype=None): return arr - def _apply_state_vector(self, state, device_wires, use_async=False): + def _apply_state_vector(self, state, device_wires, use_async: bool = False): """Initialize the state vector on GPU with a specified state on host. Note that any use of this method will introduce host-overheads. Args: @@ -224,20 +223,8 @@ def _apply_state_vector(self, state, device_wires, use_async=False): self.syncH2D(np.reshape(local_state, output_shape)) return - # generate basis states on subset of qubits via the cartesian product - basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) - - # get basis states to alter on full set of qubits - unravelled_indices = np.zeros((2 ** len(device_wires), self.num_wires), dtype=int) - unravelled_indices[:, device_wires] = basis_states - - # get indices for which the state is changed to input state vector elements - ravelled_indices = np.ravel_multi_index(unravelled_indices.T, [2] * self.num_wires) - - # set the state vector on GPU with the unravelled_indices and their corresponding values - self._qubit_state.setStateVector( - ravelled_indices, state, use_async - ) # this operation on device + # set the state vector on GPU with provided state and their corresponding wires + self._qubit_state.setStateVector(state, list(device_wires), use_async) def _apply_lightning_controlled(self, operation): """Apply an arbitrary controlled operation to the state tensor. diff --git a/pennylane_lightning/lightning_gpu/lightning_gpu.py b/pennylane_lightning/lightning_gpu/lightning_gpu.py index 84d7dd31e..2b295c499 100644 --- a/pennylane_lightning/lightning_gpu/lightning_gpu.py +++ b/pennylane_lightning/lightning_gpu/lightning_gpu.py @@ -296,7 +296,7 @@ class LightningGPU(LightningBase): is built with MPI. Default is False. mpi (bool): declare if the device will use the MPI support. mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. - sync (bool): is host-device data copy synchronized or not. + use_async (bool): is host-device data copy asynchronized or not. """ # General device options @@ -326,7 +326,7 @@ def __init__( # pylint: disable=too-many-arguments # GPU and MPI arguments mpi: bool = False, mpi_buf_size: int = 0, - sync: bool = False, + use_async: bool = False, ): if not self._CPP_BINARY_AVAILABLE: raise ImportError( @@ -349,13 +349,16 @@ def __init__( # pylint: disable=too-many-arguments # GPU specific options self._dp = DevPool() - self._sync = sync + self._use_async = use_async # Creating the state vector self._mpi_handler = MPIHandler(mpi, mpi_buf_size, len(self.wires), c_dtype) self._statevector = self.LightningStateVector( - num_wires=len(self.wires), dtype=c_dtype, mpi_handler=self._mpi_handler, sync=self._sync + num_wires=len(self.wires), + dtype=c_dtype, + mpi_handler=self._mpi_handler, + use_async=self._use_async, ) @property