diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 484cf2ae29..1f4059f9e2 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -2,6 +2,9 @@ ### New features since last release +* Add `mid-circuit measurements` support to `lightning.gpu`'s single-GPU backend. + [(#931)](https://github.com/PennyLaneAI/pennylane-lightning/pull/931) + * Add Matrix Product Operator (MPO) for all gates support to `lightning.tensor`. Note current C++ implementation only works for MPO sites data provided by users. [(#859)](https://github.com/PennyLaneAI/pennylane-lightning/pull/859) @@ -21,8 +24,14 @@ * Lightning-Kokkos migrated to the new device API. [(#810)](https://github.com/PennyLaneAI/pennylane-lightning/pull/810) +* Lightning-GPU migrated to the new device API. + [(#853)](https://github.com/PennyLaneAI/pennylane-lightning/pull/853) + ### Breaking changes +* Deprecate `initSV()` and add `resetStateVector()` to `lightning.gpu`. + [(#933)](https://github.com/PennyLaneAI/pennylane-lightning/pull/933) + * Deprecate PI gates implementation. [(#925)](https://github.com/PennyLaneAI/pennylane-lightning/pull/925) @@ -37,6 +46,21 @@ ### Improvements +* Optimize the cartesian product to reduce the amount of memory necessary to set the StatePrep with LightningTensor. + [(#943)](https://github.com/PennyLaneAI/pennylane-lightning/pull/943) + +* The `prob` data return `lightning.gpu` C++ layer is aligned with other state-vector backends and `lightning.gpu` supports out-of-order `qml.prob`. + [(#941)](https://github.com/PennyLaneAI/pennylane-lightning/pull/941) + +* Add `setStateVector(state, wire)` support to the `lightning.gpu` C++ layer. + [(#930)](https://github.com/PennyLaneAI/pennylane-lightning/pull/930) + +* Add zero-state initialization to both `StateVectorCudaManaged` and `StateVectorCudaMPI` constructors to remove the `reset_state` in the python layer ctor and refactor `setBasisState(state, wires)` in the C++ layer. + [(#933)](https://github.com/PennyLaneAI/pennylane-lightning/pull/933) + +* The `generate_samples` methods of lightning.{qubit/kokkos} can now take in a seed number to make the generated samples deterministic. This can be useful when, among other things, fixing flaky tests in CI. + [(#927)](https://github.com/PennyLaneAI/pennylane-lightning/pull/927) + * Always decompose `qml.QFT` in Lightning. [(#924)](https://github.com/PennyLaneAI/pennylane-lightning/pull/924) @@ -95,6 +119,15 @@ ### Bug fixes +* Fix missing `liblightning_kokkos_catalyst.so` in Lightning-Kokkos editable installation. + [(#945)](https://github.com/PennyLaneAI/pennylane-lightning/pull/945) + +* Add concept restriction to ensure `ConstMult` inline function only hit with arithmetic-values times complex values. Fixes build failures with the test suite when enabling OpenMP, and disabling BLAS and Python under clang. + [(#936)](https://github.com/PennyLaneAI/pennylane-lightning/pull/936) + +* Bug fix for `applyMatrix` in `lightning.tensor`. Matrix operator data is not stored in the `cuGateCache` object to support `TensorProd` obs with multiple `Hermitian` obs. + [(#932)](https://github.com/PennyLaneAI/pennylane-lightning/pull/932) + * Bug fix for `_pauli_word` of `QuantumScriptSerializer`. `_pauli_word` can process `PauliWord` object: `I`. [(#919)](https://github.com/PennyLaneAI/pennylane-lightning/pull/919) @@ -105,7 +138,7 @@ This release contains contributions from (in alphabetical order): -Ali Asadi, Amintor Dusko, Luis Alfredo Nuñez Meneses, Vincent Michaud-Rioux, Lee J. O'Riordan, Mudit Pandey, Shuli Shu +Ali Asadi, Amintor Dusko, Luis Alfredo Nuñez Meneses, Vincent Michaud-Rioux, Lee J. O'Riordan, Mudit Pandey, Shuli Shu, Haochen Paul Wang --- diff --git a/.github/workflows/wheel_linux_aarch64.yml b/.github/workflows/wheel_linux_aarch64.yml index 63bc629a77..bc21a56822 100644 --- a/.github/workflows/wheel_linux_aarch64.yml +++ b/.github/workflows/wheel_linux_aarch64.yml @@ -123,8 +123,13 @@ jobs: mkdir Kokkos cp -rf ${{ github.workspace }}/Kokkos_install/${{ matrix.exec_model }}/* Kokkos/ + - name: Install Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install dependencies - run: python -m pip install cibuildwheel~=2.20.0 tomlkit + run: python3.10 -m pip install cibuildwheel~=2.20.0 tomlkit - name: Configure pyproject.toml file run: PL_BACKEND="${{ matrix.pl_backend }}" python scripts/configure_pyproject_toml.py diff --git a/.github/workflows/wheel_linux_aarch64_cuda.yml b/.github/workflows/wheel_linux_aarch64_cuda.yml index cc87f033c9..4864fa0167 100644 --- a/.github/workflows/wheel_linux_aarch64_cuda.yml +++ b/.github/workflows/wheel_linux_aarch64_cuda.yml @@ -48,8 +48,13 @@ jobs: - name: Checkout PennyLane-Lightning uses: actions/checkout@v4 + - name: Install Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install cibuildwheel - run: python -m pip install cibuildwheel~=2.20.0 tomlkit + run: python3.10 -m pip install cibuildwheel~=2.20.0 tomlkit - name: Configure pyproject.toml file run: PL_BACKEND="${{ matrix.pl_backend }}" python scripts/configure_pyproject_toml.py diff --git a/.github/workflows/wheel_noarch.yml b/.github/workflows/wheel_noarch.yml index 11460cac1e..0414fcd7b8 100644 --- a/.github/workflows/wheel_noarch.yml +++ b/.github/workflows/wheel_noarch.yml @@ -50,7 +50,6 @@ jobs: if: ${{ matrix.pl_backend == 'lightning_qubit'}} uses: actions/checkout@v4 - - uses: actions/setup-python@v5 if: ${{ matrix.pl_backend == 'lightning_qubit'}} with: diff --git a/MANIFEST.in b/MANIFEST.in index 4c1a79b51d..23ba93b561 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,7 @@ include cmake/* include requirements.txt include .github/CHANGELOG.md include pennylane_lightning/lightning_qubit/lightning_qubit.toml -include pennylane_lightning/lightning_qpu/lightning_gpu.toml +include pennylane_lightning/lightning_gpu/lightning_gpu.toml include pennylane_lightning/lightning_kokkos/lightning_kokkos.toml include pennylane_lightning/core/_version.py graft pennylane_lightning/core/src/ diff --git a/Makefile b/Makefile index f43c9e903f..5973200c52 100644 --- a/Makefile +++ b/Makefile @@ -35,9 +35,11 @@ help: @echo " test-cpp [verbose=1] to run the C++ test suite (requires CMake)" @echo " use with 'verbose=1' for building with verbose flag" @echo " test-cpp [target=?] to run a specific C++ test target (requires CMake)." + @echo " test-cpp-mpi [backend=?] to run the C++ test suite with MPI (requires CMake and MPI)" + @echo " Default: lightning_gpu" @echo " test-python [device=?] to run the Python test suite" @echo " Default: lightning.qubit" - @echo " wheel [backend=?] to configure and build Python wheels + @echo " wheel [backend=?] to configure and build Python wheels" @echo " Default: lightning_qubit" @echo " coverage [device=?] to generate a coverage report for python interface" @echo " Default: lightning.qubit" @@ -98,7 +100,7 @@ coverage-cpp: lcov --directory . -b ../pennylane_lightning/core/src/ --capture --output-file coverage.info; \ genhtml coverage.info --output-directory out -.PHONY: test-python test-builtin test-suite test-cpp +.PHONY: test-python test-builtin test-suite test-cpp test-cpp-mpi test-python: test-builtin test-suite test-builtin: @@ -124,6 +126,27 @@ else cmake --build ./BuildTests $(VERBOSE) --target test endif +test-cpp-mpi: + rm -rf ./BuildTests + cmake -BBuildTests -G Ninja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_TESTS=ON \ + -DENABLE_WARNINGS=ON \ + -DPL_BACKEND=lightning_gpu \ + -DENABLE_MPI=ON \ + $(OPTIONS) +ifdef target + cmake --build ./BuildTests $(VERBOSE) --target $(target) + mpirun -np 2 ./BuildTests/$(target) +else + cmake --build ./BuildTests $(VERBOSE) + for file in ./BuildTests/*_test_runner_mpi; do \ + echo "Running $$file"; \ + mpirun -np 2 $$file ; \ + done +endif + + .PHONY: format format-cpp format-python format: format-cpp format-python diff --git a/doc/lightning_gpu/device.rst b/doc/lightning_gpu/device.rst index a5162c7579..405ea9764d 100644 --- a/doc/lightning_gpu/device.rst +++ b/doc/lightning_gpu/device.rst @@ -11,9 +11,9 @@ A ``lightning.gpu`` device can be loaded using: import pennylane as qml dev = qml.device("lightning.gpu", wires=2) -If the NVIDIA cuQuantum libraries are available, the above device will allow all operations to be performed on a CUDA capable GPU of generation SM 7.0 (Volta) and greater. If the libraries are not correctly installed, or available on path, the device will fall-back to ``lightning.qubit`` and perform all simulation on the CPU. +If the NVIDIA cuQuantum libraries are available, the above device will allow all operations to be performed on a CUDA capable GPU of generation SM 7.0 (Volta) and greater. If the libraries are not correctly installed, or available on path, the device will raise an error. -The ``lightning.gpu`` device also directly supports quantum circuit gradients using the adjoint differentiation method. This can be enabled at the PennyLane QNode level with: +The ``lightning.gpu`` device supports quantum circuit gradients using the adjoint differentiation method by default. This can be enabled at the PennyLane QNode level with: .. code-block:: python @@ -281,3 +281,6 @@ To enable the memory-optimized adjoint method with MPI support, ``batch_obs`` sh dev = qml.device('lightning.gpu', wires= n_wires, mpi=True, batch_obs=True) For the adjoint method, each MPI process will provide the overall simulation results. + +.. note:: + The observable ``Projector``` does not have support with the multi-GPU backend. diff --git a/mpitests/conftest.py b/mpitests/conftest.py index a2084f2a5d..552cf9f330 100644 --- a/mpitests/conftest.py +++ b/mpitests/conftest.py @@ -98,6 +98,13 @@ def get_device(): # Device specification if device_name == "lightning.gpu": from pennylane_lightning.lightning_gpu import LightningGPU as LightningDevice + from pennylane_lightning.lightning_gpu._measurements import ( + LightningGPUMeasurements as LightningMeasurements, + ) + from pennylane_lightning.lightning_gpu._state_vector import ( + LightningGPUStateVector as LightningStateVector, + ) + else: raise qml.DeviceError(f"The MPI tests do not apply to the {device_name} device.") diff --git a/mpitests/test_adjoint_jacobian.py b/mpitests/test_adjoint_jacobian.py index 6f3b5c7f5b..9d56dfdb1a 100644 --- a/mpitests/test_adjoint_jacobian.py +++ b/mpitests/test_adjoint_jacobian.py @@ -26,17 +26,15 @@ from pennylane import QNode from pennylane import numpy as np from pennylane import qnode +from pennylane.devices import ExecutionConfig +from pennylane.tape import QuantumScript from scipy.stats import unitary_group +from pennylane_lightning.lightning_gpu_ops import LightningException + if not ld._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) -I, X, Y, Z = ( - np.eye(2), - qml.PauliX.compute_matrix(), - qml.PauliY.compute_matrix(), - qml.PauliZ.compute_matrix(), -) # Tuple passed to distributed device ctor # np.complex for data type and True or False @@ -59,265 +57,255 @@ def fixture_dev(request): ) -def Rx(theta): - r"""One-qubit rotation about the x axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_x \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * X - - -def Ry(theta): - r"""One-qubit rotation about the y axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_y \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Y - - -def Rz(theta): - r"""One-qubit rotation about the z axis. - - Args: - theta (float): rotation angle - Returns: - array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_z \theta/2}` - """ - return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Z - - class TestAdjointJacobian: # pylint: disable=too-many-public-methods """Tests for the adjoint_jacobian method""" - def test_not_expval(self, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_not_expval(self, dev, batch_obs): """Test if a QuantumFunctionError is raised for a tape with measurements that are not expectation values""" - with qml.tape.QuantumTape() as tape: - qml.RX(0.1, wires=0) - qml.var(qml.PauliZ(0)) + qs = QuantumScript([qml.RX(1.23, 0)], [qml.var(qml.PauliZ(0))], trainable_params=[0]) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) with pytest.raises( qml.QuantumFunctionError, match="Adjoint differentiation method does not" ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - with qml.tape.QuantumTape() as tape: - qml.RX(0.1, wires=0) - qml.state() + qs = QuantumScript([qml.RX(1.23, 0)], [qml.state()], trainable_params=[0]) - if device_name == "lightning.gpu": - message = "Adjoint differentiation does not support State measurements." - else: - message = "Adjoint differentiation method does not support measurement StateMP." with pytest.raises( qml.QuantumFunctionError, - match=message, + match="Adjoint differentiation method does not support measurement StateMP.", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - def test_finite_shots_warns(self): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_finite_shots_warns(self, dev, batch_obs): """Tests warning raised when finite shots specified""" - dev = qml.device(device_name, wires=8, mpi=True, shots=1) - - with qml.tape.QuantumTape() as tape: - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [qml.RX(1.23, 0)], [qml.expval(qml.Z(0))], shots=10, trainable_params=[0] + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - with pytest.warns( - UserWarning, + with pytest.raises( + qml.QuantumFunctionError, match="Requested adjoint differentiation to be computed with finite shots.", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) def test_empty_measurements(self, dev): """Tests if an empty array is returned when the measurements of the tape is empty.""" - with qml.tape.QuantumTape() as tape: + def circuit(): qml.RX(0.4, wires=[0]) + return qml.expval(qml.PauliZ(0)) + + result = QNode(circuit, dev, diff_method="adjoint") + + jac = qml.grad(result)() - jac = dev.adjoint_jacobian(tape) assert len(jac) == 0 - def test_unsupported_op(self, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_unsupported_op(self, batch_obs, dev): """Test if a QuantumFunctionError is raised for an unsupported operation, i.e., multi-parameter operations that are not qml.Rot""" - with qml.tape.QuantumTape() as tape: - qml.CRot(0.1, 0.2, 0.3, wires=[0, 1]) - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [qml.CRot(0.1, 0.2, 0.3, wires=[0, 1])], + [qml.expval(qml.PauliZ(0))], + trainable_params=[0], + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) with pytest.raises( - qml.QuantumFunctionError, - match="The CRot operation is not supported using the", + LightningException, + match="The operation is not supported using the adjoint differentiation method", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - def test_proj_unsupported(self, dev): + @pytest.mark.skip("WIP: Need a deep review if LGPU accept Projector") + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_proj_unsupported(self, batch_obs, dev): """Test if a QuantumFunctionError is raised for a Projector observable""" - with qml.tape.QuantumTape() as tape: - qml.CRX(0.1, wires=[0, 1]) - qml.expval(qml.Projector([0, 1], wires=[0, 1])) + + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) + + qs = QuantumScript( + [qml.CRX(0.1, wires=[0, 1])], + [qml.expval(qml.Projector([0, 1], wires=[0, 1]))], + trainable_params=[0], + ) with pytest.raises( qml.QuantumFunctionError, match="differentiation method does not support the Projector", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) - with qml.tape.QuantumTape() as tape: - qml.CRX(0.1, wires=[0, 1]) - qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0)) + qs = QuantumScript( + [qml.CRX(0.1, wires=[0, 1])], + [qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0))], + trainable_params=[0], + ) with pytest.raises( qml.QuantumFunctionError, match="differentiation method does not support the Projector", ): - dev.adjoint_jacobian(tape) + dev.compute_derivatives(qs, config) + + @staticmethod + def tol_for_allclose(c_dtype): + """Compute the tolerance for allclose""" + return 1e-3 if c_dtype == np.complex64 else 1e-7 @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) @pytest.mark.parametrize("G", [qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) - def test_pauli_rotation_gradient(self, stateprep, G, theta, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_pauli_rotation_gradient( + self, stateprep, G, theta, batch_obs, dev + ): # pylint: disable=too-many-arguments """Tests that the automatic gradients of Pauli rotations are correct.""" random_state = np.array( [0.43593284 - 0.02945156j, 0.40812291 + 0.80158023j], requires_grad=False ) - tape = qml.tape.QuantumScript( - [stateprep(random_state, 0), G(theta, 0)], [qml.expval(qml.PauliZ(0))] + qs = QuantumScript( + [stateprep(random_state, 0), G(theta, 0)], + [qml.expval(qml.PauliZ(0))], + trainable_params=[1], ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1} - - calculated_val = dev.adjoint_jacobian(tape) + calculated_val = dev.compute_derivatives(qs, config) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) # compare to finite differences - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) numeric_val = fn(qml.execute(tapes, dev, None)) assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) - def test_Rot_gradient(self, stateprep, theta, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_Rot_gradient(self, stateprep, theta, batch_obs, dev): """Tests that the device gradient of an arbitrary Euler-angle-parameterized gate is correct.""" params = np.array([theta, theta**3, np.sqrt(2) * theta]) - with qml.tape.QuantumTape() as tape: - stateprep(np.array([1.0, -1.0], requires_grad=False) / np.sqrt(2), wires=0) - qml.Rot(*params, wires=[0]) - qml.expval(qml.PauliZ(0)) + qs = QuantumScript( + [ + stateprep(np.array([1.0, -1.0], requires_grad=False) / np.sqrt(2), wires=0), + qml.Rot(*params, wires=[0]), + ], + [qml.expval(qml.PauliZ(0))], + trainable_params=[1, 2, 3], + ) - tape.trainable_params = {1, 2, 3} + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - calculated_val = dev.adjoint_jacobian(tape) + calculated_val = dev.compute_derivatives(qs, config) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) # compare to finite differences - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) numeric_val = fn(qml.execute(tapes, dev, None)) assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) - @pytest.mark.parametrize("par", [1, -2, 1.623, -0.051, 0]) # integers, floats, zero - def test_ry_gradient(self, par, tol, dev): - """Test that the gradient of the RY gate matches the exact analytic formula.""" - with qml.tape.QuantumTape() as tape: - qml.RY(par, wires=[0]) - qml.expval(qml.PauliX(0)) - - tape.trainable_params = {0} + @pytest.mark.parametrize("param", [1, -2, 1.623, -0.051, 0]) # integers, floats, zero + @pytest.mark.parametrize( + "rotation, meas, expected_func", + [ + (qml.RY, qml.PauliX, lambda x: np.cos(x)), # pylint: disable=unnecessary-lambda + (qml.RX, qml.PauliZ, lambda x: -np.sin(x)), # pylint: disable=unnecessary-lambda + ], + ) + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_r_gradient( + self, tol, param, rotation, meas, expected_func, batch_obs, dev + ): # pylint: disable=too-many-arguments + """Test for the gradient of the rotation gate matches the known formula.""" - # gradients - exact = np.cos(par) - grad_A = dev.adjoint_jacobian(tape) + qs = QuantumScript( + [rotation(param, wires=0)], + [qml.expval(meas(0))], + trainable_params=[0], + ) - # different methods must agree - assert np.allclose(grad_A, exact, atol=tol, rtol=0) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - def test_rx_gradient(self, tol, dev): - """Test that the gradient of the RX gate matches the known formula.""" - a = 0.7418 + # circuit jacobians + dev_jacobian = dev.compute_derivatives(qs, config) + expected_jacobian = expected_func(param) + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - with qml.tape.QuantumTape() as tape: - qml.RX(a, wires=0) - qml.expval(qml.PauliZ(0)) + @staticmethod + def process_and_execute_multiple_rx(dev, params, meas, batch_obs): + """Compute the circuit with multiple RX gates""" + qs = QuantumScript( + [qml.RX(params[0], wires=0), qml.RX(params[1], wires=1), qml.RX(params[2], wires=2)], + meas, + trainable_params=[0, 1, 2], + ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) - expected_jacobian = -np.sin(a) - assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + dev_jacobian = dev.compute_derivatives(qs, config) - def test_multiple_rx_gradient_pauliz(self, tol, dev): + return dev_jacobian + + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_pauliz(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result.""" params = np.array([np.pi, np.pi / 2, np.pi / 3]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - - for idx in range(3): - qml.expval(qml.PauliZ(idx)) + meas = [qml.expval(qml.PauliZ(idx)) for idx in range(3)] # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = -np.diag(np.sin(params)) assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - def test_multiple_rx_gradient_hermitian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_hermitian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ - params = np.array([np.pi, np.pi / 2, np.pi / 3]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) + params = np.array([np.pi, np.pi / 2, np.pi / 3]) - for idx in range(3): - qml.expval(qml.Hermitian([[1, 0], [0, -1]], wires=[idx])) + meas = [qml.expval(qml.Hermitian([[1, 0], [0, -1]], wires=[idx])) for idx in range(3)] - tape.trainable_params = {0, 1, 2} # circuit jacobians - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = -np.diag(np.sin(params)) assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - def test_multiple_rx_gradient_expval_hermitian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_expval_hermitian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - + meas = [ qml.expval( qml.Hermitian( [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], wires=[0, 2], ) ) + ] - tape.trainable_params = {0, 1, 2} - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = np.array( [ -np.sin(params[0]) * np.cos(params[2]), @@ -328,37 +316,31 @@ def test_multiple_rx_gradient_expval_hermitian(self, tol, dev): assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - def test_multiple_rx_gradient_expval_hamiltonian(self, tol, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_multiple_rx_gradient_expval_hamiltonian(self, tol, batch_obs, dev): """Tests that the gradient of multiple RX gates in a circuit yields the correct result with Hermitian observable """ params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) - ham = qml.Hamiltonian( - [1.0, 0.3, 0.3, 0.4], - [ - qml.PauliX(0) @ qml.PauliX(1), - qml.PauliZ(0), - qml.PauliZ(1), - qml.Hermitian( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], - wires=[0, 2], - ), - ], - ) - - with qml.tape.QuantumTape() as tape: - qml.RX(params[0], wires=0) - qml.RX(params[1], wires=1) - qml.RX(params[2], wires=2) - - qml.expval(ham) + meas = [ + qml.expval( + qml.Hamiltonian( + [1.0, 0.3, 0.3, 0.4], + [ + qml.PauliX(0) @ qml.PauliX(1), + qml.PauliZ(0), + qml.PauliZ(1), + qml.Hermitian( + [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + wires=[0, 2], + ), + ], + ) + ) + ] - tape.trainable_params = {0, 1, 2} - dev_jacobian = dev.adjoint_jacobian(tape) + dev_jacobian = self.process_and_execute_multiple_rx(dev, params, meas, batch_obs) expected_jacobian = ( 0.3 * np.array([-np.sin(params[0]), 0, 0]) + 0.3 * np.array([0, -np.sin(params[1]), 0]) @@ -374,51 +356,21 @@ def test_multiple_rx_gradient_expval_hamiltonian(self, tol, dev): assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) - qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member - ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} - - @pytest.mark.parametrize("obs", [qml.PauliX, qml.PauliY]) @pytest.mark.parametrize( - "op", + "meas", [ - qml.RX(0.4, wires=0), - qml.RY(0.6, wires=0), - qml.RZ(0.8, wires=0), - qml.CRX(1.0, wires=[0, 1]), - qml.CRY(2.0, wires=[0, 1]), - qml.CRZ(3.0, wires=[0, 1]), - qml.Rot(0.2, -0.1, 0.2, wires=0), + [qml.expval(qml.PauliX(wires=0)), qml.expval(qml.PauliZ(wires=1))], + [qml.expval(qml.PauliY(wires=0)), qml.expval(qml.PauliZ(wires=1))], + [ + qml.expval( + qml.Hermitian( + [[0, 0, 1, 1], [0, 1, 2, 1], [1, 2, 1, 0], [1, 1, 0, 0]], + wires=[0, 1], + ) + ) + ], ], ) - def test_gradients_pauliz(self, op, obs, dev): - """Tests that the gradients of circuits match between the finite difference and device - methods.""" - # op.num_wires and op.num_params must be initialized a priori - with qml.tape.QuantumTape() as tape: - qml.Hadamard(wires=0) - qml.RX(0.543, wires=0) - qml.CNOT(wires=[0, 1]) - - op # pylint: disable=pointless-statement - - qml.Rot(1.3, -2.3, 0.5, wires=[0]) - qml.RZ(-0.5, wires=0) - qml.adjoint(qml.RY(0.5, wires=1), lazy=False) - qml.CNOT(wires=[0, 1]) - - qml.expval(obs(wires=0)) - qml.expval(qml.PauliZ(wires=1)) - - tape.trainable_params = set(range(1, 1 + op.num_params)) - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - # pylint: disable=unnecessary-direct-lambda-call - grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) - grad_D = dev.adjoint_jacobian(tape) - - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - @pytest.mark.parametrize( "op", [ @@ -431,119 +383,72 @@ def test_gradients_pauliz(self, op, obs, dev): qml.Rot(0.2, -0.1, 0.2, wires=0), ], ) - def test_gradients_hermitian(self, op, dev): + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_gradients_pauliz_hermitian(self, op, meas, batch_obs, dev): """Tests that the gradients of circuits match between the finite difference and device methods.""" # op.num_wires and op.num_params must be initialized a priori - with qml.tape.QuantumTape() as tape: - qml.Hadamard(wires=0) - qml.RX(0.543, wires=0) - qml.CNOT(wires=[0, 1]) - - op.queue() - - qml.Rot(1.3, -2.3, 0.5, wires=[0]) - qml.RZ(-0.5, wires=0) - qml.adjoint(qml.RY(0.5, wires=1), lazy=False) - qml.CNOT(wires=[0, 1]) - - qml.expval( - qml.Hermitian( - [[0, 0, 1, 1], [0, 1, 2, 1], [1, 2, 1, 0], [1, 1, 0, 0]], - wires=[0, 1], - ) - ) - - tape.trainable_params = set(range(1, 1 + op.num_params)) - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - # pylint: disable=unnecessary-direct-lambda-call - grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) - grad_D = dev.adjoint_jacobian(tape) - - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - - def test_gradient_gate_with_multiple_parameters_pauliz(self, dev): - """Tests that gates with multiple free parameters yield correct gradients.""" - x, y, z = [0.5, 0.3, -0.7] - - tape = qml.tape.QuantumScript( + qs = QuantumScript( [ - qml.RX(0.4, wires=[0]), - qml.Rot(x, y, z, wires=[0]), - qml.RY(-0.2, wires=[0]), + qml.Hadamard(wires=0), + qml.RX(0.543, wires=0), + qml.CNOT(wires=[0, 1]), + op, + qml.Rot(1.3, -2.3, 0.5, wires=[0]), + qml.RZ(-0.5, wires=0), + qml.adjoint(qml.RY(0.5, wires=1), lazy=False), + qml.CNOT(wires=[0, 1]), ], - [qml.expval(qml.PauliZ(0))], + meas, + trainable_params=list(range(1, 1 + op.num_params)), ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1, 2, 3} + tol = self.tol_for_allclose(dev.c_dtype) - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) + tapes, fn = qml.gradients.param_shift(qs) grad_F = fn(qml.execute(tapes, dev, None)) - # gradient has the correct shape and every element is nonzero - assert len(grad_D) == 3 - assert all(isinstance(v, np.ndarray) for v in grad_D) - assert np.count_nonzero(grad_D) == 3 - # the different methods agree + # circuit jacobians + grad_D = dev.compute_derivatives(qs, config) assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - def test_gradient_gate_with_multiple_parameters_hermitian(self, dev): - """Tests that gates with multiple free parameters yield correct gradients.""" - x, y, z = [0.5, 0.3, -0.7] - - tape = qml.tape.QuantumScript( + @pytest.mark.parametrize( + "meas", + [ + [qml.expval(qml.PauliZ(0))], + [qml.expval(qml.Hermitian([[0, 1], [1, 1]], wires=0))], [ - qml.RX(0.4, wires=[0]), - qml.Rot(x, y, z, wires=[0]), - qml.RY(-0.2, wires=[0]), + qml.expval( + qml.Hamiltonian( + [1.0, 0.3, 0.3], + [qml.PauliX(0) @ qml.PauliX(1), qml.PauliZ(0), qml.PauliZ(1)], + ) + ) ], - [qml.expval(qml.Hermitian([[0, 1], [1, 1]], wires=0))], - ) - - tape.trainable_params = {1, 2, 3} - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) - grad_F = fn(qml.execute(tapes, dev, None)) - - # gradient has the correct shape and every element is nonzero - assert len(grad_D) == 3 - assert all(isinstance(v, np.ndarray) for v in grad_D) - assert np.count_nonzero(grad_D) == 3 - # the different methods agree - assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - - def test_gradient_gate_with_multiple_parameters_hamiltonian(self, dev): + ], + ) + @pytest.mark.parametrize("batch_obs", [True, False]) + def test_gradient_gate_with_multiple_parameters(self, meas, batch_obs, dev): """Tests that gates with multiple free parameters yield correct gradients.""" x, y, z = [0.5, 0.3, -0.7] - ham = qml.Hamiltonian( - [1.0, 0.3, 0.3], - [qml.PauliX(0) @ qml.PauliX(1), qml.PauliZ(0), qml.PauliZ(1)], - ) - - tape = qml.tape.QuantumScript( + qs = QuantumScript( [ qml.RX(0.4, wires=[0]), qml.Rot(x, y, z, wires=[0]), qml.RY(-0.2, wires=[0]), ], - [qml.expval(ham)], + meas, + trainable_params=[1, 2, 3], ) + config = ExecutionConfig(gradient_method="adjoint", device_options={"batch_obs": batch_obs}) - tape.trainable_params = {1, 2, 3} - - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = self.tol_for_allclose(dev.c_dtype) - grad_D = dev.adjoint_jacobian(tape) - tapes, fn = qml.gradients.param_shift(tape) + # circuit jacobians + grad_D = dev.compute_derivatives(qs, config) + tapes, fn = qml.gradients.param_shift(qs) grad_F = fn(qml.execute(tapes, dev, None)) # gradient has the correct shape and every element is nonzero @@ -553,101 +458,45 @@ def test_gradient_gate_with_multiple_parameters_hamiltonian(self, dev): # the different methods agree assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) - def test_use_device_state(self, tol, dev): - """Tests that when using the device state, the correct answer is still returned.""" - - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} - - dM1 = dev.adjoint_jacobian(tape) - - qml.execute([tape], dev, None) - dM2 = dev.adjoint_jacobian(tape, use_device_state=True) - - assert np.allclose(dM1, dM2, atol=tol, rtol=0) - - def test_provide_starting_state(self, tol, dev): - """Tests provides correct answer when provided starting state.""" - comm = MPI.COMM_WORLD - - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} - - dM1 = dev.adjoint_jacobian(tape) - if device_name == "lightning.gpu": - local_state_vector = dev.state - complex_type = np.complex128 if dev.R_DTYPE == np.float64 else np.complex64 - state_vector = np.zeros(1 << 8).astype(complex_type) - comm.Allgather(local_state_vector, state_vector) - qml.execute([tape], dev, None) - dM2 = dev.adjoint_jacobian(tape, starting_state=state_vector) - assert np.allclose(dM1, dM2, atol=tol, rtol=0) - - def test_provide_wrong_starting_state(self, dev): - """Tests raise an exception when provided starting state mismatches.""" - x, y, z = [0.5, 0.3, -0.7] - - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.Rot(x, y, z, wires=[0]) - qml.RY(-0.2, wires=[0]) - qml.expval(qml.PauliZ(0)) - - tape.trainable_params = {1, 2, 3} +class TestAdjointJacobianQNode: + """Test QNode integration with the adjoint_jacobian method""" - with pytest.raises( - qml.QuantumFunctionError, - match="The number of qubits of starting_state must be the same as", - ): - dev.adjoint_jacobian(tape, starting_state=np.ones(7)) + # def analytic_rotation(self): + I = np.eye(2) + X = qml.PauliX.compute_matrix() + Y = qml.PauliY.compute_matrix() + Z = qml.PauliZ.compute_matrix() - @pytest.mark.skipif( - device_name == "lightning.gpu", - reason="Adjoint differentiation does not support State measurements.", - ) - def test_state_return_type(self, dev): - """Tests raise an exception when the return type is State""" - with qml.tape.QuantumTape() as tape: - qml.RX(0.4, wires=[0]) - qml.state() + def Rx(self, theta): + r"""One-qubit rotation about the x axis. - tape.trainable_params = {0} + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_x \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.X - with pytest.raises( - qml.QuantumFunctionError, - match="Adjoint differentiation method does not support measurement StateMP.", - ): - dev.adjoint_jacobian(tape) + def Ry(self, theta): + r"""One-qubit rotation about the y axis. + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_y \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.Y -class TestAdjointJacobianQNode: - """Test QNode integration with the adjoint_jacobian method""" + def Rz(self, theta): + r"""One-qubit rotation about the z axis. - @pytest.fixture(params=fixture_params) - def dev(self, request): - """Returns a PennyLane device.""" - return qml.device( - device_name, - wires=8, - mpi=True, - c_dtype=request.param[0], - batch_obs=request.param[1], - ) + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_z \theta/2}` + """ + return math.cos(theta / 2) * self.I + 1j * math.sin(-theta / 2) * self.Z def test_finite_shots_error(self): """Tests that an error is raised when computing the adjoint diff on a device with finite shots""" @@ -665,6 +514,11 @@ def circ(x): qml.grad(circ)(0.1) + @staticmethod + def tol_for_allclose(c_dtype): + """Compute the tolerance for allclose""" + return 1e-3 if c_dtype == np.complex64 else 1e-7 + def test_qnode(self, mocker, dev): """Test that specifying diff_method allows the adjoint method to be selected""" args = np.array([0.54, 0.1, 0.5], requires_grad=True) @@ -684,15 +538,15 @@ def circuit(x, y, z): return qml.expval(qml.PauliX(0) @ qml.PauliZ(1)) qnode1 = QNode(circuit, dev, diff_method="adjoint") - spy = mocker.spy(dev.target_device, "adjoint_jacobian") + spy = mocker.spy(dev, "LightningAdjointJacobian") grad_fn = qml.grad(qnode1) grad_A = grad_fn(*args) spy.assert_called() - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode2 = QNode(circuit, dev, diff_method="finite-diff", h=h) grad_fn = qml.grad(qnode2) @@ -726,7 +580,7 @@ def cost(p1, p2): zero_state = np.array([1.0, 0.0]) cost(reused_p, other_p) - spy = mocker.spy(dev.target_device, "adjoint_jacobian") + spy = mocker.spy(dev, "LightningAdjointJacobian") # analytic gradient grad_fn = qml.grad(cost) @@ -737,18 +591,34 @@ def cost(p1, p2): # manual gradient grad_true0 = ( expZ( - Rx(reused_p) @ Rz(other_p) @ Ry(reused_p + np.pi / 2) @ Rx(extra_param) @ zero_state + self.Rx(reused_p) + @ self.Rz(other_p) + @ self.Ry(reused_p + np.pi / 2) + @ self.Rx(extra_param) + @ zero_state ) - expZ( - Rx(reused_p) @ Rz(other_p) @ Ry(reused_p - np.pi / 2) @ Rx(extra_param) @ zero_state + self.Rx(reused_p) + @ self.Rz(other_p) + @ self.Ry(reused_p - np.pi / 2) + @ self.Rx(extra_param) + @ zero_state ) ) / 2 grad_true1 = ( expZ( - Rx(reused_p + np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + self.Rx(reused_p + np.pi / 2) + @ self.Rz(other_p) + @ self.Ry(reused_p) + @ self.Rx(extra_param) + @ zero_state ) - expZ( - Rx(reused_p - np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + self.Rx(reused_p - np.pi / 2) + @ self.Rz(other_p) + @ self.Ry(reused_p) + @ self.Rx(extra_param) + @ zero_state ) ) / 2 expected = grad_true0 + grad_true1 # product rule @@ -765,10 +635,10 @@ def circuit(params): qml.Rot(params[1], params[0], 2 * params[0], wires=[0]) return qml.expval(qml.PauliX(0)) - spy_analytic = mocker.spy(dev.target_device, "adjoint_jacobian") + spy_analytic = mocker.spy(dev, "LightningAdjointJacobian") - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) cost = QNode(circuit, dev, diff_method="finite-diff", h=h) @@ -798,7 +668,7 @@ def f(params1, params2): qml.RY(tf.cos(params2), wires=[0]) return qml.expval(qml.PauliZ(0)) - if dev.R_DTYPE == np.float32: + if dev.r_dtype == np.float32: tf_r_dtype = tf.float32 else: tf_r_dtype = tf.float64 @@ -806,8 +676,8 @@ def f(params1, params2): params1 = tf.Variable(0.3, dtype=tf_r_dtype) params2 = tf.Variable(0.4, dtype=tf_r_dtype) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode1 = QNode(f, dev, interface="tf", diff_method="adjoint") qnode2 = QNode(f, dev, interface="tf", diff_method="finite-diff", h=h) @@ -839,7 +709,7 @@ def f(params1, params2): params1 = torch.tensor(0.3, requires_grad=True) params2 = torch.tensor(0.4, requires_grad=True) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) qnode1 = QNode(f, dev, interface="torch", diff_method="adjoint") qnode2 = QNode(f, dev, interface="torch", diff_method="finite-diff", h=h) @@ -861,7 +731,7 @@ def test_interface_jax(self, dev): jax interface""" jax = pytest.importorskip("jax") - if dev.R_DTYPE == np.float64: + if dev.c_dtype == np.complex128: from jax import config # pylint: disable=import-outside-toplevel config.update("jax_enable_x64", True) @@ -872,11 +742,13 @@ def f(params1, params2): qml.RY(jax.numpy.cos(params2), wires=[0]) return qml.expval(qml.PauliZ(0)) - params1 = jax.numpy.array(0.3, dev.R_DTYPE) - params2 = jax.numpy.array(0.4, dev.R_DTYPE) + r_dtype = np.float32 if dev.c_dtype == np.complex64 else np.float64 + + params1 = jax.numpy.array(0.3, r_dtype) + params2 = jax.numpy.array(0.4, r_dtype) - h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 - tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + h = self.tol_for_allclose(dev.c_dtype) + tol = self.tol_for_allclose(dev.c_dtype) qnode_adjoint = QNode(f, dev, interface="jax", diff_method="adjoint") qnode_fd = QNode(f, dev, interface="jax", diff_method="finite-diff", h=h) @@ -1379,8 +1251,8 @@ def test_qubit_unitary(dev, n_targets): """Tests that ``qml.QubitUnitary`` can be included in circuits differentiated with the adjoint method.""" n_wires = len(dev.wires) dev_def = qml.device("default.qubit", wires=n_wires) - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - c_dtype = np.complex64 if dev.R_DTYPE == np.float32 else np.complex128 + h = 1e-3 if dev.c_dtype == np.complex64 else 1e-7 + c_dtype = dev.c_dtype np.random.seed(1337) par = 2 * np.pi * np.random.rand(n_wires) @@ -1427,8 +1299,8 @@ def test_diff_qubit_unitary(dev, n_targets): """Tests that ``qml.QubitUnitary`` can be differentiated with the adjoint method.""" n_wires = len(dev.wires) dev_def = qml.device("default.qubit", wires=n_wires) - h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 - c_dtype = np.complex64 if dev.R_DTYPE == np.float32 else np.complex128 + h = 1e-3 if dev.c_dtype == np.complex64 else 1e-7 + c_dtype = dev.c_dtype np.random.seed(1337) par = 2 * np.pi * np.random.rand(n_wires) diff --git a/mpitests/test_apply.py b/mpitests/test_apply.py index 17d91cd2d7..5987626f1f 100644 --- a/mpitests/test_apply.py +++ b/mpitests/test_apply.py @@ -34,14 +34,17 @@ ) -def create_random_init_state(numWires, R_DTYPE, seed_value=48): +def create_random_init_state(numWires, c_dtype, seed_value=48): """Returns a random initial state of a certain type.""" np.random.seed(seed_value) - num_elements = 1 << numWires - init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( num_elements - ).astype(R_DTYPE) - scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) + ).astype(r_dtype) + scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(r_dtype) init_state = init_state / scale_sum return init_state @@ -54,16 +57,13 @@ def apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 + c_dtype = dev_mpi.c_dtype - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) comm.Scatter(state_vector, local_state_vector, root=0) @@ -84,45 +84,6 @@ def circuit(*params): assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) -def apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires): - """Wrapper applying a parametric gate with the apply method.""" - num_wires = numQubits - comm = MPI.COMM_WORLD - commSize = comm.Get_size() - num_global_wires = commSize.bit_length() - 1 - num_local_wires = num_wires - num_global_wires - - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 - - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) - - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) - comm.Bcast(state_vector, root=0) - - comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - - @qml.qnode(dev_cpu) - def circuit(*params): - qml.StatePrep(state_vector, wires=range(num_wires)) - operation(*params, wires=Wires) - return qml.state() - - expected_output_cpu = np.array(circuit(*par)).astype(c_dtype) - comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - - dev_mpi.syncH2D(local_state_vector) - dev_mpi.apply([operation(*par, wires=Wires)]) - dev_mpi.syncD2H(local_state_vector) - - assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) - - def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): """Wrapper applying a non-parametric gate with QNode function.""" num_wires = numQubits @@ -131,16 +92,13 @@ def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 + c_dtype = dev_mpi.c_dtype - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) comm.Scatter(state_vector, local_state_vector, root=0) @@ -161,45 +119,6 @@ def circuit(): assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) -def apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires): - """Wrapper applying a non-parametric gate with the apply method.""" - num_wires = numQubits - comm = MPI.COMM_WORLD - commSize = comm.Get_size() - num_global_wires = commSize.bit_length() - 1 - num_local_wires = num_wires - num_global_wires - - if dev_mpi.R_DTYPE == np.float32: - c_dtype = np.complex64 - else: - c_dtype = np.complex128 - - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) - - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) - comm.Bcast(state_vector, root=0) - - comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - - @qml.qnode(dev_cpu) - def circuit(): - qml.StatePrep(state_vector, wires=range(num_wires)) - operation(wires=Wires) - return qml.state() - - expected_output_cpu = np.array(circuit()).astype(c_dtype) - comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - - dev_mpi.syncH2D(local_state_vector) - dev_mpi.apply([operation(wires=Wires)]) - dev_mpi.syncD2H(local_state_vector) - - assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) - - class TestApply: # pylint: disable=missing-function-docstring,too-many-arguments """Tests whether the device can apply supported quantum gates.""" @@ -220,13 +139,11 @@ def dev_mpi(self, request): @pytest.mark.parametrize("Wires", [0, 1, numQubits - 2, numQubits - 1]) def test_apply_operation_single_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) @pytest.mark.parametrize("Wires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]]) def test_apply_operation_two_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -240,7 +157,6 @@ def test_apply_operation_two_wire_nonparam(self, tol, operation, Wires, dev_mpi) ) def test_apply_operation_three_wire_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -254,7 +170,6 @@ def test_apply_operation_three_wire_nonparam(self, tol, operation, Wires, dev_mp ) def test_apply_operation_three_wire_qnode_nonparam(self, tol, operation, Wires, dev_mpi): apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) - apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) @@ -263,7 +178,6 @@ def test_apply_operation_1gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize("operation", [qml.Rot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @@ -272,7 +186,6 @@ def test_apply_operation_1gatequbit_3param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize("operation", [qml.CRot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @@ -281,7 +194,6 @@ def test_apply_operation_1gatequbit_3param_cgate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize( "operation", @@ -304,7 +216,6 @@ def test_apply_operation_2gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) @pytest.mark.parametrize( "operation", @@ -323,7 +234,6 @@ def test_apply_operation_4gatequbit_1param_gate_qnode_param( self, tol, operation, par, Wires, dev_mpi ): apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) - apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) # BasisState test @pytest.mark.parametrize("operation", [qml.BasisState]) @@ -337,17 +247,17 @@ def test_state_prep(self, tol, operation, index, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -399,17 +309,17 @@ def test_qubit_state_prep(self, tol, par, Wires, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -435,17 +345,17 @@ def test_dev_reset(self, tol, dev_mpi): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - if dev_mpi.R_DTYPE == np.float32: + if dev_mpi.c_dtype == np.float32: c_dtype = np.complex64 else: c_dtype = np.complex128 - state_vector = np.zeros(1 << num_wires).astype(c_dtype) - expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) - local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) - local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + state_vector = np.zeros(2**num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) @@ -462,10 +372,10 @@ def circuit(): expected_output_cpu = cpu_qnode().astype(c_dtype) comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) - dev_mpi.reset() + dev_mpi._statevector.reset_state() gpumpi_qnode = qml.QNode(circuit, dev_mpi) - dev_mpi.reset() + dev_mpi._statevector.reset_state() local_state_vector = gpumpi_qnode() assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) @@ -474,8 +384,8 @@ def circuit(): class TestSparseHamExpval: # pylint: disable=too-few-public-methods,missing-function-docstring """Tests sparse hamiltonian expectation values.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sparse_hamiltonian_expectation(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sparse_hamiltonian_expectation(self, c_dtype): comm = MPI.COMM_WORLD commSize = comm.Get_size() num_global_wires = commSize.bit_length() - 1 @@ -496,32 +406,38 @@ def test_sparse_hamiltonian_expectation(self, C_DTYPE): 0.3 + 0.3j, 0.3 + 0.5j, ], - dtype=C_DTYPE, + dtype=c_dtype, ) - local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + state_vector /= np.linalg.norm(state_vector) + + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) - dev_gpu = qml.device("lightning.gpu", wires=3, mpi=False, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True, c_dtype=C_DTYPE) + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(3)) - dev_mpi.syncH2D(local_state_vector) - dev_gpu.syncH2D(state_vector) + def circuit(): + qml.StatePrep(state_vector, wires=range(3)) + return qml.expval(H_sparse) - H_sparse = qml.SparseHamiltonian(Hmat, wires=range(3)) + dev_gpu = qml.device("lightning.gpu", wires=3, mpi=False, c_dtype=c_dtype) + gpu_qnode = qml.QNode(circuit, dev_gpu) + expected_output_gpu = gpu_qnode() + comm.Bcast(np.array(expected_output_gpu), root=0) - comm.Barrier() + dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True, c_dtype=c_dtype) + mpi_qnode = qml.QNode(circuit, dev_mpi) + expected_output_mpi = mpi_qnode() - res = dev_mpi.expval(H_sparse) - expected = dev_gpu.expval(H_sparse) + comm.Barrier() - assert np.allclose(res, expected) + assert np.allclose(expected_output_mpi, expected_output_gpu) class TestExpval: """Tests that expectation values are properly calculated or that the proper errors are raised.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "operation", [ @@ -533,7 +449,7 @@ class TestExpval: ], ) @pytest.mark.parametrize("wires", [0, 1, 2, numQubits - 3, numQubits - 2, numQubits - 1]) - def test_expval_single_wire_no_parameters(self, tol, operation, wires, C_DTYPE): + def test_expval_single_wire_no_parameters(self, tol, operation, wires, c_dtype): """Tests that expectation values are properly calculated for single-wire observables without parameters.""" num_wires = numQubits comm = MPI.COMM_WORLD @@ -541,14 +457,14 @@ def test_expval_single_wire_no_parameters(self, tol, operation, wires, C_DTYPE): num_global_wires = commSize.bit_length() - 1 num_local_wires = num_wires - num_global_wires - dev_mpi = qml.device("lightning.gpu", wires=numQubits, mpi=True, c_dtype=C_DTYPE) + dev_mpi = qml.device("lightning.gpu", wires=numQubits, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) - local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) comm.Scatter(state_vector, local_state_vector, root=0) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) def circuit(): qml.StatePrep(state_vector, wires=range(num_wires)) @@ -563,7 +479,7 @@ def circuit(): assert np.allclose(expected_output_mpi, expected_output_cpu, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "obs", [ @@ -575,12 +491,12 @@ def circuit(): qml.PauliZ(numQubits - 2) @ qml.PauliZ(numQubits - 1), ], ) - def test_expval_multiple_obs(self, obs, tol, C_DTYPE): + def test_expval_multiple_obs(self, obs, tol, c_dtype): """Test expval with Hamiltonian""" num_wires = numQubits - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) def circuit(): qml.RX(0.4, wires=[0]) @@ -592,7 +508,7 @@ def circuit(): assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) @pytest.mark.parametrize( "obs, coeffs", [ @@ -620,14 +536,14 @@ def circuit(): ), ], ) - def test_expval_hamiltonian(self, obs, coeffs, tol, C_DTYPE): + def test_expval_hamiltonian(self, obs, coeffs, tol, c_dtype): """Test expval with Hamiltonian""" num_wires = numQubits ham = qml.Hamiltonian(coeffs, obs) - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) def circuit(): qml.RX(0.4, wires=[0]) @@ -665,14 +581,14 @@ def circuit(): class TestGenerateSample: """Tests that samples are properly calculated.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_dimensions(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_dimensions(self, c_dtype): """Tests if the samples returned by sample have the correct dimensions """ num_wires = numQubits - dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) ops = [qml.RX(1.5708, wires=[0]), qml.RX(1.5708, wires=[1])] @@ -697,14 +613,14 @@ def test_sample_dimensions(self, C_DTYPE): assert np.array_equal(s3.shape, (shots,)) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_values(self, tol, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_values(self, tol, c_dtype): """Tests if the samples returned by sample have the correct values """ num_wires = numQubits - dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype) shots = qml.measurements.Shots(1000) ops = [qml.RX(1.5708, wires=[0])] @@ -716,17 +632,17 @@ def test_sample_values(self, tol, C_DTYPE): # they square to 1 assert np.allclose(s1**2, 1, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_sample_values_qnode(self, tol, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_sample_values_qnode(self, tol, c_dtype): """Tests if the samples returned by sample have the correct values """ num_wires = numQubits dev_mpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) - dev_mpi.reset() + dev_mpi._statevector.reset_state() @qml.qnode(dev_mpi) def circuit(): @@ -737,15 +653,15 @@ def circuit(): # they square to 1 assert np.allclose(circuit() ** 2, 1, atol=tol, rtol=0) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_multi_samples_return_correlated_results(self, C_DTYPE): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_multi_samples_return_correlated_results(self, c_dtype): """Tests if the samples returned by the sample function have the correct dimensions """ num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) @qml.qnode(dev_gpumpi) @@ -758,13 +674,13 @@ def circuit(): assert np.array_equal(outcomes[0], outcomes[1]) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliX and PauliY works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -800,13 +716,13 @@ def circuit(): ) / 16 assert np.allclose(var, expected, atol=tol) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -846,13 +762,13 @@ def circuit(): class TestTensorVar: """Test tensor variance measurements.""" - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliX and PauliY works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 @@ -880,12 +796,12 @@ def circuit(): ) / 16 assert np.allclose(res, expected, atol=tol) - @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) - def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + @pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, c_dtype, tol=TOL_STOCHASTIC): """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" num_wires = 3 dev_gpumpi = qml.device( - "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=c_dtype ) theta = 0.432 diff --git a/mpitests/test_device.py b/mpitests/test_device.py index 03a1880114..dd783dbee7 100644 --- a/mpitests/test_device.py +++ b/mpitests/test_device.py @@ -38,13 +38,13 @@ def test_create_device(): def test_unsupported_mpi_buf_size(): - with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + with pytest.raises(ValueError, match="Unsupported mpi_buf_size value"): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=-1) - with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + with pytest.raises(ValueError, match="Unsupported mpi_buf_size value"): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=3) - with pytest.warns( - RuntimeWarning, - match="The MPI buffer size is larger than the local state vector size", + with pytest.raises( + RuntimeError, + match="The MPI buffer size is larger than the local state vector size.", ): dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=2**4) with pytest.raises( diff --git a/mpitests/test_expval.py b/mpitests/test_expval.py index d020471c03..3ca73cd82e 100644 --- a/mpitests/test_expval.py +++ b/mpitests/test_expval.py @@ -22,114 +22,260 @@ from conftest import PHI, THETA, VARPHI, device_name from mpi4py import MPI +numQubits = 8 -@pytest.mark.parametrize("theta, phi", list(zip(THETA, PHI))) -class TestExpval: - """Test expectation values""" - def test_identity_expectation(self, theta, phi, tol): - """Test that identity expectation value (i.e. the trace) is 1""" - dev = qml.device(device_name, mpi=True, wires=3) +def create_random_init_state(numWires, c_dtype, seed_value=48): + """Returns a random initial state of a certain type.""" + np.random.seed(seed_value) - O1 = qml.Identity(wires=[0]) - O2 = qml.Identity(wires=[1]) + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( + num_elements + ).astype(r_dtype) - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([1, 1]), tol) + init_state = init_state / np.linalg.norm(init_state) + return init_state - def test_pauliz_expectation(self, theta, phi, tol): - """Test that PauliZ expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) - O1 = qml.PauliZ(wires=[0]) - O2 = qml.PauliZ(wires=[1]) +def apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires): + """Wrapper applying a parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + c_dtype = dev_mpi.c_dtype - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([np.cos(theta), np.cos(theta) * np.cos(phi)]), tol) + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) - def test_paulix_expectation(self, theta, phi, tol): - """Test that PauliX expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - O1 = qml.PauliX(wires=[0]) - O2 = qml.PauliX(wires=[1]) + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - dev.apply( - [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], - ) + def circuit(*params): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(*params, wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode(*par).astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode(*par) + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): + """Wrapper applying a non-parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + c_dtype = dev_mpi.c_dtype + + expected_output_cpu = np.zeros(2**num_wires).astype(c_dtype) + local_state_vector = np.zeros(2**num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(2**num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - res = np.array([dev.expval(O1), dev.expval(O2)], dtype=dev.C_DTYPE) - assert np.allclose( - res, - np.array([np.sin(theta) * np.sin(phi), np.sin(phi)], dtype=dev.C_DTYPE), - tol * 10, + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode() + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +@pytest.mark.parametrize("c_dtype", [np.complex128, np.complex64]) +@pytest.mark.parametrize("batch_obs", [True, False]) +class TestExpval: + """Tests that expectation values are properly calculated or that the proper errors are raised.""" + + @pytest.mark.parametrize( + "operation", + [ + qml.PauliX, + qml.PauliY, + qml.PauliZ, + qml.Hadamard, + qml.Identity, + ], + ) + @pytest.mark.parametrize("wires", [0, 1, 2, numQubits - 2, numQubits - 1]) + def test_expval_single_wire_no_parameters(self, tol, operation, wires, c_dtype, batch_obs): + """Tests that expectation values are properly calculated for single-wire observables without parameters.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + + dev_mpi = qml.device( + "lightning.gpu", wires=numQubits, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) - def test_pauliy_expectation(self, theta, phi, tol): - """Test that PauliY expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) - O1 = qml.PauliY(wires=[0]) - O2 = qml.PauliY(wires=[1]) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) + comm.Bcast(state_vector, root=0) - dev.apply( - [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + return qml.expval(operation(wires)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode() + comm.Bcast(np.array(expected_output_cpu), root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + expected_output_mpi = mpi_qnode() + + assert np.allclose(expected_output_mpi, expected_output_cpu, atol=tol, rtol=0) + + @pytest.mark.parametrize( + "obs", + [ + qml.PauliX(0) @ qml.PauliZ(1), + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + qml.PauliZ(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(numQubits - 2) @ qml.PauliZ(numQubits - 1), + ], + ) + def test_expval_multiple_obs(self, obs, tol, c_dtype, batch_obs): + """Test expval with Hamiltonian""" + num_wires = numQubits + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) - res = np.array([dev.expval(O1), dev.expval(O2)]) - assert np.allclose(res, np.array([0, -np.cos(theta) * np.sin(phi)]), tol) + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[num_wires - 1]) + return qml.expval(obs) - def test_hadamard_expectation(self, theta, phi, tol): - """Test that Hadamard expectation value is correct""" - dev = qml.device(device_name, mpi=True, wires=3) + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + @pytest.mark.parametrize( + "obs, coeffs", + [ + ([qml.PauliX(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliX(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ( + [qml.PauliX(0) @ qml.PauliZ(1), qml.PauliZ(0) @ qml.PauliZ(1)], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ], + ) + def test_expval_hamiltonian(self, obs, coeffs, tol, c_dtype, batch_obs): + """Test expval with Hamiltonian""" + num_wires = numQubits + + ham = qml.Hamiltonian(coeffs, obs) + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs + ) + + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[numQubits - 1]) + return qml.expval(ham) - O1 = qml.Hadamard(wires=[0]) - O2 = qml.Hadamard(wires=[1]) + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) - dev.apply( - [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], - rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + def test_expval_non_pauli_word_hamiltionian(self, tol, c_dtype, batch_obs): + """Tests expectation values of non-Pauli word Hamiltonians.""" + dev_mpi = qml.device( + "lightning.gpu", wires=3, mpi=True, c_dtype=c_dtype, batch_obs=batch_obs ) + dev_cpu = qml.device("lightning.qubit", wires=3) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.expval(0.5 * qml.Hadamard(2)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) - res = np.array([dev.expval(O1), dev.expval(O2)]) - expected = np.array( - [ - np.sin(theta) * np.sin(phi) + np.cos(theta), - np.cos(theta) * np.cos(phi) + np.sin(phi), - ] - ) / np.sqrt(2) - assert np.allclose(res, expected, tol) - - @pytest.mark.parametrize("n_wires", range(1, 8)) - def test_hermitian_expectation(self, n_wires, theta, phi, tol): + @pytest.mark.parametrize("theta, phi", list(zip(THETA, PHI))) + @pytest.mark.parametrize("n_wires", range(1, numQubits)) + def test_hermitian_expectation(self, n_wires, theta, phi, tol, c_dtype, batch_obs): """Test that Hadamard expectation value is correct""" - n_qubits = 7 + n_qubits = numQubits - 1 dev_def = qml.device("default.qubit", wires=n_qubits) - dev = qml.device(device_name, mpi=True, wires=n_qubits) + dev = qml.device( + device_name, mpi=True, wires=n_qubits, c_dtype=c_dtype, batch_obs=batch_obs + ) comm = MPI.COMM_WORLD m = 2**n_wires U = np.random.rand(m, m) + 1j * np.random.rand(m, m) U = U + np.conj(U.T) - U = U.astype(dev.C_DTYPE) + U = U.astype(dev.c_dtype) comm.Bcast(U, root=0) obs = qml.Hermitian(U, wires=range(n_wires)) init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) - init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) - init_state = init_state.astype(dev.C_DTYPE) + init_state = init_state / np.linalg.norm(init_state) + init_state = init_state.astype(dev.c_dtype) comm.Bcast(init_state, root=0) def circuit(): @@ -250,69 +396,39 @@ def circuit(x, y): class TestTensorExpval: """Test tensor expectation values""" - def test_paulix_pauliy(self, theta, phi, varphi, tol): + @pytest.mark.parametrize( + "obs,expected", + [ + (qml.PauliX(0) @ qml.PauliY(2), "PXPY"), + (qml.PauliZ(0) @ qml.Identity(1) @ qml.PauliZ(2), "PZIPZ"), + (qml.PauliZ(0) @ qml.Hadamard(1) @ qml.PauliY(2), "PZHPY"), + ], + ) + def test_tensor(self, theta, phi, varphi, obs, expected, tol): """Test that a tensor product involving PauliX and PauliY works correctly""" dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliX(0) @ qml.PauliY(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) - res = dev.expval(obs) - - expected = np.sin(theta) * np.sin(phi) * np.sin(varphi) - - assert np.allclose(res, expected, atol=tol) - - def test_pauliz_identity(self, theta, phi, varphi, tol): - """Test that a tensor product involving PauliZ and Identity works - correctly""" - dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliZ(0) @ qml.Identity(1) @ qml.PauliZ(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) - - res = dev.expval(obs) - - expected = np.cos(varphi) * np.cos(phi) - assert np.allclose(res, expected, tol) - - def test_pauliz_hadamard_pauliy(self, theta, phi, varphi, tol): - """Test that a tensor product involving PauliZ and PauliY and Hadamard - works correctly""" - dev = qml.device(device_name, mpi=True, wires=3) - obs = qml.PauliZ(0) @ qml.Hadamard(1) @ qml.PauliY(2) - - dev.apply( - [ - qml.RX(theta, wires=[0]), - qml.RX(phi, wires=[1]), - qml.RX(varphi, wires=[2]), - qml.CNOT(wires=[0, 1]), - qml.CNOT(wires=[1, 2]), - ], - rotations=obs.diagonalizing_gates(), - ) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.expval(obs) - res = dev.expval(obs) - expected = -(np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta)) / np.sqrt(2) + mpi_qnode = qml.QNode(circuit, dev) + res = mpi_qnode() + + if expected == "PXPY": + expected_val = np.sin(theta) * np.sin(phi) * np.sin(varphi) + elif expected == "PZIPZ": + expected_val = np.cos(varphi) * np.cos(phi) + elif expected == "PZHPY": + expected_val = -( + np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta) + ) / np.sqrt(2) + else: + expected_val = 0 - assert np.allclose(res, expected, tol) + assert np.allclose(res, expected_val, atol=tol) diff --git a/mpitests/test_native_mcm.py b/mpitests/test_native_mcm.py new file mode 100644 index 0000000000..78bde9872a --- /dev/null +++ b/mpitests/test_native_mcm.py @@ -0,0 +1,43 @@ +# Copyright 2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for default qubit preprocessing.""" +import numpy as np +import pennylane as qml +import pytest +from conftest import LightningDevice, device_name +from mpi4py import MPI + +if not LightningDevice._CPP_BINARY_AVAILABLE: # pylint: disable=protected-access + pytest.skip("No binary module found. Skipping.", allow_module_level=True) + + +def test_unspported_mid_measurement(): + """Test unsupported mid_measurement for Lightning-GPU-MPI.""" + comm = MPI.COMM_WORLD + dev = qml.device(device_name, wires=2, mpi=True, shots=1000) + params = np.pi / 4 * np.ones(2) + + @qml.qnode(dev) + def func(x, y): + qml.RX(x, wires=0) + m0 = qml.measure(0) + qml.cond(m0, qml.RY)(y, wires=1) + return qml.probs(wires=0) + + comm.Barrier() + + with pytest.raises( + qml.DeviceError, match="Lightning-GPU-MPI does not support Mid-circuit measurements." + ): + func(*params) diff --git a/mpitests/test_probs.py b/mpitests/test_probs.py index b2f57f733a..ed9ab9b9c8 100644 --- a/mpitests/test_probs.py +++ b/mpitests/test_probs.py @@ -23,27 +23,31 @@ numQubits = 8 -def create_random_init_state(numWires, R_DTYPE, seed_value=48): +def create_random_init_state(numWires, c_dtype, seed_value=48): + """Returns a random initial state of a certain type.""" np.random.seed(seed_value) - num_elements = 1 << numWires - init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + + num_elements = 2**numWires + init_state = np.random.rand(num_elements).astype(r_dtype) + 1j * np.random.rand( num_elements - ).astype(R_DTYPE) - scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) - init_state = init_state / scale_sum + ).astype(r_dtype) + + init_state = init_state / np.linalg.norm(init_state) return init_state -def apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE): +def apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype): num_wires = numQubits comm = MPI.COMM_WORLD rank = comm.Get_rank() commSize = comm.Get_size() - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) def circuit(): @@ -58,15 +62,16 @@ def circuit(): local_probs = mpi_qnode() recv_counts = comm.gather(len(local_probs), root=0) - comm.Barrier() + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + if rank == 0: - probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) - displacements = [i for i in range(commSize)] + probs_mpi = np.zeros(2 ** len(Wires)).astype(r_dtype) else: probs_mpi = None probs_cpu = None + comm.Barrier() comm.Gatherv(local_probs, [probs_mpi, recv_counts], root=0) @@ -75,16 +80,16 @@ def circuit(): comm.Barrier() -def apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE): +def apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype): num_wires = numQubits comm = MPI.COMM_WORLD rank = comm.Get_rank() commSize = comm.Get_size() - dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) - dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=c_dtype) - state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + state_vector = create_random_init_state(num_wires, dev_mpi.c_dtype) comm.Bcast(state_vector, root=0) def circuit(): @@ -102,8 +107,10 @@ def circuit(): comm.Barrier() + r_dtype = np.float64 if c_dtype == np.complex128 else np.float32 + if rank == 0: - probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) + probs_mpi = np.zeros(2 ** len(Wires)).astype(r_dtype) else: probs_mpi = None probs_cpu = None @@ -116,6 +123,19 @@ def circuit(): comm.Barrier() +@pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], +) +@pytest.mark.parametrize("c_dtype", [np.complex128]) class TestProbs: """Tests for the probability method.""" @@ -123,41 +143,15 @@ class TestProbs: "operation", [qml.PauliX, qml.PauliY, qml.PauliZ, qml.Hadamard, qml.S, qml.T] ) @pytest.mark.parametrize("GateWires", [[0], [numQubits - 1]]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) @pytest.mark.parametrize( "GateWires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) @pytest.mark.parametrize( @@ -169,80 +163,28 @@ def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE) [0, numQubits - 2, numQubits - 1], ], ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_three_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): - apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + def test_prob_three_wire_nonparam(self, tol, operation, GateWires, Wires, c_dtype): + apply_probs_nonparam(tol, operation, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.Rot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_single_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_single_wire_3param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize("operation", [qml.CRot]) @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) @pytest.mark.parametrize( "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize( "operation", @@ -263,21 +205,8 @@ def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTY @pytest.mark.parametrize( "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) @pytest.mark.parametrize( "operation", @@ -292,18 +221,5 @@ def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYP [numQubits - 4, numQubits - 3, numQubits - 2, numQubits - 1], ], ) - @pytest.mark.parametrize( - "Wires", - [ - [0], - [1], - [0, 1], - [0, 2], - [0, numQubits - 1], - [numQubits - 2, numQubits - 1], - range(numQubits), - ], - ) - @pytest.mark.parametrize("C_DTYPE", [np.complex128]) - def test_prob_four_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): - apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + def test_prob_four_wire_param(self, tol, operation, par, GateWires, Wires, c_dtype): + apply_probs_param(tol, operation, par, GateWires, Wires, c_dtype) diff --git a/pennylane_lightning/core/_adjoint_jacobian_base.py b/pennylane_lightning/core/_adjoint_jacobian_base.py index 50046d5f94..a779c0cc4c 100644 --- a/pennylane_lightning/core/_adjoint_jacobian_base.py +++ b/pennylane_lightning/core/_adjoint_jacobian_base.py @@ -111,6 +111,7 @@ def _process_jacobian_tape(self, tape: QuantumTape, split_obs: bool = False): self._qubit_state.device_name, use_csingle, use_mpi, split_obs ).serialize_ops(tape) + # pylint: disable=not-callable ops_serialized = self._create_ops_list_lightning(*ops_serialized) # We need to filter out indices in trainable_params which do not diff --git a/pennylane_lightning/core/_measurements_base.py b/pennylane_lightning/core/_measurements_base.py index 06ae878899..1e4f54b4ef 100644 --- a/pennylane_lightning/core/_measurements_base.py +++ b/pennylane_lightning/core/_measurements_base.py @@ -56,6 +56,7 @@ def __init__( ) -> None: self._qubit_state = qubit_state + self._use_mpi = False # Dummy for the C++ bindings self._measurement_lightning = None @@ -94,7 +95,6 @@ def state_diagonalizing_gates(self, measurementprocess: StateMeasurement) -> Ten self._qubit_state.apply_operations([qml.adjoint(g) for g in reversed(diagonalizing_gates)]) return result - # pylint: disable=protected-access def expval(self, measurementprocess: MeasurementProcess): """Expectation value of the supplied observable contained in the MeasurementProcess. @@ -121,8 +121,9 @@ def expval(self, measurementprocess: MeasurementProcess): or (measurementprocess.obs.arithmetic_depth > 0) or isinstance(measurementprocess.obs.name, List) ): + # pylint: disable=protected-access ob_serialized = QuantumScriptSerializer( - self._qubit_state.device_name, self.dtype == np.complex64 + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi )._ob(measurementprocess.obs) return self._measurement_lightning.expval(ob_serialized) @@ -134,19 +135,23 @@ def probs(self, measurementprocess: MeasurementProcess): """Probabilities of the supplied observable or wires contained in the MeasurementProcess. Args: - measurementprocess (StateMeasurement): measurement to apply to the state + measurementprocess (StateMeasurement): measurement to apply to the state. Returns: - Probabilities of the supplied observable or wires + Probabilities of the supplied observable or wires. """ diagonalizing_gates = measurementprocess.diagonalizing_gates() + if diagonalizing_gates: self._qubit_state.apply_operations(diagonalizing_gates) + results = self._measurement_lightning.probs(measurementprocess.wires.tolist()) + if diagonalizing_gates: self._qubit_state.apply_operations( [qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)] ) + return results def var(self, measurementprocess: MeasurementProcess): @@ -175,8 +180,9 @@ def var(self, measurementprocess: MeasurementProcess): or (measurementprocess.obs.arithmetic_depth > 0) or isinstance(measurementprocess.obs.name, List) ): + # pylint: disable=protected-access ob_serialized = QuantumScriptSerializer( - self._qubit_state.device_name, self.dtype == np.complex64 + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi )._ob(measurementprocess.obs) return self._measurement_lightning.var(ob_serialized) @@ -187,6 +193,7 @@ def var(self, measurementprocess: MeasurementProcess): def get_measurement_function( self, measurementprocess: MeasurementProcess ) -> Callable[[MeasurementProcess, TensorLike], TensorLike]: + # pylint: disable=too-many-return-statements """Get the appropriate method for performing a measurement. Args: @@ -197,16 +204,24 @@ def get_measurement_function( """ if isinstance(measurementprocess, StateMeasurement): if isinstance(measurementprocess, ExpectationMP): - if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): - return self.state_diagonalizing_gates + if self._use_mpi: + if isinstance(measurementprocess.obs, (qml.Projector)): + return self.state_diagonalizing_gates + else: + if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): + return self.state_diagonalizing_gates return self.expval if isinstance(measurementprocess, ProbabilityMP): return self.probs if isinstance(measurementprocess, VarianceMP): - if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): - return self.state_diagonalizing_gates + if self._use_mpi: + if isinstance(measurementprocess.obs, (qml.Projector)): + return self.state_diagonalizing_gates + else: + if isinstance(measurementprocess.obs, (qml.Identity, qml.Projector)): + return self.state_diagonalizing_gates return self.var if measurementprocess.obs is None or measurementprocess.obs.has_diagonalizing_gates: return self.state_diagonalizing_gates diff --git a/pennylane_lightning/core/_state_vector_base.py b/pennylane_lightning/core/_state_vector_base.py index 3e08a5ab40..8815e13a04 100644 --- a/pennylane_lightning/core/_state_vector_base.py +++ b/pennylane_lightning/core/_state_vector_base.py @@ -16,7 +16,7 @@ """ from abc import ABC, abstractmethod -from typing import Union +from typing import Optional, Union import numpy as np from pennylane import BasisState, StatePrep @@ -101,7 +101,7 @@ def reset_state(self): self._qubit_state.resetStateVector() @abstractmethod - def _apply_state_vector(self, state, device_wires: Wires): + def _apply_state_vector(self, state, device_wires: Wires, sync: Optional[bool] = None): """Initialize the internal state vector in a specified state. Args: state (array[complex]): normalized input state of length ``2**len(wires)`` @@ -117,6 +117,7 @@ def _apply_basis_state(self, state, wires): consisting of 0s and 1s. wires (Wires): wires that the provided computational state should be initialized on + use_async(Optional[bool]): immediately sync with host-sv after applying operation. Note: This function does not support broadcasted inputs yet. """ diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 37f7ba1bbf..243d1c7ad5 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -15,5 +15,4 @@ """Version information. Version number (major.minor.patch[-label]) """ - __version__ = "0.39.0-dev46" diff --git a/pennylane_lightning/core/lightning_newAPI_base.py b/pennylane_lightning/core/lightning_newAPI_base.py index dcee73fd5c..12cdf98b4e 100644 --- a/pennylane_lightning/core/lightning_newAPI_base.py +++ b/pennylane_lightning/core/lightning_newAPI_base.py @@ -90,6 +90,49 @@ def c_dtype(self): def _set_lightning_classes(self): """Load the LightningStateVector, LightningMeasurements, LightningAdjointJacobian as class attribute""" + @abstractmethod + def _setup_execution_config(self, config): + """ + Update the execution config with choices for how the device should be used and the device options. + """ + + @abstractmethod + def preprocess(self, execution_config: ExecutionConfig = DefaultExecutionConfig): + """This function defines the device transform program to be applied and an updated device configuration. + + Args: + execution_config (Union[ExecutionConfig, Sequence[ExecutionConfig]]): A data structure describing the + parameters needed to fully describe the execution. + + Returns: + TransformProgram, ExecutionConfig: A transform program that when called returns :class:`~.QuantumTape`'s that the + device can natively execute as well as a postprocessing function to be called after execution, and a configuration + with unset specifications filled in. + + This device: + + * Supports any qubit operations that provide a matrix + * Currently does not support finite shots + * Currently does not intrinsically support parameter broadcasting + + """ + + @abstractmethod + def execute( + self, + circuits: QuantumTape_or_Batch, + execution_config: ExecutionConfig = DefaultExecutionConfig, + ) -> Result_or_ResultBatch: + """Execute a circuit or a batch of circuits and turn it into results. + + Args: + circuits (Union[QuantumTape, Sequence[QuantumTape]]): the quantum circuits to be executed + execution_config (ExecutionConfig): a datastructure with additional information required for execution + + Returns: + TensorLike, tuple[TensorLike], tuple[tuple[TensorLike]]: A numeric result of the computation. + """ + @abstractmethod def simulate( self, @@ -112,6 +155,25 @@ def simulate( Note that this function can return measurements for non-commuting observables simultaneously. """ + @abstractmethod + def supports_derivatives( + self, + execution_config: Optional[ExecutionConfig] = None, + circuit: Optional[qml.tape.QuantumTape] = None, + ) -> bool: + """Check whether or not derivatives are available for a given configuration and circuit. + + ``LightningGPU`` supports adjoint differentiation with analytic results. + + Args: + execution_config (ExecutionConfig): The configuration of the desired derivative calculation + circuit (QuantumTape): An optional circuit to check derivatives support for. + + Returns: + Bool: Whether or not a derivative can be calculated provided the given information + + """ + def jacobian( self, circuit: QuantumTape, @@ -135,6 +197,7 @@ def jacobian( [circuit], _ = qml.map_wires(circuit, wire_map) state.reset_state() final_state = state.get_final_state(circuit) + # pylint: disable=not-callable return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_jacobian( circuit ) @@ -163,6 +226,7 @@ def simulate_and_jacobian( if wire_map is not None: [circuit], _ = qml.map_wires(circuit, wire_map) res = self.simulate(circuit, state) + # pylint: disable=not-callable jac = self.LightningAdjointJacobian(state, batch_obs=batch_obs).calculate_jacobian(circuit) return res, jac @@ -193,6 +257,7 @@ def vjp( # pylint: disable=too-many-arguments [circuit], _ = qml.map_wires(circuit, wire_map) state.reset_state() final_state = state.get_final_state(circuit) + # pylint: disable=not-callable return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_vjp( circuit, cotangents ) @@ -224,6 +289,7 @@ def simulate_and_vjp( # pylint: disable=too-many-arguments if wire_map is not None: [circuit], _ = qml.map_wires(circuit, wire_map) res = self.simulate(circuit, state) + # pylint: disable=not-callable _vjp = self.LightningAdjointJacobian(state, batch_obs=batch_obs).calculate_vjp( circuit, cotangents ) diff --git a/pennylane_lightning/core/src/algorithms/tests/mpi/Test_AdjointJacobianMPI.cpp b/pennylane_lightning/core/src/algorithms/tests/mpi/Test_AdjointJacobianMPI.cpp index b4e617eec4..9953b218f0 100644 --- a/pennylane_lightning/core/src/algorithms/tests/mpi/Test_AdjointJacobianMPI.cpp +++ b/pennylane_lightning/core/src/algorithms/tests/mpi/Test_AdjointJacobianMPI.cpp @@ -87,7 +87,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -138,7 +137,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -189,7 +187,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -240,7 +237,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -310,7 +306,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -362,7 +357,6 @@ template void testAdjointJacobian() { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); auto obs1 = std::make_shared>( std::make_shared>( diff --git a/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp b/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp index 50a76610dc..62fd82e1ab 100644 --- a/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp +++ b/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp @@ -77,7 +77,6 @@ template class MeasurementsBase { /** * @brief Randomly set the seed of the internal random generator * - * @param seed Seed */ void setRandomSeed() { std::random_device rd; diff --git a/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp b/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp index 674659a9cc..f32a44a363 100644 --- a/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp +++ b/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp @@ -20,6 +20,7 @@ using Pennylane::Util::isApproxEqual; } // namespace /// @endcond #include +#include #include #ifdef _ENABLE_PLQUBIT @@ -84,44 +85,33 @@ template void testProbabilities() { // Expected results calculated with Pennylane default.qubit: std::vector< std::pair, std::vector>> - input = { -#if defined(_ENABLE_PLGPU) - // Bit index reodering conducted in the python layer - // for L-GPU. Also L-GPU backend doesn't support - // out of order wires for probability calculation - {{2, 1, 0}, - {0.67078706, 0.03062806, 0.0870997, 0.00397696, 0.17564072, - 0.00801973, 0.02280642, 0.00104134}} -#else - // LightningQubit currently supports arbitrary wire index - // ordering. - {{0, 2, 1}, - {0.67078706, 0.0870997, 0.03062806, 0.00397696, 0.17564072, - 0.02280642, 0.00801973, 0.00104134}}, - {{1, 0, 2}, - {0.67078706, 0.03062806, 0.17564072, 0.00801973, 0.0870997, - 0.00397696, 0.02280642, 0.00104134}}, - {{1, 2, 0}, - {0.67078706, 0.17564072, 0.03062806, 0.00801973, 0.0870997, - 0.02280642, 0.00397696, 0.00104134}}, - {{2, 0, 1}, - {0.67078706, 0.0870997, 0.17564072, 0.02280642, 0.03062806, - 0.00397696, 0.00801973, 0.00104134}}, - {{2, 1, 0}, - {0.67078706, 0.17564072, 0.0870997, 0.02280642, 0.03062806, - 0.00801973, 0.00397696, 0.00104134}}, - {{2, 1}, {0.84642778, 0.10990612, 0.0386478, 0.0050183}}, - {{0, 1, 2}, - {0.67078706, 0.03062806, 0.0870997, 0.00397696, 0.17564072, - 0.00801973, 0.02280642, 0.00104134}}, - {{0, 1}, {0.70141512, 0.09107666, 0.18366045, 0.02384776}}, - {{0, 2}, {0.75788676, 0.03460502, 0.19844714, 0.00906107}}, - {{1, 2}, {0.84642778, 0.0386478, 0.10990612, 0.0050183}}, - {{0}, {0.79249179, 0.20750821}}, - {{1}, {0.88507558, 0.11492442}}, - {{2}, {0.9563339, 0.0436661}} -#endif - }; + input = {// LightningQubit currently supports arbitrary wire index + // ordering. + {{0, 2, 1}, + {0.67078706, 0.0870997, 0.03062806, 0.00397696, + 0.17564072, 0.02280642, 0.00801973, 0.00104134}}, + {{1, 0, 2}, + {0.67078706, 0.03062806, 0.17564072, 0.00801973, + 0.0870997, 0.00397696, 0.02280642, 0.00104134}}, + {{1, 2, 0}, + {0.67078706, 0.17564072, 0.03062806, 0.00801973, + 0.0870997, 0.02280642, 0.00397696, 0.00104134}}, + {{2, 0, 1}, + {0.67078706, 0.0870997, 0.17564072, 0.02280642, + 0.03062806, 0.00397696, 0.00801973, 0.00104134}}, + {{2, 1, 0}, + {0.67078706, 0.17564072, 0.0870997, 0.02280642, + 0.03062806, 0.00801973, 0.00397696, 0.00104134}}, + {{2, 1}, {0.84642778, 0.10990612, 0.0386478, 0.0050183}}, + {{0, 1, 2}, + {0.67078706, 0.03062806, 0.0870997, 0.00397696, + 0.17564072, 0.00801973, 0.02280642, 0.00104134}}, + {{0, 1}, {0.70141512, 0.09107666, 0.18366045, 0.02384776}}, + {{0, 2}, {0.75788676, 0.03460502, 0.19844714, 0.00906107}}, + {{1, 2}, {0.84642778, 0.0386478, 0.10990612, 0.0050183}}, + {{0}, {0.79249179, 0.20750821}}, + {{1}, {0.88507558, 0.11492442}}, + {{2}, {0.9563339, 0.0436661}}}; // Defining the Statevector that will be measured. auto statevector_data = createNonTrivialState(); @@ -403,11 +393,7 @@ template void testProbabilitiesObsShots() { std::size_t num_shots = 10000; auto prob_obs_shots = Measurer_obs_shots.probs(*obs, num_shots); -#ifdef _ENABLE_PLGPU - auto prob = Measurer.probs(std::vector({2, 1, 0})); -#else auto prob = Measurer.probs(std::vector({0, 1, 2})); -#endif REQUIRE_THAT(prob_obs_shots, Catch::Approx(prob).margin(5e-2)); } @@ -433,11 +419,7 @@ template void testProbabilitiesObsShots() { std::size_t num_shots = 10000; auto prob_obs_shots = Measurer_obs_shots.probs(*obs, num_shots); -#ifdef _ENABLE_PLGPU - auto prob = Measurer.probs(std::vector({2, 1, 0})); -#else auto prob = Measurer.probs(std::vector({0, 1, 2})); -#endif REQUIRE_THAT(prob_obs_shots, Catch::Approx(prob).margin(5e-2)); } @@ -1251,7 +1233,9 @@ TEST_CASE("Var Shot- TensorProdObs", "[MeasurementsBase][Observables]") { testTensorProdObsVarShot(); } } -template void testSamples() { + +template +void testSamples(const std::optional &seed = std::nullopt) { if constexpr (!std::is_same_v) { using StateVectorT = typename TypeList::Type; using PrecisionT = typename StateVectorT::PrecisionT; @@ -1281,7 +1265,10 @@ template void testSamples() { std::size_t num_qubits = 3; std::size_t N = std::pow(2, num_qubits); std::size_t num_samples = 100000; - auto &&samples = Measurer.generate_samples(num_samples); + auto &&samples = + seed.has_value() + ? Measurer.generate_samples(num_samples, seed.value()) + : Measurer.generate_samples(num_samples); std::vector counts(N, 0); std::vector samples_decimal(num_samples, 0); @@ -1307,7 +1294,7 @@ template void testSamples() { REQUIRE_THAT(probabilities, Catch::Approx(expected_probabilities).margin(.05)); } - testSamples(); + testSamples(seed); } } @@ -1317,6 +1304,12 @@ TEST_CASE("Samples", "[MeasurementsBase]") { } } +TEST_CASE("Seeded samples", "[MeasurementsBase]") { + if constexpr (BACKEND_FOUND) { + testSamples(37); + } +} + template void testSamplesCountsObs() { if constexpr (!std::is_same_v) { using StateVectorT = typename TypeList::Type; @@ -1729,4 +1722,4 @@ TEST_CASE("Measure Shot - SparseHObs ", "[MeasurementsBase][Observables]") { if constexpr (BACKEND_FOUND) { testSparseHObsMeasureShot(); } -} \ No newline at end of file +} diff --git a/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp b/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp index 4a90d8849d..7f6411263e 100644 --- a/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp +++ b/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp @@ -58,7 +58,7 @@ template void testProbabilities() { input = {// Bit index reodering conducted in the python layer // for L-GPU. Also L-GPU backend doesn't support // out of order wires for probability calculation - {{2, 1, 0}, + {{0, 1, 2}, {0.67078706, 0.03062806, 0.0870997, 0.00397696, 0.17564072, 0.00801973, 0.02280642, 0.00104134}}}; @@ -386,7 +386,7 @@ template void testProbabilitiesObsShots() { std::size_t num_shots = 10000; auto prob_obs_shots = Measurer_obs_shots.probs(*obs, num_shots); - auto prob = Measurer.probs(std::vector({2, 1, 0})); + auto prob = Measurer.probs(std::vector({0, 1, 2})); auto prob_all = mpi_manager.allgather(prob); REQUIRE_THAT(prob_obs_shots, Catch::Approx(prob_all).margin(5e-2)); } diff --git a/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp b/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp index eb39b57f5c..30f7262349 100644 --- a/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp +++ b/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp @@ -522,7 +522,6 @@ template void testHamiltonianBase() { StateVectorT sv_mpi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv_mpi.initSV(); REQUIRE_THROWS_AS(ham->applyInPlace(sv_mpi), LightningException); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp index 90b03961e7..772d1b6a2c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp @@ -198,16 +198,6 @@ class StateVectorCudaBase : public StateVectorBase { data_buffer_ = std::move(other); } - /** - * @brief Initialize the statevector data to the |0...0> state - * - */ - void initSV(bool async = false) { - std::size_t index = 0; - const std::complex value(1, 0); - static_cast(this)->setBasisState(value, index, async); - }; - protected: using ParFunc = std::function &, bool, const std::vector &)>; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp index 3753f792fd..964c5e69ce 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp @@ -119,6 +119,7 @@ class StateVectorCudaMPI final handle_.get(), mpi_manager_, mpi_buf_size, BaseType::getData(), num_local_qubits, localStream_.get())), gate_cache_(true, dev_tag) { + resetStateVector(); PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); mpi_manager_.Barrier(); }; @@ -137,6 +138,7 @@ class StateVectorCudaMPI final handle_.get(), mpi_manager_, mpi_buf_size, BaseType::getData(), num_local_qubits, localStream_.get())), gate_cache_(true, dev_tag) { + resetStateVector(); PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); mpi_manager_.Barrier(); }; @@ -155,6 +157,7 @@ class StateVectorCudaMPI final handle_.get(), mpi_manager_, mpi_buf_size, BaseType::getData(), num_local_qubits, localStream_.get())), gate_cache_(true, dev_tag) { + resetStateVector(); PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); mpi_manager_.Barrier(); }; @@ -193,7 +196,7 @@ class StateVectorCudaMPI final handle_.get(), mpi_manager_, 0, BaseType::getData(), num_local_qubits, localStream_.get())), gate_cache_(true, dev_tag) { - BaseType::initSV(); + resetStateVector(); PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); mpi_manager_.Barrier(); } @@ -251,92 +254,83 @@ class StateVectorCudaMPI final } /** - * @brief Set value for a single element of the state-vector on device. This - * method is implemented by cudaMemcpy. - * - * @param value Value to be set for the target element. - * @param index Index of the target element. - * @param async Use an asynchronous memory copy. + * @brief the statevector data to the |0...0> state. + * @param use_async Use an asynchronous memory copy or not. Default is + * false. */ - void setBasisState(const std::complex &value, - const std::size_t index, const bool async = false) { - std::size_t rankId = index >> BaseType::getNumQubits(); - - std::size_t local_index = - static_cast( - rankId * std::pow(2.0, static_cast( - BaseType::getNumQubits()))) ^ - index; + void resetStateVector(bool use_async = false) { BaseType::getDataBuffer().zeroInit(); + std::size_t index = 0; + ComplexT value(1.0, 0.0); + setBasisState_(value, index, use_async); + }; - CFP_t value_cu = cuUtil::complexToCu>(value); - auto stream_id = localStream_.get(); + /** + * @brief Prepare a single computational basis state. + * + * @param state Binary number representing the index + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. + */ + void setBasisState(const std::vector &state, + const std::vector &wires, + const bool use_async) { + PL_ABORT_IF_NOT(state.size() == wires.size(), + "state and wires must have equal dimensions."); - if (mpi_manager_.getRank() == rankId) { - setBasisState_CUDA(BaseType::getData(), value_cu, local_index, - async, stream_id); + const auto n_wires = this->getTotalNumQubits(); + + std::size_t index{0U}; + for (std::size_t k = 0; k < n_wires; k++) { + index |= state[k] << (n_wires - 1 - wires[k]); } - PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); - mpi_manager_.Barrier(); + + const std::complex value(1.0, 0.0); + BaseType::getDataBuffer().zeroInit(); + setBasisState_(value, index, use_async); } /** - * @brief Set values for a batch of elements of the state-vector. This - * method is implemented by the customized CUDA kernel defined in the - * DataBuffer class. + * @brief Set values for a batch of elements of the state-vector. * - * @param num_indices Number of elements to be passed to the state vector. - * @param values Pointer to values to be set for the target elements. - * @param indices Pointer to indices of the target elements. - * @param async Use an asynchronous memory copy. + * @param state_ptr Pointer to initial state data. + * @param num_states Length of initial state data. + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. Default is false. */ - template - void setStateVector(const index_type num_indices, - const std::complex *values, - const index_type *indices, const bool async = false) { - BaseType::getDataBuffer().zeroInit(); - - std::vector indices_local; - std::vector> values_local; - - for (std::size_t i = 0; i < static_cast(num_indices); - i++) { - int index = indices[i]; - PL_ASSERT(index >= 0); - std::size_t rankId = - static_cast(index) >> BaseType::getNumQubits(); - - if (rankId == mpi_manager_.getRank()) { - int local_index = - static_cast( - rankId * std::pow(2.0, static_cast( - BaseType::getNumQubits()))) ^ - index; - indices_local.push_back(local_index); - values_local.push_back(values[i]); + void setStateVector(const ComplexT *state_ptr, const std::size_t num_states, + const std::vector &wires, + bool use_async = false) { + PL_ABORT_IF_NOT(num_states == Pennylane::Util::exp2(wires.size()), + "Inconsistent state and wires dimensions."); + + const auto num_qubits = this->getTotalNumQubits(); + + PL_ABORT_IF_NOT(std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "Invalid wire index."); + + using index_type = + typename std::conditional::value, + int32_t, int64_t>::type; + + // Calculate the indices of the state-vector to be set. + // TODO: Could move to GPU/MPI calculation if the state size is large. + std::vector indices(num_states); + const std::size_t num_wires = wires.size(); + constexpr std::size_t one{1U}; + for (std::size_t i = 0; i < num_states; i++) { + std::size_t index{0U}; + for (std::size_t j = 0; j < num_wires; j++) { + const std::size_t bit = (i & (one << j)) >> j; + index |= bit << (num_qubits - 1 - wires[num_wires - 1 - j]); } + indices[i] = static_cast(index); } - - auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); - auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); - - index_type num_elements = indices_local.size(); - - DataBuffer d_indices{ - static_cast(num_elements), device_id, stream_id, true}; - - DataBuffer d_values{static_cast(num_elements), - device_id, stream_id, true}; - - d_indices.CopyHostDataToGpu(indices_local.data(), d_indices.getLength(), - async); - d_values.CopyHostDataToGpu(values_local.data(), d_values.getLength(), - async); - - setStateVector_CUDA(BaseType::getData(), num_elements, - d_values.getData(), d_indices.getData(), - thread_per_block, stream_id); - PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + setStateVector_(num_states, state_ptr, indices.data(), + use_async); mpi_manager_.Barrier(); } @@ -405,6 +399,19 @@ class StateVectorCudaMPI final cuGates::getRot(params[0], params[1], params[2]); applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, false); } + } else if (opName == "Matrix") { + DataBuffer d_matrix{ + gate_matrix.size(), BaseType::getDataBuffer().getDevTag(), + true}; + d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(), + false); + // ensure wire indexing correctly preserved for tensor-observables + const std::vector ctrls_local{ctrls.rbegin(), + ctrls.rend()}; + const std::vector tgts_local{tgts.rbegin(), + tgts.rend()}; + applyDeviceMatrixGate(d_matrix.getData(), ctrls_local, tgts_local, + adjoint); } else if (par_gates_.find(opName) != par_gates_.end()) { par_gates_.at(opName)(wires, adjoint, params); } else { // No offloadable function call; defer to matrix passing @@ -484,7 +491,7 @@ class StateVectorCudaMPI final const std::vector &wires, bool adjoint = false) { PL_ABORT_IF(wires.empty(), "Number of wires must be larger than 0"); - const std::string opName = {}; + const std::string opName = "Matrix"; std::size_t n = std::size_t{1} << wires.size(); const std::vector> matrix(gate_matrix, gate_matrix + n * n); @@ -1528,6 +1535,88 @@ class StateVectorCudaMPI final return t_indices; } + /** + * @brief Set values for a batch of elements of the state-vector. This + * method is implemented by the customized CUDA kernel defined in the + * DataBuffer class. + * + * @param num_indices Number of elements to be passed to the state vector. + * @param values Pointer to values to be set for the target elements. + * @param indices Pointer to indices of the target elements. + * @param async Use an asynchronous memory copy. + */ + template + void setStateVector_(const index_type num_indices, + const std::complex *values, + const index_type *indices, const bool async = false) { + BaseType::getDataBuffer().zeroInit(); + + std::vector indices_local; + std::vector> values_local; + + for (std::size_t i = 0; i < static_cast(num_indices); + i++) { + int index = indices[i]; + PL_ASSERT(index >= 0); + std::size_t rankId = + static_cast(index) >> BaseType::getNumQubits(); + + if (rankId == mpi_manager_.getRank()) { + int local_index = static_cast( + compute_local_index(static_cast(index), + this->getNumLocalQubits())); + indices_local.push_back(local_index); + values_local.push_back(values[i]); + } + } + + auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + + index_type num_elements = indices_local.size(); + + DataBuffer d_indices{ + static_cast(num_elements), device_id, stream_id, true}; + + DataBuffer d_values{static_cast(num_elements), + device_id, stream_id, true}; + + d_indices.CopyHostDataToGpu(indices_local.data(), d_indices.getLength(), + async); + d_values.CopyHostDataToGpu(values_local.data(), d_values.getLength(), + async); + + setStateVector_CUDA(BaseType::getData(), num_elements, + d_values.getData(), d_indices.getData(), + thread_per_block, stream_id); + } + + /** + * @brief Set value for a single element of the state-vector on device. This + * method is implemented by cudaMemcpy. + * + * @param value Value to be set for the target element. + * @param index Index of the target element. + * @param async Use an asynchronous memory copy. + */ + void setBasisState_(const std::complex &value, + const std::size_t index, const bool async = false) { + const std::size_t rankId = index >> this->getNumLocalQubits(); + + const std::size_t local_index = + compute_local_index(index, this->getNumLocalQubits()); + + CFP_t value_cu = cuUtil::complexToCu>(value); + auto stream_id = localStream_.get(); + + if (mpi_manager_.getRank() == rankId) { + setBasisState_CUDA(BaseType::getData(), value_cu, local_index, + async, stream_id); + } + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + mpi_manager_.Barrier(); + } + /** * @brief Get expectation value for a sum of Pauli words. * @@ -1591,8 +1680,8 @@ class StateVectorCudaMPI final } /** - * @brief Apply parametric Pauli gates to local statevector using custateVec - * calls. + * @brief Apply parametric Pauli gates to local statevector using + * custateVec calls. * * @param pauli_words List of Pauli words representing operation. * @param ctrls Control wires @@ -1662,7 +1751,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); @@ -1822,7 +1912,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); @@ -1963,7 +2054,8 @@ class StateVectorCudaMPI final }); // Initialize a vector to store the status of wires and default its - // elements as zeros, which assumes there is no target and control wire. + // elements as zeros, which assumes there is no target and control + // wire. std::vector statusWires(this->getTotalNumQubits(), WireStatus::Default); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp index 716d95c89f..f5aeb4abb6 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp @@ -111,7 +111,10 @@ class StateVectorCudaManaged : StateVectorCudaBase>( num_qubits), handle_(make_shared_cusv_handle()), - cublascaller_(make_shared_cublas_caller()), gate_cache_(true){}; + cublascaller_(make_shared_cublas_caller()), gate_cache_(true) { + resetStateVector(); + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + }; StateVectorCudaManaged( std::size_t num_qubits, const DevTag &dev_tag, bool alloc = true, @@ -124,7 +127,8 @@ class StateVectorCudaManaged cublascaller_(std::move(cublascaller_in)), cusparsehandle_(std::move(cusparsehandle_in)), gate_cache_(true, dev_tag) { - BaseType::initSV(); + resetStateVector(); + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); }; StateVectorCudaManaged(const CFP_t *gpu_data, std::size_t length) @@ -167,54 +171,90 @@ class StateVectorCudaManaged ~StateVectorCudaManaged() = default; /** - * @brief Set value for a single element of the state-vector on device. This - * method is implemented by cudaMemcpy. - * - * @param value Value to be set for the target element. - * @param index Index of the target element. - * @param async Use an asynchronous memory copy. + * @brief the statevector data to the |0...0> state. + * @param use_async Use an asynchronous memory copy or not. Default is + * false. */ - void setBasisState(const std::complex &value, - const std::size_t index, const bool async = false) { + void resetStateVector(bool use_async = false) { BaseType::getDataBuffer().zeroInit(); - - CFP_t value_cu = cuUtil::complexToCu>(value); - auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); - setBasisState_CUDA(BaseType::getData(), value_cu, index, async, - stream_id); - } + std::size_t index = 0; + ComplexT value(1.0, 0.0); + setBasisState_(value, index, use_async); + }; /** - * @brief Set values for a batch of elements of the state-vector. This - * method is implemented by the customized CUDA kernel defined in the - * DataBuffer class. + * @brief Prepare a single computational basis state. * - * @param num_indices Number of elements to be passed to the state vector. - * @param values Pointer to values to be set for the target elements. - * @param indices Pointer to indices of the target elements. - * @param async Use an asynchronous memory copy. + * @param state Binary number representing the index + * @param wires Wires. + * @param use_async(Optional[bool]): immediately sync with host-sv after + * applying operation. */ - template - void setStateVector(const index_type num_indices, - const std::complex *values, - const index_type *indices, const bool async = false) { - BaseType::getDataBuffer().zeroInit(); + void setBasisState(const std::vector &state, + const std::vector &wires, + const bool use_async = false) { + PL_ABORT_IF_NOT(state.size() == wires.size(), + "state and wires must have equal dimensions."); + const auto num_qubits = BaseType::getNumQubits(); + PL_ABORT_IF_NOT( + std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "wires must take values lower than the number of qubits."); + const auto n_wires = wires.size(); + std::size_t index{0U}; + for (std::size_t k = 0; k < n_wires; k++) { + index |= state[k] << (num_qubits - 1 - wires[k]); + } - auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); - auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + const std::complex value(1.0, 0.0); - index_type num_elements = num_indices; - DataBuffer d_indices{ - static_cast(num_elements), device_id, stream_id, true}; - DataBuffer d_values{static_cast(num_elements), - device_id, stream_id, true}; - - d_indices.CopyHostDataToGpu(indices, d_indices.getLength(), async); - d_values.CopyHostDataToGpu(values, d_values.getLength(), async); + BaseType::getDataBuffer().zeroInit(); + setBasisState_(value, index, use_async); + } - setStateVector_CUDA(BaseType::getData(), num_elements, - d_values.getData(), d_indices.getData(), - thread_per_block, stream_id); + /** + * @brief Set values for a batch of elements of the state-vector. + * + * @param state_ptr Pointer to the initial state data. + * @param num_states Length of the initial state data. + * @param wires Wires. + * @param use_async Use an asynchronous memory copy. Default is false. + */ + void setStateVector(const ComplexT *state_ptr, const std::size_t num_states, + const std::vector &wires, + bool use_async = false) { + PL_ABORT_IF_NOT(num_states == Pennylane::Util::exp2(wires.size()), + "Inconsistent state and wires dimensions."); + + const auto num_qubits = BaseType::getNumQubits(); + + PL_ABORT_IF_NOT(std::find_if(wires.begin(), wires.end(), + [&num_qubits](const auto i) { + return i >= num_qubits; + }) == wires.end(), + "Invalid wire index."); + + using index_type = + typename std::conditional::value, + int32_t, int64_t>::type; + + // Calculate the indices of the state-vector to be set. + // TODO: Could move to GPU calculation if the state size is large. + std::vector indices(num_states); + const std::size_t num_wires = wires.size(); + constexpr std::size_t one{1U}; + for (std::size_t i = 0; i < num_states; i++) { + std::size_t index{0U}; + for (std::size_t j = 0; j < num_wires; j++) { + const std::size_t bit = (i & (one << j)) >> j; + index |= bit << (num_qubits - 1 - wires[num_wires - 1 - j]); + } + indices[i] = static_cast(index); + } + setStateVector_(num_states, state_ptr, indices.data(), + use_async); } /** @@ -324,6 +364,19 @@ class StateVectorCudaManaged cuGates::getRot(params[0], params[1], params[2]); applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, false); } + } else if (opName == "Matrix") { + DataBuffer d_matrix{ + gate_matrix.size(), BaseType::getDataBuffer().getDevTag(), + true}; + d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(), + false); + // ensure wire indexing correctly preserved for tensor-observables + const std::vector ctrls_local{ctrls.rbegin(), + ctrls.rend()}; + const std::vector tgts_local{tgts.rbegin(), + tgts.rend()}; + applyDeviceMatrixGate(d_matrix.getData(), ctrls_local, tgts_local, + adjoint); } else if (par_gates_.find(opName) != par_gates_.end()) { par_gates_.at(opName)(wires, adjoint, params); } else { // No offloadable function call; defer to matrix passing @@ -403,7 +456,7 @@ class StateVectorCudaManaged const std::vector &wires, bool adjoint = false) { PL_ABORT_IF(wires.empty(), "Number of wires must be larger than 0"); - const std::string opName = {}; + const std::string opName = "Matrix"; std::size_t n = std::size_t{1} << wires.size(); const std::vector> matrix(gate_matrix, gate_matrix + n * n); @@ -434,6 +487,56 @@ class StateVectorCudaManaged applyMatrix(gate_matrix.data(), wires, adjoint); } + /** + * @brief Collapse the state vector after having measured one of the qubit. + * + * Note: The branch parameter imposes the measurement result on the given + * wire. + * + * @param wire Wire to measure. + * @param branch Branch 0 or 1. + */ + void collapse(std::size_t wire, bool branch) { + PL_ABORT_IF_NOT(wire < BaseType::getNumQubits(), "Invalid wire index."); + cudaDataType_t data_type; + + if constexpr (std::is_same_v || + std::is_same_v) { + data_type = CUDA_C_64F; + } else { + data_type = CUDA_C_32F; + } + + std::vector basisBits(1, BaseType::getNumQubits() - 1 - wire); + + double abs2sum0; + double abs2sum1; + + PL_CUSTATEVEC_IS_SUCCESS(custatevecAbs2SumOnZBasis( + /* custatevecHandle_t */ handle_.get(), + /* void *sv */ BaseType::getData(), + /* cudaDataType_t */ data_type, + /* const uint32_t nIndexBits */ BaseType::getNumQubits(), + /* double * */ &abs2sum0, + /* double * */ &abs2sum1, + /* const int32_t * */ basisBits.data(), + /* const uint32_t nBasisBits */ basisBits.size())); + + const double norm = branch ? abs2sum1 : abs2sum0; + + const int parity = static_cast(branch); + + PL_CUSTATEVEC_IS_SUCCESS(custatevecCollapseOnZBasis( + /* custatevecHandle_t */ handle_.get(), + /* void *sv */ BaseType::getData(), + /* cudaDataType_t */ data_type, + /* const uint32_t nIndexBits */ BaseType::getNumQubits(), + /* const int32_t parity */ parity, + /* const int32_t *basisBits */ basisBits.data(), + /* const uint32_t nBasisBits */ basisBits.size(), + /* double norm */ norm)); + } + //****************************************************************************// // Explicit gate calls for bindings //****************************************************************************// @@ -1303,6 +1406,55 @@ class StateVectorCudaManaged return t_indices; } + /** @brief Set value for a single element of the state-vector on device. + * This method is implemented by cudaMemcpy. + * + * @param value Value to be set for the target element. + * @param index Index of the target element. + * @param async Use an asynchronous memory copy. + */ + void setBasisState_(const std::complex &value, + const std::size_t index, const bool async = false) { + CFP_t value_cu = cuUtil::complexToCu>(value); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + setBasisState_CUDA(BaseType::getData(), value_cu, index, async, + stream_id); + } + + /** + * @brief Set values for a batch of elements of the state-vector. This + * method is implemented by the customized CUDA kernel defined in the + * DataBuffer class. + * + * @param num_indices Number of elements to be passed to the state vector. + * @param values Pointer to values to be set for the target elements. + * @param indices Pointer to indices of the target elements. + * @param async Use an asynchronous memory copy. + */ + template + void setStateVector_(const index_type num_indices, + const std::complex *values, + const index_type *indices, const bool async = false) { + BaseType::getDataBuffer().zeroInit(); + + auto device_id = BaseType::getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID(); + + index_type num_elements = num_indices; + DataBuffer d_indices{ + static_cast(num_elements), device_id, stream_id, true}; + DataBuffer d_values{static_cast(num_elements), + device_id, stream_id, true}; + + d_indices.CopyHostDataToGpu(indices, d_indices.getLength(), async); + d_values.CopyHostDataToGpu(values, d_values.getLength(), async); + + setStateVector_CUDA(BaseType::getData(), num_elements, + d_values.getData(), d_indices.getData(), + thread_per_block, stream_id); + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + } + /** * @brief Apply parametric Pauli gates using custateVec calls. * diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/Test_AdjointJacobianGPU.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/Test_AdjointJacobianGPU.cpp index 30109d64e2..ccdcedaea2 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/Test_AdjointJacobianGPU.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/Test_AdjointJacobianGPU.cpp @@ -50,7 +50,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Op=RX, Obs=Z", auto ops = OpsData({"RX"}, {{p}}, {{0}}, {false}); StateVectorT psi(num_qubits); - psi.initSV(); JacobianData tape{ param.size(), psi.getLength(), psi.getData(), {obs}, ops, tp}; @@ -80,7 +79,6 @@ TEST_CASE("AdjointJacobianGPU::adjointJacobian Op=RY, Obs=X", auto ops = OpsData({"RY"}, {{p}}, {{0}}, {false}); StateVectorT psi(num_qubits); - psi.initSV(); JacobianData tape{ param.size(), psi.getLength(), psi.getData(), {obs}, ops, tp}; @@ -112,7 +110,6 @@ TEST_CASE("AdjointJacobianGPU::adjointJacobian Op=[QubitStateVector, " {0.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}}; StateVectorT psi(num_qubits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -150,7 +147,6 @@ TEST_CASE("AdjointJacobianGPU::adjointJacobian Op=RX, Obs=[Z,Z]", std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -183,7 +179,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Op=[RX,RX,RX], Obs=[Z,Z,Z]", std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -225,7 +220,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Op=[RX,RX,RX], Obs=[Z,Z,Z]," std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -265,7 +259,6 @@ TEST_CASE("Algorithms::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -304,7 +297,6 @@ TEST_CASE("AdjointJacobianGPU::adjointJacobian Op=Mixed, Obs=[XXX]", std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -553,7 +545,6 @@ TEST_CASE("Algorithms::adjointJacobian Op=RX, Obs=Ham[Z0+Z1]", "[Algorithms]") { std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -588,7 +579,6 @@ TEST_CASE( std::vector jacobian(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -630,7 +620,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs", std::vector jacobian2(num_obs * tp.size(), 0); StateVectorT psi(num_qubits); - psi.initSV(); auto obs1 = std::make_shared>( std::make_shared>( diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/mpi/Test_AdjointJacobianGPUMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/mpi/Test_AdjointJacobianGPUMPI.cpp index 8cf5a487ee..3d0e6cab7c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/mpi/Test_AdjointJacobianGPUMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/tests/mpi/Test_AdjointJacobianGPUMPI.cpp @@ -84,7 +84,6 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=RX, Obs=[Z,Z]", { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -144,7 +143,6 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobianMPI Op=[QubitStateVector, " { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -200,7 +198,6 @@ TEST_CASE( { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -269,7 +266,6 @@ TEST_CASE( { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); @@ -334,7 +330,6 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=[RX,RX,RX], Obs=[ZZZ]", { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -397,7 +392,6 @@ TEST_CASE("AdjointJacobianGPUMPI::adjointJacobian Op=Mixed, Obs=[XXX]", { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); const auto obs = std::make_shared>( std::make_shared>( @@ -478,8 +472,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPUMPI Op=[RX,RX,RX], " { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); - auto obs1 = std::make_shared>( "PauliZ", std::vector{0}); auto obs2 = std::make_shared>( @@ -548,7 +540,6 @@ TEST_CASE("AdjointJacobianGPU::AdjointJacobianGPU Test HermitianObs", { StateVectorT psi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - psi.initSV(); auto obs1 = std::make_shared>( std::make_shared>( diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp index 5bd92b5520..145097b30e 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp @@ -63,10 +63,6 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { using ParamT = PrecisionT; // Parameter's data precision using np_arr_c = py::array_t, py::array::c_style | py::array::forcecast>; - using np_arr_sparse_ind = typename std::conditional< - std::is_same::value, - py::array_t, - py::array_t>::type; registerGatesForStateVector(pyclass); @@ -83,28 +79,23 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { })) .def( "setBasisState", - [](StateVectorT &sv, const std::size_t index, - const bool use_async) { - const std::complex value(1, 0); - sv.setBasisState(value, index, use_async); - }, - "Create Basis State on GPU.") + [](StateVectorT &sv, const std::vector &state, + const std::vector &wires, + const bool async) { sv.setBasisState(state, wires, async); }, + py::arg("state") = nullptr, py::arg("wires") = nullptr, + py::arg("async") = false, + "Set the state vector to a basis state on GPU.") .def( "setStateVector", - [](StateVectorT &sv, const np_arr_sparse_ind &indices, - const np_arr_c &state, const bool use_async) { - using index_type = typename std::conditional< - std::is_same::value, int32_t, int64_t>::type; - - sv.template setStateVector( - static_cast(indices.request().size), - static_cast *>( - state.request().ptr), - static_cast(indices.request().ptr), - use_async); + [](StateVectorT &sv, const np_arr_c &state, + const std::vector &wires, const bool async) { + const auto state_buffer = state.request(); + const auto state_ptr = + static_cast *>(state_buffer.ptr); + sv.setStateVector(state_ptr, state_buffer.size, wires, async); }, - "Set State Vector on GPU with values and their corresponding " - "indices for the state vector on device") + "Set State Vector on GPU with values for the state vector and " + "wires on the host memory.") .def( "DeviceToDevice", [](StateVectorT &sv, const StateVectorT &other, bool async) { @@ -152,7 +143,15 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) { "Get the GPU index for the statevector data.") .def("numQubits", &StateVectorT::getNumQubits) .def("dataLength", &StateVectorT::getLength) - .def("resetGPU", &StateVectorT::initSV) + .def( + "resetStateVector", + [](StateVectorT &gpu_sv, bool async) { + gpu_sv.resetStateVector(async); + }, + py::arg("async") = false, + "Initialize the statevector data to the |0...0> state") + .def("collapse", &StateVectorT::collapse, + "Collapse the statevector onto the 0 or 1 branch of a given wire.") .def( "apply", [](StateVectorT &sv, const std::string &str, diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp index 620fd93868..2d3313f694 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp @@ -63,10 +63,6 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { using ParamT = PrecisionT; // Parameter's data precision using np_arr_c = py::array_t, py::array::c_style | py::array::forcecast>; - using np_arr_sparse_ind = typename std::conditional< - std::is_same::value, - py::array_t, - py::array_t>::type; registerGatesForStateVector(pyclass); @@ -86,28 +82,24 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { })) // qubits, device .def( "setBasisState", - [](StateVectorT &sv, const std::size_t index, - const bool use_async) { - const std::complex value(1, 0); - sv.setBasisState(value, index, use_async); + [](StateVectorT &sv, const std::vector &state, + const std::vector &wires, const bool use_async) { + sv.setBasisState(state, wires, use_async); }, - "Create Basis State on GPU.") + py::arg("state") = nullptr, py::arg("wires") = nullptr, + py::arg("async") = false, + "Set the state vector to a basis state on GPU.") .def( "setStateVector", - [](StateVectorT &sv, const np_arr_sparse_ind &indices, - const np_arr_c &state, const bool use_async) { - using index_type = typename std::conditional< - std::is_same::value, int32_t, int64_t>::type; - - sv.template setStateVector( - static_cast(indices.request().size), - static_cast *>( - state.request().ptr), - static_cast(indices.request().ptr), - use_async); + [](StateVectorT &sv, const np_arr_c &state, + const std::vector &wires, const bool async) { + const auto state_buffer = state.request(); + const auto state_ptr = + static_cast *>(state_buffer.ptr); + sv.setStateVector(state_ptr, state_buffer.size, wires, async); }, - "Set State Vector on GPU with values and their corresponding " - "indices for the state vector on device") + "Set State Vector on GPU with values for the state vector and " + "wires on the host memory.") .def( "DeviceToDevice", [](StateVectorT &sv, const StateVectorT &other, bool async) { @@ -155,7 +147,13 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { "Get the GPU index for the statevector data.") .def("numQubits", &StateVectorT::getNumQubits) .def("dataLength", &StateVectorT::getLength) - .def("resetGPU", &StateVectorT::initSV) + .def( + "resetStateVector", + [](StateVectorT &gpu_sv, bool use_async) { + gpu_sv.resetStateVector(use_async); + }, + py::arg("async") = false, + "Initialize the statevector data to the |0...0> state") .def( "apply", [](StateVectorT &sv, const std::string &str, diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp index b552ef5f01..af864d8b01 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::StateVectorCudaManaged", {0, 0}, {0, 0}, {0, 0}, {0, 0}}; SECTION("GPU <-> host data: std::complex") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); std::vector out_data(Pennylane::Util::exp2(num_qubits), {0.5, 0.5}); std::vector ref_data(Pennylane::Util::exp2(num_qubits), @@ -100,7 +100,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyHadamard", SECTION("Apply directly") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); CHECK(sv.getDataVector()[0] == cp_t{1, 0}); sv.applyHadamard({index}, inverse); CAPTURE(sv.getDataVector()); @@ -120,7 +119,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyHadamard", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); CHECK(sv.getDataVector()[0] == cp_t{1, 0}); sv.applyOperation("Hadamard", {index}, inverse); @@ -148,7 +146,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyPauliX", SECTION("Apply directly") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); CHECK(sv.getDataVector()[0] == cuUtil::ONE>()); sv.applyPauliX({index}, inverse); @@ -161,7 +158,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyPauliX", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); CHECK(sv.getDataVector()[0] == cuUtil::ONE>()); sv.applyOperation("PauliX", {index}, inverse); @@ -181,7 +177,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyPauliY", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -229,7 +224,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyPauliZ", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -274,7 +268,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyS", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -323,7 +316,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyT", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -372,7 +364,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyCNOT", { const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+00> state to generate 3-qubit GHZ state sv.applyOperation("Hadamard", {0}); @@ -414,7 +405,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applySWAP", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+10> state sv.applyOperations({{"Hadamard"}, {"PauliX"}}, {{0}, {1}}, @@ -593,7 +583,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyCY", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+10> state sv.applyOperations({{"Hadamard"}, {"PauliX"}}, {{0}, {1}}, @@ -762,7 +751,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyCZ", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+10> state sv.applyOperations({{"Hadamard"}, {"PauliX"}}, {{0}, {1}}, @@ -876,7 +864,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyToffoli", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+10> state sv.applyOperations({{"Hadamard"}, {"PauliX"}}, {{0}, {1}}, @@ -983,7 +970,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyCSWAP", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+10> state sv.applyOperations({{"Hadamard"}, {"PauliX"}}, {{0}, {1}}, @@ -1083,68 +1069,15 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVector", } StateVectorCudaManaged sv{num_qubits}; - sv.CopyHostDataToGpu(init_state.data(), init_state.size()); - - using index_type = - typename std::conditional::value, - int32_t, int64_t>::type; - // The setStates will shuffle the state vector values on the device with - // the following indices and values setting on host. For example, the - // values[i] is used to set the indices[i] th element of state vector on - // the device. For example, values[2] (init_state[5]) will be copied to - // indices[2]th or (4th) element of the state vector. - std::vector indices = {0, 2, 4, 6, 1, 3, 5, 7}; - - std::vector> values = { - init_state[1], init_state[3], init_state[5], init_state[7], - init_state[0], init_state[2], init_state[4], init_state[6]}; - - sv.template setStateVector(values.size(), values.data(), - indices.data(), false); - - CHECK(expected_state == Pennylane::Util::approx(sv.getDataVector())); - } -} -// LCOV_EXCL_START -TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVectorwith_thread_setting", - "[StateVectorCudaManaged_Nonparam]", float, double) { - using PrecisionT = TestType; - const std::size_t num_qubits = 3; - std::mt19937 re{1337}; - - SECTION("SetStates with a non-default GPU thread setting") { - auto init_state = - createRandomStateVectorData(re, num_qubits); - auto expected_state = init_state; - for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); - i++) { - std::swap(expected_state[i * 2], expected_state[i * 2 + 1]); - } - - StateVectorCudaManaged sv{num_qubits}; - sv.CopyHostDataToGpu(init_state.data(), init_state.size()); - - using index_type = - typename std::conditional::value, - int32_t, int64_t>::type; + std::vector> values(init_state.begin(), + init_state.end()); - std::vector indices = {0, 2, 4, 6, 1, 3, 5, 7}; - - std::vector> values = { - init_state[1], init_state[3], init_state[5], init_state[7], - init_state[0], init_state[2], init_state[4], init_state[6]}; - - // default setting of the number of threads in a block is 256. - const std::size_t threads_per_block = 1024; - - sv.template setStateVector( - values.size(), values.data(), indices.data(), false); - - CHECK(expected_state == Pennylane::Util::approx(sv.getDataVector())); + sv.setStateVector(values.data(), values.size(), + std::vector{0, 1, 2}); + CHECK(init_state == Pennylane::Util::approx(sv.getDataVector())); } } -// LCOV_EXCL_STOP TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetIthStates", "[StateVectorCudaManaged_Nonparam]", float, double) { @@ -1156,21 +1089,19 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetIthStates", "Set Ith element of the state state on device with data on the host") { auto init_state = createRandomStateVectorData(re, num_qubits); - auto expected_state = init_state; + std::vector> expected_state(init_state.size(), + {0, 0}); - expected_state[0] = expected_state[1]; - - for (std::size_t i = 1; i < Pennylane::Util::exp2(num_qubits); i++) { - expected_state[i] = {0, 0}; - } + expected_state[expected_state.size() - 1] = {1.0, 0}; StateVectorCudaManaged sv{num_qubits}; sv.CopyHostDataToGpu(init_state.data(), init_state.size()); - std::size_t index = 0; - std::complex values = init_state[1]; + std::vector state(num_qubits, 1); + std::vector wires(num_qubits, 0); + std::iota(wires.begin(), wires.end(), 0); - sv.setBasisState(values, index, false); + sv.setBasisState(state, wires, false); CHECK(expected_state == Pennylane::Util::approx(sv.getDataVector())); } diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp index c93eba882e..e2485910d9 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp @@ -43,7 +43,6 @@ TEMPLATE_TEST_CASE("LightningGPU:applyOperation", "[LightningGPU_Param]", double) { const std::size_t num_qubits = 1; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); SECTION("Catch failures caused by unsupported named gates") { std::string obs = "paulix"; @@ -56,7 +55,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRX", "[LightningGPU_Param]", double) { using cp_t = std::complex; const std::size_t num_qubits = 1; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{{0.1}, {0.6}}; @@ -188,7 +186,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRZ", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, @@ -250,7 +247,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyPhaseShift", "[LightningGPU_Param]", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, @@ -313,7 +309,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyControlledPhaseShift", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); // Test using |+++> state sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, @@ -387,7 +382,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float, SECTION("Apply directly") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyRot({index}, adjoint, angles[index][0], angles[index][1], angles[index][2]); @@ -396,7 +390,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float, } for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyRot({index}, adjoint, angles[index]); CHECK(sv_direct.getDataVector() == @@ -406,7 +399,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float, SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < num_qubits; index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("Rot", {index}, adjoint, angles[index]); CHECK(sv_dispatch.getDataVector() == @@ -422,7 +414,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyCRot", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8, 2.4}; @@ -441,7 +432,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyCRot", "[LightningGPU_Param]", float, SECTION("CRot0,1 |000> -> |000>") { { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyCRot({0, 1}, adjoint, angles[0], angles[1], angles[2]); @@ -451,7 +441,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyCRot", "[LightningGPU_Param]", float, } { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyCRot({0, 1}, adjoint, angles); @@ -461,7 +450,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyCRot", "[LightningGPU_Param]", float, } SECTION("CRot0,1 |100> -> |1>(a|0>+b|1>)|0>") { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyOperation("PauliX", {0}); sv_direct.applyCRot({0, 1}, adjoint, angles[0], angles[1], @@ -473,7 +461,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyCRot", "[LightningGPU_Param]", float, SECTION("Apply using dispatcher") { SECTION("CRot0,1 |100> -> |1>(a|0>+b|1>)|0>") { StateVectorCudaManaged sv_direct{num_qubits}; - sv_direct.initSV(); sv_direct.applyOperation("PauliX", {0}); sv_direct.applyOperation("CRot", {0, 1}, adjoint, angles); @@ -489,7 +476,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -642,7 +628,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -722,8 +707,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float, SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); - + sv_dispatch.resetStateVector(); sv_dispatch.applyOperation("IsingYY", {0, 1}, true, {angles[index]}); CHECK(sv_dispatch.getDataVector() == @@ -737,7 +721,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -796,7 +779,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float, SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("IsingZZ", {0, 1}, true, {angles[index]}); @@ -988,7 +970,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitation", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1021,7 +1002,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitation", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("SingleExcitation", {0, 1}, false, {angles[index]}); CHECK(sv_dispatch.getDataVector() == @@ -1035,7 +1015,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1101,7 +1080,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("SingleExcitationMinus", {0, 1}, true, {angles[index]}); @@ -1116,7 +1094,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus", using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1182,7 +1159,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("SingleExcitationPlus", {0, 1}, true, {angles[index]}); @@ -1197,7 +1173,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitation", using cp_t = std::complex; const std::size_t num_qubits = 4; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8, 2.4}; @@ -1221,7 +1196,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitation", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("DoubleExcitation", {0, 1, 2, 3}, false, {angles[index]}); @@ -1236,7 +1210,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationMinus", using cp_t = std::complex; const std::size_t num_qubits = 4; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1278,7 +1251,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationMinus", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("DoubleExcitationMinus", {0, 1, 2, 3}, true, {angles[index]}); @@ -1293,7 +1265,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationPlus", using cp_t = std::complex; const std::size_t num_qubits = 4; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1336,7 +1307,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationPlus", SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("DoubleExcitationPlus", {0, 1, 2, 3}, true, {angles[index]}); CHECK(sv_dispatch.getDataVector() == @@ -1350,7 +1320,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float, using cp_t = std::complex; const std::size_t num_qubits = 3; StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); const std::vector angles{0.3, 0.8}; @@ -1412,7 +1381,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float, SECTION("Apply using dispatcher") { for (std::size_t index = 0; index < angles.size(); index++) { StateVectorCudaManaged sv_dispatch{num_qubits}; - sv_dispatch.initSV(); sv_dispatch.applyOperation("MultiRZ", {0, 1}, true, {angles[index]}); @@ -1437,10 +1405,8 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliX"}, {"PauliZ"}}, @@ -1459,9 +1425,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliZ"}, {"PauliX"}}, @@ -1478,9 +1442,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliX"}, {"PauliY"}}, @@ -1497,9 +1459,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliY"}, {"PauliX"}}, @@ -1517,9 +1477,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliY"}, {"PauliZ"}}, @@ -1537,9 +1495,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire", SECTION("Apply using dispatcher") { StateVectorCudaManaged sv{num_qubits}; - sv.initSV(); StateVectorCudaManaged sv_expected{num_qubits}; - sv_expected.initSV(); for (std::size_t index = 0; index < num_qubits; index++) { sv_expected.applyOperations({{"PauliZ"}, {"PauliY"}}, @@ -1557,7 +1513,6 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation multiple wires", const std::size_t num_qubits = 3; StateVectorCudaManaged sv_init{num_qubits}; - sv_init.initSV(); sv_init.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {false, false, false}); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp index 9b88afa388..968badd4dc 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -158,21 +159,17 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector", "the host") { StateVectorCudaMPI sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - // The setStates will shuffle the state vector values on the device with - // the following indices and values setting on host. For example, the - // values[i] is used to set the indices[i] th element of state vector on - // the device. For example, values[2] (init_state[5]) will be copied to - // indices[2]th or (4th) element of the state vector. - sv.template setStateVector( - init_state.size(), init_state.data(), indices.data(), false); + std::vector> values(init_state.begin(), + init_state.end()); + std::vector wires(num_qubits); + std::iota(wires.begin(), wires.end(), 0); + sv.setStateVector(values.data(), values.size(), wires); - mpi_manager.Barrier(); - sv.CopyGpuDataToHost(local_state.data(), - static_cast(subSvLength)); - mpi_manager.Barrier(); + auto expected_local_state_vector = mpi_manager.scatter(values, 0); - CHECK(expected_local_state == Pennylane::Util::approx(local_state)); + CHECK(expected_local_state_vector == + Pennylane::Util::approx(sv.getDataVector())); } } @@ -189,20 +186,10 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates", std::bit_width(static_cast(mpi_manager.getSize())) - 1; std::size_t nLocalIndexBits = num_qubits - nGlobalIndexBits; std::size_t subSvLength = 1 << nLocalIndexBits; - mpi_manager.Barrier(); - - int index; - if (mpi_manager.getRank() == 0) { - std::mt19937 re{1337}; - std::uniform_int_distribution<> distr( - 0, Pennylane::Util::exp2(num_qubits) - 1); - index = distr(re); - } - mpi_manager.Bcast(index, 0); std::vector expected_state(Pennylane::Util::exp2(num_qubits), {0, 0}); if (mpi_manager.getRank() == 0) { - expected_state[index] = {1.0, 0}; + expected_state[expected_state.size() - 1] = {1.0, 0}; } auto expected_local_state = mpi_manager.scatter(expected_state, 0); @@ -219,8 +206,10 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetIthStates", "Set Ith element of the state state on device with data on the host") { StateVectorCudaMPI sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - std::complex values = {1.0, 0}; - sv.setBasisState(values, index, false); + std::vector state(num_qubits, 1); + std::vector wires(num_qubits); + std::iota(wires.begin(), wires.end(), 0); + sv.setBasisState(state, wires, false); std::vector h_sv0(subSvLength, {0.0, 0.0}); sv.CopyGpuDataToHost(h_sv0.data(), diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Param.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Param.cpp index a9d5ec106d..17cf43e842 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Param.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_Param.cpp @@ -380,7 +380,6 @@ TEMPLATE_TEST_CASE("LightningGPUMPI:applyOperation", "[LightningGPUMPI_Param]", std::string obs = "paulix"; StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); PL_CHECK_THROWS_MATCHES(sv.applyOperation(obs, {0}), LightningException, "Currently unsupported gate: paulix"); } diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/initSV.cu b/pennylane_lightning/core/src/simulators/lightning_gpu/initSV.cu index 4e3e93ea79..8a62e89e84 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/initSV.cu +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/initSV.cu @@ -59,7 +59,7 @@ void setBasisState_CUDA(cuDoubleComplex *sv, cuDoubleComplex &value, cudaStream_t stream_id); /** - * @brief The CUDA kernel that setS state vector data on GPU device from the + * @brief The CUDA kernel that sets state vector data on GPU device from the * input values (on device) and their corresponding indices (on device) * information. * diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp index 460a4fa8cb..bcfdd3944c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp @@ -25,6 +25,7 @@ #include #include #include // custatevecApplyMatrix +#include #include #include #include @@ -93,16 +94,10 @@ class Measurements final */ auto probs(const std::vector &wires) -> std::vector { - PL_ABORT_IF_NOT(std::is_sorted(wires.cbegin(), wires.cend()) || - std::is_sorted(wires.rbegin(), wires.rend()), - "LightningGPU does not currently support out-of-order " - "wire indices with probability calculations"); - // Data return type fixed as double in custatevec function call std::vector probabilities(Pennylane::Util::exp2(wires.size())); // this should be built upon by the wires not participating - int maskLen = - 0; // static_cast(BaseType::getNumQubits() - wires.size()); + int maskLen = 0; int *maskBitString = nullptr; // int *maskOrdering = nullptr; @@ -124,6 +119,8 @@ class Measurements final this->_statevector.getNumQubits() - 1 - x); }); + std::reverse(wires_int.begin(), wires_int.end()); + PL_CUSTATEVEC_IS_SUCCESS(custatevecAbs2SumArray( /* custatevecHandle_t */ this->_statevector.getCusvHandle(), /* const void* */ this->_statevector.getData(), @@ -218,7 +215,9 @@ class Measurements final * be accessed using the stride sample_id*num_qubits, where sample_id is a * number between 0 and num_samples-1. */ - auto generate_samples(std::size_t num_samples) -> std::vector { + auto generate_samples(std::size_t num_samples, + const std::optional &seed = std::nullopt) + -> std::vector { std::vector rand_nums(num_samples); custatevecSamplerDescriptor_t sampler; @@ -238,7 +237,11 @@ class Measurements final data_type = CUDA_C_32F; } - this->setRandomSeed(); + if (seed.has_value()) { + this->setSeed(seed.value()); + } else { + this->setRandomSeed(); + } std::uniform_real_distribution dis(0.0, 1.0); for (std::size_t n = 0; n < num_samples; n++) { rand_nums[n] = dis(this->rng); @@ -273,7 +276,7 @@ class Measurements final PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample( this->_statevector.getCusvHandle(), sampler, bitStrings.data(), bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples, - CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER)); + CUSTATEVEC_SAMPLER_OUTPUT_RANDNUM_ORDER)); PL_CUDA_IS_SUCCESS(cudaStreamSynchronize( this->_statevector.getDataBuffer().getDevTag().getStreamID())); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp index 126ce2e686..6fee1711d2 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp @@ -130,6 +130,8 @@ class MeasurementsMPI final } } + std::reverse(wires_local.begin(), wires_local.end()); + std::vector local_probabilities( Pennylane::Util::exp2(wires_local.size())); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp index 3de2f6aab6..28a04d6d72 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp @@ -50,7 +50,6 @@ TEMPLATE_TEST_CASE("[Identity]", "[StateVectorCudaManaged_Expval]", float, const std::size_t num_qubits = 3; auto ONE = TestType(1); StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); SECTION("Using expval") { @@ -73,7 +72,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations({{"Hadamard"}, {"CNOT"}, {"CNOT"}}, {{0}, {0, 1}, {1, 2}}, @@ -85,7 +83,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval: Plus states") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -96,7 +93,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval: Minus states") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( {{"PauliX"}, @@ -126,7 +122,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations({{"Hadamard"}, {"CNOT"}, {"CNOT"}}, {{0}, {0, 1}, {1, 2}}, @@ -138,7 +133,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval: Plus i states") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations({{"RX"}, {"RX"}, {"RX"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}, @@ -150,7 +144,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval: Minus i states") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations({{"RX"}, {"RX"}, {"RX"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}, @@ -191,7 +184,6 @@ TEMPLATE_TEST_CASE("[Hadamard]", "[StateVectorCudaManaged_Expval]", float, SECTION("Using expval") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperation("PauliX", {0}); auto ob = NamedObs("Hadamard", {0}); @@ -209,7 +201,6 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::Hamiltonian_expval", SECTION("GetExpectationIdentity") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); std::vector wires{0, 1, 2}; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp index f23497f0c7..4f3efaade5 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp @@ -257,7 +257,7 @@ TEMPLATE_TEST_CASE("Probabilities", "[Measures]", float, double) { using StateVectorT = StateVectorCudaManaged; // Probabilities calculated with Pennylane default.qubit: std::vector, std::vector>> - input = {{{2, 1, 0}, + input = {{{0, 1, 2}, {0.67078706, 0.03062806, 0.0870997, 0.00397696, 0.17564072, 0.00801973, 0.02280642, 0.00104134}}}; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Var.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Var.cpp index 3b40d093be..deccedee0c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Var.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Var.cpp @@ -38,7 +38,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaManaged_Var]", const std::size_t num_qubits = 2; SECTION("var(PauliX[0])") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( @@ -53,7 +52,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaManaged_Var]", SECTION("var(PauliY[0])") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( @@ -68,7 +66,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaManaged_Var]", SECTION("var(PauliZ[1])") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( @@ -89,7 +86,6 @@ TEMPLATE_TEST_CASE("Test variance of HermitianObs", using ComplexT = typename StateVectorT::ComplexT; SECTION("Using var") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( @@ -122,7 +118,6 @@ TEMPLATE_TEST_CASE("Test variance of TensorProdObs", const std::size_t num_qubits = 3; SECTION("Using var") { StateVectorT sv{num_qubits}; - sv.initSV(); auto m = Measurements(sv); sv.applyOperations( diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp index d3c55ff7ae..bbc1dba860 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp @@ -71,7 +71,6 @@ TEMPLATE_TEST_CASE("[Identity]", "[StateVectorCudaMPI_Expval]", float, double) { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); @@ -112,7 +111,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations({{"Hadamard"}, {"CNOT"}, {"CNOT"}}, @@ -126,7 +124,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval: Plus states") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations({{"Hadamard"}, {"Hadamard"}, {"Hadamard"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}); @@ -138,7 +135,6 @@ TEMPLATE_TEST_CASE("[PauliX]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval: Minus states") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations( {{"PauliX"}, @@ -185,7 +181,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations({{"Hadamard"}, {"CNOT"}, {"CNOT"}}, {{0}, {0, 1}, {1, 2}}, @@ -198,7 +193,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval: Plus i states") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations({{"RX"}, {"RX"}, {"RX"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}, @@ -211,7 +205,6 @@ TEMPLATE_TEST_CASE("[PauliY]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval: Minus i states") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperations({{"RX"}, {"RX"}, {"RX"}}, {{0}, {1}, {2}}, {{false}, {false}, {false}}, @@ -293,7 +286,6 @@ TEMPLATE_TEST_CASE("[Hadamard]", "[StateVectorCudaMPI_Expval]", float, double) { SECTION("Using expval") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); sv.applyOperation("PauliX", {0}); auto ob = NamedObsMPI("Hadamard", {0}); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Measure.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Measure.cpp index c77f4e2215..7bdc578f77 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Measure.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Measure.cpp @@ -411,7 +411,7 @@ TEMPLATE_TEST_CASE("Probabilities", "[MeasuresMPI]", double) { using StateVectorT = StateVectorCudaMPI; // Probabilities calculated with Pennylane default.qubit: std::vector, std::vector>> - input = {{{2, 1, 0}, + input = {{{0, 1, 2}, {0.67078706, 0.03062806, 0.0870997, 0.00397696, 0.17564072, 0.00801973, 0.02280642, 0.00104134}}}; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Var.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Var.cpp index 0a9ed9c33b..cfe9675d0d 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Var.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Var.cpp @@ -60,7 +60,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaMPI_Var]", SECTION("var(PauliX[0])") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); @@ -77,7 +76,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaMPI_Var]", SECTION("var(PauliY[0])") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); @@ -94,7 +92,6 @@ TEMPLATE_TEST_CASE("Test variance of NamedObs", "[StateVectorCudaMPI_Var]", SECTION("var(PauliZ[1])") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); @@ -135,7 +132,6 @@ TEMPLATE_TEST_CASE("Test variance of HermitianObs", "[StateVectorCudaMPI_Var]", SECTION("Using var") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); @@ -188,7 +184,6 @@ TEMPLATE_TEST_CASE("Test variance of TensorProdObs", "[StateVectorCudaMPI_Var]", SECTION("Using var") { StateVectorT sv(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, nLocalIndexBits); - sv.initSV(); auto m = MeasurementsMPI(sv); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp index 4003395b53..841074474b 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp @@ -266,3 +266,45 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::StateVectorCudaManaged", REQUIRE(std::is_constructible_v); } } + +TEMPLATE_TEST_CASE("StateVectorCudaManaged::collapse", + "[StateVectorCudaManaged]", float, double) { + using PrecisionT = TestType; + using ComplexT = typename StateVectorCudaManaged::ComplexT; + using CFP_t = typename StateVectorCudaManaged::CFP_t; + using TestVectorT = TestVector; + + std::size_t wire = GENERATE(0, 1, 2); + std::size_t branch = GENERATE(0, 1); + constexpr std::size_t num_qubits = 3; + + // TODO @tomlqc use same template for testing all Lightning flavours? + + SECTION("Collapse the state vector after having measured one of the " + "qubits.") { + TestVectorT init_state = createPlusState_(num_qubits); + + const ComplexT coef{0.5, PrecisionT{0.0}}; + const ComplexT zero{PrecisionT{0.0}, PrecisionT{0.0}}; + + std::vector>> expected_state = { + {{coef, coef, coef, coef, zero, zero, zero, zero}, + {coef, coef, zero, zero, coef, coef, zero, zero}, + {coef, zero, coef, zero, coef, zero, coef, zero}}, + {{zero, zero, zero, zero, coef, coef, coef, coef}, + {zero, zero, coef, coef, zero, zero, coef, coef}, + {zero, coef, zero, coef, zero, coef, zero, coef}}, + }; + + StateVectorCudaManaged sv( + reinterpret_cast(init_state.data()), init_state.size()); + + sv.collapse(wire, branch); + + PrecisionT eps = std::numeric_limits::epsilon() * 1e2; + REQUIRE(isApproxEqual(sv.getDataVector().data(), + sv.getDataVector().size(), + expected_state[branch][wire].data(), + expected_state[branch][wire].size(), eps)); + } +} diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp index 6dd5a01590..0c119409fd 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp @@ -36,6 +36,7 @@ namespace { using namespace Pennylane::LightningGPU; using namespace Pennylane::LightningGPU::MPI; +using namespace Pennylane::LightningGPU::Util; using namespace Pennylane::Util; using Pennylane::Util::isApproxEqual; @@ -52,6 +53,23 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::Constructibility", } } +TEMPLATE_TEST_CASE("cuStateVec_helper::compute_local_index", + "[Default Constructibility]", StateVectorCudaMPI<>) { + const std::size_t local_num_qubits = 4; + + SECTION("compute_local_index, index inside the current qubits set") { + const std::size_t index = 2; // 0b00010 + std::size_t local_index = compute_local_index(index, local_num_qubits); + REQUIRE(local_index == index); + } + + SECTION("compute_local_index, index outside the current qubits set") { + const std::size_t index = 16; // 0b10000 + std::size_t local_index = compute_local_index(index, local_num_qubits); + REQUIRE(local_index == 0); + } +} + TEMPLATE_PRODUCT_TEST_CASE("StateVectorCudaMPI::Constructibility", "[General Constructibility]", (StateVectorCudaMPI), (float, double)) { @@ -299,4 +317,4 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVectorCudaMPI::applyOperations", {false, false}, {{0.0}}), LightningException, "must all be equal"); // invalid parameters } -} \ No newline at end of file +} diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp index 8bd27c2dc8..ffdefe3e25 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/cuStateVec_helpers.hpp @@ -101,4 +101,22 @@ inline SharedCusvHandle make_shared_cusv_handle() { PL_CUSTATEVEC_IS_SUCCESS(custatevecCreate(&h)); return {h, handleDeleter()}; } + +/** + * @brief Compute the local index from a given index in multi-gpu workflow + * + * @param index Global index of the target element. + * @param num_qubits Number of wires within the local devices. + * + * @return local_index Local index of the target element. + */ +inline std::size_t compute_local_index(const std::size_t index, + const std::size_t num_qubits) { + // TODO: bound check for the left shift operation here + constexpr std::size_t one{1U}; + const std::size_t local_index = + (index >> num_qubits) * (one << num_qubits) ^ index; + return local_index; +} + } // namespace Pennylane::LightningGPU::Util diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.cpp index d01c0340d2..04fab62ac5 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.cpp @@ -338,13 +338,22 @@ void LightningKokkosSimulator::PartialProbs( std::move(dv_probs.begin(), dv_probs.end(), probs.begin()); } -void LightningKokkosSimulator::Sample(DataView &samples, - std::size_t shots) { +std::vector LightningKokkosSimulator::GenerateSamples(size_t shots) { + // generate_samples is a member function of the Measures class. Pennylane::LightningKokkos::Measures::Measurements m{ *(this->device_sv)}; + // PL-Lightning-Kokkos generates samples using the alias method. // Reference: https://en.wikipedia.org/wiki/Inverse_transform_sampling - auto li_samples = m.generate_samples(shots); + if (this->gen) { + return m.generate_samples(shots, (*(this->gen))()); + } + return m.generate_samples(shots); +} + +void LightningKokkosSimulator::Sample(DataView &samples, + std::size_t shots) { + auto li_samples = this->GenerateSamples(shots); RT_FAIL_IF(samples.size() != li_samples.size(), "Invalid size for the pre-allocated samples"); @@ -377,13 +386,7 @@ void LightningKokkosSimulator::PartialSample( // get device wires auto &&dev_wires = getDeviceWires(wires); - // generate_samples is a member function of the MeasuresKokkos class. - Pennylane::LightningKokkos::Measures::Measurements m{ - *(this->device_sv)}; - - // PL-Lightning-Kokkos generates samples using the alias method. - // Reference: https://en.wikipedia.org/wiki/Inverse_transform_sampling - auto li_samples = m.generate_samples(shots); + auto li_samples = this->GenerateSamples(shots); // The lightning samples are layed out as a single vector of size // shots*qubits, where each element represents a single bit. The @@ -407,13 +410,7 @@ void LightningKokkosSimulator::Counts(DataView &eigvals, RT_FAIL_IF(eigvals.size() != numElements || counts.size() != numElements, "Invalid size for the pre-allocated counts"); - // generate_samples is a member function of the MeasuresKokkos class. - Pennylane::LightningKokkos::Measures::Measurements m{ - *(this->device_sv)}; - - // PL-Lightning-Kokkos generates samples using the alias method. - // Reference: https://en.wikipedia.org/wiki/Inverse_transform_sampling - auto li_samples = m.generate_samples(shots); + auto li_samples = this->GenerateSamples(shots); // Fill the eigenvalues with the integer representation of the corresponding // computational basis bitstring. In the future, eigenvalues can also be @@ -451,13 +448,7 @@ void LightningKokkosSimulator::PartialCounts( // get device wires auto &&dev_wires = getDeviceWires(wires); - // generate_samples is a member function of the MeasuresKokkos class. - Pennylane::LightningKokkos::Measures::Measurements m{ - *(this->device_sv)}; - - // PL-Lightning-Kokkos generates samples using the alias method. - // Reference: https://en.wikipedia.org/wiki/Inverse_transform_sampling - auto li_samples = m.generate_samples(shots); + auto li_samples = this->GenerateSamples(shots); // Fill the eigenvalues with the integer representation of the corresponding // computational basis bitstring. In the future, eigenvalues can also be diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp index 890c3a267f..d28959f7c3 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp @@ -96,6 +96,8 @@ class LightningKokkosSimulator final : public Catalyst::Runtime::QuantumDevice { return res; } + auto GenerateSamples(size_t shots) -> std::vector; + public: explicit LightningKokkosSimulator(const std::string &kwargs = "{}") { auto &&args = Catalyst::Runtime::parse_kwargs(kwargs); diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp index 7208732a3b..d32e6100ef 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp @@ -1754,26 +1754,71 @@ TEST_CASE("Counts and PartialCounts tests with numWires=0-4 shots=100", } TEST_CASE("Measurement with a seeded device", "[Measures]") { - for (std::size_t _ = 0; _ < 5; _++) { - std::unique_ptr sim = std::make_unique(); - std::unique_ptr sim1 = std::make_unique(); + std::array, 2> sims; + std::vector gens{std::mt19937{37}, std::mt19937{37}}; - std::mt19937 gen(37); - sim->SetDevicePRNG(&gen); + auto circuit = [](LKSimulator &sim, std::mt19937 &gen) { + sim.SetDevicePRNG(&gen); std::vector Qs; Qs.reserve(1); - Qs.push_back(sim->AllocateQubit()); - sim->NamedOperation("Hadamard", {}, {Qs[0]}, false); - auto m = sim->Measure(Qs[0]); - - std::mt19937 gen1(37); - sim1->SetDevicePRNG(&gen1); - std::vector Qs1; - Qs1.reserve(1); - Qs1.push_back(sim1->AllocateQubit()); - sim1->NamedOperation("Hadamard", {}, {Qs1[0]}, false); - auto m1 = sim1->Measure(Qs1[0]); - - CHECK(*m == *m1); + Qs.push_back(sim.AllocateQubit()); + sim.NamedOperation("Hadamard", {}, {Qs[0]}, false); + auto m = sim.Measure(Qs[0]); + return m; + }; + + for (std::size_t trial = 0; trial < 5; trial++) { + sims[0] = std::make_unique(); + sims[1] = std::make_unique(); + + auto m0 = circuit(*(sims[0]), gens[0]); + auto m1 = circuit(*(sims[1]), gens[1]); + + CHECK(*m0 == *m1); + } +} + +TEST_CASE("Sample with a seeded device", "[Measures]") { + std::size_t shots = 100; + std::array, 2> sims; + std::vector> sample_vec(2, + std::vector(shots * 4)); + + std::vector> buffers{ + MemRefT{ + sample_vec[0].data(), sample_vec[0].data(), 0, {shots, 1}, {1, 1}}, + MemRefT{ + sample_vec[1].data(), sample_vec[1].data(), 0, {shots, 1}, {1, 1}}, + }; + std::vector> views{ + DataView(buffers[0].data_aligned, buffers[0].offset, + buffers[0].sizes, buffers[0].strides), + DataView(buffers[1].data_aligned, buffers[1].offset, + buffers[1].sizes, buffers[1].strides)}; + + std::vector gens{std::mt19937{37}, std::mt19937{37}}; + + auto circuit = [shots](LKSimulator &sim, DataView &view, + std::mt19937 &gen) { + sim.SetDevicePRNG(&gen); + std::vector Qs; + Qs.reserve(1); + Qs.push_back(sim.AllocateQubit()); + sim.NamedOperation("Hadamard", {}, {Qs[0]}, false); + sim.NamedOperation("RX", {0.5}, {Qs[0]}, false); + sim.Sample(view, shots); + }; + + for (std::size_t trial = 0; trial < 5; trial++) { + sims[0] = std::make_unique(); + sims[1] = std::make_unique(); + + for (std::size_t sim_idx = 0; sim_idx < sims.size(); sim_idx++) { + circuit(*(sims[sim_idx]), views[sim_idx], gens[sim_idx]); + } + + for (std::size_t i = 0; i < sample_vec[0].size(); i++) { + CHECK((sample_vec[0][i] == sample_vec[1][i])); + } } } diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp index 28449e5015..ee8684e814 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp @@ -14,6 +14,7 @@ #pragma once #include #include +#include #include #include @@ -649,13 +650,16 @@ class Measurements final * Reference https://en.wikipedia.org/wiki/Inverse_transform_sampling * * @param num_samples Number of Samples + * @param seed Seed to generate the samples from * * @return std::vector to the samples. * Each sample has a length equal to the number of qubits. Each sample can * be accessed using the stride sample_id*num_qubits, where sample_id is a * number between 0 and num_samples-1. */ - auto generate_samples(std::size_t num_samples) -> std::vector { + auto generate_samples(std::size_t num_samples, + const std::optional &seed = std::nullopt) + -> std::vector { const std::size_t num_qubits = this->_statevector.getNumQubits(); const std::size_t N = this->_statevector.getLength(); Kokkos::View samples("num_samples", @@ -674,10 +678,12 @@ class Measurements final }); // Sampling using Random_XorShift64_Pool - Kokkos::Random_XorShift64_Pool<> rand_pool( - std::chrono::high_resolution_clock::now() - .time_since_epoch() - .count()); + auto rand_pool = seed.has_value() + ? Kokkos::Random_XorShift64_Pool<>(seed.value()) + : Kokkos::Random_XorShift64_Pool<>( + std::chrono::high_resolution_clock::now() + .time_since_epoch() + .count()); Kokkos::parallel_for( Kokkos::RangePolicy(0, num_samples), diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp index 4bf72e332b..d57bd70631 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -573,14 +574,17 @@ class Measurements final * Reference: https://en.wikipedia.org/wiki/Alias_method * * @param num_samples The number of samples to generate. + * @param seed Seed to generate the samples from * @return 1-D vector of samples in binary, each sample is * separated by a stride equal to the number of qubits. */ - std::vector generate_samples(const std::size_t num_samples) { + std::vector + generate_samples(const std::size_t num_samples, + const std::optional &seed = std::nullopt) { const std::size_t num_qubits = this->_statevector.getNumQubits(); std::vector wires(num_qubits); std::iota(wires.begin(), wires.end(), 0); - return generate_samples(wires, num_samples); + return generate_samples(wires, num_samples, seed); } /** @@ -588,15 +592,21 @@ class Measurements final * * @param wires Sample are generated for the specified wires. * @param num_samples The number of samples to generate. + * @param seed Seed to generate the samples from * @return 1-D vector of samples in binary, each sample is * separated by a stride equal to the number of qubits. */ std::vector generate_samples(const std::vector &wires, - const std::size_t num_samples) { + const std::size_t num_samples, + const std::optional &seed = std::nullopt) { const std::size_t n_wires = wires.size(); std::vector samples(num_samples * n_wires); - this->setRandomSeed(); + if (seed.has_value()) { + this->setSeed(seed.value()); + } else { + this->setRandomSeed(); + } DiscreteRandomVariable drv{this->rng, probs(wires)}; // The Python layer expects a 2D array with dimensions (n_samples x // n_wires) and hence the linear index is `s * n_wires + (n_wires - 1 - diff --git a/pennylane_lightning/core/src/utils/Util.hpp b/pennylane_lightning/core/src/utils/Util.hpp index e0d3a1170e..5478cdbdcb 100644 --- a/pennylane_lightning/core/src/utils/Util.hpp +++ b/pennylane_lightning/core/src/utils/Util.hpp @@ -21,6 +21,7 @@ #include #include #include +#include // integral, floating_point #include #include // transform_reduce #include @@ -41,6 +42,7 @@ namespace Pennylane::Util { * @return constexpr std::complex */ template + requires std::integral || std::floating_point inline static constexpr auto ConstMult(U a, std::complex b) -> std::complex { return {a * b.real(), a * b.imag()}; diff --git a/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py b/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py new file mode 100644 index 0000000000..50f9acef38 --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py @@ -0,0 +1,248 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +r""" +Internal methods for adjoint Jacobian differentiation method. +""" + +from __future__ import annotations + +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import DevPool + from pennylane_lightning.lightning_gpu_ops.algorithms import ( + AdjointJacobianC64, + AdjointJacobianC128, + create_ops_listC64, + create_ops_listC128, + ) + + try: + from pennylane_lightning.lightning_gpu_ops.algorithmsMPI import ( + AdjointJacobianMPIC64, + AdjointJacobianMPIC128, + create_ops_listMPIC64, + create_ops_listMPIC128, + ) + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as ex: + warn(str(ex), UserWarning) + + +import numpy as np +from pennylane import BasisState, StatePrep +from pennylane.operation import Operation +from pennylane.tape import QuantumTape +from scipy.sparse import csr_matrix + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian +from pennylane_lightning.core._serialize import QuantumScriptSerializer + + +class LightningGPUAdjointJacobian(LightningBaseAdjointJacobian): + """Check and execute the adjoint Jacobian differentiation method. + + Args: + qubit_state(LightningGPUStateVector): State Vector to calculate the adjoint Jacobian with. + batch_obs(bool): If serialized tape is to be batched or not. + For Lightning GPU, distribute the observations across GPUs in the same node. Defaults to False. + For Lightning GPU-MPI, if `batch_obs=False` the computation requires more memory and is faster, + while `batch_obs=True` allows a larger number of qubits simulation + at the expense of high computational cost. Defaults to False. + """ + + # pylint: disable=too-few-public-methods + + def __init__( + self, + qubit_state: LightningGPUStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: + + super().__init__(qubit_state, batch_obs) + + self._dp = DevPool() + + self._use_mpi = qubit_state._mpi_handler.use_mpi + + if self._use_mpi: + self._mpi_handler = qubit_state._mpi_handler + + # Initialize the C++ binds + self._jacobian_lightning, self._create_ops_list_lightning = self._adjoint_jacobian_dtype() + + # Warning about performance with MPI and batch observation + if self._use_mpi and not self._batch_obs: + warn( + "Using LightningGPU with `batch_obs=False` and `use_mpi=True` has the limitation of requiring more memory. If you want to allocate larger number of qubits use the option `batch_obs=True`" + "For more information Check out the section `Parallel adjoint differentiation support` in our website https://docs.pennylane.ai/projects/lightning/en/stable/lightning_gpu/device.html for more details.", + RuntimeWarning, + ) + + def _adjoint_jacobian_dtype(self): + """Binding to Lightning GPU Adjoint Jacobian C++ class. + + Returns: the AdjointJacobian class + """ + if self._use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + jacobian_lightning = ( + AdjointJacobianMPIC64() if self.dtype == np.complex64 else AdjointJacobianMPIC128() + ) + create_ops_list_lightning = ( + create_ops_listMPIC64 if self.dtype == np.complex64 else create_ops_listMPIC128 + ) + return jacobian_lightning, create_ops_list_lightning + + # without MPI + jacobian_lightning = ( + AdjointJacobianC64() if self.dtype == np.complex64 else AdjointJacobianC128() + ) + create_ops_list_lightning = ( + create_ops_listC64 if self.dtype == np.complex64 else create_ops_listC128 + ) + return jacobian_lightning, create_ops_list_lightning + + def _process_jacobian_tape( + self, tape: QuantumTape, split_obs: bool = False, use_mpi: bool = False + ): + """Process a tape, serializing and building a dictionary proper for + the adjoint Jacobian calculation in the C++ layer. + + Args: + tape (QuantumTape): Operations and measurements that represent instructions for execution on Lightning. + split_obs (bool, optional): If splitting the observables in a list. Defaults to False. + use_mpi (bool, optional): If distributing computation with MPI. Defaults to False. + + Returns: + dictionary: dictionary providing serialized data for Jacobian calculation. + """ + use_csingle = self._qubit_state.dtype == np.complex64 + + obs_serialized, obs_indices = QuantumScriptSerializer( + self._qubit_state.device_name, use_csingle, use_mpi, split_obs + ).serialize_observables(tape) + + ops_serialized, use_sp = QuantumScriptSerializer( + self._qubit_state.device_name, use_csingle, use_mpi, split_obs + ).serialize_ops(tape) + + ops_serialized = self._create_ops_list_lightning(*ops_serialized) + + # We need to filter out indices in trainable_params which do not + # correspond to operators. + trainable_params = sorted(tape.trainable_params) + if len(trainable_params) == 0: + return None + + tp_shift = [] + record_tp_rows = [] + all_params = 0 + + for op_idx, trainable_param in enumerate(trainable_params): + # get op_idx-th operator among differentiable operators + operation, _, _ = tape.get_operation(op_idx) + if isinstance(operation, Operation) and not isinstance( + operation, (BasisState, StatePrep) + ): + # We now just ignore non-op or state preps + tp_shift.append(trainable_param) + record_tp_rows.append(all_params) + all_params += 1 + + if use_sp: + # When the first element of the tape is state preparation. Still, I am not sure + # whether there must be only one state preparation... + tp_shift = [i - 1 for i in tp_shift] + + return { + "state_vector": self.state, + "obs_serialized": obs_serialized, + "ops_serialized": ops_serialized, + "tp_shift": tp_shift, + "record_tp_rows": record_tp_rows, + "all_params": all_params, + "obs_indices": obs_indices, + } + + def calculate_jacobian(self, tape: QuantumTape): + """Computes the Jacobian with the adjoint method. + + .. code-block:: python + + statevector = LightningGPUStateVector(num_wires=num_wires) + statevector = statevector.get_final_state(tape) + jacobian = LightningGPUAdjointJacobian(statevector).calculate_jacobian(tape) + + Args: + tape (QuantumTape): Operations and measurements that represent instructions for execution on Lightning. + + Returns: + The Jacobian of a tape. + """ + + empty_array = self._handle_raises(tape, is_jacobian=True) + + if empty_array: + return np.array([], dtype=self.dtype) + + if self._use_mpi: + split_obs = False # with MPI batched means compute Jacobian one observables at a time, no point splitting linear combinations + else: + split_obs = self._dp.getTotalDevices() if self._batch_obs else False + + processed_data = self._process_jacobian_tape(tape, split_obs, self._use_mpi) + + if not processed_data: # training_params is empty + return np.array([], dtype=self.dtype) + + trainable_params = processed_data["tp_shift"] + + if self._batch_obs: # Batching of Measurements + jac = self._jacobian_lightning.batched( + processed_data["state_vector"], + processed_data["obs_serialized"], + processed_data["ops_serialized"], + trainable_params, + ) + else: + jac = self._jacobian_lightning( + processed_data["state_vector"], + processed_data["obs_serialized"], + processed_data["ops_serialized"], + trainable_params, + ) + + jac = np.array(jac) + has_shape0 = bool(len(jac)) + + num_obs = len(np.unique(processed_data["obs_indices"])) + rows = processed_data["obs_indices"] + cols = np.arange(len(rows), dtype=int) + data = np.ones(len(rows)) + red_mat = csr_matrix((data, (rows, cols)), shape=(num_obs, len(rows))) + jac = red_mat @ jac.reshape((len(rows), -1)) + jac = jac.reshape(-1, len(trainable_params)) if has_shape0 else jac + jac_r = np.zeros((jac.shape[0], processed_data["all_params"])) + jac_r[:, processed_data["record_tp_rows"]] = jac + return self._adjoint_jacobian_processing(jac_r) diff --git a/pennylane_lightning/lightning_gpu/_measurements.py b/pennylane_lightning/lightning_gpu/_measurements.py new file mode 100644 index 0000000000..4b95762ccc --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_measurements.py @@ -0,0 +1,202 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Class implementation for state vector measurements. +""" + +from __future__ import annotations + +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import MeasurementsC64, MeasurementsC128 + + try: + from pennylane_lightning.lightning_gpu_ops import MeasurementsMPIC64, MeasurementsMPIC128 + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as error_import: + warn(str(error_import), UserWarning) + +from typing import List + +import numpy as np +import pennylane as qml +from pennylane.measurements import CountsMP, MeasurementProcess, SampleMeasurement, Shots +from pennylane.typing import TensorLike + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._measurements_base import LightningBaseMeasurements +from pennylane_lightning.core._serialize import QuantumScriptSerializer + + +class LightningGPUMeasurements(LightningBaseMeasurements): # pylint: disable=too-few-public-methods + """Lightning GPU Measurements class + + Measures the state provided by the LightningGPUStateVector class. + + Args: + qubit_state(LightningGPUStateVector): Lightning state-vector class containing the state vector to be measured. + """ + + def __init__( + self, + qubit_state: LightningGPUStateVector, # pylint: disable=undefined-variable + ) -> TensorLike: + + super().__init__(qubit_state) + + self._use_mpi = qubit_state._mpi_handler.use_mpi + + if self._use_mpi: + self._mpi_handler = qubit_state._mpi_handler + self._num_local_wires = qubit_state._mpi_handler.num_local_wires + + self._measurement_lightning = self._measurement_dtype()(qubit_state.state_vector) + + def _measurement_dtype(self): + """Binding to Lightning GPU Measurements C++ class. + + Returns: the Measurements class + """ + if self._use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + return MeasurementsMPIC128 if self.dtype == np.complex128 else MeasurementsMPIC64 + + # without MPI + return MeasurementsC128 if self.dtype == np.complex128 else MeasurementsC64 + + def _measure_with_samples_diagonalizing_gates( + self, + mps: List[SampleMeasurement], + shots: Shots, + ) -> TensorLike: + """ + Returns the samples of the measurement process performed on the given state, + by rotating the state into the measurement basis using the diagonalizing gates + given by the measurement process. + + Args: + mps (~.measurements.SampleMeasurement): The sample measurements to perform + shots (~.measurements.Shots): The number of samples to take + + Returns: + TensorLike[Any]: Sample measurement results + """ + # apply diagonalizing gates + self._apply_diagonalizing_gates(mps) + + # Specific for LGPU: + total_indices = self._qubit_state.num_wires + wires = qml.wires.Wires(range(total_indices)) + + def _process_single_shot(samples): + processed = [] + for mp in mps: + res = mp.process_samples(samples, wires) + if not isinstance(mp, CountsMP): + res = qml.math.squeeze(res) + + processed.append(res) + + return tuple(processed) + + try: + samples = self._measurement_lightning.generate_samples( + len(wires), shots.total_shots + ).astype(int, copy=False) + + except ValueError as ex: + if str(ex) != "probabilities contain NaN": + raise ex + samples = qml.math.full((shots.total_shots, len(wires)), 0) + + self._apply_diagonalizing_gates(mps, adjoint=True) + + # if there is a shot vector, use the shots.bins generator to + # split samples w.r.t. the shots + processed_samples = [] + for lower, upper in shots.bins(): + result = _process_single_shot(samples[..., lower:upper, :]) + processed_samples.append(result) + + return ( + tuple(zip(*processed_samples)) if shots.has_partitioned_shots else processed_samples[0] + ) + + def expval(self, measurementprocess: MeasurementProcess): + """Expectation value of the supplied observable contained in the MeasurementProcess. + + Args: + measurementprocess (StateMeasurement): measurement to apply to the state + + Returns: + Expectation value of the observable + """ + + if isinstance(measurementprocess.obs, qml.SparseHamiltonian): + # ensuring CSR sparse representation. + + if self._use_mpi: + # Identity for CSR_SparseHamiltonian to pass to processes with rank != 0 to reduce + # host(cpu) memory requirements + obs = qml.Identity(0) + Hmat = qml.Hamiltonian([1.0], [obs]).sparse_matrix() + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(1)) + CSR_SparseHamiltonian = H_sparse.sparse_matrix().tocsr() + # CSR_SparseHamiltonian for rank == 0 + if self._mpi_handler.mpi_manager.getRank() == 0: + CSR_SparseHamiltonian = measurementprocess.obs.sparse_matrix().tocsr() + else: + CSR_SparseHamiltonian = measurementprocess.obs.sparse_matrix( + wire_order=list(range(self._qubit_state.num_wires)) + ).tocsr(copy=False) + + return self._measurement_lightning.expval( + CSR_SparseHamiltonian.indptr, + CSR_SparseHamiltonian.indices, + CSR_SparseHamiltonian.data, + ) + + # use specialized functors to compute expval(Hermitian) + if isinstance(measurementprocess.obs, qml.Hermitian): + observable_wires = measurementprocess.obs.wires + if self._use_mpi and len(observable_wires) > self._num_local_wires: + raise RuntimeError( + "MPI backend does not support Hermitian with number of target wires larger than local wire number." + ) + matrix = measurementprocess.obs.matrix() + return self._measurement_lightning.expval(matrix, observable_wires) + + if ( + isinstance(measurementprocess.obs, qml.ops.Hamiltonian) + or (measurementprocess.obs.arithmetic_depth > 0) + or isinstance(measurementprocess.obs.name, List) + ): + # pylint: disable=protected-access + ob_serialized = QuantumScriptSerializer( + self._qubit_state.device_name, self.dtype == np.complex64, self._use_mpi + )._ob(measurementprocess.obs) + return self._measurement_lightning.expval(ob_serialized) + + return self._measurement_lightning.expval( + measurementprocess.obs.name, measurementprocess.obs.wires + ) diff --git a/pennylane_lightning/lightning_gpu/_mpi_handler.py b/pennylane_lightning/lightning_gpu/_mpi_handler.py new file mode 100644 index 0000000000..0d569ebeb1 --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_mpi_handler.py @@ -0,0 +1,126 @@ +# Copyright 2022-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module contains the :class:`~.LightningGPU_MPIHandler` class, a MPI handler to use LightningGPU device with multi-GPU on multi-node system. +""" + +try: + # pylint: disable=no-name-in-module + from pennylane_lightning.lightning_gpu_ops import DevPool, DevTag, MPIManager + + MPI_SUPPORT = True +except ImportError: + MPI_SUPPORT = False + +from typing import Union + +import numpy as np + + +# MPI options +class MPIHandler: # pylint: disable=too-few-public-methods + """MPI handler for PennyLane Lightning GPU device. + + MPI handler to use a GPU-backed Lightning device using NVIDIA cuQuantum SDK with parallel capabilities. + + Use the MPI library is necessary to initialize different variables and methods to handle the data across nodes and perform checks for memory allocation on each device. + + Args: + mpi (bool): declare if the device will use the MPI support. + mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. + num_wires (int): the number of wires to initialize the device with. + c_dtype (np.complex64, np.complex128): Datatypes for statevector representation. + """ + + def __init__( + self, + mpi: bool, + mpi_buf_size: int, + num_wires: int, + c_dtype: Union[np.complex64, np.complex128], + ) -> None: + + self.use_mpi = mpi + self.mpi_buf_size = mpi_buf_size + + self._dp = DevPool() + + if self.use_mpi: + + if not MPI_SUPPORT: + raise ImportError( + "Pre-compiled binaries for lightning.gpu with MPI support are not available. " + "To manually compile from source, follow the instructions at " + "https://docs.pennylane.ai/projects/lightning/en/stable/dev/installation.html." + ) + + if mpi_buf_size < 0: + raise ValueError(f"Unsupported mpi_buf_size value: {mpi_buf_size}, should be >= 0") + + if mpi_buf_size > 0 and (mpi_buf_size & (mpi_buf_size - 1)): + raise ValueError( + f"Unsupported mpi_buf_size value: {mpi_buf_size}. mpi_buf_size should be power of 2." + ) + + # After check if all MPI parameters are ok + self.mpi_manager, self.devtag = self._mpi_init_helper(num_wires) + + # set the number of global and local wires + commSize = self.mpi_manager.getSize() + self.num_global_wires = commSize.bit_length() - 1 + self.num_local_wires = num_wires - self.num_global_wires + + self._check_memory_size(c_dtype, mpi_buf_size) + + if not self.use_mpi: + self.num_local_wires = num_wires + self.num_global_wires = num_wires + + def _mebibytesToBytes(self, mebibytes): + return mebibytes * 1024 * 1024 + + def _check_memory_size(self, c_dtype, mpi_buf_size): + # Memory size in bytes + sv_memsize = np.dtype(c_dtype).itemsize * (1 << self.num_local_wires) + if self._mebibytesToBytes(mpi_buf_size) > sv_memsize: + raise RuntimeError("The MPI buffer size is larger than the local state vector size.") + + def _mpi_init_helper(self, num_wires): + """Set up MPI checks and initializations.""" + + # initialize MPIManager and config check in the MPIManager ctor + mpi_manager = MPIManager() + + # check if number of GPUs per node is larger than number of processes per node + numDevices = self._dp.getTotalDevices() + numProcsNode = mpi_manager.getSizeNode() + + if numDevices < numProcsNode: + raise ValueError( + "Number of devices should be larger than or equal to the number of processes on each node." + ) + + # check if the process number is larger than number of statevector elements + if mpi_manager.getSize() > (1 << (num_wires - 1)): + raise ValueError( + "Number of processes should be smaller than the number of statevector elements." + ) + + # set GPU device + rank = mpi_manager.getRank() + deviceid = rank % numProcsNode + self._dp.setDeviceID(deviceid) + devtag = DevTag(deviceid) + + return (mpi_manager, devtag) diff --git a/pennylane_lightning/lightning_gpu/_state_vector.py b/pennylane_lightning/lightning_gpu/_state_vector.py new file mode 100644 index 0000000000..77e453778b --- /dev/null +++ b/pennylane_lightning/lightning_gpu/_state_vector.py @@ -0,0 +1,351 @@ +# Copyright 2018-2024 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Class implementation for lightning_gpu state-vector manipulation. +""" +from warnings import warn + +try: + from pennylane_lightning.lightning_gpu_ops import StateVectorC64, StateVectorC128 + + try: # Try to import the MPI modules + from pennylane_lightning.lightning_gpu_ops import StateVectorMPIC64, StateVectorMPIC128 + + mpi_error = None + MPI_SUPPORT = True + except ImportError as ex_mpi: + mpi_error = ex_mpi + MPI_SUPPORT = False + +except ImportError as ex: + warn(str(ex), UserWarning) + +from typing import Union + +import numpy as np +import pennylane as qml +from pennylane import DeviceError +from pennylane.measurements import MidMeasureMP +from pennylane.ops import Conditional +from pennylane.ops.op_math import Adjoint +from pennylane.tape import QuantumScript +from pennylane.wires import Wires + +# pylint: disable=ungrouped-imports +from pennylane_lightning.core._serialize import global_phase_diagonal +from pennylane_lightning.core._state_vector_base import LightningBaseStateVector + +from ._measurements import LightningGPUMeasurements +from ._mpi_handler import MPIHandler + +gate_cache_needs_hash = ( + qml.BlockEncode, + qml.ControlledQubitUnitary, + qml.DiagonalQubitUnitary, + qml.MultiControlledX, + qml.OrbitalRotation, + qml.PSWAP, + qml.QubitUnitary, +) + + +class LightningGPUStateVector(LightningBaseStateVector): + """Lightning GPU state-vector class. + + Interfaces with C++ python binding methods for state-vector manipulation. + + Args: + num_wires(int): the number of wires to initialize the device with + dtype: Datatypes for state-vector representation. Must be one of + ``np.complex64`` or ``np.complex128``. Default is ``np.complex128`` + device_name(string): state vector device name. Options: ["lightning.gpu"] + mpi_handler(MPIHandler): MPI handler for PennyLane Lightning GPU device. + Provides functionality to distribute the state-vector to multiple devices. + use_async (bool): is host-device data copy asynchronized or not. + """ + + def __init__( + self, + num_wires: int, + dtype: Union[np.complex128, np.complex64] = np.complex128, + mpi_handler: MPIHandler = None, + use_async: bool = False, + ): + + super().__init__(num_wires, dtype) + + self._device_name = "lightning.gpu" + + # Initialize GPU and MPI variables + if mpi_handler is None: + mpi_handler = MPIHandler(False, 0, num_wires, dtype) + + self._num_global_wires = mpi_handler.num_global_wires + self._num_local_wires = mpi_handler.num_local_wires + + self._mpi_handler = mpi_handler + self._use_async = use_async + + # Initialize the state vector + if self._mpi_handler.use_mpi: # using MPI + self._qubit_state = self._state_dtype()( + self._mpi_handler.mpi_manager, + self._mpi_handler.devtag, + self._mpi_handler.mpi_buf_size, + self._mpi_handler.num_global_wires, + self._mpi_handler.num_local_wires, + ) + else: # without MPI + self._qubit_state = self._state_dtype()(self.num_wires) + + def _state_dtype(self): + """Binding to Lightning Managed state vector C++ class. + + Returns: the state vector class + """ + if self._mpi_handler.use_mpi: + if not MPI_SUPPORT: + warn(str(mpi_error), UserWarning) + + return StateVectorMPIC128 if self.dtype == np.complex128 else StateVectorMPIC64 + + # without MPI + return StateVectorC128 if self.dtype == np.complex128 else StateVectorC64 + + def syncD2H(self, state_vector, use_async: bool = False): + """Copy the state vector data on device to a state vector on the host provided by the user. + Args: + state_vector(array[complex]): the state vector array on host. + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=1) + >>> dev.apply([qml.PauliX(wires=[0])]) + >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.c_type) + >>> dev.syncD2H(state_vector) + >>> print(state_vector) + [0.+0.j 1.+0.j] + """ + self._qubit_state.DeviceToHost(state_vector.ravel(order="C"), use_async) + + @property + def state(self): + """Copy the state vector data from the device to the host. + + A state vector Numpy array is explicitly allocated on the host to store and return the data. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=1) + >>> dev.apply([qml.PauliX(wires=[0])]) + >>> print(dev.state) + [0.+0.j 1.+0.j] + """ + state = np.zeros(2**self._num_local_wires, dtype=self.dtype) + self.syncD2H(state) + return state + + def syncH2D(self, state_vector, use_async: bool = False): + """Copy the state vector data on host provided by the user to the state vector on the device + Args: + state_vector(array[complex]): the state vector array on host. + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy. + + **Example** + + >>> dev = qml.device('lightning.gpu', wires=3) + >>> obs = qml.Identity(0) @ qml.PauliX(1) @ qml.PauliY(2) + >>> obs1 = qml.Identity(1) + >>> H = qml.Hamiltonian([1.0, 1.0], [obs1, obs]) + >>> state_vector = np.array([0.0 + 0.0j, 0.0 + 0.1j, 0.1 + 0.1j, 0.1 + 0.2j, + 0.2 + 0.2j, 0.3 + 0.3j, 0.3 + 0.4j, 0.4 + 0.5j,], dtype=np.complex64,) + >>> dev.syncH2D(state_vector) + >>> res = dev.expval(H) + >>> print(res) + 1.0 + """ + self._qubit_state.HostToDevice(state_vector.ravel(order="C"), use_async) + + @staticmethod + def _asarray(arr, dtype=None): + arr = np.asarray(arr) # arr is not copied + + if arr.dtype.kind not in ["f", "c"]: + return arr + + if not dtype: + dtype = arr.dtype + + return arr + + def _apply_state_vector(self, state, device_wires, use_async: bool = False): + """Initialize the state vector on GPU with a specified state on host. + Note that any use of this method will introduce host-overheads. + Args: + state (array[complex]): normalized input state (on host) of length ``2**len(wires)`` + or broadcasted state of shape ``(batch_size, 2**len(wires))`` + device_wires (Wires): wires that get initialized in the state + use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. + Note: This function only supports synchronized memory copy from host to device. + """ + + if isinstance(state, self._qubit_state.__class__): + raise DeviceError("LightningGPU does not support allocate external state_vector.") + + # TODO + # Create an implementation in the C++ backend and binding to be able + # to allocate memory for a new statevector and copy the data + # from an external state vector. + # state_data = allocate_aligned_array(state.size, np.dtype(self.dtype), True) + # state.getState(state_data) + # state = state_data + + state = self._asarray(state, dtype=self.dtype) # this operation on host + output_shape = [2] * self._num_local_wires + + if len(device_wires) == self.num_wires and Wires(sorted(device_wires)) == device_wires: + # Initialize the entire device state with the input state + if self.num_wires == self._num_local_wires: + self.syncH2D(np.reshape(state, output_shape)) + return + local_state = np.zeros(2**self._num_local_wires, dtype=self._dtype) + self._mpi_handler.mpi_manager.Scatter(state, local_state, 0) + self.syncH2D(np.reshape(local_state, output_shape)) + return + + # set the state vector on GPU with provided state and their corresponding wires + self._qubit_state.setStateVector(state, list(device_wires), use_async) + + def _apply_lightning_controlled(self, operation): + """Apply an arbitrary controlled operation to the state tensor. + + Args: + operation (~pennylane.operation.Operation): controlled operation to apply + + Returns: + None + """ + state = self.state_vector + + control_wires = list(operation.control_wires) + control_values = operation.control_values + name = operation.name + # Apply GlobalPhase + inv = False + param = operation.parameters[0] + wires = self.wires.indices(operation.wires) + matrix = global_phase_diagonal(param, self.wires, control_wires, control_values) + state.apply(name, wires, inv, [[param]], matrix) + + def _apply_lightning_midmeasure( + self, operation: MidMeasureMP, mid_measurements: dict, postselect_mode: str + ): + """Execute a MidMeasureMP operation and return the sample in mid_measurements. + + Args: + operation (~pennylane.operation.Operation): mid-circuit measurement + mid_measurements (None, dict): Dictionary of mid-circuit measurements + postselect_mode (str): Configuration for handling shots with mid-circuit measurement + postselection. Use ``"hw-like"`` to discard invalid shots and ``"fill-shots"`` to + keep the same number of shots. + + Returns: + None + """ + wires = self.wires.indices(operation.wires) + wire = list(wires)[0] + if postselect_mode == "fill-shots" and operation.postselect is not None: + sample = operation.postselect + else: + circuit = QuantumScript([], [qml.sample(wires=operation.wires)], shots=1) + sample = LightningGPUMeasurements(self).measure_final_state(circuit) + sample = np.squeeze(sample) + mid_measurements[operation] = sample + getattr(self.state_vector, "collapse")(wire, bool(sample)) + if operation.reset and bool(sample): + self.apply_operations([qml.PauliX(operation.wires)], mid_measurements=mid_measurements) + + # pylint: disable=unused-argument + def _apply_lightning( + self, operations, mid_measurements: dict = None, postselect_mode: str = None + ): + """Apply a list of operations to the state vector. + + Args: + operations (list[~pennylane.operation.Operation]): operations to apply + mid_measurements (None, dict): Dictionary of mid-circuit measurements + postselect_mode (str): Configuration for handling shots with mid-circuit measurement + postselection. Use ``"hw-like"`` to discard invalid shots and ``"fill-shots"`` to + keep the same number of shots. Default is ``None``. + + Returns: + None + """ + state = self.state_vector + + # Skip over identity operations instead of performing + # matrix multiplication with it. + for operation in operations: + if isinstance(operation, qml.Identity): + continue + if isinstance(operation, Adjoint): + name = operation.base.name + invert_param = True + else: + name = operation.name + invert_param = False + method = getattr(state, name, None) + wires = list(operation.wires) + + if isinstance(operation, Conditional): + if operation.meas_val.concretize(mid_measurements): + self._apply_lightning([operation.base]) + elif isinstance(operation, MidMeasureMP): + self._apply_lightning_midmeasure( + operation, mid_measurements, postselect_mode=postselect_mode + ) + elif method is not None: # apply specialized gate + param = operation.parameters + method(wires, invert_param, param) + elif isinstance(operation, qml.ops.Controlled) and isinstance( + operation.base, qml.GlobalPhase + ): # apply n-controlled gate + # LGPU do not support the controlled gates except for GlobalPhase + self._apply_lightning_controlled(operation) + else: # apply gate as a matrix + try: + mat = qml.matrix(operation) + except AttributeError: # pragma: no cover + # To support older versions of PL + mat = operation.matrix + + r_dtype = np.float32 if self.dtype == np.complex64 else np.float64 + param = ( + [[r_dtype(operation.hash)]] + if isinstance(operation, gate_cache_needs_hash) + else [] + ) + if len(mat) == 0: + raise ValueError("Unsupported operation") + + self._qubit_state.apply( + name, + wires, + False, + param, + mat.ravel(order="C"), # inv = False: Matrix already in correct form; + ) # Parameters can be ignored for explicit matrices; F-order for cuQuantum diff --git a/pennylane_lightning/lightning_gpu/lightning_gpu.py b/pennylane_lightning/lightning_gpu/lightning_gpu.py index 2894b999f3..56454613cc 100644 --- a/pennylane_lightning/lightning_gpu/lightning_gpu.py +++ b/pennylane_lightning/lightning_gpu/lightning_gpu.py @@ -16,921 +16,524 @@ This module contains the :class:`~.LightningGPU` class, a PennyLane simulator device that interfaces with the NVIDIA cuQuantum cuStateVec simulator library for GPU-enabled calculations. """ +from __future__ import annotations from ctypes.util import find_library +from dataclasses import replace from importlib import util as imp_util -from itertools import product from pathlib import Path -from typing import List, Union +from typing import List, Optional, Union from warnings import warn import numpy as np import pennylane as qml -from pennylane import BasisState, DeviceError, QuantumFunctionError, Rot, StatePrep, math -from pennylane.measurements import Expectation, State -from pennylane.ops.op_math import Adjoint -from pennylane.wires import Wires -from scipy.sparse import csr_matrix - -from pennylane_lightning.core._serialize import QuantumScriptSerializer, global_phase_diagonal -from pennylane_lightning.core._version import __version__ - -# pylint: disable=import-error, no-name-in-module, ungrouped-imports -from pennylane_lightning.core.lightning_base import LightningBase +from pennylane.devices import DefaultExecutionConfig, ExecutionConfig +from pennylane.devices.default_qubit import adjoint_ops +from pennylane.devices.modifiers import simulator_tracking, single_tape_support +from pennylane.devices.preprocess import ( + decompose, + mid_circuit_measurements, + no_sampling, + validate_adjoint_trainable_params, + validate_device_wires, + validate_measurements, + validate_observables, +) +from pennylane.measurements import MidMeasureMP +from pennylane.operation import DecompositionUndefinedError, Operator, Tensor +from pennylane.ops import Prod, SProd, Sum +from pennylane.tape import QuantumScript +from pennylane.transforms.core import TransformProgram +from pennylane.typing import Result + +from pennylane_lightning.core.lightning_newAPI_base import ( + LightningBase, + QuantumTape_or_Batch, + Result_or_ResultBatch, +) try: from pennylane_lightning.lightning_gpu_ops import ( DevPool, - MeasurementsC64, - MeasurementsC128, - StateVectorC64, - StateVectorC128, backend_info, get_gpu_arch, is_gpu_supported, ) - from pennylane_lightning.lightning_gpu_ops.algorithms import ( - AdjointJacobianC64, - AdjointJacobianC128, - create_ops_listC64, - create_ops_listC128, - ) - - try: - # pylint: disable=no-name-in-module - from pennylane_lightning.lightning_gpu_ops import ( - DevTag, - MeasurementsMPIC64, - MeasurementsMPIC128, - MPIManager, - StateVectorMPIC64, - StateVectorMPIC128, - ) - from pennylane_lightning.lightning_gpu_ops.algorithmsMPI import ( - AdjointJacobianMPIC64, - AdjointJacobianMPIC128, - create_ops_listMPIC64, - create_ops_listMPIC128, - ) - - MPI_SUPPORT = True - except ImportError as ex: - warn(str(ex), UserWarning) - MPI_SUPPORT = False - - if find_library("custatevec") is None and not imp_util.find_spec( - "cuquantum" - ): # pragma: no cover - raise ImportError( - "custatevec libraries not found. Please pip install the appropriate custatevec library in a virtual environment." - ) - if not DevPool.getTotalDevices(): # pragma: no cover - raise ValueError("No supported CUDA-capable device found") - - if not is_gpu_supported(): # pragma: no cover - raise ValueError(f"CUDA device is an unsupported version: {get_gpu_arch()}") LGPU_CPP_BINARY_AVAILABLE = True + except (ImportError, ValueError) as ex: warn(str(ex), UserWarning) - backend_info = None LGPU_CPP_BINARY_AVAILABLE = False + backend_info = None - -def _gpu_dtype(dtype, mpi=False): - if dtype not in [np.complex128, np.complex64]: # pragma: no cover - raise ValueError(f"Data type is not supported for state-vector computation: {dtype}") - if mpi: - return StateVectorMPIC128 if dtype == np.complex128 else StateVectorMPIC64 - return StateVectorC128 if dtype == np.complex128 else StateVectorC64 - - -def _adj_dtype(use_csingle, mpi=False): - if mpi: - return AdjointJacobianMPIC64 if use_csingle else AdjointJacobianMPIC128 - return AdjointJacobianC64 if use_csingle else AdjointJacobianC128 - - -def _mebibytesToBytes(mebibytes): - return mebibytes * 1024 * 1024 - - -allowed_operations = { - "Identity", - "BasisState", - "QubitStateVector", - "StatePrep", - "QubitUnitary", - "ControlledQubitUnitary", - "MultiControlledX", - "DiagonalQubitUnitary", - "PauliX", - "PauliY", - "PauliZ", - "MultiRZ", - "GlobalPhase", - "C(GlobalPhase)", - "Hadamard", - "S", - "Adjoint(S)", - "T", - "Adjoint(T)", - "SX", - "Adjoint(SX)", - "CNOT", - "SWAP", - "ISWAP", - "PSWAP", - "Adjoint(ISWAP)", - "SISWAP", - "Adjoint(SISWAP)", - "SQISW", - "CSWAP", - "Toffoli", - "CY", - "CZ", - "PhaseShift", - "ControlledPhaseShift", - "RX", - "RY", - "RZ", - "Rot", - "CRX", - "CRY", - "CRZ", - "CRot", - "IsingXX", - "IsingYY", - "IsingZZ", - "IsingXY", - "SingleExcitation", - "SingleExcitationPlus", - "SingleExcitationMinus", - "DoubleExcitation", - "DoubleExcitationPlus", - "DoubleExcitationMinus", - "QubitCarry", - "QubitSum", - "OrbitalRotation", - "ECR", - "BlockEncode", - "C(BlockEncode)", -} - -allowed_observables = { - "PauliX", - "PauliY", - "PauliZ", - "Hadamard", - "SparseHamiltonian", - "Hamiltonian", - "LinearCombination", - "Hermitian", - "Identity", - "Projector", - "Sum", - "Prod", - "SProd", -} - -gate_cache_needs_hash = ( - qml.BlockEncode, - qml.ControlledQubitUnitary, - qml.DiagonalQubitUnitary, - qml.MultiControlledX, - qml.OrbitalRotation, - qml.PSWAP, - qml.QubitUnitary, +from ._adjoint_jacobian import LightningGPUAdjointJacobian +from ._measurements import LightningGPUMeasurements +from ._mpi_handler import MPIHandler +from ._state_vector import LightningGPUStateVector + +# The set of supported operations. +_operations = frozenset( + { + "Identity", + "QubitStateVector", + "QubitUnitary", + "ControlledQubitUnitary", + "MultiControlledX", + "DiagonalQubitUnitary", + "PauliX", + "PauliY", + "PauliZ", + "MultiRZ", + "GlobalPhase", + "C(GlobalPhase)", + "Hadamard", + "S", + "Adjoint(S)", + "T", + "Adjoint(T)", + "SX", + "Adjoint(SX)", + "CNOT", + "SWAP", + "ISWAP", + "PSWAP", + "Adjoint(ISWAP)", + "SISWAP", + "Adjoint(SISWAP)", + "SQISW", + "CSWAP", + "Toffoli", + "CY", + "CZ", + "PhaseShift", + "ControlledPhaseShift", + "RX", + "RY", + "RZ", + "Rot", + "CRX", + "CRY", + "CRZ", + "CRot", + "IsingXX", + "IsingYY", + "IsingZZ", + "IsingXY", + "SingleExcitation", + "SingleExcitationPlus", + "SingleExcitationMinus", + "DoubleExcitation", + "DoubleExcitationPlus", + "DoubleExcitationMinus", + "QubitCarry", + "QubitSum", + "OrbitalRotation", + "ECR", + "BlockEncode", + "C(BlockEncode)", + } +) +# End the set of supported operations. + +# The set of supported observables. +_observables = frozenset( + { + "PauliX", + "PauliY", + "PauliZ", + "Hadamard", + "SparseHamiltonian", + "Hamiltonian", + "LinearCombination", + "Hermitian", + "Identity", + "Projector", + "Sum", + "Prod", + "SProd", + } ) -class LightningGPU(LightningBase): # pylint: disable=too-many-instance-attributes - """PennyLane Lightning GPU device. +def stopping_condition(op: Operator) -> bool: + """A function that determines whether or not an operation is supported by ``lightning.gpu``.""" + # To avoid building matrices beyond the given thresholds. + # This should reduce runtime overheads for larger systems. + if isinstance(op, qml.QFT): + return len(op.wires) < 10 + if isinstance(op, qml.GroverOperator): + return len(op.wires) < 13 + if isinstance(op, qml.PauliRot): + return False - A GPU-backed Lightning device using NVIDIA cuQuantum SDK. + return op.name in _operations - Use of this device requires pre-built binaries or compilation from source. Check out the - :doc:`/lightning_gpu/installation` guide for more details. - Args: - wires (int): the number of wires to initialize the device with - mpi (bool): enable MPI support. MPI support will be enabled if ``mpi`` is set as``True``. - mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. - sync (bool): immediately sync with host-sv after applying operations - c_dtype: Datatypes for statevector representation. Must be one of ``np.complex64`` or ``np.complex128``. - shots (int): How many times the circuit should be evaluated (or sampled) to estimate - the expectation values. Defaults to ``None`` if not specified. Setting - to ``None`` results in computing statistics like expectation values and - variances analytically. - batch_obs (Union[bool, int]): determine whether to use multiple GPUs within the same node or not - """ - - name = "Lightning GPU PennyLane plugin" - short_name = "lightning.gpu" +def stopping_condition_shots(op: Operator) -> bool: + """A function that determines whether or not an operation is supported by ``lightning.gpu`` + with finite shots.""" + return stopping_condition(op) or isinstance(op, (MidMeasureMP, qml.ops.op_math.Conditional)) - operations = allowed_operations - observables = allowed_observables - _backend_info = backend_info - config = Path(__file__).parent / "lightning_gpu.toml" - _CPP_BINARY_AVAILABLE = LGPU_CPP_BINARY_AVAILABLE - def __init__( - self, - wires, - *, - mpi: bool = False, - mpi_buf_size: int = 0, - sync=False, - c_dtype=np.complex128, - shots=None, - batch_obs: Union[bool, int] = False, - ): # pylint: disable=too-many-arguments - if c_dtype is np.complex64: - self.use_csingle = True - elif c_dtype is np.complex128: - self.use_csingle = False - else: - raise TypeError(f"Unsupported complex type: {c_dtype}") - - super().__init__(wires, shots=shots, c_dtype=c_dtype) +def accepted_observables(obs: Operator) -> bool: + """A function that determines whether or not an observable is supported by ``lightning.gpu``.""" + return obs.name in _observables - self._dp = DevPool() - if not mpi: - self._mpi = False - self._num_local_wires = self.num_wires - self._gpu_state = _gpu_dtype(c_dtype)(self._num_local_wires) - else: - self._mpi = True - self._mpi_init_helper(self.num_wires) - - if mpi_buf_size < 0: - raise TypeError(f"Unsupported mpi_buf_size value: {mpi_buf_size}") - - if mpi_buf_size: - if mpi_buf_size & (mpi_buf_size - 1): - raise TypeError( - f"Unsupported mpi_buf_size value: {mpi_buf_size}. mpi_buf_size should be power of 2." - ) - # Memory size in bytes - sv_memsize = np.dtype(c_dtype).itemsize * (1 << self._num_local_wires) - if _mebibytesToBytes(mpi_buf_size) > sv_memsize: - w_msg = "The MPI buffer size is larger than the local state vector size." - warn( - w_msg, - RuntimeWarning, - ) +def adjoint_observables(obs: Operator) -> bool: + """A function that determines whether or not an observable is supported by ``lightning.gpu`` + when using the adjoint differentiation method.""" + if isinstance(obs, qml.Projector): + return False - self._gpu_state = _gpu_dtype(c_dtype, mpi)( - self._mpi_manager, - self._devtag, - mpi_buf_size, - self._num_global_wires, - self._num_local_wires, - ) + if isinstance(obs, Tensor): + if any(isinstance(o, qml.Projector) for o in obs.non_identity_obs): + return False + return True - self._sync = sync - self._batch_obs = batch_obs - self._create_basis_state(0) - - def _mpi_init_helper(self, num_wires): - """Set up MPI checks.""" - if not MPI_SUPPORT: - raise ImportError("MPI related APIs are not found.") - # initialize MPIManager and config check in the MPIManager ctor - self._mpi_manager = MPIManager() - # check if number of GPUs per node is larger than - # number of processes per node - numDevices = self._dp.getTotalDevices() - numProcsNode = self._mpi_manager.getSizeNode() - if numDevices < numProcsNode: - raise ValueError( - "Number of devices should be larger than or equal to the number of processes on each node." - ) - # check if the process number is larger than number of statevector elements - if self._mpi_manager.getSize() > (1 << (num_wires - 1)): - raise ValueError( - "Number of processes should be smaller than the number of statevector elements." - ) - # set the number of global and local wires - commSize = self._mpi_manager.getSize() - self._num_global_wires = commSize.bit_length() - 1 - self._num_local_wires = num_wires - self._num_global_wires - # set GPU device - rank = self._mpi_manager.getRank() - deviceid = rank % numProcsNode - self._dp.setDeviceID(deviceid) - self._devtag = DevTag(deviceid) - - @staticmethod - def _asarray(arr, dtype=None): - arr = np.asarray(arr) # arr is not copied - - if arr.dtype.kind not in ["f", "c"]: - return arr - - if not dtype: - dtype = arr.dtype - - return arr - - # pylint disable=missing-function-docstring - def reset(self): - """Reset the device""" - super().reset() - # init the state vector to |00..0> - self._gpu_state.resetGPU(False) # Sync reset + if isinstance(obs, SProd): + return adjoint_observables(obs.base) - @property - def state(self): - # pylint disable=missing-function-docstring - """Copy the state vector data from the device to the host. + if isinstance(obs, (Sum, Prod)): + return all(adjoint_observables(o) for o in obs) - A state vector Numpy array is explicitly allocated on the host to store and return the data. + return obs.name in _observables - **Example** - >>> dev = qml.device('lightning.gpu', wires=1) - >>> dev.apply([qml.PauliX(wires=[0])]) - >>> print(dev.state) - [0.+0.j 1.+0.j] - """ - state = np.zeros(1 << self._num_local_wires, dtype=self.C_DTYPE) - state = self._asarray(state, dtype=self.C_DTYPE) - self.syncD2H(state) - return state +def adjoint_measurements(mp: qml.measurements.MeasurementProcess) -> bool: + """Specifies whether or not an observable is compatible with adjoint differentiation on DefaultQubit.""" + return isinstance(mp, qml.measurements.ExpectationMP) - @property - def create_ops_list(self): - """Returns create_ops_list function of the matching precision.""" - if self._mpi: - return create_ops_listMPIC64 if self.use_csingle else create_ops_listMPIC128 - return create_ops_listC64 if self.use_csingle else create_ops_listC128 - @property - def measurements(self): - """Returns Measurements constructor of the matching precision.""" - if self._mpi: - return ( - MeasurementsMPIC64(self._gpu_state) - if self.use_csingle - else MeasurementsMPIC128(self._gpu_state) - ) - return ( - MeasurementsC64(self._gpu_state) - if self.use_csingle - else MeasurementsC128(self._gpu_state) - ) +def _supports_adjoint(circuit): + if circuit is None: + return True - def syncD2H(self, state_vector, use_async=False): - """Copy the state vector data on device to a state vector on the host provided by the user - Args: - state_vector(array[complex]): the state vector array on host - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - - **Example** - >>> dev = qml.device('lightning.gpu', wires=1) - >>> dev.apply([qml.PauliX(wires=[0])]) - >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.C_DTYPE) - >>> dev.syncD2H(state_vector) - >>> print(state_vector) - [0.+0.j 1.+0.j] - """ - self._gpu_state.DeviceToHost(state_vector.ravel(order="C"), use_async) + prog = TransformProgram() + _add_adjoint_transforms(prog) - def syncH2D(self, state_vector, use_async=False): - """Copy the state vector data on host provided by the user to the state vector on the device - Args: - state_vector(array[complex]): the state vector array on host. - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - - **Example** - >>> dev = qml.device('lightning.gpu', wires=3) - >>> obs = qml.Identity(0) @ qml.PauliX(1) @ qml.PauliY(2) - >>> obs1 = qml.Identity(1) - >>> H = qml.Hamiltonian([1.0, 1.0], [obs1, obs]) - >>> state_vector = np.array([0.0 + 0.0j, 0.0 + 0.1j, 0.1 + 0.1j, 0.1 + 0.2j, - 0.2 + 0.2j, 0.3 + 0.3j, 0.3 + 0.4j, 0.4 + 0.5j,], dtype=np.complex64,) - >>> dev.syncH2D(state_vector) - >>> res = dev.expval(H) - >>> print(res) - 1.0 - """ - self._gpu_state.HostToDevice(state_vector.ravel(order="C"), use_async) + try: + prog((circuit,)) + except (DecompositionUndefinedError, qml.DeviceError, AttributeError): + return False + return True - def _create_basis_state(self, index, use_async=False): - """Return a computational basis state over all wires. - Args: - index (int): integer representing the computational basis state. - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy. - """ - self._gpu_state.setBasisState(index, use_async) - def _apply_state_vector(self, state, device_wires, use_async=False): - """Initialize the state vector on GPU with a specified state on host. - Note that any use of this method will introduce host-overheads. - Args: - state (array[complex]): normalized input state (on host) of length ``2**len(wires)`` - or broadcasted state of shape ``(batch_size, 2**len(wires))`` - device_wires (Wires): wires that get initialized in the state - use_async(bool): indicates whether to use asynchronous memory copy from host to device or not. - Note: This function only supports synchronized memory copy from host to device. - """ - # translate to wire labels used by device - device_wires = self.map_wires(device_wires) - - state = self._asarray(state, dtype=self.C_DTYPE) # this operation on host - output_shape = [2] * self._num_local_wires - - if len(device_wires) == self.num_wires and Wires(sorted(device_wires)) == device_wires: - # Initialize the entire device state with the input state - if self.num_wires == self._num_local_wires: - self.syncH2D(self._reshape(state, output_shape)) - return - local_state = np.zeros(1 << self._num_local_wires, dtype=self.C_DTYPE) - self._mpi_manager.Scatter(state, local_state, 0) - # Initialize the entire device state with the input state - self.syncH2D(self._reshape(local_state, output_shape)) - return - - # generate basis states on subset of qubits via the cartesian product - basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) - - # get basis states to alter on full set of qubits - unravelled_indices = np.zeros((2 ** len(device_wires), self.num_wires), dtype=int) - unravelled_indices[:, device_wires] = basis_states - - # get indices for which the state is changed to input state vector elements - ravelled_indices = np.ravel_multi_index(unravelled_indices.T, [2] * self.num_wires) - - # set the state vector on GPU with the unravelled_indices and their corresponding values - self._gpu_state.setStateVector( - ravelled_indices, state, use_async - ) # this operation on device - - def _apply_basis_state(self, state, wires): - """Initialize the state vector in a specified computational basis state on GPU directly. - Args: - state (array[int]): computational basis state (on host) of shape ``(wires,)`` - consisting of 0s and 1s. - wires (Wires): wires that the provided computational state should be initialized on - Note: This function does not support broadcasted inputs yet. - """ - # translate to wire labels used by device - device_wires = self.map_wires(wires) +def _adjoint_ops(op: qml.operation.Operator) -> bool: + """Specify whether or not an Operator is supported by adjoint differentiation.""" + return not isinstance(op, qml.PauliRot) and adjoint_ops(op) - # length of basis state parameter - n_basis_state = len(state) - state = state.tolist() if hasattr(state, "tolist") else state - if not set(state).issubset({0, 1}): - raise ValueError("BasisState parameter must consist of 0 or 1 integers.") - if n_basis_state != len(device_wires): - raise ValueError("BasisState parameter and wires must be of equal length.") +def _add_adjoint_transforms(program: TransformProgram) -> None: + """Private helper function for ``preprocess`` that adds the transforms specific + for adjoint differentiation. - # get computational basis state number - basis_states = 2 ** (self.num_wires - 1 - np.array(device_wires)) - basis_states = qml.math.convert_like(basis_states, state) - num = int(qml.math.dot(state, basis_states)) + Args: + program (TransformProgram): where we will add the adjoint differentiation transforms - self._create_basis_state(num) + Side Effects: + Adds transforms to the input program. - def apply_lightning(self, operations): - """Apply a list of operations to the state tensor. + """ - Args: - operations (list[~pennylane.operation.Operation]): operations to apply - dtype (type): Type of numpy ``complex`` to be used. Can be important - to specify for large systems for memory allocation purposes. + name = "adjoint + lightning.gpu" + program.add_transform(no_sampling, name=name) + program.add_transform( + decompose, + stopping_condition=_adjoint_ops, + stopping_condition_shots=stopping_condition_shots, + name=name, + skip_initial_state_prep=False, + ) + program.add_transform(validate_observables, accepted_observables, name=name) + program.add_transform( + validate_measurements, analytic_measurements=adjoint_measurements, name=name + ) + program.add_transform(qml.transforms.broadcast_expand) + program.add_transform(validate_adjoint_trainable_params) - Returns: - array[complex]: the output state tensor - """ - # Skip over identity operations instead of performing - # matrix multiplication with the identity. - for ops in operations: - if isinstance(ops, qml.Identity): - continue - if isinstance(ops, Adjoint): - name = ops.base.name - invert_param = True - else: - name = ops.name - invert_param = False - method = getattr(self._gpu_state, name, None) - wires = self.wires.indices(ops.wires) - - if isinstance(ops, qml.ops.op_math.Controlled) and isinstance( - ops.base, qml.GlobalPhase - ): - controls = ops.control_wires - control_values = ops.control_values - param = ops.base.parameters[0] - matrix = global_phase_diagonal(param, self.wires, controls, control_values) - self._gpu_state.apply(name, wires, False, [], matrix) - elif method is None: - # Inverse can be set to False since qml.matrix(ops) is already in inverted form - try: - mat = qml.matrix(ops) - except AttributeError: # pragma: no cover - # To support older versions of PL - mat = ops.matrix - r_dtype = np.float32 if self.use_csingle else np.float64 - param = [[r_dtype(ops.hash)]] if isinstance(ops, gate_cache_needs_hash) else [] - if len(mat) == 0: - raise ValueError("Unsupported operation") - self._gpu_state.apply( - name, - wires, - False, - param, - mat.ravel(order="C"), # inv = False: Matrix already in correct form; - ) # Parameters can be ignored for explicit matrices; F-order for cuQuantum - - else: - param = ops.parameters - method(wires, invert_param, param) - # pylint: disable=unused-argument - def apply(self, operations, rotations=None, **kwargs): - """Applies a list of operations to the state tensor.""" - # State preparation is currently done in Python - if operations: # make sure operations[0] exists - if isinstance(operations[0], StatePrep): - self._apply_state_vector(operations[0].parameters[0].copy(), operations[0].wires) - operations = operations[1:] - elif isinstance(operations[0], BasisState): - self._apply_basis_state(operations[0].parameters[0], operations[0].wires) - operations = operations[1:] - - for operation in operations: - if isinstance(operation, (StatePrep, BasisState)): - raise DeviceError( - f"Operation {operation.name} cannot be used after other " - + f"Operations have already been applied on a {self.short_name} device." - ) +# LightningGPU specific methods +def check_gpu_resources() -> None: + """Check the available resources of each Nvidia GPU""" + if find_library("custatevec") is None and not imp_util.find_spec("cuquantum"): - self.apply_lightning(operations) + raise ImportError( + "cuStateVec libraries not found. Please pip install the appropriate cuStateVec library in a virtual environment." + ) - @staticmethod - def _check_adjdiff_supported_operations(operations): - """Check Lightning adjoint differentiation method support for a tape. + if not DevPool.getTotalDevices(): + raise ValueError("No supported CUDA-capable device found") - Raise ``QuantumFunctionError`` if ``tape`` contains not supported measurements, - observables, or operations by the Lightning adjoint differentiation method. + if not is_gpu_supported(): + raise ValueError(f"CUDA device is an unsupported version: {get_gpu_arch()}") - Args: - tape (.QuantumTape): quantum tape to differentiate. - """ - for op in operations: - if op.num_params > 1 and not isinstance(op, Rot): - raise QuantumFunctionError( - f"The {op.name} operation is not supported using " - 'the "adjoint" differentiation method' - ) - def _init_process_jacobian_tape(self, tape, starting_state, use_device_state): - """Generate an initial state vector for ``_process_jacobian_tape``.""" - if starting_state is not None: - if starting_state.size != 2 ** len(self.wires): - raise QuantumFunctionError( - "The number of qubits of starting_state must be the same as " - "that of the device." - ) - self._apply_state_vector(starting_state, self.wires) - elif not use_device_state: - self.reset() - self.apply(tape.operations) - return self._gpu_state - - # pylint: disable=too-many-branches - def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False): - """Implements the adjoint method outlined in - `Jones and Gacon `__ to differentiate an input tape. - - After a forward pass, the circuit is reversed by iteratively applying adjoint - gates to scan backwards through the circuit. - """ - if self.shots is not None: - warn( - "Requested adjoint differentiation to be computed with finite shots." - " The derivative is always exact when using the adjoint differentiation method.", - UserWarning, - ) +@simulator_tracking +@single_tape_support +class LightningGPU(LightningBase): + """PennyLane Lightning GPU device. - tape_return_type = self._check_adjdiff_supported_measurements(tape.measurements) + A device that interfaces with C++ to perform fast linear algebra calculations. - if not tape_return_type: # the tape does not have measurements - return np.array([], dtype=self.state.dtype) + Use of this device requires pre-built binaries or compilation from source. Check out the + :doc:`/lightning_gpu/installation` guide for more details. - if tape_return_type is State: # pragma: no cover - raise QuantumFunctionError( - "Adjoint differentiation method does not support measurement StateMP." - "Use vjp method instead for this purpose." - ) + Args: + wires (int): the number of wires to initialize the device with + c_dtype: Datatypes for statevector representation. Must be one of + ``np.complex64`` or ``np.complex128``. + shots (int): How many times the circuit should be evaluated (or sampled) to estimate + the expectation values. Defaults to ``None`` if not specified. Setting + to ``None`` results in computing statistics like expectation values and + variances analytically. + batch_obs (bool): Determine whether we process observables in parallel when + computing the jacobian. This value is only relevant when the lightning.gpu + is built with MPI. Default is False. + mpi (bool): declare if the device will use the MPI support. + mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB. + use_async (bool): is host-device data copy asynchronized or not. + """ - # Check adjoint diff support - self._check_adjdiff_supported_operations(tape.operations) + # General device options + _device_options = ("c_dtype", "batch_obs") - if self._mpi: - split_obs = False # with MPI batched means compute Jacobian one observables at a time, no point splitting linear combinations - else: - split_obs = self._dp.getTotalDevices() if self._batch_obs else False - processed_data = self._process_jacobian_tape( - tape, starting_state, use_device_state, self._mpi, split_obs - ) + # Device specific options + _CPP_BINARY_AVAILABLE = LGPU_CPP_BINARY_AVAILABLE + _backend_info = backend_info if LGPU_CPP_BINARY_AVAILABLE else None - if not processed_data: # training_params is empty - return np.array([], dtype=self.state.dtype) + # This `config` is used in Catalyst-Frontend + config = Path(__file__).parent / "lightning_gpu.toml" - trainable_params = processed_data["tp_shift"] - # pylint: disable=pointless-string-statement - """ - This path enables controlled batching over the requested observables, be they explicit, or part of a Hamiltonian. - The traditional path will assume there exists enough free memory to preallocate all arrays and run through each observable iteratively. - However, for larger system, this becomes impossible, and we hit memory issues very quickly. the batching support here enables several functionalities: - - Pre-allocate memory for all observables on the primary GPU (`batch_obs=False`, default behaviour): This is the simplest path, and works best for few observables, and moderate qubit sizes. All memory is preallocated for each observable, and run through iteratively on a single GPU. - - Evenly distribute the observables over all available GPUs (`batch_obs=True`): This will evenly split the data into ceil(num_obs/num_gpus) chunks, and allocate enough space on each GPU up-front before running through them concurrently. This relies on C++ threads to handle the orchestration. - - Allocate at most `n` observables per GPU (`batch_obs=n`): Providing an integer value restricts each available GPU to at most `n` copies of the statevector, and hence `n` given observables for a given batch. This will iterate over the data in chnuks of size `n*num_gpus`. - """ - adjoint_jacobian = _adj_dtype(self.use_csingle, self._mpi)() - - if self._batch_obs: # Batching of Measurements - jac = adjoint_jacobian.batched( - self._gpu_state, - processed_data["obs_serialized"], - processed_data["ops_serialized"], - trainable_params, - ) - else: - jac = adjoint_jacobian( - self._gpu_state, - processed_data["obs_serialized"], - processed_data["ops_serialized"], - trainable_params, - ) - jac = np.array(jac) - has_shape0 = bool(len(jac)) + # TODO: Move supported ops/obs to TOML file + operations = _operations + # The names of the supported operations. - num_obs = len(np.unique(processed_data["obs_indices"])) - rows = processed_data["obs_indices"] - cols = np.arange(len(rows), dtype=int) - data = np.ones(len(rows)) - red_mat = csr_matrix((data, (rows, cols)), shape=(num_obs, len(rows))) - jac = red_mat @ jac.reshape((len(rows), -1)) - jac = jac.reshape(-1, len(trainable_params)) if has_shape0 else jac - jac_r = np.zeros((jac.shape[0], processed_data["all_params"])) - jac_r[:, processed_data["record_tp_rows"]] = jac - return self._adjoint_jacobian_processing(jac_r) + observables = _observables + # The names of the supported observables. - # pylint: disable=inconsistent-return-statements, line-too-long, missing-function-docstring - def vjp(self, measurements, grad_vec, starting_state=None, use_device_state=False): - """Generate the processing function required to compute the vector-Jacobian products - of a tape. + def __init__( # pylint: disable=too-many-arguments + self, + wires: Union[int, List], + *, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, + # GPU and MPI arguments + mpi: bool = False, + mpi_buf_size: int = 0, + use_async: bool = False, + ): + if not self._CPP_BINARY_AVAILABLE: + raise ImportError( + "Pre-compiled binaries for lightning.gpu are not available. " + "To manually compile from source, follow the instructions at " + "https://docs.pennylane.ai/projects/lightning/en/stable/dev/installation.html." + ) - This function can be used with multiple expectation values or a quantum state. - When a quantum state is given, + check_gpu_resources() - .. code-block:: python + super().__init__( + wires=wires, + c_dtype=c_dtype, + shots=shots, + batch_obs=batch_obs, + ) - vjp_f = dev.vjp([qml.state()], grad_vec) - vjp = vjp_f(tape) + # Set the attributes to call the LightningGPU classes + self._set_lightning_classes() - computes :math:`w = (w_1,\\cdots,w_m)` where + # GPU specific options + self._dp = DevPool() + self._use_async = use_async - .. math:: + # Creating the state vector + self._mpi_handler = MPIHandler(mpi, mpi_buf_size, len(self.wires), c_dtype) - w_k = \\langle v| \\frac{\\partial}{\\partial \\theta_k} | \\psi_{\\pmb{\\theta}} \\rangle. + self._statevector = self.LightningStateVector( + num_wires=len(self.wires), + dtype=c_dtype, + mpi_handler=self._mpi_handler, + use_async=self._use_async, + ) - Here, :math:`m` is the total number of trainable parameters, - :math:`\\pmb{\\theta}` is the vector of trainable parameters and - :math:`\\psi_{\\pmb{\\theta}}` is the output quantum state. + @property + def name(self): + """The name of the device.""" + return "lightning.gpu" - Args: - measurements (list): List of measurement processes for vector-Jacobian product. - Now it must be expectation values or a quantum state. - grad_vec (tensor_like): Gradient-output vector. Must have shape matching the output - shape of the corresponding tape, i.e. number of measurements if the return - type is expectation or :math:`2^N` if the return type is statevector - starting_state (tensor_like): post-forward pass state to start execution with. - It should be complex-valued. Takes precedence over ``use_device_state``. - use_device_state (bool): use current device state to initialize. - A forward pass of the same circuit should be the last thing the device - has executed. If a ``starting_state`` is provided, that takes precedence. + def _set_lightning_classes(self): + """Load the LightningStateVector, LightningMeasurements, LightningAdjointJacobian as class attribute""" + self.LightningStateVector = LightningGPUStateVector + self.LightningMeasurements = LightningGPUMeasurements + self.LightningAdjointJacobian = LightningGPUAdjointJacobian - Returns: - The processing function required to compute the vector-Jacobian products of a tape. + def _setup_execution_config(self, config): """ - if self.shots is not None: - warn( - "Requested adjoint differentiation to be computed with finite shots." - " The derivative is always exact when using the adjoint differentiation method.", - UserWarning, - ) - - tape_return_type = self._check_adjdiff_supported_measurements(measurements) - - if math.allclose(grad_vec, 0) or tape_return_type is None: - return lambda tape: math.convert_like(np.zeros(len(tape.trainable_params)), grad_vec) + Update the execution config with choices for how the device should be used and the device options. + """ + updated_values = {} + if config.gradient_method == "best": + updated_values["gradient_method"] = "adjoint" + if config.use_device_gradient is None: + updated_values["use_device_gradient"] = config.gradient_method in ("best", "adjoint") + if config.grad_on_execution is None: + updated_values["grad_on_execution"] = True - if tape_return_type is Expectation: - if len(grad_vec) != len(measurements): - raise ValueError( - "Number of observables in the tape must be the same as the length of grad_vec in the vjp method" - ) + new_device_options = dict(config.device_options) + for option in self._device_options: + if option not in new_device_options: + new_device_options[option] = getattr(self, f"_{option}", None) - if np.iscomplexobj(grad_vec): - raise ValueError( - "The vjp method only works with a real-valued grad_vec when the tape is returning an expectation value" - ) + # It is necessary to set the mcmc default configuration to complete the requirements of ExecuteConfig + mcmc_default = {"mcmc": False, "kernel_name": None, "num_burnin": 0, "rng": None} + new_device_options.update(mcmc_default) - ham = qml.Hamiltonian(grad_vec, [m.obs for m in measurements]) + return replace(config, **updated_values, device_options=new_device_options) - # pylint: disable=protected-access - def processing_fn(tape): - nonlocal ham - num_params = len(tape.trainable_params) + def preprocess(self, execution_config: ExecutionConfig = DefaultExecutionConfig): + """This function defines the device transform program to be applied and an updated device configuration. - if num_params == 0: - return np.array([], dtype=self.state.dtype) + Args: + execution_config (Union[ExecutionConfig, Sequence[ExecutionConfig]]): A data structure describing the + parameters needed to fully describe the execution. - new_tape = tape.copy() - new_tape._measurements = [qml.expval(ham)] + Returns: + TransformProgram, ExecutionConfig: A transform program that when called returns :class:`~.QuantumTape`'s that the + device can natively execute as well as a postprocessing function to be called after execution, and a configuration + with unset specifications filled in. - return self.adjoint_jacobian(new_tape, starting_state, use_device_state) + This device: - return processing_fn + * Supports any qubit operations that provide a matrix + * Currently does not support finite shots + * Currently does not intrinsically support parameter broadcasting - # pylint: disable=attribute-defined-outside-init - def sample(self, observable, shot_range=None, bin_size=None, counts=False): - """Return samples of an observable.""" - diagonalizing_gates = observable.diagonalizing_gates() - if diagonalizing_gates: - self.apply(diagonalizing_gates) - if not isinstance(observable, qml.PauliZ): - self._samples = self.generate_samples() - results = super().sample( - observable, shot_range=shot_range, bin_size=bin_size, counts=counts + """ + exec_config = self._setup_execution_config(execution_config) + program = TransformProgram() + + program.add_transform(validate_measurements, name=self.name) + program.add_transform(validate_observables, accepted_observables, name=self.name) + program.add_transform(validate_device_wires, self.wires, name=self.name) + program.add_transform( + mid_circuit_measurements, device=self, mcm_config=exec_config.mcm_config ) - if diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - def generate_samples(self): - """Generate samples - - Returns: - array[int]: array of samples in binary representation with shape - ``(dev.shots, dev.num_wires)`` - """ - shots = self.shots if isinstance(self.shots, int) else self.shots.total_shots + program.add_transform( + decompose, + stopping_condition=stopping_condition, + stopping_condition_shots=stopping_condition_shots, + skip_initial_state_prep=True, + name=self.name, + ) + program.add_transform(qml.transforms.broadcast_expand) - return self.measurements.generate_samples(len(self.wires), shots).astype(int, copy=False) + if exec_config.gradient_method == "adjoint": + _add_adjoint_transforms(program) + return program, exec_config - # pylint: disable=protected-access - def expval(self, observable, shot_range=None, bin_size=None): - """Expectation value of the supplied observable. + # pylint: disable=unused-argument + def execute( + self, + circuits: QuantumTape_or_Batch, + execution_config: ExecutionConfig = DefaultExecutionConfig, + ) -> Result_or_ResultBatch: + """Execute a circuit or a batch of circuits and turn it into results. Args: - observable: A PennyLane observable. - shot_range (tuple[int]): 2-tuple of integers specifying the range of samples - to use. If not specified, all samples are used. - bin_size (int): Divides the shot range into bins of size ``bin_size``, and - returns the measurement statistic separately over each bin. If not - provided, the entire shot range is treated as a single bin. + circuits (Union[QuantumTape, Sequence[QuantumTape]]): the quantum circuits to be executed + execution_config (ExecutionConfig): a datastructure with additional information required for execution Returns: - Expectation value of the observable + TensorLike, tuple[TensorLike], tuple[tuple[TensorLike]]: A numeric result of the computation. """ - if isinstance(observable, qml.Projector): - diagonalizing_gates = observable.diagonalizing_gates() - if self.shots is None and diagonalizing_gates: - self.apply(diagonalizing_gates) - results = super().expval(observable, shot_range=shot_range, bin_size=bin_size) - if self.shots is None and diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - - if self.shots is not None: - # estimate the expectation value - samples = self.sample(observable, shot_range=shot_range, bin_size=bin_size) - return np.squeeze(np.mean(samples, axis=0)) - - if isinstance(observable, qml.SparseHamiltonian): - if self._mpi: - # Identity for CSR_SparseHamiltonian to pass to processes with rank != 0 to reduce - # host(cpu) memory requirements - obs = qml.Identity(0) - Hmat = qml.Hamiltonian([1.0], [obs]).sparse_matrix() - H_sparse = qml.SparseHamiltonian(Hmat, wires=range(1)) - CSR_SparseHamiltonian = H_sparse.sparse_matrix().tocsr() - # CSR_SparseHamiltonian for rank == 0 - if self._mpi_manager.getRank() == 0: - CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() - else: - CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() - - return self.measurements.expval( - CSR_SparseHamiltonian.indptr, - CSR_SparseHamiltonian.indices, - CSR_SparseHamiltonian.data, - ) - - # use specialized functors to compute expval(Hermitian) - if isinstance(observable, qml.Hermitian): - observable_wires = self.map_wires(observable.wires) - if self._mpi and len(observable_wires) > self._num_local_wires: - raise RuntimeError( - "MPI backend does not support Hermitian with number of target wires larger than local wire number." + results = [] + for circuit in circuits: + if self._wire_map is not None: + [circuit], _ = qml.map_wires(circuit, self._wire_map) + results.append( + self.simulate( + circuit, + self._statevector, + postselect_mode=execution_config.mcm_config.postselect_mode, ) - matrix = observable.matrix() - return self.measurements.expval(matrix, observable_wires) - - if ( - isinstance(observable, qml.ops.Hamiltonian) - or (observable.arithmetic_depth > 0) - or isinstance(observable.name, List) - ): - ob_serialized = QuantumScriptSerializer( - self.short_name, self.use_csingle, self._mpi - )._ob(observable, self.wire_map) - return self.measurements.expval(ob_serialized) + ) - # translate to wire labels used by device - observable_wires = self.map_wires(observable.wires) + return tuple(results) - return self.measurements.expval(observable.name, observable_wires) + def supports_derivatives( + self, + execution_config: Optional[ExecutionConfig] = None, + circuit: Optional[qml.tape.QuantumTape] = None, + ) -> bool: + """Check whether or not derivatives are available for a given configuration and circuit. - def probability_lightning(self, wires=None): - """Return the probability of each computational basis state. + ``LightningGPU`` supports adjoint differentiation with analytic results. Args: - wires (Iterable[Number, str], Number, str, Wires): wires to return - marginal probabilities for. Wires not provided are traced out of the system. + execution_config (ExecutionConfig): The configuration of the desired derivative calculation + circuit (QuantumTape): An optional circuit to check derivatives support for. Returns: - array[float]: list of the probabilities + Bool: Whether or not a derivative can be calculated provided the given information + """ - # translate to wire labels used by device - observable_wires = self.map_wires(wires) - # Device returns as col-major orderings, so perform transpose on data for bit-index shuffle for now. - local_prob = self.measurements.probs(observable_wires) - if len(local_prob) > 0: - num_local_wires = len(local_prob).bit_length() - 1 if len(local_prob) > 0 else 0 - return local_prob.reshape([2] * num_local_wires).transpose().reshape(-1) - return local_prob - - def var(self, observable, shot_range=None, bin_size=None): - """Variance of the supplied observable. + if execution_config is None and circuit is None: + return True + if execution_config.gradient_method not in {"adjoint", "best"}: + return False + if circuit is None: + return True + return _supports_adjoint(circuit=circuit) + + def simulate( + self, + circuit: QuantumScript, + state: LightningGPUStateVector, + postselect_mode: Optional[str] = None, + ) -> Result: + """Simulate a single quantum script. Args: - observable: A PennyLane observable. - shot_range (tuple[int]): 2-tuple of integers specifying the range of samples - to use. If not specified, all samples are used. - bin_size (int): Divides the shot range into bins of size ``bin_size``, and - returns the measurement statistic separately over each bin. If not - provided, the entire shot range is treated as a single bin. + circuit (QuantumTape): The single circuit to simulate + state (LightningGPUStateVector): handle to Lightning state vector + postselect_mode (str): Configuration for handling shots with mid-circuit measurement + postselection. Use ``"hw-like"`` to discard invalid shots and ``"fill-shots"`` to + keep the same number of shots. Default is ``None``. Returns: - Variance of the observable - """ - if isinstance(observable, qml.Projector): - diagonalizing_gates = observable.diagonalizing_gates() - if self.shots is None and diagonalizing_gates: - self.apply(diagonalizing_gates) - results = super().var(observable, shot_range=shot_range, bin_size=bin_size) - if self.shots is None and diagonalizing_gates: - self.apply([qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]) - return results - - if self.shots is not None: - # estimate the var - # Lightning doesn't support sampling yet - samples = self.sample(observable, shot_range=shot_range, bin_size=bin_size) - return np.squeeze(np.var(samples, axis=0)) - - if isinstance(observable, qml.SparseHamiltonian): - csr_hamiltonian = observable.sparse_matrix(wire_order=self.wires).tocsr(copy=False) - return self.measurements.var( - csr_hamiltonian.indptr, - csr_hamiltonian.indices, - csr_hamiltonian.data, - ) + Tuple[TensorLike]: The results of the simulation - if ( - isinstance(observable, (qml.Hermitian, qml.ops.Hamiltonian)) - or (observable.arithmetic_depth > 0) - or isinstance(observable.name, List) - ): - ob_serialized = QuantumScriptSerializer( - self.short_name, self.use_csingle, self._mpi - )._ob(observable, self.wire_map) - return self.measurements.var(ob_serialized) + Note that this function can return measurements for non-commuting observables simultaneously. + """ + if circuit.shots and (any(isinstance(op, MidMeasureMP) for op in circuit.operations)): + if self._mpi_handler.use_mpi: + raise qml.DeviceError( + "Lightning-GPU-MPI does not support Mid-circuit measurements." + ) - # translate to wire labels used by device - observable_wires = self.map_wires(observable.wires) + results = [] + aux_circ = QuantumScript( + circuit.operations, + circuit.measurements, + shots=[1], + trainable_params=circuit.trainable_params, + ) + for _ in range(circuit.shots.total_shots): + state.reset_state() + mid_measurements = {} + final_state = state.get_final_state( + aux_circ, mid_measurements=mid_measurements, postselect_mode=postselect_mode + ) + results.append( + self.LightningMeasurements(final_state).measure_final_state( + aux_circ, mid_measurements=mid_measurements + ) + ) + return tuple(results) - return self.measurements.var(observable.name, observable_wires) + state.reset_state() + final_state = state.get_final_state(circuit) + return self.LightningMeasurements(final_state).measure_final_state(circuit) diff --git a/pennylane_lightning/lightning_gpu/lightning_gpu.toml b/pennylane_lightning/lightning_gpu/lightning_gpu.toml index 518315de09..b18470da6b 100644 --- a/pennylane_lightning/lightning_gpu/lightning_gpu.toml +++ b/pennylane_lightning/lightning_gpu/lightning_gpu.toml @@ -98,7 +98,7 @@ qjit_compatible = false # If the device requires run time generation of the quantum circuit. runtime_code_generation = false # If the device supports mid circuit measurements natively -mid_circuit_measurement = false +mid_circuit_measurement = true # This field is currently unchecked but it is reserved for the purpose of # determining if the device supports dynamic qubit allocation/deallocation. diff --git a/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py b/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py index 4338a5b876..bee481aac4 100644 --- a/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py +++ b/pennylane_lightning/lightning_kokkos/_adjoint_jacobian.py @@ -15,6 +15,10 @@ Internal methods for adjoint Jacobian differentiation method. """ +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_kokkos_ops.algorithms import ( AdjointJacobianC64, @@ -22,8 +26,8 @@ create_ops_listC64, create_ops_listC128, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) import numpy as np from pennylane.tape import QuantumTape @@ -31,8 +35,6 @@ # pylint: disable=ungrouped-imports from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian -from ._state_vector import LightningKokkosStateVector - class LightningKokkosAdjointJacobian(LightningBaseAdjointJacobian): """Check and execute the adjoint Jacobian differentiation method. @@ -44,7 +46,11 @@ class LightningKokkosAdjointJacobian(LightningBaseAdjointJacobian): # pylint: disable=too-few-public-methods - def __init__(self, qubit_state: LightningKokkosStateVector, batch_obs: bool = False) -> None: + def __init__( + self, + qubit_state: LightningKokkosStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: super().__init__(qubit_state, batch_obs) # Initialize the C++ binds diff --git a/pennylane_lightning/lightning_kokkos/_measurements.py b/pennylane_lightning/lightning_kokkos/_measurements.py index b438af350c..ee848739cf 100644 --- a/pennylane_lightning/lightning_kokkos/_measurements.py +++ b/pennylane_lightning/lightning_kokkos/_measurements.py @@ -15,11 +15,14 @@ Class implementation for state vector measurements. """ -# pylint: disable=import-error, no-name-in-module, ungrouped-imports +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_kokkos_ops import MeasurementsC64, MeasurementsC128 -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from typing import List @@ -28,6 +31,7 @@ from pennylane.measurements import CountsMP, SampleMeasurement, Shots from pennylane.typing import TensorLike +# pylint: disable=ungrouped-imports from pennylane_lightning.core._measurements_base import LightningBaseMeasurements @@ -44,7 +48,7 @@ class LightningKokkosMeasurements( def __init__( self, - kokkos_state, + kokkos_state: LightningKokkosStateVector, # pylint: disable=undefined-variable ) -> None: super().__init__(kokkos_state) diff --git a/pennylane_lightning/lightning_kokkos/_state_vector.py b/pennylane_lightning/lightning_kokkos/_state_vector.py index 50518ed078..cd8d23ceef 100644 --- a/pennylane_lightning/lightning_kokkos/_state_vector.py +++ b/pennylane_lightning/lightning_kokkos/_state_vector.py @@ -14,6 +14,7 @@ """ Class implementation for lightning_kokkos state-vector manipulation. """ +from warnings import warn try: from pennylane_lightning.lightning_kokkos_ops import ( @@ -23,8 +24,10 @@ allocate_aligned_array, print_configuration, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) + +from typing import Union import numpy as np import pennylane as qml @@ -58,17 +61,16 @@ class LightningKokkosStateVector(LightningBaseStateVector): def __init__( self, - num_wires, - dtype=np.complex128, + num_wires: int, + dtype: Union[np.complex128, np.complex64] = np.complex128, kokkos_args=None, - sync=True, - ): # pylint: disable=too-many-arguments + ): + super().__init__(num_wires, dtype) self._device_name = "lightning.kokkos" self._kokkos_config = {} - self._sync = sync # Initialize the state vector if kokkos_args is None: @@ -142,7 +144,7 @@ def sync_d2h(self, state_vector): >>> dev = qml.device('lightning.kokkos', wires=1) >>> dev.apply([qml.PauliX(wires=[0])]) - >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.C_DTYPE) + >>> state_vector = np.zeros(2**dev.num_wires).astype(dev.c_dtype) >>> dev.sync_d2h(state_vector) >>> print(state_vector) [0.+0.j 1.+0.j] @@ -277,9 +279,12 @@ def _apply_lightning( ) elif isinstance(operation, qml.PauliRot): method = getattr(state, "applyPauliRot") - paulis = operation._hyperparameters["pauli_word"] + # pylint: disable=protected-access + paulis = operation._hyperparameters[ + "pauli_word" + ] # pylint: disable=protected-access wires = [i for i, w in zip(wires, paulis) if w != "I"] - word = "".join(p for p in paulis if p != "I") # pylint: disable=protected-access + word = "".join(p for p in paulis if p != "I") method(wires, invert_param, operation.parameters, word) elif method is not None: # apply specialized gate param = operation.parameters diff --git a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py index 668550ff24..faa7e6d0bf 100644 --- a/pennylane_lightning/lightning_kokkos/lightning_kokkos.py +++ b/pennylane_lightning/lightning_kokkos/lightning_kokkos.py @@ -20,7 +20,7 @@ from dataclasses import replace from functools import reduce from pathlib import Path -from typing import Optional +from typing import List, Optional, Union from warnings import warn import numpy as np @@ -50,10 +50,6 @@ Result_or_ResultBatch, ) -from ._adjoint_jacobian import LightningKokkosAdjointJacobian -from ._measurements import LightningKokkosMeasurements -from ._state_vector import LightningKokkosStateVector - try: from pennylane_lightning.lightning_kokkos_ops import backend_info, print_configuration @@ -63,6 +59,10 @@ LK_CPP_BINARY_AVAILABLE = False backend_info = None +from ._adjoint_jacobian import LightningKokkosAdjointJacobian +from ._measurements import LightningKokkosMeasurements +from ._state_vector import LightningKokkosStateVector + # The set of supported operations. _operations = frozenset( { @@ -313,13 +313,12 @@ class LightningKokkos(LightningBase): def __init__( # pylint: disable=too-many-arguments self, - wires, + wires: Union[int, List], *, - c_dtype=np.complex128, - shots=None, - batch_obs=False, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, # Kokkos arguments - sync=True, kokkos_args=None, ): if not self._CPP_BINARY_AVAILABLE: @@ -341,11 +340,10 @@ def __init__( # pylint: disable=too-many-arguments # Kokkos specific options self._kokkos_args = kokkos_args - self._sync = sync # Creating the state vector self._statevector = self.LightningStateVector( - num_wires=len(self.wires), dtype=c_dtype, kokkos_args=kokkos_args, sync=sync + num_wires=len(self.wires), dtype=c_dtype, kokkos_args=kokkos_args ) if not LightningKokkos.kokkos_config: @@ -516,7 +514,7 @@ def simulate( aux_circ, mid_measurements=mid_measurements, postselect_mode=postselect_mode ) results.append( - LightningKokkosMeasurements(final_state).measure_final_state( + self.LightningMeasurements(final_state).measure_final_state( aux_circ, mid_measurements=mid_measurements ) ) @@ -524,7 +522,7 @@ def simulate( state.reset_state() final_state = state.get_final_state(circuit) - return LightningKokkosMeasurements(final_state).measure_final_state(circuit) + return self.LightningMeasurements(final_state).measure_final_state(circuit) @staticmethod def get_c_interface(): diff --git a/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py b/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py index 0abc7f72f7..390c0cf69b 100644 --- a/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py +++ b/pennylane_lightning/lightning_qubit/_adjoint_jacobian.py @@ -14,6 +14,9 @@ r""" Internal methods for adjoint Jacobian differentiation method. """ +from __future__ import annotations + +from warnings import warn try: from pennylane_lightning.lightning_qubit_ops.algorithms import ( @@ -22,8 +25,8 @@ create_ops_listC64, create_ops_listC128, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from os import getenv @@ -34,8 +37,6 @@ # pylint: disable=ungrouped-imports from pennylane_lightning.core._adjoint_jacobian_base import LightningBaseAdjointJacobian -from ._state_vector import LightningStateVector - class LightningAdjointJacobian( LightningBaseAdjointJacobian @@ -47,7 +48,12 @@ class LightningAdjointJacobian( batch_obs(bool): If serialized tape is to be batched or not. """ - def __init__(self, qubit_state: LightningStateVector, batch_obs: bool = False) -> None: + def __init__( + self, + qubit_state: LightningStateVector, # pylint: disable=undefined-variable + batch_obs: bool = False, + ) -> None: + super().__init__(qubit_state, batch_obs) # Initialize the C++ binds diff --git a/pennylane_lightning/lightning_qubit/_measurements.py b/pennylane_lightning/lightning_qubit/_measurements.py index c1b97a1184..415ce74088 100644 --- a/pennylane_lightning/lightning_qubit/_measurements.py +++ b/pennylane_lightning/lightning_qubit/_measurements.py @@ -16,10 +16,14 @@ """ # pylint: disable=import-error, no-name-in-module, ungrouped-imports +from __future__ import annotations + +from warnings import warn + try: from pennylane_lightning.lightning_qubit_ops import MeasurementsC64, MeasurementsC128 -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) from functools import reduce from typing import List @@ -53,7 +57,7 @@ class LightningMeasurements(LightningBaseMeasurements): # pylint: disable=too-f def __init__( self, - qubit_state, + qubit_state: LightningStateVector, # pylint: disable=undefined-variable mcmc: bool = None, kernel_name: str = None, num_burnin: int = None, diff --git a/pennylane_lightning/lightning_qubit/_state_vector.py b/pennylane_lightning/lightning_qubit/_state_vector.py index b4b6ef5ff1..62068dcbd7 100644 --- a/pennylane_lightning/lightning_qubit/_state_vector.py +++ b/pennylane_lightning/lightning_qubit/_state_vector.py @@ -14,6 +14,7 @@ """ Class implementation for lightning_qubit state-vector manipulation. """ +from warnings import warn try: from pennylane_lightning.lightning_qubit_ops import ( @@ -21,8 +22,10 @@ StateVectorC128, allocate_aligned_array, ) -except ImportError: - pass +except ImportError as ex: + warn(str(ex), UserWarning) + +from typing import Union import numpy as np import pennylane as qml @@ -50,7 +53,8 @@ class LightningStateVector(LightningBaseStateVector): # pylint: disable=too-few device_name(string): state vector device name. Options: ["lightning.qubit"] """ - def __init__(self, num_wires, dtype=np.complex128): + def __init__(self, num_wires: int, dtype: Union[np.complex128, np.complex64] = np.complex128): + super().__init__(num_wires, dtype) self._device_name = "lightning.qubit" diff --git a/pennylane_lightning/lightning_qubit/lightning_qubit.py b/pennylane_lightning/lightning_qubit/lightning_qubit.py index c317bbfbad..abf0809787 100644 --- a/pennylane_lightning/lightning_qubit/lightning_qubit.py +++ b/pennylane_lightning/lightning_qubit/lightning_qubit.py @@ -18,7 +18,7 @@ from dataclasses import replace from functools import reduce from pathlib import Path -from typing import Optional, Sequence +from typing import List, Optional, Sequence, Union from warnings import warn import numpy as np @@ -48,10 +48,6 @@ Result_or_ResultBatch, ) -from ._adjoint_jacobian import LightningAdjointJacobian -from ._measurements import LightningMeasurements -from ._state_vector import LightningStateVector - try: from pennylane_lightning.lightning_qubit_ops import backend_info @@ -60,6 +56,10 @@ warn(str(ex), UserWarning) LQ_CPP_BINARY_AVAILABLE = False +from ._adjoint_jacobian import LightningAdjointJacobian +from ._measurements import LightningMeasurements +from ._state_vector import LightningStateVector + # The set of supported operations. _operations = frozenset( { @@ -323,16 +323,16 @@ class LightningQubit(LightningBase): def __init__( # pylint: disable=too-many-arguments self, - wires, + wires: Union[int, List], *, - c_dtype=np.complex128, - shots=None, - batch_obs=False, + c_dtype: Union[np.complex128, np.complex64] = np.complex128, + shots: Union[int, List] = None, + batch_obs: bool = False, # Markov Chain Monte Carlo (MCMC) sampling method arguments - seed="global", - mcmc=False, - kernel_name="Local", - num_burnin=100, + seed: Union[str, int] = "global", + mcmc: bool = False, + kernel_name: str = "Local", + num_burnin: int = 100, ): if not self._CPP_BINARY_AVAILABLE: raise ImportError( @@ -559,4 +559,4 @@ def simulate( state.reset_state() final_state = state.get_final_state(circuit) - return LightningMeasurements(final_state, **mcmc).measure_final_state(circuit) + return self.LightningMeasurements(final_state, **mcmc).measure_final_state(circuit) diff --git a/pennylane_lightning/lightning_tensor/_tensornet.py b/pennylane_lightning/lightning_tensor/_tensornet.py index 05849ad4bb..967c0fbb17 100644 --- a/pennylane_lightning/lightning_tensor/_tensornet.py +++ b/pennylane_lightning/lightning_tensor/_tensornet.py @@ -21,8 +21,6 @@ except ImportError: pass -from itertools import product - import numpy as np import pennylane as qml from pennylane import BasisState, DeviceError, StatePrep @@ -223,20 +221,46 @@ def _preprocess_state_vector(self, state, device_wires): if len(device_wires) == self._num_wires and Wires(sorted(device_wires)) == device_wires: return np.reshape(state, output_shape).ravel(order="C") - # generate basis states on subset of qubits via the cartesian product - basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) + local_dev_wires = device_wires.tolist().copy() + local_dev_wires = local_dev_wires[::-1] + + # generate basis states on subset of qubits via broadcasting as substitute of cartesian product. + + # Allocate a single row as a base to avoid a large array allocation with + # the cartesian product algorithm. + # Initialize the base with the pattern [0 1 0 1 ...]. + base = np.tile([0, 1], 2 ** (len(local_dev_wires) - 1)).astype(dtype=np.int64) + # Allocate the array where it will accumulate the value of the indexes depending on + # the value of the basis. + indexes = np.zeros(2 ** (len(local_dev_wires)), dtype=np.int64) + + max_dev_wire = self._num_wires - 1 + + # Iterate over all device wires. + for i, wire in enumerate(local_dev_wires): + + # Accumulate indexes from the basis. + indexes += base * 2 ** (max_dev_wire - wire) + + if i == len(local_dev_wires) - 1: + continue + + two_n = 2 ** (i + 1) # Compute the value of the base. - # get basis states to alter on full set of qubits - unravelled_indices = np.zeros((2 ** len(device_wires), self._num_wires), dtype=int) - unravelled_indices[:, device_wires] = basis_states + # Update the value of the base without reallocating a new array. + # Reshape the basis to swap the internal columns. + base = base.reshape(-1, two_n * 2) + swapper_A = two_n // 2 + swapper_B = swapper_A + two_n - # get indices for which the state is changed to input state vector elements - ravelled_indices = np.ravel_multi_index(unravelled_indices.T, [2] * self._num_wires) + base[:, swapper_A:swapper_B] = base[:, swapper_A:swapper_B][:, ::-1] + # Flatten the base array + base = base.reshape(-1) # get full state vector to be factorized into MPS full_state = np.zeros(2**self._num_wires, dtype=self.dtype) for i, value in enumerate(state): - full_state[ravelled_indices[i]] = value + full_state[indexes[i]] = value return np.reshape(full_state, output_shape).ravel(order="C") def _apply_state_vector(self, state, device_wires: Wires): @@ -285,7 +309,7 @@ def _apply_MPO(self, gate_matrix, wires): None """ # TODO: Discuss if public interface for max_mpo_bond_dim argument - max_mpo_bond_dim = 2 ** len(wires) # Exact SVD decomposition for MPO + max_mpo_bond_dim = self._max_bond_dim # Get sorted wires and MPO site tensor mpos, sorted_wires = gate_matrix_decompose( diff --git a/setup.py b/setup.py index a326a90a2d..7e6e080e96 100644 --- a/setup.py +++ b/setup.py @@ -156,6 +156,12 @@ def build_extension(self, ext: CMakeExtension): env=os.environ, ) + # Ensure that catalyst shared object is copied to the build directory for pip editable install + if backend in ("lightning_kokkos"): + source = os.path.join(f"{extdir}", f"lib{backend}_catalyst.so") + destination = os.path.join(os.getcwd(), "build") + shutil.copy(source, destination) + with open(os.path.join("pennylane_lightning", "core", "_version.py"), encoding="utf-8") as f: version = f.readlines()[-1].split()[-1].strip("\"'") diff --git a/tests/conftest.py b/tests/conftest.py index a648418465..1c06ae0dc4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -142,10 +142,15 @@ def get_device(): from pennylane_lightning.lightning_kokkos_ops import LightningException elif device_name == "lightning.gpu": from pennylane_lightning.lightning_gpu import LightningGPU as LightningDevice - - LightningAdjointJacobian = None - LightningMeasurements = None - LightningStateVector = None + from pennylane_lightning.lightning_gpu._adjoint_jacobian import ( + LightningGPUAdjointJacobian as LightningAdjointJacobian, + ) + from pennylane_lightning.lightning_gpu._measurements import ( + LightningGPUMeasurements as LightningMeasurements, + ) + from pennylane_lightning.lightning_gpu._state_vector import ( + LightningGPUStateVector as LightningStateVector, + ) if hasattr(pennylane_lightning, "lightning_gpu_ops"): import pennylane_lightning.lightning_gpu_ops as lightning_ops diff --git a/tests/lightning_qubit/test_adjoint_jacobian_class.py b/tests/lightning_qubit/test_adjoint_jacobian_class.py index 6a6c5b8e92..74fc8e2427 100644 --- a/tests/lightning_qubit/test_adjoint_jacobian_class.py +++ b/tests/lightning_qubit/test_adjoint_jacobian_class.py @@ -481,6 +481,7 @@ def test_hermitian_expectation(self, tol, lightning_sv): tape.trainable_params = {0} statevector.reset_state() + vjp = self.calculate_vjp(statevector, tape, dy) assert np.allclose(vjp, -0.8 * np.sin(x), atol=tol) @@ -498,6 +499,7 @@ def test_hermitian_tensor_expectation(self, tol, lightning_sv): tape.trainable_params = {0} statevector.reset_state() + vjp = self.calculate_vjp(statevector, tape, dy) assert np.allclose(vjp, -0.8 * np.sin(x), atol=tol) diff --git a/tests/lightning_qubit/test_measurements_class.py b/tests/lightning_qubit/test_measurements_class.py index c1b6fb44d5..5ddb6a5f5b 100644 --- a/tests/lightning_qubit/test_measurements_class.py +++ b/tests/lightning_qubit/test_measurements_class.py @@ -669,8 +669,8 @@ def test_double_return_value(self, shots, measurement, obs0_, obs1_, lightning_s assert np.allclose(r, e, atol=dtol, rtol=dtol) @pytest.mark.skipif( - device_name == "lightning.tensor", - reason="lightning.tensor does not support out of order probs.", + device_name in ("lightning.tensor"), + reason=f"{device_name} does not support out of order probs.", ) @pytest.mark.parametrize( "cases", diff --git a/tests/lightning_qubit/test_state_vector_class.py b/tests/lightning_qubit/test_state_vector_class.py index 3918afcd5f..b3baaa3ea6 100644 --- a/tests/lightning_qubit/test_state_vector_class.py +++ b/tests/lightning_qubit/test_state_vector_class.py @@ -30,6 +30,9 @@ except ImportError: pass +if device_name == "lightning.gpu": + from pennylane_lightning.lightning_gpu._mpi_handler import MPIHandler + if device_name == "lightning.tensor": pytest.skip("Skipping tests for the LightningTensor class.", allow_module_level=True) @@ -39,6 +42,7 @@ allow_module_level=True, ) + if not LightningDevice._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) @@ -86,10 +90,18 @@ def test_apply_state_vector_with_lightning_handle(tol): state_vector_1 = LightningStateVector(2) state_vector_1.apply_operations([qml.BasisState(np.array([0, 1]), wires=[0, 1])]) - state_vector_2 = LightningStateVector(2) - state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) + if device_name == "lightning.gpu": + with pytest.raises( + qml.DeviceError, match="LightningGPU does not support allocate external state_vector." + ): + state_vector_2 = LightningStateVector(2) + state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) + + else: + state_vector_2 = LightningStateVector(2) + state_vector_2._apply_state_vector(state_vector_1.state_vector, Wires([0, 1])) - assert np.allclose(state_vector_1.state, state_vector_2.state, atol=tol, rtol=0) + assert np.allclose(state_vector_1.state, state_vector_2.state, atol=tol, rtol=0) @pytest.mark.parametrize( diff --git a/tests/new_api/test_device.py b/tests/new_api/test_device.py index 0485f3a054..111dd3af7d 100644 --- a/tests/new_api/test_device.py +++ b/tests/new_api/test_device.py @@ -43,8 +43,7 @@ validate_measurements, validate_observables, ) - -if device_name == "lightning.kokkos": +elif device_name == "lightning.kokkos": from pennylane_lightning.lightning_kokkos.lightning_kokkos import ( _add_adjoint_transforms, _adjoint_ops, @@ -62,13 +61,31 @@ validate_measurements, validate_observables, ) - - -if device_name == "lightning.tensor": +elif device_name == "lightning.gpu": + from pennylane_lightning.lightning_gpu.lightning_gpu import ( + _add_adjoint_transforms, + _adjoint_ops, + _supports_adjoint, + accepted_observables, + adjoint_measurements, + adjoint_observables, + decompose, + mid_circuit_measurements, + no_sampling, + stopping_condition, + stopping_condition_shots, + validate_adjoint_trainable_params, + validate_device_wires, + validate_measurements, + validate_observables, + ) +elif device_name == "lightning.tensor": from pennylane_lightning.lightning_tensor.lightning_tensor import ( accepted_observables, stopping_condition, ) +else: + raise TypeError(f"The device name: {device_name} is not a valid name") if not LightningDevice._new_API: pytest.skip("Exclusive tests for new device API. Skipping.", allow_module_level=True) @@ -448,6 +465,11 @@ def test_execute_single_measurement(self, theta, phi, mp, dev): if isinstance(mp.obs, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): mp.obs = qml.operation.convert_to_legacy_H(mp.obs) + if isinstance(mp.obs, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + qs = QuantumScript( [ qml.RX(phi, 0), @@ -641,6 +663,12 @@ def test_supports_derivatives(self, dev, config, tape, expected, batch_obs): qml.Z(1) + qml.X(1), qml.Hamiltonian([-1.0, 1.5], [qml.Z(1), qml.X(1)]), qml.Hermitian(qml.Hadamard.compute_matrix(), 0), + qml.SparseHamiltonian( + qml.Hamiltonian([-1.0, 1.5], [qml.Z(1), qml.X(1)]).sparse_matrix( + wire_order=[0, 1, 2] + ), + wires=[0, 1, 2], + ), qml.Projector([1], 1), ], ) @@ -649,6 +677,11 @@ def test_derivatives_single_expval( self, theta, phi, dev, obs, execute_and_derivatives, batch_obs ): """Test that the jacobian is correct when a tape has a single expectation value""" + if isinstance(obs, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs = qml.operation.convert_to_legacy_H(obs) @@ -705,6 +738,11 @@ def test_derivatives_multi_expval( self, theta, phi, omega, dev, obs1, obs2, execute_and_derivatives, batch_obs ): """Test that the jacobian is correct when a tape has multiple expectation values""" + if isinstance(obs2, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs1, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs1 = qml.operation.convert_to_legacy_H(obs1) if isinstance(obs2, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): @@ -1074,6 +1112,11 @@ def test_vjp_multi_expval( self, theta, phi, omega, dev, obs1, obs2, execute_and_derivatives, batch_obs ): """Test that the VJP is correct when a tape has multiple expectation values""" + if isinstance(obs2, qml.SparseHamiltonian) and dev.dtype == np.complex64: + pytest.skip( + reason="The conversion from qml.Hamiltonian to SparseHamiltonian is only possible with np.complex128" + ) + if isinstance(obs1, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): obs1 = qml.operation.convert_to_legacy_H(obs1) if isinstance(obs2, qml.ops.LinearCombination) and not qml.operation.active_new_opmath(): diff --git a/tests/test_measurements.py b/tests/test_measurements.py index 211a8c134b..6cb008f12f 100644 --- a/tests/test_measurements.py +++ b/tests/test_measurements.py @@ -151,8 +151,8 @@ def circuit(): _ = circuit() @pytest.mark.skipif( - device_name in ("lightning.gpu", "lightning.tensor"), - reason="lightning.gpu/lightning.tensor does not support out of order prob.", + device_name in ("lightning.tensor"), + reason="lightning.tensor does not support out of order prob.", ) @pytest.mark.parametrize( "cases", diff --git a/tests/test_native_mcm.py b/tests/test_native_mcm.py index 07281fb48a..050e1d27c6 100644 --- a/tests/test_native_mcm.py +++ b/tests/test_native_mcm.py @@ -21,7 +21,7 @@ from conftest import LightningDevice, device_name, validate_measurements from flaky import flaky -if device_name not in ("lightning.qubit", "lightning.kokkos"): +if device_name not in ("lightning.qubit", "lightning.kokkos", "lightning.gpu"): pytest.skip("Native MCM not supported. Skipping.", allow_module_level=True) if not LightningDevice._CPP_BINARY_AVAILABLE: # pylint: disable=protected-access @@ -89,7 +89,7 @@ def func(x, y): match=f"not accepted with finite shots on lightning.qubit", ): func(*params) - if device_name == "lightning.kokkos": + if device_name in ("lightning.kokkos", "lightning.gpu"): with pytest.raises( qml.DeviceError, match=r"Measurement shadow\(wires=\[0\]\) not accepted with finite shots on " diff --git a/tests/test_var.py b/tests/test_var.py index 4b4e8561fa..7bdcec2c20 100644 --- a/tests/test_var.py +++ b/tests/test_var.py @@ -24,7 +24,6 @@ if not ld._CPP_BINARY_AVAILABLE: pytest.skip("No binary module found. Skipping.", allow_module_level=True) - np.random.seed(42)