diff --git a/_static/demo_thumbnails/large_demo_thumbnails/thumbnail_large_machine_learning_quantum_manybody_problems.png b/_static/demo_thumbnails/large_demo_thumbnails/thumbnail_large_machine_learning_quantum_manybody_problems.png new file mode 100644 index 0000000000..4135fa7113 Binary files /dev/null and b/_static/demo_thumbnails/large_demo_thumbnails/thumbnail_large_machine_learning_quantum_manybody_problems.png differ diff --git a/_static/demo_thumbnails/opengraph_demo_thumbnails/OGthumbnail_machine_learning_quantum_manybody_problems.png b/_static/demo_thumbnails/opengraph_demo_thumbnails/OGthumbnail_machine_learning_quantum_manybody_problems.png new file mode 100644 index 0000000000..e20412fe68 Binary files /dev/null and b/_static/demo_thumbnails/opengraph_demo_thumbnails/OGthumbnail_machine_learning_quantum_manybody_problems.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/class_shadow_ml.png b/_static/demonstration_assets/ml_classical_shadows/class_shadow_ml.png new file mode 100644 index 0000000000..af3e025857 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/class_shadow_ml.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/class_shadow_prep.png b/_static/demonstration_assets/ml_classical_shadows/class_shadow_prep.png new file mode 100644 index 0000000000..eca776ab0d Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/class_shadow_prep.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/error_correlation_matrix.png b/_static/demonstration_assets/ml_classical_shadows/error_correlation_matrix.png new file mode 100644 index 0000000000..b98f33e5d6 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/error_correlation_matrix.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/exact_correlation_matrix.png b/_static/demonstration_assets/ml_classical_shadows/exact_correlation_matrix.png new file mode 100644 index 0000000000..590cf18da2 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/exact_correlation_matrix.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/fidel_snapshot.png b/_static/demonstration_assets/ml_classical_shadows/fidel_snapshot.png new file mode 100644 index 0000000000..5531dfca4a Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/fidel_snapshot.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/ml_classical_shadow.png b/_static/demonstration_assets/ml_classical_shadows/ml_classical_shadow.png new file mode 100644 index 0000000000..232e494141 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/ml_classical_shadow.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/model_comparison.png b/_static/demonstration_assets/ml_classical_shadows/model_comparison.png new file mode 100644 index 0000000000..80ddd7a829 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/model_comparison.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/rmse_shadow.png b/_static/demonstration_assets/ml_classical_shadows/rmse_shadow.png new file mode 100644 index 0000000000..ca9da55e22 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/rmse_shadow.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/rmse_training.png b/_static/demonstration_assets/ml_classical_shadows/rmse_training.png new file mode 100644 index 0000000000..a33f6c218a Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/rmse_training.png differ diff --git a/_static/demonstration_assets/ml_classical_shadows/spin_lattice.png b/_static/demonstration_assets/ml_classical_shadows/spin_lattice.png new file mode 100644 index 0000000000..1ecf7c8ef1 Binary files /dev/null and b/_static/demonstration_assets/ml_classical_shadows/spin_lattice.png differ diff --git a/demonstrations/ml_classical_shadows.metadata.json b/demonstrations/ml_classical_shadows.metadata.json new file mode 100644 index 0000000000..14c812fb20 --- /dev/null +++ b/demonstrations/ml_classical_shadows.metadata.json @@ -0,0 +1,66 @@ +{ + "title": "Machine learning for quantum many-body problems", + "authors": [ + { + "username": "whatsis" + } + ], + "dateOfPublication": "2022-05-02T00:00:00+00:00", + "dateOfLastModification": "2024-09-05T00:00:00+00:00", + "categories": [ + "Quantum Machine Learning" + ], + "tags": [], + "previewImages": [ + { + "type": "thumbnail", + "uri": "_static/demo_thumbnails/regular_demo_thumbnails/thumbnail_machine_learning_quantum_manybody_problems.png" + }, + { + "type": "large_thumbnail", + "uri": "_static/demo_thumbnails/large_demo_thumbnails/thumbnail_large_machine_learning_quantum_manybody_problems.png" + } + ], + "seoDescription": "Learn how to apply machine learning to quantum many-body problems by using classical shadow formalism and ML to predict the ground-state properties of the 2D antiferromagnetic Heisenberg model.", + "doi": "", + "canonicalURL": "/qml/demos/ml_classical_shadows", + "references": [ + { + "id": "preskill", + "type": "article", + "title": "Provably efficient machine learning for quantum many-body problems", + "authors": "H. Y. Huang, R. Kueng, G. Torlai, V. V. Albert, J. Preskill", + "year": "2021", + "journal": "", + "url": "https://arxiv.org/abs/2106.12627" + }, + { + "id": "neurtangkernel", + "type": "article", + "title": "Neural tangent kernel: Convergence and generalization in neural networks", + "authors": "A. Jacot, F. Gabriel, and C. Hongler", + "year": "2018", + "journal": "", + "url": "https://proceedings.neurips.cc/paper/2018/file/5a4be1fa34e62bb8a6ec6b91d2462f5a-Paper.pdf" + } + ], + "basedOnPapers": ["10.48550/arXiv.2106.12627"], + "referencedByPapers": [], + "relatedContent": [ + { + "type": "demonstration", + "id": "tutorial_classical_shadows", + "weight": 1.0 + }, + { + "type": "demonstration", + "id": "tutorial_kernel_based_training", + "weight": 1.0 + }, + { + "type": "demonstration", + "id": "tutorial_kernels_module", + "weight": 1.0 + } + ] +} diff --git a/demonstrations/ml_classical_shadows.py b/demonstrations/ml_classical_shadows.py new file mode 100644 index 0000000000..05050cf555 --- /dev/null +++ b/demonstrations/ml_classical_shadows.py @@ -0,0 +1,862 @@ +r""" +Machine learning for quantum many-body problems +============================================================== + +Storing and processing a complete description of an :math:`n`-qubit quantum mechanical +system is challenging because the amount of memory required generally scales exponentially +with the number of qubits. The quantum community has recently addressed this challenge by using +the :doc:`classical shadow ` formalism, which allows us to build more +concise classical descriptions of quantum states using randomized single-qubit measurements. +It was argued in Ref. [#preskill]_ that combining classical shadows with classical machine learning +enables using learning models that efficiently predict properties of the quantum systems, such as +the expectation value of a Hamiltonian, correlation functions, and entanglement entropies. + +.. figure:: /_static/demonstration_assets/ml_classical_shadows/class_shadow_ml.png + :align: center + :width: 80 % + :alt: Combining ML with Classical Shadow + + Combining machine learning and classical shadows + + +In this demo, we describe one of the ideas presented in Ref. [#preskill]_ for using classical +shadow formalism and machine learning to predict the ground-state properties of the +2D antiferromagnetic Heisenberg model. We begin by learning how to build the Heisenberg model, +calculate its ground-state properties, and compute its classical shadow. Finally, we demonstrate +how to use :doc:`kernel-based learning models ` to predict ground-state +properties from the learned classical shadows. So let's get started! + +.. note:: + This demo is compatible with the latest version of PennyLane and ``neural-tangents==0.6.5``. + The latter is required for building the kernel for the infinite network used in training. + + +Building the 2D Heisenberg model +--------------------------------- + +We define a two-dimensional antiferromagnetic `Heisenberg model +`__ as a square +lattice, where a spin-1/2 particle occupies each site. The antiferromagnetic +nature and the overall physics of this model depend on the couplings +:math:`J_{ij}` present between the spins, as reflected in the Hamiltonian +associated with the model: + +.. math:: H = \sum_{i < j} J_{ij}(X_i X_j + Y_i Y_j + Z_i Z_j) . + +Here, we consider the family of Hamiltonians where all the couplings :math:`J_{ij}` +are sampled uniformly from [0, 2]. We build a coupling matrix :math:`J` by providing +the number of rows :math:`N_r` and columns :math:`N_c` present in the square lattice. +The dimensions of this matrix are :math:`N_s \times N_s`, where :math:`N_s = N_r \times N_c` +is the total number of spin particles present in the model. + + +""" + +import itertools as it +import numpy as np + +def build_coupling_mats(num_mats, num_rows, num_cols): + num_spins = num_rows * num_cols + coupling_mats = np.zeros((num_mats, num_spins, num_spins)) + coup_terms = np.random.RandomState(24).uniform(0, 2, + size=(num_mats, 2 * num_rows * num_cols - num_rows - num_cols)) + # populate edges to build the grid lattice + edges = [(si, sj) for (si, sj) in it.combinations(range(num_spins), 2) + if sj % num_cols and sj - si == 1 or sj - si == num_cols] + for itr in range(num_mats): + for ((i, j), term) in zip(edges, coup_terms[itr]): + coupling_mats[itr][i][j] = coupling_mats[itr][j][i] = term + return coupling_mats + + +###################################################################### +# For this demo, we study a model with four spins arranged on the nodes of +# a square lattice. We require four qubits for simulating this model; +# one qubit for each spin. We start by building a coupling matrix ``J_mat`` +# using our previously defined function. +# + +Nr, Nc = 2, 2 +num_qubits = Nr * Nc # Ns +J_mat = build_coupling_mats(1, Nr, Nc)[0] + + +###################################################################### +# We can now visualize the model instance by representing the coupling matrix as a +# ``networkx`` graph: + +import matplotlib.pyplot as plt +import networkx as nx + +G = nx.from_numpy_array(J_mat, create_using=nx.DiGraph) +pos = {i: (i % Nc, -(i // Nc)) for i in G.nodes()} +edge_labels = {(x, y): np.round(J_mat[x, y], 2) for x, y in G.edges()} +weights = [x + 1.5 for x in list(nx.get_edge_attributes(G, "weight").values())] + +plt.figure(figsize=(4, 4)) +nx.draw( + G, pos, node_color="lightblue", with_labels=True, + node_size=600, width=weights, edge_color="firebrick", +) +nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edge_labels) +plt.show() + +############################################################################## +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/spin_lattice.png +# :width: 40% +# :align: center +# :alt: Spin model on square lattice + + +###################################################################### +# We then use the same coupling matrix ``J_mat`` to obtain the Hamiltonian +# :math:`H` for the model we have instantiated above. +# + +import pennylane as qml + +def Hamiltonian(J_mat): + coeffs, ops = [], [] + ns = J_mat.shape[0] + for i, j in it.combinations(range(ns), r=2): + coeff = J_mat[i, j] + if coeff: + for op in [qml.PauliX, qml.PauliY, qml.PauliZ]: + coeffs.append(coeff) + ops.append(op(i) @ op(j)) + H = qml.Hamiltonian(coeffs, ops) + return H + +print(f"Hamiltonian =\n{Hamiltonian(J_mat)}") + +############################################################################## +# .. rst-class:: sphx-glr-script-out +# +# +# .. code-block:: none +# +# Hamiltonian = +# 1.920034606671837 * (X(0) @ X(1)) + 1.920034606671837 * (Y(0) @ Y(1)) + 1.920034606671837 * (Z(0) @ Z(1)) + 1.399024099899152 * (X(0) @ X(2)) + 1.399024099899152 * (Y(0) @ Y(2)) + 1.399024099899152 * (Z(0) @ Z(2)) + 1.9997345852477584 * (X(1) @ X(3)) + 1.9997345852477584 * (Y(1) @ Y(3)) + 1.9997345852477584 * (Z(1) @ Z(3)) + 0.44013459956570355 * (X(2) @ X(3)) + 0.44013459956570355 * (Y(2) @ Y(3)) + 0.44013459956570355 * (Z(2) @ Z(3)) + + +###################################################################### +# For the Heisenberg model, a property of interest is usually the two-body +# correlation function :math:`C_{ij}`, which for a pair of spins :math:`i` +# and :math:`j` is defined as the following operator: +# +# .. math:: \hat{C}_{ij} = \frac{1}{3} (X_i X_j + Y_iY_j + Z_iZ_j). + +def corr_function(i, j): + ops = [] + for op in [qml.PauliX, qml.PauliY, qml.PauliZ]: + if i != j: + ops.append(op(i) @ op(j)) + else: + ops.append(qml.Identity(i)) + return ops + +###################################################################### +# +# The expectation value of each such operator :math:`\hat{C}_{ij}` with respect to +# the ground state :math:`|\psi_{0}\rangle` of the model can be used to build +# the correlation matrix :math:`C`: +# +# .. math:: {C}_{ij} = \langle \hat{C}_{ij} \rangle = \frac{1}{3} \langle \psi_{0} | X_i X_j + Y_iY_j + Z_iZ_j | \psi_{0} \rangle . +# + +###################################################################### +# Hence, to build :math:`C` for the model, we need to calculate its +# ground state :math:`|\psi_{0}\rangle`. We do this by diagonalizing +# the Hamiltonian for the model. Then, we obtain the eigenvector corresponding +# to the smallest eigenvalue. +# + +import scipy as sp + +ham = Hamiltonian(J_mat) +eigvals, eigvecs = sp.sparse.linalg.eigs(ham.sparse_matrix()) +psi0 = eigvecs[:, np.argmin(eigvals)] + + +###################################################################### +# We then build a circuit that initializes the qubits into the ground +# state and measures the expectation value of the provided set of observables. +# + +dev_exact = qml.device("default.qubit", wires=num_qubits) # for exact simulation + +def circuit(psi, observables): + psi = psi / np.linalg.norm(psi) # normalize the state + qml.StatePrep(psi, wires=range(num_qubits)) + return [qml.expval(o) for o in observables] + +circuit_exact = qml.QNode(circuit, dev_exact) + + +###################################################################### +# Finally, we execute this circuit to obtain the exact correlation matrix +# :math:`C`. We compute the correlation operators :math:`\hat{C}_{ij}` and +# their expectation values with respect to the ground state :math:`|\psi_0\rangle`. +# + +coups = list(it.product(range(num_qubits), repeat=2)) +corrs = [corr_function(i, j) for i, j in coups] + +def build_exact_corrmat(coups, corrs, circuit, psi): + corr_mat_exact = np.zeros((num_qubits, num_qubits)) + for idx, (i, j) in enumerate(coups): + corr = corrs[idx] + if i == j: + corr_mat_exact[i][j] = 1.0 + else: + corr_mat_exact[i][j] = ( + np.sum(np.array([circuit(psi, observables=[o]) for o in corr]).T) / 3 + ) + corr_mat_exact[j][i] = corr_mat_exact[i][j] + return corr_mat_exact + +expval_exact = build_exact_corrmat(coups, corrs, circuit_exact, psi0) + +######################################################################### +# Once built, we can visualize the correlation matrix: +# + +fig, ax = plt.subplots(1, 1, figsize=(4, 4)) +im = ax.imshow(expval_exact, cmap=plt.get_cmap("RdBu"), vmin=-1, vmax=1) +ax.xaxis.set_ticks(range(num_qubits)) +ax.yaxis.set_ticks(range(num_qubits)) +ax.xaxis.set_tick_params(labelsize=14) +ax.yaxis.set_tick_params(labelsize=14) +ax.set_title("Exact Correlation Matrix", fontsize=14) + +bar = fig.colorbar(im, pad=0.05, shrink=0.80 ) +bar.set_label(r"$C_{ij}$", fontsize=14, rotation=0) +bar.ax.tick_params(labelsize=14) +plt.show() + +############################################################################## +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/exact_correlation_matrix.png +# :width: 55% +# :align: center +# :alt: Exact correlation matrix + + +###################################################################### +# Constructing classical shadows +# ------------------------------ +# + + +###################################################################### +# Now that we have built the Heisenberg model, the next step is to construct +# a :doc:`classical shadow ` representation for its ground state. To construct an +# approximate classical representation of an :math:`n`-qubit quantum state :math:`\rho`, +# we perform randomized single-qubit measurements on :math:`T`-copies of +# :math:`\rho`. Each measurement is chosen randomly among the Pauli bases +# :math:`X`, :math:`Y`, or :math:`Z` to yield random :math:`n` pure product +# states :math:`|s_i\rangle` for each copy: +# +# .. math:: |s_{i}^{(t)}\rangle \in \{|0\rangle, |1\rangle, |+\rangle, |-\rangle, |i+\rangle, |i-\rangle\}. +# +# .. math:: S_T(\rho) = \big\{|s_{i}^{(t)}\rangle: i\in\{1,\ldots, n\},\ t\in\{1,\ldots, T\} \big\}. +# +# Each of the :math:`|s_i^{(t)}\rangle` provides us with a snapshot of the +# state :math:`\rho`, and the :math:`nT` measurements yield the complete +# set :math:`S_{T}`, which requires just :math:`3nT` bits to be stored +# in classical memory. This is discussed in further detail in our previous +# demo about :doc:`classical shadows `. +# + +###################################################################### +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/class_shadow_prep.png +# :align: center +# :width: 100 % +# :alt: Preparing Classical Shadows +# + + +###################################################################### +# To prepare a classical shadow for the ground state of the Heisenberg +# model, we simply reuse the circuit template used above and reconstruct +# a ``QNode`` utilizing a device that performs single-shot measurements. +# + +dev_oshot = qml.device("default.qubit", wires=num_qubits, shots=1) +circuit_oshot = qml.QNode(circuit, dev_oshot) + + +###################################################################### +# Now, we define a function to build the classical shadow for the quantum +# state prepared by a given :math:`n`-qubit circuit using :math:`T`-copies +# of randomized Pauli basis measurements +# + + +def gen_class_shadow(circ_template, circuit_params, num_shadows, num_qubits): + # prepare the complete set of available Pauli operators + unitary_ops = [qml.PauliX, qml.PauliY, qml.PauliZ] + # sample random Pauli measurements uniformly + unitary_ensmb = np.random.randint(0, 3, size=(num_shadows, num_qubits), dtype=int) + + outcomes = np.zeros((num_shadows, num_qubits)) + for ns in range(num_shadows): + # for each snapshot, extract the Pauli basis measurement to be performed + meas_obs = [unitary_ops[unitary_ensmb[ns, i]](i) for i in range(num_qubits)] + # perform single shot randomized Pauli measuremnt for each qubit + outcomes[ns, :] = circ_template(circuit_params, observables=meas_obs) + + return outcomes, unitary_ensmb + +outcomes, basis = gen_class_shadow(circuit_oshot, psi0, 100, num_qubits) +print("First five measurement outcomes =\n", outcomes[:5]) +print("First five measurement bases =\n", basis[:5]) + +############################################################################## +# .. rst-class:: sphx-glr-script-out +# +# +# .. code-block:: none +# +# First five measurement outcomes = +# [[-1. 1. -1. 1.] +# [ 1. 1. -1. -1.] +# [-1. 1. 1. -1.] +# [-1. 1. -1. -1.] +# [ 1. 1. -1. 1.]] +# First five measurement bases = +# [[2 2 1 0] +# [1 2 0 2] +# [2 1 1 0] +# [0 0 1 0] +# [2 0 2 1]] + + +###################################################################### +# Furthermore, :math:`S_{T}` can be used to construct an approximation +# of the underlying :math:`n`-qubit state :math:`\rho` by averaging over :math:`\sigma_t`: +# +# .. math:: \sigma_T(\rho) = \frac{1}{T} \sum_{1}^{T} \big(3|s_{1}^{(t)}\rangle\langle s_1^{(t)}| - \mathbb{I}\big)\otimes \ldots \otimes \big(3|s_{n}^{(t)}\rangle\langle s_n^{(t)}| - \mathbb{I}\big). +# + + +def snapshot_state(meas_list, obs_list): + # undo the rotations done for performing Pauli measurements in the specific basis + rotations = [ + qml.matrix(qml.Hadamard(wires=0)), # X-basis + qml.matrix(qml.Hadamard(wires=0)) @ qml.matrix(qml.adjoint(qml.S(wires=0))), # Y-basis + qml.matrix(qml.Identity(wires=0)), # Z-basis + ] + + # reconstruct snapshot from local Pauli measurements + rho_snapshot = [1] + for meas_out, basis in zip(meas_list, obs_list): + # preparing state |s_i><0| for 1 and |1><1| for -1 + state = np.array([[1, 0], [0, 0]]) if meas_out == 1 else np.array([[0, 0], [0, 1]]) + local_rho = 3 * (rotations[basis].conj().T @ state @ rotations[basis]) - np.eye(2) + rho_snapshot = np.kron(rho_snapshot, local_rho) + + return rho_snapshot + +def shadow_state_reconst(shadow): + num_snapshots, num_qubits = shadow[0].shape + meas_lists, obs_lists = shadow + + # Reconstruct the quantum state from its classical shadow + shadow_rho = np.zeros((2 ** num_qubits, 2 ** num_qubits), dtype=complex) + for i in range(num_snapshots): + shadow_rho += snapshot_state(meas_lists[i], obs_lists[i]) + + return shadow_rho / num_snapshots + + +###################################################################### +# To see how well the reconstruction works for different values of :math:`T`, +# we look at the `fidelity `_ +# of the actual quantum state with respect to the reconstructed quantum state from +# the classical shadow with :math:`T` copies. On average, as the number of copies +# :math:`T` is increased, the reconstruction becomes more effective with average +# higher fidelity values (orange) and lower variance (blue). Eventually, in the +# limit :math:`T\rightarrow\infty`, the reconstruction will be exact. +# +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/fidel_snapshot.png +# :align: center +# :width: 80 % +# :alt: Fidelity of reconstructed ground state with different shadow sizes :math:`T` +# +# Fidelity of the reconstructed ground state with different shadow sizes :math:`T` +# + + +###################################################################### +# The reconstructed quantum state :math:`\sigma_T` can also be used to +# evaluate expectation values :math:`\text{Tr}(O\sigma_T)` for some localized +# observable :math:`O = \bigotimes_{i}^{n} P_i`, where :math:`P_i \in \{I, X, Y, Z\}`. +# However, as shown above, :math:`\sigma_T` would be only an approximation of +# :math:`\rho` for finite values of :math:`T`. Therefore, to estimate +# :math:`\langle O \rangle` robustly, we use the median of means +# estimation. For this purpose, we split up the :math:`T` shadows into +# :math:`K` equally-sized groups and evaluate the median of the mean +# value of :math:`\langle O \rangle` for each of these groups. +# + + +def estimate_shadow_obs(shadow, observable, k=10): + shadow_size = shadow[0].shape[0] + + # convert Pennylane observables to indices + map_name_to_int = {"PauliX": 0, "PauliY": 1, "PauliZ": 2} + if isinstance(observable, (qml.PauliX, qml.PauliY, qml.PauliZ)): + target_obs = np.array([map_name_to_int[observable.name]]) + target_locs = np.array([observable.wires[0]]) + else: + target_obs = np.array([map_name_to_int[o.name] for o in observable.obs]) + target_locs = np.array([o.wires[0] for o in observable.obs]) + + # perform median of means to return the result + means = [] + meas_list, obs_lists = shadow + for i in range(0, shadow_size, shadow_size // k): + meas_list_k, obs_lists_k = ( + meas_list[i : i + shadow_size // k], + obs_lists[i : i + shadow_size // k], + ) + indices = np.all(obs_lists_k[:, target_locs] == target_obs, axis=1) + if sum(indices): + means.append( + np.sum(np.prod(meas_list_k[indices][:, target_locs], axis=1)) / sum(indices) + ) + else: + means.append(0) + + return np.median(means) + + +###################################################################### +# Now we estimate the correlation matrix :math:`C^{\prime}` from the +# classical shadow approximation of the ground state. +# + +coups = list(it.product(range(num_qubits), repeat=2)) +corrs = [corr_function(i, j) for i, j in coups] +qbobs = [qob for qobs in corrs for qob in qobs] + +def build_estim_corrmat(coups, corrs, num_obs, shadow): + k = int(2 * np.log(2 * num_obs)) # group size + corr_mat_estim = np.zeros((num_qubits, num_qubits)) + for idx, (i, j) in enumerate(coups): + corr = corrs[idx] + if i == j: + corr_mat_estim[i][j] = 1.0 + else: + corr_mat_estim[i][j] = ( + np.sum(np.array([estimate_shadow_obs(shadow, o, k=k+1) for o in corr])) / 3 + ) + corr_mat_estim[j][i] = corr_mat_estim[i][j] + return corr_mat_estim + +shadow = gen_class_shadow(circuit_oshot, psi0, 1000, num_qubits) +expval_estmt = build_estim_corrmat(coups, corrs, len(qbobs), shadow) + +######################################################################### +# This time, let us visualize the deviation observed between the exact correlation +# matrix (:math:`C`) and the estimated correlation matrix (:math:`C^{\prime}`) +# to assess the effectiveness of classical shadow formalism. +# + +fig, ax = plt.subplots(1, 1, figsize=(4.2, 4)) +im = ax.imshow(expval_exact-expval_estmt, cmap=plt.get_cmap("RdBu"), vmin=-1, vmax=1) +ax.xaxis.set_ticks(range(num_qubits)) +ax.yaxis.set_ticks(range(num_qubits)) +ax.xaxis.set_tick_params(labelsize=14) +ax.yaxis.set_tick_params(labelsize=14) +ax.set_title("Error in estimating the\ncorrelation matrix", fontsize=14) + +bar = fig.colorbar(im, pad=0.05, shrink=0.80) +bar.set_label(r"$\Delta C_{ij}$", fontsize=14, rotation=0) +bar.ax.tick_params(labelsize=14) +plt.show() + +###################################################################### +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/error_correlation_matrix.png +# :align: center +# :width: 55 % +# :alt: Error in estimating correlation matrix +# + + +###################################################################### +# Training classical machine learning models +# ------------------------------------------ +# + + +###################################################################### +# There are multiple ways in which we can combine classical shadows and +# machine learning. This could include training a model to learn +# the classical representation of quantum systems based on some system +# parameter, estimating a property from such learned classical representations, +# or a combination of both. In our case, we consider the problem of using +# :doc:`kernel-based models ` to learn the ground-state representation of the +# Heisenberg model Hamiltonian :math:`H(x_l)` from the coupling vector :math:`x_l`, +# where :math:`x_l = [J_{i,j} \text{ for } i < j]`. The goal is to predict the +# correlation functions :math:`C_{ij}`: +# +# .. math:: \big\{x_l \rightarrow \sigma_T(\rho(x_l)) \rightarrow \text{Tr}(\hat{C}_{ij} \sigma_T(\rho(x_l))) \big\}_{l=1}^{N}. +# +# Here, we consider the following kernel-based machine learning model [#neurtangkernel]_: +# +# .. math:: \hat{\sigma}_{N} (x) = \sum_{l=1}^{N} \kappa(x, x_l)\sigma_T (x_l) = \sum_{l=1}^{N} \left(\sum_{l^{\prime}=1}^{N} k(x, x_{l^{\prime}})(K+\lambda I)^{-1}_{l, l^{\prime}} \sigma_T(x_l) \right), +# +# where :math:`\lambda > 0` is a regularization parameter in cases when +# :math:`K` is not invertible, :math:`\sigma_T(x_l)` denotes the classical +# representation of the ground state :math:`\rho(x_l)` of the Heisenberg +# model constructed using :math:`T` randomized Pauli measurements, and +# :math:`K_{ij}=k(x_i, x_j)` is the kernel matrix with +# :math:`k(x, x^{\prime})` as the kernel function. +# +# Similarly, estimating an expectation value on the predicted ground state +# :math:`\sigma_T(x_l)` using the trained model can then be done by +# evaluating: +# +# .. math:: \text{Tr}(\hat{O} \hat{\sigma}_{N} (x)) = \sum_{l=1}^{N} \kappa(x, x_l)\text{Tr}(O\sigma_T (x_l)). +# +# We train the classical kernel-based models using :math:`N = 70` +# randomly chosen values of the coupling matrices :math:`J`. +# + +# imports for ML methods and techniques +from sklearn.model_selection import train_test_split, cross_val_score +from sklearn import svm +from sklearn.kernel_ridge import KernelRidge + +###################################################################### +# First, to build the dataset, we use the function ``build_dataset`` that +# takes as input the size of the dataset (``num_points``), the topology of +# the lattice (``Nr`` and ``Nc``), and the number of randomized +# Pauli measurements (:math:`T`) for the construction of classical shadows. +# The ``X_data`` is the set of coupling vectors that are defined as a +# stripped version of the coupling matrix :math:`J`, where only non-duplicate +# and non-zero :math:`J_{ij}` are considered. The ``y_exact`` and +# ``y_clean`` are the set of correlation vectors, i.e., the flattened +# correlation matrix :math:`C`, computed with respect to the ground-state +# obtained from exact diagonalization and classical shadow representation +# (with :math:`T=500`), respectively. +# + + +def build_dataset(num_points, Nr, Nc, T=500): + + num_qubits = Nr * Nc + X, y_exact, y_estim = [], [], [] + coupling_mats = build_coupling_mats(num_points, Nr, Nc) + + for coupling_mat in coupling_mats: + ham = Hamiltonian(coupling_mat) + eigvals, eigvecs = sp.sparse.linalg.eigs(ham.sparse_matrix()) + psi = eigvecs[:, np.argmin(eigvals)] + shadow = gen_class_shadow(circuit_oshot, psi, T, num_qubits) + + coups = list(it.product(range(num_qubits), repeat=2)) + corrs = [corr_function(i, j) for i, j in coups] + qbobs = [x for sublist in corrs for x in sublist] + + expval_exact = build_exact_corrmat(coups, corrs, circuit_exact, psi) + expval_estim = build_estim_corrmat(coups, corrs, len(qbobs), shadow) + + coupling_vec = [] + for coup in coupling_mat.reshape(1, -1)[0]: + if coup and coup not in coupling_vec: + coupling_vec.append(coup) + coupling_vec = np.array(coupling_vec) / np.linalg.norm(coupling_vec) + + X.append(coupling_vec) + y_exact.append(expval_exact.reshape(1, -1)[0]) + y_estim.append(expval_estim.reshape(1, -1)[0]) + + return np.array(X), np.array(y_exact), np.array(y_estim) + +X, y_exact, y_estim = build_dataset(100, Nr, Nc, 500) +X_data, y_data = X, y_estim +X_data.shape, y_data.shape, y_exact.shape + +############################################################################## +# .. rst-class:: sphx-glr-script-out +# +# +# .. code-block:: none +# +# ((100, 4), (100, 16), (100, 16)) + + +###################################################################### +# Now that our dataset is ready, we can shift our focus to the ML models. +# Here, we use two different Kernel functions: (i) Gaussian Kernel and +# (ii) Neural Tangent Kernel. For both of them, we consider the +# regularization parameter :math:`\lambda` from the following set of values: +# +# .. math:: \lambda = \left\{ 0.0025, 0.0125, 0.025, 0.05, 0.125, 0.25, 0.5, 1.0, 5.0, 10.0 \right\}. +# +# Next, we define the kernel functions :math:`k(x, x^{\prime})` for each +# of the mentioned kernels: + +###################################################################### +# .. math:: k(x, x^{\prime}) = e^{-\gamma||x - x^{\prime}||^{2}_{2}}. \tag{Gaussian Kernel} +# +# For the Gaussian kernel, the hyperparameter +# :math:`\gamma = N^{2}/\sum_{i=1}^{N} \sum_{j=1}^{N} ||x_i-x_j||^{2}_{2} > 0` +# is chosen to be the inverse of the average Euclidean distance +# :math:`x_i` and :math:`x_j`. The kernel is implemented using the +# radial-basis function (rbf) kernel in the ``sklearn`` library. +# + +###################################################################### +# .. math:: k(x, x^{\prime}) = k^{\text{NTK}}(x, x^{\prime}). \tag{Neural Tangent Kernel} +# +# The neural tangent kernel :math:`k^{\text{NTK}}` used here is equivalent +# to an infinite-width feed-forward neural network with four hidden +# layers and that uses the rectified linear unit (ReLU) as the activation +# function. This is implemented using the ``neural_tangents`` library. +# + +from neural_tangents import stax +init_fn, apply_fn, kernel_fn = stax.serial( + stax.Dense(32), + stax.Relu(), + stax.Dense(32), + stax.Relu(), + stax.Dense(32), + stax.Relu(), + stax.Dense(32), + stax.Relu(), + stax.Dense(1), +) +kernel_NN = kernel_fn(X_data, X_data, "ntk") + +for i in range(len(kernel_NN)): + for j in range(len(kernel_NN)): + kernel_NN.at[i, j].set((kernel_NN[i][i] * kernel_NN[j][j]) ** 0.5) + + +###################################################################### +# For the above two defined kernel methods, we obtain the best learning +# model by performing hyperparameter tuning using cross-validation for +# the prediction task of each :math:`C_{ij}`. For this purpose, we +# implement the function ``fit_predict_data``, which takes input as the +# correlation function index ``cij``, kernel matrix ``kernel``, and internal +# kernel mapping ``opt`` required by the kernel-based regression models +# from the ``sklearn`` library. +# + +from sklearn.metrics import mean_squared_error + +def fit_predict_data(cij, kernel, opt="linear"): + + # training data (estimated from measurement data) + y = np.array([y_estim[i][cij] for i in range(len(X_data))]) + X_train, X_test, y_train, y_test = train_test_split( + kernel, y, test_size=0.3, random_state=24 + ) + + # testing data (exact expectation values) + y_clean = np.array([y_exact[i][cij] for i in range(len(X_data))]) + _, _, _, y_test_clean = train_test_split(kernel, y_clean, test_size=0.3, random_state=24) + + # hyperparameter tuning with cross validation + models = [ + # Epsilon-Support Vector Regression + (lambda Cx: svm.SVR(kernel=opt, C=Cx, epsilon=0.1)), + # Kernel-Ridge based Regression + (lambda Cx: KernelRidge(kernel=opt, alpha=1 / (2 * Cx))), + ] + + # Regularization parameter + hyperparams = [0.0025, 0.0125, 0.025, 0.05, 0.125, 0.25, 0.5, 1.0, 5.0, 10.0] + best_pred, best_cv_score, best_test_score = None, np.inf, np.inf + for model in models: + for hyperparam in hyperparams: + cv_score = -np.mean( + cross_val_score( + model(hyperparam), X_train, y_train, cv=5, + scoring="neg_root_mean_squared_error", + ) + ) + if best_cv_score > cv_score: + best_model = model(hyperparam).fit(X_train, y_train) + best_pred = best_model.predict(X_test) + best_cv_score = cv_score + best_test_score = mean_squared_error( + best_model.predict(X_test).ravel(), y_test_clean.ravel(), squared=False + ) + + return ( + best_pred, y_test_clean, np.round(best_cv_score, 5), np.round(best_test_score, 5) + ) + + +###################################################################### +# We perform the fitting and prediction for each :math:`C_{ij}` and print +# the output in a tabular format. +# + +kernel_list = ["Gaussian kernel", "Neural Tangent kernel"] +kernel_data = np.zeros((num_qubits ** 2, len(kernel_list), 2)) +y_predclean, y_predicts1, y_predicts2 = [], [], [] + +for cij in range(num_qubits ** 2): + y_predict, y_clean, cv_score, test_score = fit_predict_data(cij, X_data, opt="rbf") + y_predclean.append(y_clean) + kernel_data[cij][0] = (cv_score, test_score) + y_predicts1.append(y_predict) + y_predict, y_clean, cv_score, test_score = fit_predict_data(cij, kernel_NN) + kernel_data[cij][1] = (cv_score, test_score) + y_predicts2.append(y_predict) + +# For each C_ij print (best_cv_score, test_score) pair +row_format = "{:>25}{:>35}{:>35}" +print(row_format.format("Correlation", *kernel_list)) +for idx, data in enumerate(kernel_data): + print( + row_format.format( + f"\t C_{idx//num_qubits}{idx%num_qubits} \t| ", + str(data[0]), + str(data[1]), + ) + ) + +############################################################################## +# .. rst-class:: sphx-glr-script-out +# +# +# .. code-block:: none +# +# Correlation Gaussian kernel Neural Tangent kernel +# C_00 | [-0. 0.] [-0. 0.] +# C_01 | [0.08691 0.0676 ] [0.11507 0.07552] +# C_02 | [0.10751 0.0736 ] [0.11646 0.08599] +# C_03 | [0.0937 0.04272] [0.09628 0.06695] +# C_10 | [0.08691 0.0676 ] [0.11507 0.07552] +# C_11 | [-0. 0.] [-0. 0.] +# C_12 | [0.1159 0.05131] [0.12794 0.0696 ] +# C_13 | [0.11679 0.0802 ] [0.11901 0.10352] +# C_20 | [0.10751 0.0736 ] [0.11646 0.08599] +# C_21 | [0.1159 0.05131] [0.12794 0.0696 ] +# C_22 | [-0. 0.] [-0. 0.] +# C_23 | [0.09494 0.07095] [0.11708 0.07366] +# C_30 | [0.0937 0.04272] [0.09628 0.06695] +# C_31 | [0.11679 0.0802 ] [0.11901 0.10352] +# C_32 | [0.09494 0.07095] [0.11708 0.07366] +# C_33 | [-0. 0.] [-0. 0.] + + +###################################################################### +# Overall, we find that the models with the Gaussian kernel performed +# better than those with NTK for predicting the expectation value +# of the correlation function :math:`C_{ij}` for the ground state of +# the Heisenberg model. However, the best choice of :math:`\lambda` +# differed substantially across the different :math:`C_{ij}` for both +# kernels. We present the predicted correlation matrix :math:`C^{\prime}` +# for randomly selected Heisenberg models from the test set below for +# comparison against the actual correlation matrix :math:`C`, which is +# obtained from the ground state found using exact diagonalization. +# + +fig, axes = plt.subplots(3, 3, figsize=(14, 14)) +corr_vals = [y_predclean, y_predicts1, y_predicts2] +plt_plots = [1, 14, 25] + +cols = [ + "From {}".format(col) + for col in ["Exact Diagonalization", "Gaussian Kernel", "Neur. Tang. Kernel"] +] +rows = ["Model {}".format(row) for row in plt_plots] + +for ax, col in zip(axes[0], cols): + ax.set_title(col, fontsize=18) + +for ax, row in zip(axes[:, 0], rows): + ax.set_ylabel(row, rotation=90, fontsize=24) + +for itr in range(3): + for idx, corr_val in enumerate(corr_vals): + shw = axes[itr][idx].imshow( + np.array(corr_vals[idx]).T[plt_plots[itr]].reshape(Nr * Nc, Nr * Nc), + cmap=plt.get_cmap("RdBu"), vmin=-1, vmax=1, + ) + axes[itr][idx].xaxis.set_ticks(range(Nr * Nc)) + axes[itr][idx].yaxis.set_ticks(range(Nr * Nc)) + axes[itr][idx].xaxis.set_tick_params(labelsize=18) + axes[itr][idx].yaxis.set_tick_params(labelsize=18) + +fig.subplots_adjust(right=0.86) +cbar_ax = fig.add_axes([0.90, 0.15, 0.015, 0.71]) +bar = fig.colorbar(shw, cax=cbar_ax) + +bar.set_label(r"$C_{ij}$", fontsize=18, rotation=0) +bar.ax.tick_params(labelsize=16) +plt.show() + +###################################################################### +# .. figure:: /_static/demonstration_assets/ml_classical_shadows/model_comparison.png +# :align: center +# :width: 95 % +# :alt: Model comparison based on different kernels +# + + +###################################################################### +# Finally, we also attempt to showcase the effect of the size of training data +# :math:`N` and the number of Pauli measurements :math:`T`. For this, we look +# at the average root-mean-square error (RMSE) in prediction for each kernel +# over all two-point correlation functions :math:`C_{ij}`. Here, the first +# plot looks at the different training sizes :math:`N` with a fixed number of +# randomized Pauli measurements :math:`T=100`. In contrast, the second plot +# looks at the different shadow sizes :math:`T` with a fixed training data size +# :math:`N=70`. The performance improvement seems to be saturating after a +# sufficient increase in :math:`N` and :math:`T` values for all two kernels +# in both the cases. +# + +###################################################################### +# .. image:: /_static/demonstration_assets/ml_classical_shadows/rmse_shadow.png +# :width: 47 % +# :align: right +# +# .. image:: /_static/demonstration_assets/ml_classical_shadows/rmse_training.png +# :width: 47 % +# + +###################################################################### +# Conclusion +# ---------- +# +# This demo illustrates how classical machine learning models can benefit from +# the classical shadow formalism for learning characteristics and predicting +# the behavior of quantum systems. As argued in Ref. [#preskill]_, this raises +# the possibility that models trained on experimental or quantum data data can +# effectively address quantum many-body problems that cannot be solved using +# classical methods alone. +# + +###################################################################### +# .. _ml_classical_shadow_references: +# +# References +# ---------- +# +# .. [#preskill] +# +# H. Y. Huang, R. Kueng, G. Torlai, V. V. Albert, J. Preskill, "Provably +# efficient machine learning for quantum many-body problems", +# `arXiv:2106.12627 [quant-ph] `__ (2021) +# +# .. [#neurtangkernel] +# +# A. Jacot, F. Gabriel, and C. Hongler. "Neural tangent kernel: +# Convergence and generalization in neural networks". `NeurIPS, 8571–8580 +# `__ (2018) +# +# +# About the author +# ---------------- + diff --git a/demonstrations/tutorial_diffable_shadows.metadata.json b/demonstrations/tutorial_diffable_shadows.metadata.json index 2fb2153897..f26318128c 100644 --- a/demonstrations/tutorial_diffable_shadows.metadata.json +++ b/demonstrations/tutorial_diffable_shadows.metadata.json @@ -60,7 +60,7 @@ }, { "type": "demonstration", - "id": "tutorial_ml_classical_shadows", + "id": "ml_classical_shadows", "weight": 1.0 } ] diff --git a/demonstrations/tutorial_diffable_shadows.py b/demonstrations/tutorial_diffable_shadows.py index 338fd294f0..c35163bf20 100644 --- a/demonstrations/tutorial_diffable_shadows.py +++ b/demonstrations/tutorial_diffable_shadows.py @@ -9,7 +9,7 @@ .. related:: tutorial_classical_shadows Introduction to classic shadows - tutorial_ml_classical_shadows Classic shadows in machine learning + ml_classical_shadows Classic shadows in machine learning *Author: Korbinian Kottmann — Posted: 07 October 2022. Last updated: 11 October 2022.* @@ -22,7 +22,7 @@ ----------------------- A `classical shadow` is a classical description of a quantum state that is capable of reproducing expectation values of local Pauli observables, see [#Huang2020]_. -We briefly go through their theory here, and note the two additional demos in :doc:`tutorial_classical_shadows` and :doc:`tutorial_ml_classical_shadows`. +We briefly go through their theory here, and note the two additional demos in :doc:`tutorial_classical_shadows` and :doc:`ml_classical_shadows`. We are here focussing on the case where measurements are performed in the Pauli basis. The idea of classical shadows is to measure each qubit in a random Pauli basis.