Add ASE tutorial (#43)

--------- Co-authored-by: Philip Loche <[email protected]>
metatensor · Feb 2, 2024 · df96bb0 · df96bb0
1 parent 74ff6a4
commit df96bb0
Show file tree

Hide file tree

Showing 30 changed files with 1,404 additions and 1,120 deletions.
diff --git a/.gitignore b/.gitignore
@@ -165,4 +165,10 @@ cython_debug/
 
 # model output directories
 outputs/
-examples/*.xyz
+examples/basic_usage/*.xyz
+!docs/static/*.pt
+!examples/ase/*.pt
+
+# sphinx gallery
+docs/src/examples
+*execution_times*
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,6 +3,7 @@ graft src
 include LICENSE
 include README.md
 
+# TODO: remove once the hack is not needed anymore
 include scripts/hotfix_metatensor.py
 
 prune docs

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,4 +1,6 @@
+ase
 furo
 sphinx >= 7
+sphinx-gallery
 sphinx-toggleprompt
 tomli
diff --git a/docs/src/conf.py b/docs/src/conf.py
@@ -10,10 +10,6 @@
 ROOT = os.path.abspath(os.path.join("..", ".."))
 sys.path.insert(0, ROOT)
 
-# when importing metatensor-torch, this will change the definition of the classes
-# to include the documentation
-os.environ["METATENSOR_IMPORT_FOR_SPHINX"] = "1"
-
 # -- Project information -----------------------------------------------------
 
 # The master toctree document.
@@ -41,8 +37,16 @@
     "sphinx.ext.autodoc",
     "sphinx.ext.intersphinx",
     "sphinx_toggleprompt",
+    "sphinx_gallery.gen_gallery",
 ]
 
+sphinx_gallery_conf = {
+    "filename_pattern": "/*",
+    "examples_dirs": ["../../examples"],
+    "gallery_dirs": ["examples"],
+    "min_reported_time": 5,
+}
+
 python_use_unqualified_type_names = True
 
 autoclass_content = "both"

diff --git a/docs/src/getting-started/override.rst b/docs/src/getting-started/override.rst
@@ -4,7 +4,7 @@ Override Architecture's Default Parameters
 In our initial tutorial, we used default parameters to train a model employing the
 SOAP-BPNN architecture, as shown in the following config:
 
-.. literalinclude:: ../../static/options.yaml
+.. literalinclude:: ../../static/qm9/options.yaml
    :language: yaml
 
 While default parameters often serve as a good starting point, depending on your

diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst
@@ -1,3 +1,5 @@
+.. _label_basic_usage:
+
 Basic Usage
 ===========
 
@@ -11,7 +13,7 @@ general help of `metatensor-models` can be accessed using
 We now demonstrate how to `train` and `evaluate` a model from the command line. For this
 example we use the :ref:`architecture-soap-bpnn` architecture and a subset of the `QM9
 dataset <https://paperswithcode.com/dataset/qm9>`_. You can obtain the reduced dataset
-from our :download:`website <../../static/qm9_reduced_100.xyz>`.
+from our :download:`website <../../static/qm9/qm9_reduced_100.xyz>`.
 
 Training
 ########
@@ -39,7 +41,7 @@ The default model and training hyperparameter for each model are listed in their
 corresponding documentation page. We will use these minimal options to run an example
 training using the default hyperparameters of an SOAP BPNN model
 
-.. literalinclude:: ../../static/options.yaml
+.. literalinclude:: ../../static/qm9/options.yaml
    :language: yaml
 
 For each training run a new output directory in the format
@@ -50,7 +52,7 @@ default, this output directory is used to store Hydra's output for the run
 behavior in the options file. To start the training create an ``options.yaml`` file in
 the current directory and type
 
-.. literalinclude:: ../../../examples/usage.sh
+.. literalinclude:: ../../../examples/basic_usage/usage.sh
     :language: bash
     :lines: 3-8
 
@@ -64,22 +66,23 @@ The sub-command to evaluate an already trained model is
 
     metatensor-models eval
 
-.. literalinclude:: ../../../examples/usage.sh
+.. literalinclude:: ../../../examples/basic_usage/usage.sh
     :language: bash
     :lines: 9-25
 
 
 Exporting
 #########
 
-Exporting a model required if you want to use it in other frameworks, especially in
-molecular dynamics simulations. The sub-command to export a model is
+Exporting a model is very useful if you want to use it in other frameworks,
+especially in molecular dynamics simulations.
+The sub-command to export an already trained model is
 
 .. code-block:: bash
 
     metatensor-models export
 
-.. literalinclude:: ../../../examples/usage.sh
+.. literalinclude:: ../../../examples/basic_usage/usage.sh
     :language: bash
     :lines: 25-
 

diff --git a/docs/src/index.rst b/docs/src/index.rst
@@ -12,5 +12,6 @@ This is a collection of atomistic models interfaced with ``metatensor``.
    :hidden:
 
    getting-started/index
+   tutorials/index
    architectures/index
    dev-docs/index
diff --git a/docs/src/tutorials/index.rst b/docs/src/tutorials/index.rst
@@ -0,0 +1,10 @@
+Tutorials
+=========
+
+This sections includes some more advanced tutorials on the usage of the
+``metatensor-models`` package.
+
+.. toctree::
+   :maxdepth: 1
+
+   ../examples/ase/run_ase
diff --git a/docs/static/ethanol/ethanol_reduced_100.xyz b/docs/static/ethanol/ethanol_reduced_100.xyz
diff --git a/docs/static/ethanol/options.yaml b/docs/static/ethanol/options.yaml
@@ -0,0 +1,20 @@
+defaults:
+  - architecture: soap_bpnn  # architecture used to train the model
+  - _self_
+
+architecture:
+  training:
+    batch_size: 16
+    num_epochs: 100
+    learning_rate: 0.01
+
+# Section defining the parameters for structure and target data
+training_set:
+  structures: "ethanol_reduced_100.xyz"
+  targets:
+    energy:
+      key: "energy"
+      unit: "kcal/mol"  # very important to run simulations
+
+validation_set: 0.1
+test_set: 0.0
diff --git a/docs/static/options.yaml → docs/static/qm9/options.yaml b/docs/static/options.yaml → docs/static/qm9/options.yaml
diff --git a/docs/static/qm9_reduced_100.xyz → docs/static/qm9/qm9_reduced_100.xyz b/docs/static/qm9_reduced_100.xyz → docs/static/qm9/qm9_reduced_100.xyz
diff --git a/examples/README.rst b/examples/README.rst
@@ -0,0 +1,4 @@
+metatensor-models Examples
+==========================
+
+This folder consists of introductory and advanced examples.
diff --git a/examples/ase/README.rst b/examples/ase/README.rst
@@ -0,0 +1,2 @@
+Running molecular dynamics with ASE
+===================================
diff --git a/examples/ase/ethanol_reduced_100.xyz b/examples/ase/ethanol_reduced_100.xyz
@@ -0,0 +1 @@
+../../docs/static/ethanol/ethanol_reduced_100.xyz
diff --git a/examples/ase/exported-model.pt b/examples/ase/exported-model.pt
diff --git a/examples/ase/options.yaml b/examples/ase/options.yaml
@@ -0,0 +1 @@
+../../docs/static/ethanol/options.yaml
diff --git a/examples/ase/run_ase.py b/examples/ase/run_ase.py
@@ -0,0 +1,214 @@
+"""
+Running molecular dynamics with ASE
+===================================
+
+This tutorial demonstrates how to use an already trained and exported model to run an
+ASE simulation of a single ethanol molecule in vacuum. We use a model that was trained
+using the :ref:`architecture-soap-bpnn` architecture on 100 ethanol structures
+containing energies and forces. You can obtain the :download:`dataset file
+<../../../static/ethanol/ethanol_reduced_100.xyz>` used in this example from our
+website. The dataset is a subset of the `rMD17 dataset
+<https://iopscience.iop.org/article/10.1088/2632-2153/abba6f/meta>`_.
+
+The model was trained using the following training options.
+
+.. literalinclude:: ../../../static/ethanol/options.yaml
+   :language: yaml
+
+You can train and export the model yourself using the following commands
+
+.. literalinclude:: ../../../../examples/ase/train_export.sh
+   :language: bash
+
+A detailed step-by-step introduction on how to train and export a model is provided in
+the :ref:`label_basic_usage` tutorial.
+"""
+
+# %%
+#
+# First, we start by importing the necessary libraries, including the integration of ASE
+# calculators for metatensor atomistic models
+
+
+import ase.md
+import ase.md.velocitydistribution
+import ase.units
+import ase.visualize.plot
+import matplotlib.pyplot as plt
+import numpy as np
+import rascaline.torch  # noqa
+from ase.geometry.analysis import Analysis
+from metatensor.torch.atomistic.ase_calculator import MetatensorCalculator
+
+
+# %%
+#
+# .. note::
+#    We have to import ``rascaline.torch`` even though it is not used explicitly in this
+#    tutorial. The SOAP-BPNN model contains compiled extensions and therefore the import
+#    is required.
+#
+# Setting up the simulation
+# -------------------------
+#
+# Next, we initialize the simulation by extracting the initial positions from the
+# dataset file which we initially trained the model on.
+
+training_frames = ase.io.read("ethanol_reduced_100.xyz", ":")
+atoms = training_frames[0].copy()
+
+# %%
+#
+# Below we show the initial configuration of a single ethanol molecule in vacuum.
+
+ase.visualize.plot.plot_atoms(atoms)
+
+plt.xlabel("Å")
+plt.ylabel("Å")
+
+plt.show()
+
+
+# %%
+#
+# Our initial coordinates do not include velocities. We initialize the velocities
+# according to a Maxwell-Boltzmann Distribution at 300 K.
+
+ase.md.velocitydistribution.MaxwellBoltzmannDistribution(atoms, temperature_K=300)
+
+# %%
+#
+# We now register our exported model as the energy calculator to obtain energies and
+# forces.
+
+atoms.calc = MetatensorCalculator("exported-model.pt")
+
+# %%
+#
+# Finally, we define the integrator which we use to obtain new positions and velocities
+# based on our energy calculator. We use a common timestep of 0.5 fs.
+
+integrator = ase.md.VelocityVerlet(atoms, timestep=0.5 * ase.units.fs)
+
+
+# %%
+#
+# Run the simulation
+# ------------------
+#
+# We now have everything ready to run the MD simulation at constant energy (NVE). To
+# keep the execution time of this tutorial small we run the simulations only for 100
+# steps. If you want to run a longer simulation you can increase the ``n_steps``
+# variable.
+#
+# During the simulation loop we collect data about the simulation for later analysis.
+
+
+n_steps = 100
+
+potential_energy = np.zeros(n_steps)
+kinetic_energy = np.zeros(n_steps)
+total_energy = np.zeros(n_steps)
+trajectory = []
+
+for step in range(n_steps):
+    # run a single simulation step
+    integrator.run(1)
+
+    trajectory.append(atoms.copy())
+    potential_energy[step] = atoms.get_potential_energy()
+    kinetic_energy[step] = atoms.get_kinetic_energy()
+    total_energy[step] = atoms.get_total_energy()
+
+# %%
+#
+# Analyse the results
+# -------------------
+#
+# Energy conservation
+# ###################
+#
+# For a first analysis, we plot the evolution of the mean of the kinetic, potential, and
+# total energy which is an important measure for the stability of a simulation.
+#
+# As shown below we see that both the kinetic, potential, and total energy
+# fluctuate but the total energy is conserved over the length of the simulation.
+
+
+plt.plot(potential_energy - potential_energy.mean(), label="potential energy")
+plt.plot(kinetic_energy - kinetic_energy.mean(), label="kinetic energy")
+plt.plot(total_energy - total_energy.mean(), label="total energy")
+
+plt.xlabel("step")
+plt.ylabel("energy / kcal/mol")
+plt.legend()
+
+plt.show()
+
+# %%
+#
+# Inspect the final structure
+# ###########################
+#
+# Even though the total energy is conserved, we also have to verify that the ethanol
+# molecule is stable and the bonds did not break.
+
+ase.visualize.plot.plot_atoms(trajectory[-1])
+plt.xlabel("Å")
+plt.ylabel("Å")
+
+plt.show()
+
+# %%
+#
+# Carbon-hydrogen radial distribution function
+# ############################################
+#
+# As a final analysis we also calculate and plot the carbon-hydrogen radial distribution
+# function (RDF) from the trajectory and compare this to the RDF from the training set.
+#
+# To use the RDF code from ase we first have to define a unit cell for our structures.
+# We choose a cubic one with a side length of 10 Å.
+
+for atoms in training_frames:
+    atoms.cell = 10 * np.ones(3)
+    atoms.pbc = True
+
+for atoms in trajectory:
+    atoms.cell = 10 * np.ones(3)
+    atoms.pbc = True
+
+# %%
+#
+# We now can initilize the :py:class:`ase.geometry.analysis.Analysis` objects and
+# compute the the RDF using the :py:meth:`ase.geometry.analysis.Analysis.get_rdf`
+# method.
+
+ana_traj = Analysis(trajectory)
+ana_train = Analysis(training_frames)
+
+rdf_traj = ana_traj.get_rdf(rmax=5, nbins=50, elements=["C", "H"], return_dists=True)
+rdf_train = ana_train.get_rdf(rmax=5, nbins=50, elements=["C", "H"], return_dists=True)
+
+# %%
+#
+# We extract the bin positions from the returned values and and averege the RDF over the
+# whole trajectory and dataset, respectively.
+
+bins = rdf_traj[0][1]
+rdf_traj_mean = np.mean([rdf_traj[i][0] for i in range(n_steps)], axis=0)
+rdf_train_mean = np.mean([rdf_train[i][0] for i in range(n_steps)], axis=0)
+
+# %%
+#
+# Plotting the RDF verifies that the hydrogen bonds are stable, confirming that we
+# performed an energy-conserving and stable simulation.
+
+plt.plot(bins, rdf_traj_mean, label="trajectory")
+plt.plot(bins, rdf_train_mean, label="training set")
+
+plt.legend()
+plt.xlabel("r / Å")
+plt.ylabel("radial distribution function")
+
+plt.show()