metatensor · frostedoyster · Jan 24, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 11, 2024
diff --git a/docs/src/architectures/index.rst b/docs/src/architectures/index.rst
@@ -1,3 +1,5 @@
+.. _available-architectures:
+
 Available Architectures
 =======================
 

diff --git a/docs/src/dev-docs/utils/combine_dataloaders.rst b/docs/src/dev-docs/utils/combine_dataloaders.rst
@@ -0,0 +1,7 @@
+Combining dataloaders
+#####################
+
+.. automodule:: metatensor.models.utils.data.combine_dataloaders
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/src/dev-docs/utils/index.rst b/docs/src/dev-docs/utils/index.rst
@@ -11,3 +11,4 @@ This is the API for the ``utils`` module of ``metatensor-models``.
    writers
    model-io
    omegaconf
+   combine_dataloaders
diff --git a/docs/src/getting-started/custom_dataset_conf.rst b/docs/src/getting-started/custom_dataset_conf.rst
@@ -132,3 +132,5 @@ by default.
 .. note::
 
    Unknown keys are ignored and not deleted in all sections during dataset parsing.
+
+In the next tutorials we show how to override the default parameters of an architecture.
diff --git a/docs/src/getting-started/index.rst b/docs/src/getting-started/index.rst
@@ -9,3 +9,4 @@ This sections describes how to install the package, and its most basic commands.
    installation
    usage
    custom_dataset_conf
+   override
diff --git a/docs/src/getting-started/override.rst b/docs/src/getting-started/override.rst
@@ -0,0 +1,80 @@
+Override Architecture's Default Parameters
+==========================================
+
+In our initial tutorial, we used default parameters to train a model employing the
+SOAP-BPNN architecture, as shown in the following config:
+
+.. literalinclude:: ../../static/options.yaml
+   :language: yaml
+
+While default parameters often serve as a good starting point, depending on your
+training target and dataset, it might be necessary to adjust the architecture's
+parameters.
+
+First, familiarize yourself with the specific parameters of the architecture you intend
+to use. We provide a list of all architectures and their parameters in the
+:ref:`available-architectures` section. For example, the parameters of the SOAP-BPNN
+models are detailed at :ref:`architecture-soap-bpnn`.
+
+Modifying Parameters (yaml)
+---------------------------
+
+As an example, let's increase the number of epochs (``num_epochs``) and the ``cutoff``
+radius of the SOAP descriptor. To do this, create a new section in the ``options.yaml``
+named ``architecture``. Within this section, you can override the architecture's
+hyperparameters. The adjustments for ``num_epochs`` and ``cutoff`` look like this:
+
+.. code-block:: yaml
+
+   defaults:
+      - architecture: soap_bpnn
+      - _self_
+
+   architecture:
+      model:
+         soap:
+            cutoff: 7.0
+      training:
+         num_epochs: 200
+
+   training_set:
+   structures: "qm9_reduced_100.xyz"
+   targets:
+      energy:
+         key: "U0"
+
+   test_set: 0.1
+   validation_set: 0.1
+
+Modifying Parameters (Command Line Overrides)
+---------------------------------------------
+
+For quick adjustments, command-line overrides are also an option. The changes above can
+be achieved by:
+
+.. code-block:: bash
+
+   metatensor-models train options.yaml \
+      -y architecture.model.soap.cutoff=7.0 architecture.training.num_epochs=200
+
+Here, the ``-y`` flag is used to parse the override flags. More details on override
+syntax are available at https://hydra.cc/docs/advanced/override_grammar/basic/.
+
+.. note::
+
+   For your reference and reproducibility purposes `metatensor-models` always writes the
+   fully expanded options to the ``.hydra`` subdirectory inside the ``output``
+   directory of your current training run.
+
+
+Understanding the Defaults Section
+----------------------------------
+
+You may have noticed the ``defaults`` section at the beginning of each file. This list
+dictates which defaults should be loaded and how to compose the final config object and
+is conventionally the first item in the config.
+
+Append ``_self_`` to the end of the list to have your primary config override values
+from the Defaults List. If you do not add a ``_self_`` entry still your primary config
+Overrides values from the Defaults List, but Hydra will throw a warning. For more
+background, visit https://hydra.cc/docs/tutorials/basic/your_first_app/defaults/.
diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst
@@ -42,8 +42,9 @@ training using the default hyperparameters of an SOAP BPNN model
 .. literalinclude:: ../../static/options.yaml
    :language: yaml
 
-For each training run a new output directory based on the current date and time is
-created. By default, this output directory is used to store Hydra's output for the run
+For each training run a new output directory in the format
+``output/YYYY-MM-DD/HH-MM-SS`` based on the current date and time is created. By
+default, this output directory is used to store Hydra's output for the run
 (configuration, Logs etc). You can `override
 <https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/>`_ this
 behavior in the options file. To start the training create an ``options.yaml`` file in
@@ -67,6 +68,5 @@ The sub-command to evaluate a already trained model is
     :language: bash
     :lines: 9-
 
-
 In the next tutorials we show how adjust the dataset section of ``options.yaml`` file
 to use it for your own datasets.
diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py
@@ -6,12 +6,14 @@
 
 import hydra
 import torch
+from metatensor.torch.atomistic import ModelCapabilities, ModelOutput
 from omegaconf import DictConfig, OmegaConf
 
 from metatensor.models.utils.data import Dataset
 from metatensor.models.utils.data.readers import read_structures, read_targets
 
 from .. import CONFIG_PATH
+from ..utils.data import get_all_species
 from ..utils.model_io import save_model
 from ..utils.omegaconf import expand_dataset_config
 from .formatter import CustomHelpFormatter
@@ -174,19 +176,30 @@ def train_model(options: DictConfig) -> None:
     logger.info("Run training")
     output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
-    # HACK: Avoid passing a Subset which we can not handle yet. For now we pass
-    # the complete training set even though it was split before...
-    if isinstance(train_dataset, torch.utils.data.Subset):
-        model = architecture.train(
-            train_dataset=train_dataset.dataset,
-            hypers=OmegaConf.to_container(options["architecture"]),
-            output_dir=output_dir,
-        )
-    else:
-        model = architecture.train(
-            train_dataset=train_dataset,
-            hypers=OmegaConf.to_container(options["architecture"]),
-            output_dir=output_dir,
+    all_species = []
+    for dataset in [train_dataset]:  # HACK: only a single train_dataset for now
+        all_species += get_all_species(dataset)
+    all_species = list(set(all_species))
+
+    outputs = {
+        key: ModelOutput(
+            quantity=value["quantity"],
+            unit=(value["unit"] if value["unit"] is not None else ""),  # potential HACK
         )
+        for key, value in options["training_set"]["targets"].items()
+    }
+    model_capabilities = ModelCapabilities(
+        length_unit="Angstrom",
+        species=all_species,
+        outputs=outputs,
+    )
+
+    model = architecture.train(
+        train_datasets=[train_dataset],
+        validation_datasets=[validation_dataset],
+        model_capabilities=model_capabilities,
+        hypers=OmegaConf.to_container(options["architecture"]),
+        output_dir=output_dir,
+    )
 
     save_model(model, options["output_path"])
@@ -1,16 +1,22 @@
+import random
+
 import ase.io
+import numpy as np
 import rascaline.torch
 import torch
 from metatensor.torch.atomistic import ModelCapabilities, ModelOutput
 from omegaconf import OmegaConf
 
 from metatensor.models.soap_bpnn import DEFAULT_HYPERS, Model, train
-from metatensor.models.utils.data import Dataset
+from metatensor.models.utils.data import Dataset, get_all_species
 from metatensor.models.utils.data.readers import read_structures, read_targets
 
 from . import DATASET_PATH
 
 
+# reproducibility
+random.seed(0)
+np.random.seed(0)
 torch.manual_seed(0)
 
 
@@ -21,7 +27,7 @@ def test_regression_init():
         length_unit="Angstrom",
         species=[1, 6, 7, 8],
         outputs={
-            "energy": ModelOutput(
+            "U0": ModelOutput(
                 quantity="energy",
                 unit="eV",
             )
@@ -33,14 +39,15 @@ def test_regression_init():
     structures = ase.io.read(DATASET_PATH, ":5")
 
     output = soap_bpnn(
-        [rascaline.torch.systems_to_torch(structure) for structure in structures]
+        [rascaline.torch.systems_to_torch(structure) for structure in structures],
+        ["U0"],
     )
     expected_output = torch.tensor(
         [[-0.4615], [-0.4367], [-0.3004], [-0.2606], [-0.2380]],
         dtype=torch.float64,
     )
 
-    assert torch.allclose(output["energy"].block().values, expected_output, rtol=1e-3)
+    assert torch.allclose(output["U0"].block().values, expected_output, rtol=1e-3)
 
 
 def test_regression_train():
@@ -50,7 +57,7 @@ def test_regression_train():
     structures = read_structures(DATASET_PATH)
 
     conf = {
-        "energy": {
+        "U0": {
             "quantity": "energy",
             "read_from": DATASET_PATH,
             "file_format": ".xyz",
@@ -66,14 +73,25 @@ def test_regression_train():
 
     hypers = DEFAULT_HYPERS.copy()
     hypers["training"]["num_epochs"] = 2
-    soap_bpnn = train(dataset, hypers)
+
+    capabilities = ModelCapabilities(
+        length_unit="Angstrom",
+        species=get_all_species(dataset),
+        outputs={
+            "U0": ModelOutput(
+                quantity="energy",
+                unit="eV",
+            )
+        },
+    )
+    soap_bpnn = train([dataset], [dataset], capabilities, hypers)
 
     # Predict on the first five structures
-    output = soap_bpnn(structures[:5])
+    output = soap_bpnn(structures[:5], ["U0"])
 
     expected_output = torch.tensor(
-        [[-39.9658], [-56.0888], [-76.1100], [-76.9461], [-93.0914]],
+        [[-40.1358], [-56.1721], [-76.1576], [-77.1174], [-93.1679]],
         dtype=torch.float64,
     )
 
-    assert torch.allclose(output["energy"].block().values, expected_output, rtol=1e-3)
+    assert torch.allclose(output["U0"].block().values, expected_output, rtol=1e-3)