metatensor · PicoCentauri · Feb 2, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 11, 2024
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,6 +3,8 @@ graft src
 include LICENSE
 include README.md
 
+include scripts/hotfix_metatensor.py
+
 prune docs
 prune examples
 prune tests

diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst
@@ -58,15 +58,30 @@ the current directory and type
 Evaluation
 ##########
 
-The sub-command to evaluate a already trained model is
+The sub-command to evaluate an already trained model is
 
 .. code-block:: bash
 
     metatensor-models eval
 
 .. literalinclude:: ../../../examples/usage.sh
     :language: bash
-    :lines: 9-
+    :lines: 9-25
+
+
+Exporting
+#########
+
+Exporting a model required if you want to use it in other frameworks, especially in
+molecular dynamics simulations. The sub-command to export a model is
+
+.. code-block:: bash
+
+    metatensor-models export
+
+.. literalinclude:: ../../../examples/usage.sh
+    :language: bash
+    :lines: 25-
 
 In the next tutorials we show how adjust the dataset section of ``options.yaml`` file
 to use it for your own datasets.
diff --git a/examples/usage.sh b/examples/usage.sh
@@ -22,3 +22,14 @@ head -n 20 output.xyz
 # All command line flags of the eval sub-command can be listed via
 
 metatensor-models eval --help
+
+# However, before we export the model, we need to run the following command to
+# hotfix errors in metatensor.
+
+python ../scripts/hotfix_metatensor.py
+
+# Finally, the `metatestor-models export`, i.e.,
+
+metatensor-models export model.pt
+
+# creates an `exported-model.pt` file that contains the exported model.
diff --git a/scripts/hotfix_metatensor.py b/scripts/hotfix_metatensor.py
@@ -0,0 +1,27 @@
+# Since torch.jit.save cannot handle Labels.single(), we need to replace it with
+# Labels(names=["_"], values=_dispatch.zeros_like(block.values, (1, 1)))
+# in metatensor-operations. This is a hacky way to do it.
+
+import os
+import metatensor.operations
+
+file = os.path.join(
+    os.path.dirname(metatensor.operations.__file__),
+    "reduce_over_samples.py"
+)
+
+# Find the line that contains "Labels.single()"
+# and replace "Labels.single()" with 
+# "Labels(names=["_"], values=_dispatch.zeros_like(block.values, (1, 1)))"
+with open(file, "r") as f:
+    lines = f.readlines()
+    for i, line in enumerate(lines):
+        if "samples_label = Labels.single()" in line:
+            lines[i] = line.replace(
+                "samples_label = Labels.single()",
+                "samples_label = Labels(names=[\"_\"], values=_dispatch.zeros_like(block.values, (1, 1)))"
+            )
+            break
+
+with open(file, "w") as f:
+    f.writelines(lines)
diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py
@@ -42,7 +42,7 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None:
     )
 
 
-def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
+def eval_model(model: str, structures: str, output: str) -> None:
     """Evaluate a pretrained model.
 
     ``target_property`` wil be predicted on a provided set of structures. Predicted
@@ -57,8 +57,7 @@ def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
     loaded_model = load_model(model)
     structure_list = read_structures(structures)
 
-    # since the second argument is missing,
-    # this calculates all the available properties:
-    predictions = loaded_model(structure_list)
+    # this calculates all the properties that the model is capable of predicting:
+    predictions = loaded_model(structure_list, loaded_model.capabilities.outputs)
 
     write_predictions(output, predictions, structure_list)
diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py
@@ -1,5 +1,9 @@
 import argparse
+import warnings
 
+from metatensor.torch.atomistic import MetatensorAtomisticModel
+
+from ..utils.model_io import load_model
 from .formatter import CustomHelpFormatter
 
 
@@ -19,23 +23,35 @@ def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.add_argument(
         "model",
         type=str,
-        help="Saved model which should be exprted",
+        help="Saved model which should be exported",
     )
     parser.add_argument(
         "-o",
         "--output",
         dest="output",
         type=str,
         required=False,
-        default="exported.pt",
+        default="exported-model.pt",
         help="Filename of the exported model (default: %(default)s).",
     )
 
 
 def export_model(model: str, output: str) -> None:
-    """Export a pretrained model to run MD simulations
+    """Export a pre-trained model to run MD simulations
 
     :param model: Path to a saved model
     :param output: Path to save the exported model
     """
-    raise NotImplementedError("model exporting is not implemented yet.")
+
+    loaded_model = load_model(model)
+
+    for model_output_name, model_output in loaded_model.capabilities.outputs.items():
+        if model_output.unit == "":
+            warnings.warn(
+                f"No units were provided for the `{model_output_name}` output. "
+                "As a result, this model output will be passed to MD engines as is.",
+                stacklevel=1,
+            )
+
+    wrapper = MetatensorAtomisticModel(loaded_model.eval(), loaded_model.capabilities)
+    wrapper.export(output)
diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py
@@ -178,7 +178,6 @@ def train_model(options: DictConfig) -> None:
     architetcure_name = options["architecture"]["name"]
     architecture = importlib.import_module(f"metatensor.models.{architetcure_name}")
 
-    logger.info("Run training")
     output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
     all_species = []
@@ -201,6 +200,7 @@ def train_model(options: DictConfig) -> None:
         outputs=outputs,
     )
 
+    logger.info("Calling architecture trainer")
     model = architecture.train(
         train_datasets=[train_dataset],
         validation_datasets=[validation_dataset],

@@ -4,7 +4,7 @@
 import rascaline.torch
 import torch
 from metatensor.torch import Labels, TensorBlock, TensorMap
-from metatensor.torch.atomistic import ModelCapabilities, System
+from metatensor.torch.atomistic import ModelCapabilities, ModelOutput, System
 from omegaconf import OmegaConf
 
 from .. import ARCHITECTURE_CONFIG_PATH
@@ -79,7 +79,13 @@ def forward(self, features: TensorMap) -> TensorMap:
                         values=output_values,
                         samples=block.samples,
                         components=block.components,
-                        properties=Labels.range("properties", output_values.shape[-1]),
+                        # cannot use Labels.range() here because of torch.jit.save
+                        properties=Labels(
+                            names=["properties"],
+                            values=torch.arange(
+                                output_values.shape[1], device=output_values.device
+                            ).reshape(-1, 1),
+                        ),
                     )
                 )
         new_keys_labels = Labels(
@@ -175,7 +181,13 @@ def forward(self, features: TensorMap) -> TensorMap:
                         values=output_values,
                         samples=block.samples,
                         components=block.components,
-                        properties=Labels.single(),
+                        # cannot use Labels.single() here because of torch.jit.save
+                        properties=Labels(
+                            names=["_"],
+                            values=torch.zeros(
+                                (1, 1), dtype=torch.int32, device=block.values.device
+                            ),
+                        ),
                     )
                 )
         new_keys_labels = Labels(
@@ -259,12 +271,15 @@ def __init__(
         )
 
     def forward(
-        self, systems: List[System], requested_outputs: Optional[List[str]] = None
+        self,
+        systems: List[System],
+        outputs: Dict[str, ModelOutput],
+        selected_atoms: Optional[Labels] = None,
     ) -> Dict[str, TensorMap]:
-        if requested_outputs is None:  # default to all outputs
-            requested_outputs = list(self.capabilities.outputs.keys())
+        if selected_atoms is not None:
+            raise NotImplementedError("SOAP-BPNN does not support selected atoms.")
 
-        for requested_output in requested_outputs:
+        for requested_output in outputs.keys():
             if requested_output not in self.capabilities.outputs.keys():
                 raise ValueError(
                     f"Requested output {requested_output} is not within "
@@ -287,7 +302,7 @@ def forward(
 
         atomic_energies: Dict[str, TensorMap] = {}
         for output_name, output_layer in self.last_layers.items():
-            if output_name in requested_outputs:
+            if output_name in outputs:
                 atomic_energies[output_name] = apply_composition_contribution(
                     output_layer(hidden_features),
                     self.composition_weights[self.output_to_index[output_name]],

@@ -24,4 +24,7 @@ def test_prediction_subset():
     soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]).to(torch.float64)
 
     structure = ase.Atoms("O2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
-    soap_bpnn([rascaline.torch.systems_to_torch(structure)])
+    soap_bpnn(
+        [rascaline.torch.systems_to_torch(structure)],
+        {"energy": soap_bpnn.capabilities.outputs["energy"]},
+    )
@@ -29,8 +29,14 @@ def test_rotational_invariance():
     original_structure = copy.deepcopy(structure)
     structure.rotate(48, "y")
 
-    original_output = soap_bpnn([rascaline.torch.systems_to_torch(original_structure)])
-    rotated_output = soap_bpnn([rascaline.torch.systems_to_torch(structure)])
+    original_output = soap_bpnn(
+        [rascaline.torch.systems_to_torch(original_structure)],
+        {"energy": soap_bpnn.capabilities.outputs["energy"]},
+    )
+    rotated_output = soap_bpnn(
+        [rascaline.torch.systems_to_torch(structure)],
+        {"energy": soap_bpnn.capabilities.outputs["energy"]},
+    )
 
     assert torch.allclose(
         original_output["energy"].block().values,

@@ -40,7 +40,7 @@ def test_regression_init():
 
     output = soap_bpnn(
         [rascaline.torch.systems_to_torch(structure) for structure in structures],
-        ["U0"],
+        {"U0": soap_bpnn.capabilities.outputs["U0"]},
     )
     expected_output = torch.tensor(
         [[-0.1746], [-0.2209], [-0.2426], [-0.2033], [-0.2973]],
@@ -87,7 +87,7 @@ def test_regression_train():
     soap_bpnn = train([dataset], [dataset], capabilities, hypers)
 
     # Predict on the first five structures
-    output = soap_bpnn(structures[:5], ["U0"])
+    output = soap_bpnn(structures[:5], {"U0": soap_bpnn.capabilities.outputs["U0"]})
 
     expected_output = torch.tensor(
         [[-40.5007], [-56.5529], [-76.4418], [-77.2819], [-93.3743]],

@@ -1,7 +1,17 @@
-import torch
-from metatensor.torch.atomistic import ModelCapabilities, ModelOutput
+import os
 
-from metatensor.models.soap_bpnn import DEFAULT_HYPERS, Model
+
+# Execute the setup script which will make sum_over_samples saveable.
+current_dir = os.path.dirname(__file__)
+setup_path = os.path.join(
+    current_dir, "..", "..", "..", "..", "..", "scripts", "hotfix_metatensor.py"
+)
+exec(open(setup_path).read())
+
+import torch  # noqa: E402
+from metatensor.torch.atomistic import ModelCapabilities, ModelOutput  # noqa: E402
+
+from metatensor.models.soap_bpnn import DEFAULT_HYPERS, Model  # noqa: E402
 
 
 def test_torchscript():
@@ -18,4 +28,26 @@ def test_torchscript():
         },
     )
     soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]).to(torch.float64)
-    torch.jit.script(soap_bpnn)
+    torch.jit.script(soap_bpnn, {"energy": soap_bpnn.capabilities.outputs["energy"]})
+
+
+def test_torchscript_save():
+    """Tests that the model can be jitted and saved."""
+
+    capabilities = ModelCapabilities(
+        length_unit="Angstrom",
+        species=[1, 6, 7, 8],
+        outputs={
+            "energy": ModelOutput(
+                quantity="energy",
+                unit="eV",
+            )
+        },
+    )
+    soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]).to(torch.float64)
+    torch.jit.save(
+        torch.jit.script(
+            soap_bpnn, {"energy": soap_bpnn.capabilities.outputs["energy"]}
+        ),
+        "soap_bpnn.pt",
+    )
@@ -45,6 +45,7 @@ def train(
     )
 
     # Calculate and set the composition weights for all targets:
+    logger.info("Calculating composition weights")
     for target_name in model_capabilities.outputs.keys():
         # find the dataset that contains the target:
         train_dataset_with_target = None
@@ -64,6 +65,8 @@ def train(
 
     hypers_training = hypers["training"]
 
+    logger.info("Setting up data loaders")
+
     # Create dataloader for the training datasets:
     train_dataloaders = []
     for dataset in train_datasets:
@@ -127,6 +130,7 @@ def train(
     epochs_without_improvement = 0
 
     # Train the model:
+    logger.info("Starting training")
     for epoch in range(hypers_training["num_epochs"]):
         # aggregated information holders:
         aggregated_train_info: Dict[str, Tuple[float, int]] = {}

diff --git a/src/metatensor/models/utils/compute_loss.py b/src/metatensor/models/utils/compute_loss.py
@@ -71,7 +71,9 @@ def compute_model_loss(
                 system.positions.requires_grad_(True)
 
     # Based on the keys of the targets, get the outputs of the model:
-    model_outputs = model(systems, targets.keys())
+    model_outputs = model(
+        systems, {key: model.capabilities.outputs[key] for key in targets.keys()}
+    )
 
     for energy_target in energy_targets:
         # If the energy target requires gradients, compute them: