Change CLI API

metatensor · Dec 19, 2023 · df6aa67 · df6aa67
1 parent dc2299b
commit df6aa67
Show file tree

Hide file tree

Showing 12 changed files with 101 additions and 95 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,8 +159,9 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# Models
+# Don't save model outputs
 *.pt
+!tests/resources/*.pt
 
 # model output directories
 outputs/

diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst
@@ -1,5 +1,5 @@
-Usage - command line
-====================
+Usage
+=====
 
 `metatensor-models` is designed for an direct usage from the the command line (cli). The
 general help of `metatensor-models` can be accessed using
@@ -31,24 +31,23 @@ The sub-command to start a model training is
 
     metatensor-models train
 
-To train a model you have to define your parameters. This includes the specific
-architecture you want to use, the files for obtaining training structures and target
-values as well as probable changes
+To train a model you have to define your options. This includes the specific
+architecture you want to use and the data including the training structures and target
+values
 
 The default model and training hyperparameter for each model are listed in their
-corresponding documentation page. We will use the following config to run an example
-training
+corresponding documentation page. We will use these minimal options to run an example
+training using the default hyperparameters of an SOAP BPNN model
 
-
-.. literalinclude:: ../../static/parameters.yaml
+.. literalinclude:: ../../static/options.yaml
    :language: yaml
 
-For each training run a new output directory is based on the current date and time is
-created. By default, this output directory is used to store Hydra output for the run
-(Configuration, Logs etc). You can `override
+For each training run a new output directory based on the current date and time is
+created. By default, this output directory is used to store Hydra's output for the run
+(configuration, Logs etc). You can `override
 <https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/>`_ this
-behavior in the parameter file. To start the training using the ``parameters.yaml`` in
-the current directory type.
+behavior in the options file. To start the training create an ``options.yaml`` file in
+the current directory and type
 
 .. literalinclude:: ../../../examples/usage.sh
     :language: bash

diff --git a/docs/static/parameters.yaml → docs/static/options.yaml b/docs/static/parameters.yaml → docs/static/options.yaml
@@ -1,5 +1,4 @@
 defaults:
-  - _self_  # mandatory parameter to avoid hydra warnings
   - architecture: soap_bpnn  # architecture used to train the model
 
 # Section defining the parameters for structure and target data

diff --git a/examples/parameters.yaml → examples/options.yaml b/examples/parameters.yaml → examples/options.yaml
diff --git a/examples/usage.sh b/examples/usage.sh
@@ -1,15 +1,16 @@
 #!\bin\bash
 
-metatensor-models train --parameters=parameters.yaml
+metatensor-models train options.yaml
 
 # The functions saves the final model `model.pt` to the current output folder for later
 # evaluation. All command line flags of the train sub-command can be listed via
 
 metatensor-models train --help
 
-# We now evaluate the model on the training dataset
+# We now evaluate the model on the training dataset, where the first arguments specifies
+# the model and the second the structure file
 
-metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz
+metatensor-models eval model.pt qm9_reduced_100.xyz
 
 # The evaluation command predicts the property the model was trained against; here "U0".
 # The predictions together with the structures have been written in a file named

diff --git a/src/metatensor/models/__main__.py b/src/metatensor/models/__main__.py
@@ -43,10 +43,10 @@ def main():
         # override `sys.argv` to be compatible with our CLI architecture.
         argv = sys.argv[:1]
 
-        parameters_path = Path(args.parameters_path)
-        argv.append(f"--config-dir={parameters_path.parent}")
-        argv.append(f"--config-name={parameters_path.name}")
-        argv.append(f"+output_path={args.output_path}")
+        options = Path(args.options)
+        argv.append(f"--config-dir={options.parent}")
+        argv.append(f"--config-name={options.name}")
+        argv.append(f"+output_path={args.output}")
 
         if args.hydra_paramters is not None:
             argv += args.hydra_paramters

diff --git a/src/metatensor/models/cli/eval_model.py b/src/metatensor/models/cli/eval_model.py
@@ -21,47 +21,39 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="eval_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="saved model to be evaluated",
     )
     parser.add_argument(
-        "-s",
-        "--structures",
-        dest="structure_path",
+        "structures",
         type=str,
-        required=True,
-        help="Path to a structure file which should be considered for the evaluation.",
+        help="Structure file which should be considered for the evaluation.",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="output.xyz",
-        help="Path to save the predicted values.",
+        help="filenmae of the predictions",
     )
 
 
-def eval_model(
-    model_path: str, structure_path: str, output_path: str = "output.xyz"
-) -> None:
+def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
     """Evaluate a pretrained model.
 
     ``target_property`` wil be predicted on a provided set of structures. Predicted
-    values will be written ``output_path``.
+    values will be written ``output``.
 
-    :param model_path: Path to a saved model
-    :param structure_path: Path to a structure file which should be considered for the
+    :param model: Path to a saved model
+    :param structure: Path to a structure file which should be considered for the
         evaluation.
-    :param output_path: Path to save the predicted values
+    :param output: Path to save the predicted values
     """
 
-    model = load_model(model_path)
-    structures = read_structures(structure_path)
-    predictions = model(structures)
-    write_predictions(output_path, predictions, structures)
+    loaded_model = load_model(model)
+    structure_list = read_structures(structures)
+    predictions = loaded_model(structure_list)
+    write_predictions(output, predictions, structure_list)
diff --git a/src/metatensor/models/cli/export_model.py b/src/metatensor/models/cli/export_model.py
@@ -15,28 +15,25 @@ def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="export_model")
 
     parser.add_argument(
-        "-m",
-        "--model",
-        dest="model_path",
+        "model",
         type=str,
-        required=True,
-        help="Path to a saved model",
+        help="Saved model which should be exprted",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="exported.pt",
-        help="Export path for the model.",
+        help="Filename of the exported model.",
     )
 
 
-def export_model(model_path: str, output_path: str) -> None:
+def export_model(model: str, output: str) -> None:
     """Export a pretrained model to run MD simulations
 
-    :param model_path: Path to a saved model
-    :param output_path: Path to save the exported model
+    :param model: Path to a saved model
+    :param output: Path to save the exported model
     """
     raise NotImplementedError("model exporting is not implemented yet.")
diff --git a/src/metatensor/models/cli/train_model.py b/src/metatensor/models/cli/train_model.py
@@ -20,9 +20,7 @@ def _has_yaml_suffix(s: str) -> str:
     """Checks if a string has a .yaml suffix."""
 
     if Path(s).suffix != ".yaml":
-        raise argparse.ArgumentTypeError(
-            f"Parameters file '{s}' must be a `.yaml` file."
-        )
+        raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.")
 
     return s
 
@@ -46,17 +44,14 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
     parser.set_defaults(callable="train_model")
 
     parser.add_argument(
-        "-p",
-        "--parameters",
-        dest="parameters_path",
+        "options",
         type=_has_yaml_suffix,
-        required=True,
-        help="Path to the parameter file",
+        help="Options file",
     )
     parser.add_argument(
         "-o",
         "--output",
-        dest="output_path",
+        dest="output",
         type=str,
         required=False,
         default="model.pt",
@@ -73,7 +68,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
 
 
 @hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None)
-def train_model(config: DictConfig) -> None:
+def train_model(options: DictConfig) -> None:
     """Train an atomistic machine learning model using configurations provided by Hydra.
 
     This function sets up the dataset and model architecture, then runs the training
@@ -87,34 +82,35 @@ def train_model(config: DictConfig) -> None:
     https://hydra.cc/docs/advanced/hydra-command-line-flags/ and
     https://hydra.cc/docs/advanced/override_grammar/basic/ for details.
 
-    :param config: A dictionary-like object obtained from Hydra, containing all the
-        necessary parameters for dataset preparation, model instantiation, and training.
+    :param options: A dictionary-like object obtained from Hydra, containing all the
+        necessary options for dataset preparation, model hyperparameters, and training.
     """
 
     logger.info("Setting up dataset")
-    structures = read_structures(config["dataset"]["structure_path"])
+    structures = read_structures(options["dataset"]["structure_path"])
     targets = read_targets(
-        config["dataset"]["targets_path"],
-        target_values=config["dataset"]["target_value"],
+        options["dataset"]["targets_path"],
+        target_values=options["dataset"]["target_value"],
     )
     dataset = Dataset(structures, targets)
 
     logger.info("Setting up model")
-    architetcure_name = config["architecture"]["name"]
+    architetcure_name = options["architecture"]["name"]
     architecture = importlib.import_module(f"metatensor.models.{architetcure_name}")
     model = architecture.Model(
         all_species=dataset.all_species,
-        hypers=OmegaConf.to_container(config["architecture"]["model"]),
+        hypers=OmegaConf.to_container(options["architecture"]["model"]),
     )
 
     logger.info("Run training")
     output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
+    print(OmegaConf.to_container(options))
     model = architecture.train(
         model=model,
         train_dataset=dataset,
-        hypers=OmegaConf.to_container(config["architecture"]["training"]),
+        hypers=OmegaConf.to_container(options["architecture"]["training"]),
         output_dir=output_dir,
     )
 
-    save_model(model, config["output_path"])
+    save_model(model, options["output_path"])
diff --git a/tests/cli/test_eval_model.py b/tests/cli/test_eval_model.py
@@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path):
     shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
     shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt")
 
-    command = [
-        "metatensor-models",
-        "eval",
-        "-m",
-        "bpnn-model.pt",
-        "-s",
-        "qm9_reduced_100.xyz",
-    ]
+    command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"]
 
     if output is not None:
         command += ["-o", output]

diff --git a/tests/cli/test_train_model.py b/tests/cli/test_train_model.py
@@ -2,19 +2,47 @@
 import subprocess
 from pathlib import Path
 
+import pytest
+
 
 RESOURCES_PATH = Path(__file__).parent.resolve() / ".." / "resources"
 
 
-def test_train(monkeypatch, tmp_path):
+@pytest.mark.parametrize("output", [None, "mymodel.pt"])
+def test_train(monkeypatch, tmp_path, output):
     """Test that training via the training cli runs without an error raise."""
     monkeypatch.chdir(tmp_path)
     shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
-    shutil.copy(RESOURCES_PATH / "parameters.yaml", "parameters.yaml")
-    subprocess.check_call(
-        [
-            "metatensor-models",
-            "train",
-            "--parameters=parameters.yaml",
-        ]
+    shutil.copy(RESOURCES_PATH / "options.yaml", "options.yaml")
+
+    command = ["metatensor-models", "train", "options.yaml"]
+
+    if output is not None:
+        command += ["-o", output]
+    else:
+        output = "model.pt"
+
+    subprocess.check_call(command)
+    assert Path(output).is_file()
+
+
+def test_yml_error():
+    """Test error raise of the option file is not a .yaml file."""
+    try:
+        subprocess.check_output(
+            ["metatensor-models", "train", "options.yml"], stderr=subprocess.STDOUT
+        )
+    except subprocess.CalledProcessError as captured:
+        assert "Options file 'options.yml' must be a `.yaml` file." in str(
+            captured.output
+        )
+
+
+def test_hydra_arguments():
+    """Test if hydra arguments work."""
+    option_path = str(RESOURCES_PATH / "options.yaml")
+    out = subprocess.check_output(
+        ["metatensor-models", "train", option_path, "--hydra=--help"]
     )
+    # Check that num_epochs is override is succesful
+    assert "num_epochs: 1" in str(out)
diff --git a/tests/resources/parameters.yaml → tests/resources/options.yaml b/tests/resources/parameters.yaml → tests/resources/options.yaml
@@ -1,10 +1,10 @@
 defaults:
-  - _self_  # mandatory parameter to avoid hydra warnings
   - architecture: soap_bpnn  # architecture used to train the model
 
-training:
-    batch_size: 8
-    num_epochs: 1
+architecture:
+  training:
+      batch_size: 2
+      num_epochs: 1
 
 # Section defining the parameters for structure and target data
 dataset: