Skip to content

Commit

Permalink
Change CLI API
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCentauri committed Dec 19, 2023
1 parent dc2299b commit df6aa67
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 95 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,9 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Models
# Don't save model outputs
*.pt
!tests/resources/*.pt

# model output directories
outputs/
Expand Down
27 changes: 13 additions & 14 deletions docs/src/getting-started/usage.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Usage - command line
====================
Usage
=====

`metatensor-models` is designed for an direct usage from the the command line (cli). The
general help of `metatensor-models` can be accessed using
Expand Down Expand Up @@ -31,24 +31,23 @@ The sub-command to start a model training is
metatensor-models train
To train a model you have to define your parameters. This includes the specific
architecture you want to use, the files for obtaining training structures and target
values as well as probable changes
To train a model you have to define your options. This includes the specific
architecture you want to use and the data including the training structures and target
values

The default model and training hyperparameter for each model are listed in their
corresponding documentation page. We will use the following config to run an example
training
corresponding documentation page. We will use these minimal options to run an example
training using the default hyperparameters of an SOAP BPNN model


.. literalinclude:: ../../static/parameters.yaml
.. literalinclude:: ../../static/options.yaml
:language: yaml

For each training run a new output directory is based on the current date and time is
created. By default, this output directory is used to store Hydra output for the run
(Configuration, Logs etc). You can `override
For each training run a new output directory based on the current date and time is
created. By default, this output directory is used to store Hydra's output for the run
(configuration, Logs etc). You can `override
<https://hydra.cc/docs/tutorials/basic/running_your_app/working_directory/>`_ this
behavior in the parameter file. To start the training using the ``parameters.yaml`` in
the current directory type.
behavior in the options file. To start the training create an ``options.yaml`` file in
the current directory and type

.. literalinclude:: ../../../examples/usage.sh
:language: bash
Expand Down
1 change: 0 additions & 1 deletion docs/static/parameters.yaml → docs/static/options.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
defaults:
- _self_ # mandatory parameter to avoid hydra warnings
- architecture: soap_bpnn # architecture used to train the model

# Section defining the parameters for structure and target data
Expand Down
File renamed without changes.
7 changes: 4 additions & 3 deletions examples/usage.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#!\bin\bash

metatensor-models train --parameters=parameters.yaml
metatensor-models train options.yaml

# The functions saves the final model `model.pt` to the current output folder for later
# evaluation. All command line flags of the train sub-command can be listed via

metatensor-models train --help

# We now evaluate the model on the training dataset
# We now evaluate the model on the training dataset, where the first arguments specifies
# the model and the second the structure file

metatensor-models eval --model=model.pt --structures=qm9_reduced_100.xyz
metatensor-models eval model.pt qm9_reduced_100.xyz

# The evaluation command predicts the property the model was trained against; here "U0".
# The predictions together with the structures have been written in a file named
Expand Down
8 changes: 4 additions & 4 deletions src/metatensor/models/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def main():
# override `sys.argv` to be compatible with our CLI architecture.
argv = sys.argv[:1]

parameters_path = Path(args.parameters_path)
argv.append(f"--config-dir={parameters_path.parent}")
argv.append(f"--config-name={parameters_path.name}")
argv.append(f"+output_path={args.output_path}")
options = Path(args.options)
argv.append(f"--config-dir={options.parent}")
argv.append(f"--config-name={options.name}")
argv.append(f"+output_path={args.output}")

if args.hydra_paramters is not None:
argv += args.hydra_paramters
Expand Down
38 changes: 15 additions & 23 deletions src/metatensor/models/cli/eval_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,47 +21,39 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None:
parser.set_defaults(callable="eval_model")

parser.add_argument(
"-m",
"--model",
dest="model_path",
"model",
type=str,
required=True,
help="Path to a saved model",
help="saved model to be evaluated",
)
parser.add_argument(
"-s",
"--structures",
dest="structure_path",
"structures",
type=str,
required=True,
help="Path to a structure file which should be considered for the evaluation.",
help="Structure file which should be considered for the evaluation.",
)
parser.add_argument(
"-o",
"--output",
dest="output_path",
dest="output",
type=str,
required=False,
default="output.xyz",
help="Path to save the predicted values.",
help="filenmae of the predictions",
)


def eval_model(
model_path: str, structure_path: str, output_path: str = "output.xyz"
) -> None:
def eval_model(model: str, structures: str, output: str = "output.xyz") -> None:
"""Evaluate a pretrained model.
``target_property`` wil be predicted on a provided set of structures. Predicted
values will be written ``output_path``.
values will be written ``output``.
:param model_path: Path to a saved model
:param structure_path: Path to a structure file which should be considered for the
:param model: Path to a saved model
:param structure: Path to a structure file which should be considered for the
evaluation.
:param output_path: Path to save the predicted values
:param output: Path to save the predicted values
"""

model = load_model(model_path)
structures = read_structures(structure_path)
predictions = model(structures)
write_predictions(output_path, predictions, structures)
loaded_model = load_model(model)
structure_list = read_structures(structures)
predictions = loaded_model(structure_list)
write_predictions(output, predictions, structure_list)
17 changes: 7 additions & 10 deletions src/metatensor/models/cli/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,25 @@ def _add_export_model_parser(subparser: argparse._SubParsersAction) -> None:
parser.set_defaults(callable="export_model")

parser.add_argument(
"-m",
"--model",
dest="model_path",
"model",
type=str,
required=True,
help="Path to a saved model",
help="Saved model which should be exprted",
)
parser.add_argument(
"-o",
"--output",
dest="output_path",
dest="output",
type=str,
required=False,
default="exported.pt",
help="Export path for the model.",
help="Filename of the exported model.",
)


def export_model(model_path: str, output_path: str) -> None:
def export_model(model: str, output: str) -> None:
"""Export a pretrained model to run MD simulations
:param model_path: Path to a saved model
:param output_path: Path to save the exported model
:param model: Path to a saved model
:param output: Path to save the exported model
"""
raise NotImplementedError("model exporting is not implemented yet.")
34 changes: 15 additions & 19 deletions src/metatensor/models/cli/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ def _has_yaml_suffix(s: str) -> str:
"""Checks if a string has a .yaml suffix."""

if Path(s).suffix != ".yaml":
raise argparse.ArgumentTypeError(
f"Parameters file '{s}' must be a `.yaml` file."
)
raise argparse.ArgumentTypeError(f"Options file '{s}' must be a `.yaml` file.")

return s

Expand All @@ -46,17 +44,14 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:
parser.set_defaults(callable="train_model")

parser.add_argument(
"-p",
"--parameters",
dest="parameters_path",
"options",
type=_has_yaml_suffix,
required=True,
help="Path to the parameter file",
help="Options file",
)
parser.add_argument(
"-o",
"--output",
dest="output_path",
dest="output",
type=str,
required=False,
default="model.pt",
Expand All @@ -73,7 +68,7 @@ def _add_train_model_parser(subparser: argparse._SubParsersAction) -> None:


@hydra.main(config_path=str(CONFIG_PATH), config_name="config", version_base=None)
def train_model(config: DictConfig) -> None:
def train_model(options: DictConfig) -> None:
"""Train an atomistic machine learning model using configurations provided by Hydra.
This function sets up the dataset and model architecture, then runs the training
Expand All @@ -87,34 +82,35 @@ def train_model(config: DictConfig) -> None:
https://hydra.cc/docs/advanced/hydra-command-line-flags/ and
https://hydra.cc/docs/advanced/override_grammar/basic/ for details.
:param config: A dictionary-like object obtained from Hydra, containing all the
necessary parameters for dataset preparation, model instantiation, and training.
:param options: A dictionary-like object obtained from Hydra, containing all the
necessary options for dataset preparation, model hyperparameters, and training.
"""

logger.info("Setting up dataset")
structures = read_structures(config["dataset"]["structure_path"])
structures = read_structures(options["dataset"]["structure_path"])
targets = read_targets(
config["dataset"]["targets_path"],
target_values=config["dataset"]["target_value"],
options["dataset"]["targets_path"],
target_values=options["dataset"]["target_value"],
)
dataset = Dataset(structures, targets)

logger.info("Setting up model")
architetcure_name = config["architecture"]["name"]
architetcure_name = options["architecture"]["name"]
architecture = importlib.import_module(f"metatensor.models.{architetcure_name}")
model = architecture.Model(
all_species=dataset.all_species,
hypers=OmegaConf.to_container(config["architecture"]["model"]),
hypers=OmegaConf.to_container(options["architecture"]["model"]),
)

logger.info("Run training")
output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir

print(OmegaConf.to_container(options))
model = architecture.train(
model=model,
train_dataset=dataset,
hypers=OmegaConf.to_container(config["architecture"]["training"]),
hypers=OmegaConf.to_container(options["architecture"]["training"]),
output_dir=output_dir,
)

save_model(model, config["output_path"])
save_model(model, options["output_path"])
9 changes: 1 addition & 8 deletions tests/cli/test_eval_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,7 @@ def test_eval(output, monkeypatch, tmp_path):
shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
shutil.copy(RESOURCES_PATH / "bpnn-model.pt", "bpnn-model.pt")

command = [
"metatensor-models",
"eval",
"-m",
"bpnn-model.pt",
"-s",
"qm9_reduced_100.xyz",
]
command = ["metatensor-models", "eval", "bpnn-model.pt", "qm9_reduced_100.xyz"]

if output is not None:
command += ["-o", output]
Expand Down
44 changes: 36 additions & 8 deletions tests/cli/test_train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,47 @@
import subprocess
from pathlib import Path

import pytest


RESOURCES_PATH = Path(__file__).parent.resolve() / ".." / "resources"


def test_train(monkeypatch, tmp_path):
@pytest.mark.parametrize("output", [None, "mymodel.pt"])
def test_train(monkeypatch, tmp_path, output):
"""Test that training via the training cli runs without an error raise."""
monkeypatch.chdir(tmp_path)
shutil.copy(RESOURCES_PATH / "qm9_reduced_100.xyz", "qm9_reduced_100.xyz")
shutil.copy(RESOURCES_PATH / "parameters.yaml", "parameters.yaml")
subprocess.check_call(
[
"metatensor-models",
"train",
"--parameters=parameters.yaml",
]
shutil.copy(RESOURCES_PATH / "options.yaml", "options.yaml")

command = ["metatensor-models", "train", "options.yaml"]

if output is not None:
command += ["-o", output]
else:
output = "model.pt"

subprocess.check_call(command)
assert Path(output).is_file()


def test_yml_error():
"""Test error raise of the option file is not a .yaml file."""
try:
subprocess.check_output(
["metatensor-models", "train", "options.yml"], stderr=subprocess.STDOUT
)
except subprocess.CalledProcessError as captured:
assert "Options file 'options.yml' must be a `.yaml` file." in str(
captured.output
)


def test_hydra_arguments():
"""Test if hydra arguments work."""
option_path = str(RESOURCES_PATH / "options.yaml")
out = subprocess.check_output(
["metatensor-models", "train", option_path, "--hydra=--help"]
)
# Check that num_epochs is override is succesful
assert "num_epochs: 1" in str(out)
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
defaults:
- _self_ # mandatory parameter to avoid hydra warnings
- architecture: soap_bpnn # architecture used to train the model

training:
batch_size: 8
num_epochs: 1
architecture:
training:
batch_size: 2
num_epochs: 1

# Section defining the parameters for structure and target data
dataset:
Expand Down

0 comments on commit df6aa67

Please sign in to comment.