Skip to content

Commit

Permalink
Run workflow file directly from cli (#363)
Browse files Browse the repository at this point in the history
* Add CLI option to run workflow locally

* Add test

* Apply suggestions from code review

Co-authored-by: Daniel Huppmann <[email protected]>

* Add run workflow to user guide

---------

Co-authored-by: Daniel Huppmann <[email protected]>
  • Loading branch information
phackstock and danielhuppmann authored Aug 5, 2024
1 parent 44f9af8 commit 981f8a0
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 6 deletions.
29 changes: 25 additions & 4 deletions docs/user_guide/local-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ A more elaborate use case is to perform validation against the codelists and use
# Import the necessary libraries
import pyam
from nomenclature import DataStructureDefinition, RegionProcessor, process
# Initialize a DataStructureDefinition from a suitable directory
dsd = DataStructureDefinition("definitions")
# Initialize a RegionProcessor from a suitable directory that has the mappings
rp = RegionProcessor.from_directory("mappings")
# Read the data using pyam
df = pyam.IamDataFrame("/path/to/file")
# Perform the validation and apply the region aggregation
df = process(df, dsd, processor=rp)
Expand All @@ -93,6 +93,27 @@ implemented as a ``main()`` function in ``workflow.py`` of a project repository.
.. attention:: The working-directory of the Python console has to be set to the clone
of the project repository.

A project workflow can be run directly form the console using the ``nomenclature
run-workflow`` command.

.. code-block:: console
nomenclature run-workflow input_data.xlsx
The output should be saved using the ``--output-file`` option with a path to an excel
file.

.. code-block:: console
nomenclature run-workflow input_data.xlsx --output-file output.xlsx
If the current working directory is not the workflow directory and/or the processing function is not main, this is also covered:

.. code-block:: console
nomenclature run-workflow input_data.xlsx --workflow-file path/to/workflow.py --workflow-function not_main --output-file output.xlsx
.. code-block:: python
# Import the pyam library and the project-specific workflow
Expand Down
53 changes: 52 additions & 1 deletion nomenclature/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path
from typing import List, Optional
import importlib.util
import sys

import click

Expand Down Expand Up @@ -198,7 +200,6 @@ def cli_list_missing_variables(
target_file : Path | None
Name of the target variable definition file, optional, defaults to
'variables.yaml'
Example
-------
Expand All @@ -215,3 +216,53 @@ def cli_list_missing_variables(
"variable",
codelist_path,
).list_missing_variables(IamDataFrame(data), target_file)


@cli.command("run-workflow")
@click.argument("input_file", type=click.Path(exists=True, path_type=Path))
@click.option(
"--workflow-file",
default=lambda: Path.cwd() / "workflow.py",
type=click.Path(exists=True, path_type=Path),
)
@click.option("--workflow-function", default="main")
@click.option("--output-file", type=click.Path())
def cli_run_workflow(
input_file: Path,
workflow_file: Path,
workflow_function: str,
output_file: Path | None,
):
"""Run a given input file through a workflow function defined in a workflow.py
Parameters
----------
input_file : Path
Input data file, must be IAMC format, .xlsx or .csv
workflow_file : Path
Path to the workflow file,
default: current working directory / "workflow.py"
workflow_function : str
Name of the workflow function inside the workflow file, default: main
output_file : Path | None
Path to the output file where the processing results is saved, nothing
is saved if None is given, default: None
Raises
------
ValueError
If the workflow_file does not have the specified workflow_function
"""

module_name = workflow_file.stem
spec = importlib.util.spec_from_file_location(module_name, workflow_file)
workflow = importlib.util.module_from_spec(spec)
sys.modules[module_name] = workflow
spec.loader.exec_module(workflow)

if not hasattr(workflow, workflow_function):
raise ValueError(f"{workflow} does not have a function `{workflow_function}`")

df = getattr(workflow, workflow_function)(IamDataFrame(input_file))
if output_file is not None:
df.to_excel(output_file)
2 changes: 2 additions & 0 deletions tests/data/workflow/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def main(df):
return df
23 changes: 22 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
import pytest

from click.testing import CliRunner
from conftest import TEST_DATA_DIR
from pandas.testing import assert_frame_equal
from pyam import IAMC_IDX, IamDataFrame, assert_iamframe_equal

from nomenclature import cli
from nomenclature.testing import assert_valid_structure, assert_valid_yaml
from nomenclature.codelist import VariableCodeList
from nomenclature.cli import cli_run_workflow

from conftest import TEST_DATA_DIR

runner = CliRunner()

Expand Down Expand Up @@ -360,3 +362,22 @@ def test_cli_add_missing_variables(simple_definition, tmp_path):

assert "Some new variable" in obs
assert obs["Some new variable"].unit == "EJ/yr"


def test_cli_run_workflow(tmp_path, simple_df):

simple_df.to_excel(tmp_path / "input.xlsx")

runner.invoke(
cli,
[
"run-workflow",
str(tmp_path / "input.xlsx"),
"--workflow-file",
str(TEST_DATA_DIR / "workflow" / "workflow.py"),
"--output-file",
str(tmp_path / "output.xlsx"),
],
)

assert_iamframe_equal(simple_df, IamDataFrame(tmp_path / "output.xlsx"))

0 comments on commit 981f8a0

Please sign in to comment.