From 5fdc23fdb8559bddc504efe1e972ebaa4da0e152 Mon Sep 17 00:00:00 2001 From: SdgJlbl Date: Thu, 8 Feb 2024 16:20:02 +0100 Subject: [PATCH] feat!: rename function to archive (#393) Signed-off-by: SdgJlbl Signed-off-by: ThibaultFy Co-authored-by: ThibaultFy --- CHANGELOG.md | 4 ++ references/sdk.md | 4 +- references/sdk_models.md | 2 +- setup.py | 1 + substra/sdk/backends/local/backend.py | 2 +- .../local/compute/spawner/subprocess.py | 45 +++++++++---------- substra/sdk/backends/local/compute/worker.py | 5 +-- substra/sdk/backends/local/dal.py | 2 +- substra/sdk/client.py | 6 +-- substra/sdk/models.py | 2 +- tests/data_factory.py | 16 ++++--- tests/datastore.py | 20 ++++----- tests/sdk/test_rest_client.py | 4 +- 13 files changed, 61 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d89cbdd..25a45045 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- BREAKING: Renamed `function` field of the Function pydantic model to `archive`([#393](https://github.com/Substra/substra/pull/393)) + ### Added - Paths are now resolved on DatasampleSpec objects. Which means that users can pass relative paths ([#392](https://github.com/Substra/substra/pull/392)) diff --git a/references/sdk.md b/references/sdk.md index 2908fb45..7cc92c7f 100644 --- a/references/sdk.md +++ b/references/sdk.md @@ -211,7 +211,7 @@ describe_function(self, key: str) -> str Get function description. ## download_dataset ```text -download_dataset(self, key: str, destination_folder: str) -> None +download_dataset(self, key: str, destination_folder: str) -> pathlib.Path ``` Download data manager resource. @@ -226,7 +226,7 @@ Download opener script in destination folder. - `pathlib.Path`: Path of the downloaded dataset ## download_function ```text -download_function(self, key: str, destination_folder: str) -> None +download_function(self, key: str, destination_folder: str) -> pathlib.Path ``` Download function resource. diff --git a/references/sdk_models.md b/references/sdk_models.md index 915dacce..1dcbc409 100644 --- a/references/sdk_models.md +++ b/references/sdk_models.md @@ -72,7 +72,7 @@ Asset creation specification base class. - inputs: typing.List[substra.sdk.models.FunctionInput] - outputs: typing.List[substra.sdk.models.FunctionOutput] - description: -- function: +- archive: ``` ## ComputePlan diff --git a/setup.py b/setup.py index 9a9dd2b4..e41ee68d 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ """Packaging settings.""" + import os from codecs import open diff --git a/substra/sdk/backends/local/backend.py b/substra/sdk/backends/local/backend.py index aedaac35..15451455 100644 --- a/substra/sdk/backends/local/backend.py +++ b/substra/sdk/backends/local/backend.py @@ -293,7 +293,7 @@ def _add_function(self, key, spec, spec_options=None): "authorized_ids": permissions.authorized_ids, }, }, - function={"checksum": fs.hash_file(function_file_path), "storage_address": function_file_path}, + archive={"checksum": fs.hash_file(function_file_path), "storage_address": function_file_path}, description={ "checksum": fs.hash_file(function_description_path), "storage_address": function_description_path, diff --git a/substra/sdk/backends/local/compute/spawner/subprocess.py b/substra/sdk/backends/local/compute/spawner/subprocess.py index 8bda7668..a474694d 100644 --- a/substra/sdk/backends/local/compute/spawner/subprocess.py +++ b/substra/sdk/backends/local/compute/spawner/subprocess.py @@ -92,26 +92,25 @@ def spawn( envs, ): """Spawn a python process (blocking).""" - with tempfile.TemporaryDirectory(dir=self._local_worker_dir) as function_dir, tempfile.TemporaryDirectory( - dir=function_dir - ) as args_dir: - function_dir = pathlib.Path(function_dir) - args_dir = pathlib.Path(args_dir) - uncompress(archive_path, function_dir) - script_name, function_name = _get_entrypoint_from_dockerfile(function_dir) - - args_file = args_dir / "arguments.txt" - - py_command = [sys.executable, str(function_dir / script_name), f"@{args_file}"] - py_command_args = _get_command_args(function_name, command_args_tpl, local_volumes) - write_command_args_file(args_file, py_command_args) - - if data_sample_paths is not None and len(data_sample_paths) > 0: - _symlink_data_samples(data_sample_paths, local_volumes[VOLUME_INPUTS]) - - # Catching error and raising to be ISO to the docker local backend - # Don't capture the output to be able to use pdb - try: - subprocess.run(py_command, capture_output=False, check=True, cwd=function_dir, env=envs) - except subprocess.CalledProcessError as e: - raise ExecutionError(e) + with tempfile.TemporaryDirectory(dir=self._local_worker_dir) as function_dir: + with tempfile.TemporaryDirectory(dir=function_dir) as args_dir: + function_dir = pathlib.Path(function_dir) + args_dir = pathlib.Path(args_dir) + uncompress(archive_path, function_dir) + script_name, function_name = _get_entrypoint_from_dockerfile(function_dir) + + args_file = args_dir / "arguments.txt" + + py_command = [sys.executable, str(function_dir / script_name), f"@{args_file}"] + py_command_args = _get_command_args(function_name, command_args_tpl, local_volumes) + write_command_args_file(args_file, py_command_args) + + if data_sample_paths is not None and len(data_sample_paths) > 0: + _symlink_data_samples(data_sample_paths, local_volumes[VOLUME_INPUTS]) + + # Catching error and raising to be ISO to the docker local backend + # Don't capture the output to be able to use pdb + try: + subprocess.run(py_command, capture_output=False, check=True, cwd=function_dir, env=envs) + except subprocess.CalledProcessError as e: + raise ExecutionError(e) diff --git a/substra/sdk/backends/local/compute/worker.py b/substra/sdk/backends/local/compute/worker.py index 1c02427f..37c25cab 100644 --- a/substra/sdk/backends/local/compute/worker.py +++ b/substra/sdk/backends/local/compute/worker.py @@ -257,7 +257,6 @@ def schedule_task(self, task: models.Task): with self._context(task.key) as task_dir: task.status = models.Status.doing task.start_date = datetime.datetime.now() - function = self._db.get_with_files(schemas.Type.Function, task.function.key) input_multiplicity = {i.identifier: i.multiple for i in function.inputs} compute_plan = self._db.get(schemas.Type.ComputePlan, task.compute_plan_key) @@ -356,10 +355,10 @@ def schedule_task(self, task: models.Task): command_template += ["--log-level", "warning"] # Task execution - container_name = f"function-{function.function.checksum}" + container_name = f"function-{function.archive.checksum}" self._spawner.spawn( container_name, - str(function.function.storage_address), + str(function.archive.storage_address), command_args_tpl=[string.Template(str(part)) for part in command_template], local_volumes=volumes, data_sample_paths=data_sample_paths, diff --git a/substra/sdk/backends/local/dal.py b/substra/sdk/backends/local/dal.py index f804a3c4..e422d10f 100644 --- a/substra/sdk/backends/local/dal.py +++ b/substra/sdk/backends/local/dal.py @@ -38,7 +38,7 @@ def is_local(self, key: str, type_: schemas.Type): def _get_asset_content_filename(self, type_): if type_ == schemas.Type.Function: filename = "function.tar.gz" - field_name = "function" + field_name = "archive" elif type_ == schemas.Type.Dataset: filename = "opener.py" diff --git a/substra/sdk/client.py b/substra/sdk/client.py index 77be1967..b0ea924c 100644 --- a/substra/sdk/client.py +++ b/substra/sdk/client.py @@ -879,7 +879,7 @@ def link_dataset_with_data_samples( return self._backend.link_dataset_with_data_samples(dataset_key, data_sample_keys) @logit - def download_dataset(self, key: str, destination_folder: str) -> None: + def download_dataset(self, key: str, destination_folder: str) -> pathlib.Path: """Download data manager resource. Download opener script in destination folder. @@ -900,7 +900,7 @@ def download_dataset(self, key: str, destination_folder: str) -> None: ) @logit - def download_function(self, key: str, destination_folder: str) -> None: + def download_function(self, key: str, destination_folder: str) -> pathlib.Path: """Download function resource. Download function package in destination folder. @@ -915,7 +915,7 @@ def download_function(self, key: str, destination_folder: str) -> None: return pathlib.Path( self._backend.download( schemas.Type.Function, - "function.storage_address", + "archive.storage_address", key, os.path.join(destination_folder, "function.tar.gz"), ) diff --git a/substra/sdk/models.py b/substra/sdk/models.py index 8e045f6d..dfa94e8d 100644 --- a/substra/sdk/models.py +++ b/substra/sdk/models.py @@ -178,7 +178,7 @@ class Function(_Model): outputs: List[FunctionOutput] description: _File - function: _File + archive: _File type_: ClassVar[str] = schemas.Type.Function diff --git a/tests/data_factory.py b/tests/data_factory.py index 32bdd767..9f72594a 100644 --- a/tests/data_factory.py +++ b/tests/data_factory.py @@ -400,11 +400,17 @@ def create_function( ( BAD_ENTRYPOINT_DOCKERFILE if dockerfile_type == "BAD_ENTRYPOINT" - else NO_ENTRYPOINT_DOCKERFILE - if dockerfile_type == "NO_ENTRYPOINT" - else NO_FUNCTION_NAME_DOCKERFILE - if dockerfile_type == "NO_FUNCTION_NAME" - else DEFAULT_FUNCTION_DOCKERFILE.format(function_name=DEFAULT_FUNCTION_FUNCTION_NAME[category]) + else ( + NO_ENTRYPOINT_DOCKERFILE + if dockerfile_type == "NO_ENTRYPOINT" + else ( + NO_FUNCTION_NAME_DOCKERFILE + if dockerfile_type == "NO_FUNCTION_NAME" + else DEFAULT_FUNCTION_DOCKERFILE.format( + function_name=DEFAULT_FUNCTION_FUNCTION_NAME[category] + ) + ) + ) ), ), ) diff --git a/tests/datastore.py b/tests/datastore.py index 8660cd60..a618ad08 100644 --- a/tests/datastore.py +++ b/tests/datastore.py @@ -12,7 +12,7 @@ "checksum": "77c7a32520f7564b03f4abac4271307cc639cd9fb78b90435328278f3f24d796", "storage_address": "http://testserver/metric/7b75b728-9b9b-45fe-a867-4bb1525b7b5d/description/", }, - "function": { + "archive": { "checksum": "52eea19fccfde2b12e30f4d16fd7d48f035e03210ad5804616f71799c4bdc0de", "storage_address": "http://testserver/metric/7b75b728-9b9b-45fe-a867-4bb1525b7b5d/file/", }, @@ -68,7 +68,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://testserver/function/17f98afc-2b82-4ce9-b232-1a471633d020/description/", }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://testserver/function/17f98afc-2b82-4ce9-b232-1a471633d020/file/", }, @@ -95,7 +95,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://testserver/function/681eedb9-db00-4480-a66f-63c86cc20280/description/", }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://testserver/function/681eedb9-db00-4480-a66f-63c86cc20280/file/", }, @@ -123,7 +123,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://testserver/function/6a8ada2e-740f-46f4-af0f-11376763ed72/description/", }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://testserver/function/6a8ada2e-740f-46f4-af0f-11376763ed72/file/", }, @@ -152,7 +152,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/17f98afc-2b82-4ce9-b232-1a471633d020/description/", # noqa: E501 }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/17f98afc-2b82-4ce9-b232-1a471633d020/file/", # noqa: E501 }, @@ -217,7 +217,7 @@ "function": { "key": "7c9f9799-bf64-c100-6238-1583a9ffc535", "name": "Logistic regression", - "function": { + "archive": { "checksum": "7c9f9799bf64c10002381583a9ffc535bc3f4bf14d6g0c614d3f6f868f72a9d5", "storage_address": "", }, @@ -294,7 +294,7 @@ "checksum": "40483cd8b99ea7fbd3b73020997ea07547771993a6a3fa56fa2a8e9d7860529e", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/c663b6e6-dd62-49fb-afe8-191fa7627a64/description/", # noqa: E501 }, - "function": { + "archive": { "checksum": "51bd5fe2e7f087b203d1b4a73f3b3276b9fde96a0fff9c1f5984de96e4675d59", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/c663b6e6-dd62-49fb-afe8-191fa7627a64/file/", # noqa: E501 }, @@ -406,7 +406,7 @@ "checksum": "40483cd8b99ea7fbd3b73020997ea07547771993a6a3fa56fa2a8e9d7860529e", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/c663b6e6-dd62-49fb-afe8-191fa7627a64/description/", # noqa: E501 }, - "function": { + "archive": { "checksum": "51bd5fe2e7f087b203d1b4a73f3b3276b9fde96a0fff9c1f5984de96e4675d59", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/c663b6e6-dd62-49fb-afe8-191fa7627a64/file/", # noqa: E501 }, @@ -467,7 +467,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/681eedb9-db00-4480-a66f-63c86cc20280/description/", # noqa: E501 }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/681eedb9-db00-4480-a66f-63c86cc20280/file/", # noqa: E501 }, @@ -526,7 +526,7 @@ "checksum": "756589d5971c421a388a751d533ab8ce09715c93040e9e8fff1365e831545aa2", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/6a8ada2e-740f-46f4-af0f-11376763ed72/description/", # noqa: E501 }, - "function": { + "archive": { "checksum": "aa8d43bf6e3341b0034a2e396451ab731ccca95a4c1d4f65a4fcd30f9081ec7d", "storage_address": "http://backend-org-1-substra-backend-server.org-1:8000/function/6a8ada2e-740f-46f4-af0f-11376763ed72/file/", # noqa: E501 }, diff --git a/tests/sdk/test_rest_client.py b/tests/sdk/test_rest_client.py index 58337753..5b246e73 100644 --- a/tests/sdk/test_rest_client.py +++ b/tests/sdk/test_rest_client.py @@ -57,8 +57,8 @@ def test_verify_login(mocker, config): c.login("foo", "bar") c.logout() if config.get("insecure", None): - assert m_post.call_args.kwargs["verify"] == False - assert m_delete.call_args.kwargs["verify"] == False + assert m_post.call_args.kwargs["verify"] is False + assert m_delete.call_args.kwargs["verify"] is False else: assert "verify" not in m_post.call_args.kwargs or m_post.call_args.kwargs["verify"] assert "verify" not in m_post.call_args.kwargs or m_delete.call_args.kwargs["verify"]