diff --git a/backend/api/models/computeplan.py b/backend/api/models/computeplan.py index c8f6b4756..a9a1a2b10 100644 --- a/backend/api/models/computeplan.py +++ b/backend/api/models/computeplan.py @@ -96,14 +96,14 @@ def get_task_stats(self) -> dict: def update_status(self) -> None: """Compute cp status from tasks counts.""" stats = self.get_task_stats() - if stats["task_count"] == 0 or stats["waiting_builder_slot_count"] == stats["task_count"]: + if self.cancelation_date or stats["canceled_count"] > 0: + compute_plan_status = self.Status.PLAN_STATUS_CANCELED + elif stats["task_count"] == 0 or stats["waiting_builder_slot_count"] == stats["task_count"]: compute_plan_status = self.Status.PLAN_STATUS_CREATED elif stats["done_count"] == stats["task_count"]: compute_plan_status = self.Status.PLAN_STATUS_DONE elif stats["failed_count"] > 0: compute_plan_status = self.Status.PLAN_STATUS_FAILED - elif self.cancelation_date or stats["canceled_count"] > 0: - compute_plan_status = self.Status.PLAN_STATUS_CANCELED else: compute_plan_status = self.Status.PLAN_STATUS_DOING diff --git a/backend/api/models/function.py b/backend/api/models/function.py index 8ac143334..442ba571a 100644 --- a/backend/api/models/function.py +++ b/backend/api/models/function.py @@ -68,3 +68,7 @@ class Function(models.Model, AssetPermissionMixin): class Meta: ordering = ["creation_date", "key"] # default order for relations serializations + + def cancel(self) -> None: + self.status = Function.Status.FUNCTION_STATUS_CANCELED + self.save() diff --git a/backend/builder/exceptions.py b/backend/builder/exceptions.py index a3d2111e3..170fcccbc 100644 --- a/backend/builder/exceptions.py +++ b/backend/builder/exceptions.py @@ -41,3 +41,7 @@ class BuildRetryError(_BuildError, CeleryRetryError): Args: logs (str): the container image build logs """ + + +class BuildCanceledError(CeleryNoRetryError): + """A function built has been cancelled (for instance, all the linked ocmpute plans has been cancelled or failed)""" diff --git a/backend/builder/image_builder/image_builder.py b/backend/builder/image_builder/image_builder.py index d41116e10..421784477 100644 --- a/backend/builder/image_builder/image_builder.py +++ b/backend/builder/image_builder/image_builder.py @@ -9,6 +9,8 @@ from django.conf import settings import orchestrator +from api.models import ComputePlan +from api.models import Function from builder import docker from builder import exceptions from builder.exceptions import BuildError @@ -348,3 +350,22 @@ def _build_container_args(dockerfile_mount_path: str, image_tag: str) -> list[st if REGISTRY_SCHEME == "http": args.append("--insecure-pull") return args + + +def check_function_is_runnable(function_key: str, channel_name: str) -> bool: + compute_plans_statuses = set( + ComputePlan.objects.filter(compute_tasks__function__key=function_key, channel=channel_name) + .values_list("status", flat=True) + .distinct() + ) + + if len(compute_plans_statuses) == 0: + return True + + target_statuses = {ComputePlan.Status.PLAN_STATUS_CANCELED, ComputePlan.Status.PLAN_STATUS_FAILED} + is_runnable = not compute_plans_statuses.issubset(target_statuses) + + if not is_runnable: + Function.objects.get(key=function_key).cancel() + + return is_runnable diff --git a/backend/builder/tasks/task.py b/backend/builder/tasks/task.py index 8203ab7a9..03fce00c7 100644 --- a/backend/builder/tasks/task.py +++ b/backend/builder/tasks/task.py @@ -1,7 +1,11 @@ +from typing import Any + import structlog +from billiard.einfo import ExceptionInfo from django.conf import settings import orchestrator +from builder.exceptions import BuildCanceledError from substrapp.models import FailedAssetKind from substrapp.tasks.task import FailableTask @@ -36,3 +40,11 @@ def get_task_info(self, args: tuple, kwargs: dict) -> tuple[str, str]: function = orchestrator.Function.model_validate_json(kwargs["function_serialized"]) channel_name = kwargs["channel_name"] return function.key, channel_name + + def on_failure( + self, exc: Exception, task_id: str, args: tuple, kwargs: dict[str, Any], einfo: ExceptionInfo + ) -> None: + if isinstance(exc, BuildCanceledError): + return + + super().on_failure(exc, task_id, args, kwargs, einfo) diff --git a/backend/builder/tasks/tasks_build_image.py b/backend/builder/tasks/tasks_build_image.py index d7c9625eb..973d5b4df 100644 --- a/backend/builder/tasks/tasks_build_image.py +++ b/backend/builder/tasks/tasks_build_image.py @@ -2,6 +2,7 @@ import orchestrator from backend.celery import app +from builder.exceptions import BuildCanceledError from builder.exceptions import BuildError from builder.exceptions import BuildRetryError from builder.exceptions import CeleryNoRetryError @@ -24,9 +25,11 @@ def build_image(task: BuildTask, function_serialized: str, channel_name: str) -> timer = Timer() attempt = 0 while attempt <= task.max_retries: + if not image_builder.check_function_is_runnable(function.key, channel_name): + logger.info("build has been canceled", function_id=function.key) + raise BuildCanceledError try: timer.start() - image_builder.build_image_if_missing(channel_name, function) with orchestrator.get_orchestrator_client(channel_name) as client: diff --git a/backend/builder/tests/test_image_builder.py b/backend/builder/tests/test_image_builder.py index dc6830ed4..badda8a3d 100644 --- a/backend/builder/tests/test_image_builder.py +++ b/backend/builder/tests/test_image_builder.py @@ -4,6 +4,7 @@ from pytest_mock import MockerFixture import orchestrator +from api.models import ComputePlan from builder.exceptions import BuildError from builder.exceptions import BuildRetryError from builder.exceptions import PodTimeoutError @@ -85,3 +86,34 @@ def test_get_entrypoint_from_dockerfile_invalid_dockerfile( image_builder._get_entrypoint_from_dockerfile(str(tmp_path)) assert expected_exc_content in bytes.decode(exc.value.logs.read()) + + +@pytest.mark.parametrize( + ["statuses", "is_function_runnable"], + [ + ([], True), + ([ComputePlan.Status.PLAN_STATUS_DONE.value], True), + ([ComputePlan.Status.PLAN_STATUS_FAILED.value, ComputePlan.Status.PLAN_STATUS_CANCELED.value], False), + ( + [ + ComputePlan.Status.PLAN_STATUS_DONE.value, + ComputePlan.Status.PLAN_STATUS_FAILED.value, + ComputePlan.Status.PLAN_STATUS_CANCELED.value, + ], + True, + ), + ], + ids=["no cp", "done cp", "failed + canceled cp", "done + failed + canceled cp"], +) +def test_check_function_is_runnable(mocker: MockerFixture, statuses: str, is_function_runnable: bool) -> None: + function_key = "e7f8aed4-f2c9-442d-a02c-8b7858a2ac4f" + channel_name = "channel_name" + compute_plan_getter = mocker.patch("builder.image_builder.image_builder.ComputePlan.objects.filter") + function_cancel = mocker.patch("builder.image_builder.image_builder.Function.objects.get") + compute_plan_getter.return_value.values_list.return_value.distinct.return_value = statuses + result = image_builder.check_function_is_runnable(function_key=function_key, channel_name=channel_name) + + assert result == is_function_runnable + compute_plan_getter.assert_called_once_with(compute_tasks__function__key=function_key, channel=channel_name) + if not is_function_runnable: + function_cancel.assert_called_once_with(key=function_key) diff --git a/backend/builder/tests/test_task_build_image.py b/backend/builder/tests/test_task_build_image.py index 24e490e4f..7bf3795b4 100644 --- a/backend/builder/tests/test_task_build_image.py +++ b/backend/builder/tests/test_task_build_image.py @@ -30,6 +30,7 @@ def test_store_failure_build_error(): assert failure_report.logs.read() == str.encode(msg) +@pytest.mark.django_db def test_catch_all_exceptions(celery_app, celery_worker, mocker): mocker.patch("builder.tasks.task.orchestrator.get_orchestrator_client") mocker.patch("builder.image_builder.image_builder.build_image_if_missing", side_effect=Exception("random error")) @@ -39,6 +40,7 @@ def test_catch_all_exceptions(celery_app, celery_worker, mocker): r.get() +@pytest.mark.django_db @pytest.mark.parametrize("execution_number", range(10)) def test_order_building_success(celery_app, celery_worker, mocker, execution_number): function_1 = orc_mock.FunctionFactory() @@ -63,6 +65,7 @@ def test_order_building_success(celery_app, celery_worker, mocker, execution_num assert result_2.state == "WAITING" +@pytest.mark.django_db @pytest.mark.parametrize("execution_number", range(10)) def test_order_building_retry(celery_app, celery_worker, mocker, execution_number): function_retry = orc_mock.FunctionFactory() @@ -100,6 +103,7 @@ def side_effect(*args, **kwargs): assert result_other.state == "WAITING" +@pytest.mark.django_db def test_ssl_connection_timeout(celery_app, celery_worker, mocker): """ Test that in case of a SSL connection timeout, the task is retried max_retries times, diff --git a/changes/997.added b/changes/997.added new file mode 100644 index 000000000..e245ba2f3 --- /dev/null +++ b/changes/997.added @@ -0,0 +1,2 @@ +Check if function is linked with compute plans (through the compute tasks) before building. If all compute plans have been cancelled or failed, cancels the function. + \ No newline at end of file