Skip to content

Commit

Permalink
Feat!: Remove SDK installation from our base image (#443)
Browse files Browse the repository at this point in the history
# The problem
We had to regenerate SDK images each release
https://zapatacomputing.atlassian.net/browse/ORQSDK-1032

# This PR's solution
Remove installation of SDK in our worker base images

# Checklist

_Check that this PR satisfies the following items:_

- [x] Tests have been added for new features/changed behavior (if no new
features have been added, check the box).
- [ ] The [changelog file](CHANGELOG.md) has been updated with a
user-readable description of the changes (if the change isn't visible to
the user in any way, check the box).
- [x] The PR's title is prefixed with
`<feat/fix/chore/imp[rovement]/int[ernal]/docs>[!]:`
- [x] The PR is linked to a JIRA ticket (if there's no suitable ticket,
check the box).
  • Loading branch information
SebastianMorawiec authored Sep 25, 2024
1 parent c8929cb commit 26daea6
Show file tree
Hide file tree
Showing 18 changed files with 192 additions and 298 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/orquestra-sdk-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
source ./venv/bin/activate
make github-actions-integration
- name: Run performance test
- name: Run integration test
shell: bash
run: |
source ./venv/bin/activate
Expand Down
10 changes: 1 addition & 9 deletions .github/workflows/publish-cuda-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@ name: Publish orquestra-sdk-base Docker Image with CUDA support
on:
workflow_dispatch:
inputs:
sdk_requirement:
type: string
description: |
Orquestra SDK requirement to build the image with.
Example of using a published version of the SDK:
orquestra-sdk[ray]==0.63.0
Example of using an unpublished version:
orquestra-sdk[ray] @ git+https://github.com/zapata-engineering/[email protected]#subdirectory=projects/orquestra-sdk
docker_tag:
type: string
description: |
Expand Down Expand Up @@ -81,6 +73,6 @@ jobs:
# as on the command line `--build-arg`, one to a line:
# docker_build_args: BUILD_ARG_ONE=value1\nBUILD_ARG_TWO=value2
# DUE TO JSON LIMITATIONS, NEWLINES MUST BE EXPLICITLY ADDED AS \n
docker_build_args: SDK_REQUIREMENT=${{ inputs.sdk_requirement }}
docker_build_args:
# leave blank for linux/amd64 (recommended)
target_platforms: ''
10 changes: 1 addition & 9 deletions .github/workflows/publish-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@ name: Publish orquestra-sdk-base Docker Image
on:
workflow_dispatch:
inputs:
sdk_requirement:
type: string
description: |
Orquestra SDK requirement to build the image with.
Example of using a published version of the SDK:
orquestra-sdk[ray]==0.63.0
Example of using an unpublished version:
orquestra-sdk[ray] @ git+https://github.com/zapata-engineering/[email protected]#subdirectory=projects/orquestra-sdk
docker_tag:
type: string
description: |
Expand Down Expand Up @@ -79,6 +71,6 @@ jobs:
# as on the command line `--build-arg`, one to a line:
# docker_build_args: BUILD_ARG_ONE=value1\nBUILD_ARG_TWO=value2
# DUE TO JSON LIMITATIONS, NEWLINES MUST BE EXPLICITLY ADDED AS \n
docker_build_args: SDK_REQUIREMENT=${{ inputs.sdk_requirement }}
docker_build_args:
# leave blank for linux/amd64 (recommended)
target_platforms: 'linux/amd64,linux/arm64'
3 changes: 1 addition & 2 deletions projects/orquestra-sdk/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Base image for running Orquestra tasks.
# Published at hub.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base
FROM python:3.11.6-slim-bullseye
ARG SDK_REQUIREMENT

# Set by BuildKit
ARG TARGETARCH
Expand Down Expand Up @@ -38,7 +37,7 @@ RUN <<EOF
set -ex
. "$VIRTUAL_ENV/bin/activate"
python -m pip install --no-cache-dir -U pip wheel
python -m pip install --no-cache-dir "${SDK_REQUIREMENT}"
python -m pip install --no-cache-dir ray[default]==2.30
EOF

# Prefer to use pip, python, and other binaries from the virtual env.
Expand Down
3 changes: 1 addition & 2 deletions projects/orquestra-sdk/docker/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
ARG CUDA_MINOR_VERSION=11.8
FROM nvidia/cuda:${CUDA_MINOR_VERSION}.0-runtime-ubuntu22.04

ARG SDK_REQUIREMENT
ARG PYTHON_VERSION=3.11.6
ARG TARGETARCH="amd64"

Expand Down Expand Up @@ -56,7 +55,7 @@ RUN <<EOF
set -ex
. "$VIRTUAL_ENV/bin/activate"
python -m pip install --no-cache-dir -U pip wheel
python -m pip install --no-cache-dir "${SDK_REQUIREMENT}"
python -m pip install --no-cache-dir ray[default]==2.30
EOF

# Prefer to use pip, python, and other binaries from the virtual env.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ def test_good_practice_defaults():
# Then
assert src_import.type == "INLINE_IMPORT"
assert len(deps_imports) == 1
assert deps_imports[0].type == "PYTHON_IMPORT"
# it depends if you have installed SDK with version from PyPi, or
# its local-dev version, it changes what import is used for SDK
assert deps_imports[0].type in {"PYTHON_IMPORT", "GIT_IMPORT"}

@staticmethod
def test_good_practice_python_imports():
Expand All @@ -212,7 +214,9 @@ def test_good_practice_python_imports():
assert deps_imports is not None
assert len(deps_imports) == 2
assert deps_imports[0].type == "PYTHON_IMPORT"
assert deps_imports[1].type == "PYTHON_IMPORT"
# it depends if you have installed SDK with version from PyPi, or
# its local-dev version, it changes what import is used for SDK
assert deps_imports[1].type in {"PYTHON_IMPORT", "GIT_IMPORT"}

@staticmethod
def test_good_practice_git_import_with_auth():
Expand All @@ -225,7 +229,9 @@ def test_good_practice_git_import_with_auth():
# Then
assert src_import.type == "GIT_IMPORT"
assert len(deps_imports) == 1
assert deps_imports[0].type == "PYTHON_IMPORT"
# it depends if you have installed SDK with version from PyPi, or
# its local-dev version, it changes what import is used for SDK
assert deps_imports[0].type in {"PYTHON_IMPORT", "GIT_IMPORT"}

@staticmethod
def test_simple_task_explicit():
Expand All @@ -239,7 +245,9 @@ def test_simple_task_explicit():
# Then
assert src_import.type == "INLINE_IMPORT"
assert len(deps_imports) == 1
assert deps_imports[0].type == "PYTHON_IMPORT"
# it depends if you have installed SDK with version from PyPi, or
# its local-dev version, it changes what import is used for SDK
assert deps_imports[0].type in {"PYTHON_IMPORT", "GIT_IMPORT"}

@staticmethod
def test_python_imports(monkeypatch, tmp_path):
Expand All @@ -253,13 +261,14 @@ def test_python_imports(monkeypatch, tmp_path):
for task in [task1, task2, task3]:
# When
src_import, deps_imports = _import_models(task)

# Then
assert src_import.type == "INLINE_IMPORT"
assert deps_imports is not None
assert len(deps_imports) == 2
assert deps_imports[0].type == "PYTHON_IMPORT"
assert deps_imports[1].type == "PYTHON_IMPORT"
# it depends if you have installed SDK with version from PyPi, or
# its local-dev version, it changes what import is used for SDK
assert deps_imports[1].type in {"PYTHON_IMPORT", "GIT_IMPORT"}

@staticmethod
def test_github_import_private_repo():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
################################################################################
# © Copyright 2024 Zapata Computing Inc.
################################################################################

HEAD_NODE_IMAGE = (
"hub.stage.nexus.orquestra.io/zapatacomputing/workflow-driver-ray:head-node-1.0.0a3"
)
DEFAULT_WORKER_IMAGE = (
"hub.stage.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base:worker-1.0.0a1"
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import collections.abc
import hashlib
import inspect
import os
import re
import typing as t
import warnings
Expand All @@ -23,13 +24,21 @@
)
from orquestra.workflow_shared.schema import ir, responses
from orquestra.workflow_shared.secrets import Secret
from packaging import version
from pip_api._parse_requirements import Requirement

from . import _dsl, _workflow
from . import _docker_images, _dsl, _workflow

N_BYTES_IN_HASH = 8


def _get_default_image(num_gpus: t.Optional[str]):
image = _docker_images.DEFAULT_WORKER_IMAGE
if num_gpus:
image = f"{image}-cuda"
return image


def _make_key(obj: t.Any):
"""Returns a hashable key for all types.
Expand Down Expand Up @@ -766,6 +775,23 @@ def _make_invocation_model(
arg_name: graph.get_node_id(arg_val) for arg_name, arg_val in invocation.kwargs
}

gpu_used: t.Optional[str]
if invocation.resources.gpu:
gpu_used = invocation.resources.gpu
elif task_models_dict[invocation.task].resources is not None:
task_model = task_models_dict[invocation.task]
# this is just to silence pyright which doesn't believe elif check
# we dont multiprocess that variables, so we dont have race conditions here
assert task_model.resources is not None
gpu_used = task_model.resources.gpu
else:
gpu_used = None

custom_image = (
invocation.custom_image
or task_models_dict[invocation.task].custom_image
or _get_default_image(gpu_used)
)
return ir.TaskInvocation(
id=_make_invocation_id(
task_models_dict[invocation.task].fn_ref.function_name,
Expand All @@ -777,7 +803,7 @@ def _make_invocation_model(
kwargs_ids=kwargs_ids,
output_ids=graph.output_ids_for_invocation(invocation),
resources=_make_resources_model(invocation.resources),
custom_image=invocation.custom_image,
custom_image=custom_image,
env_vars=invocation.env_vars,
)

Expand Down Expand Up @@ -844,7 +870,30 @@ def flatten_graph(
else:
import_models_dict[imp] = _make_import_model(imp)

sdk_python_import = _dsl.InstalledImport(package_name="orquestra-sdk")
sdk_version = get_current_sdk_version()
sdk_version_parsed = version.parse(sdk_version.original)

if not (sdk_version_parsed.is_devrelease or sdk_version_parsed.is_prerelease):
sdk_python_import = _dsl.PythonImports(
f"orquestra-sdk[all]=={sdk_version.original}"
)
else:
# this only happens on dev environment, should not happen on users' machines
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="You're working on detached HEAD")
warnings.filterwarnings("ignore", message="You have uncommitted changes")
# this is a workaround where .model is called outside of SDK repo
# its save to do as this should only happen if SDK is installed as editable
# install (or from git repository)
path_to_sdk = os.path.realpath(__file__)
git_ref = _dsl.infer_git_ref(path_to_sdk).resolve()
sdk_python_import = _dsl.GithubImport(
git_ref=git_ref,
repo="zapata-engineering/orquestra-sdk",
package_name="orquestra-sdk",
extras="all",
)

ir_sdk_import = _make_import_model(sdk_python_import)
import_models_dict[sdk_python_import] = ir_sdk_import

Expand All @@ -871,12 +920,14 @@ def flatten_graph(
}

sdk_version = get_current_sdk_version()

python_version = get_current_python_version()

return ir.WorkflowDef(
metadata=ir.WorkflowMetadata(
sdk_version=sdk_version,
python_version=python_version,
head_node_image=_docker_images.HEAD_NODE_IMAGE,
),
resources=_make_resources_model(workflow_def._resources, is_task=False),
# At the moment 'orq submit workflow-def <name>' assumes that the <name> is
Expand Down
20 changes: 12 additions & 8 deletions projects/orquestra-sdk/tests/integration/ray/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,8 +947,9 @@ def test_setting_resources(
# Then
calls = client.add_options.call_args_list

# We should only have two calls: our invocation and the aggregation step
assert len(calls) == 2
# We should only have two calls: invocation aggregation step and aggregation
# error handling step
assert len(calls) == 3
# Checking our call did not have any resources included
assert calls[0] == call(
ANY,
Expand All @@ -975,15 +976,15 @@ def test_setting_resources(
None,
None,
{
"image:hub.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base:mocked": 1 # noqa: E501
"image:hub.stage.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base:worker-1.0.0a1": 1 # noqa: E501
},
{},
),
(
None,
1,
{
"image:hub.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base:mocked-cuda": 1 # noqa: E501
"image:hub.stage.nexus.orquestra.io/zapatacomputing/orquestra-sdk-base:worker-1.0.0a1-cuda": 1 # noqa: E501
},
{
"num_gpus": 1,
Expand Down Expand Up @@ -1021,8 +1022,9 @@ def test_with_env_set(
# Then
calls = client.add_options.call_args_list

# We should only have two calls: our invocation and the aggregation step
assert len(calls) == 2
# We should only have two calls: invocation and the aggregation step
# and error-handling aggregation step
assert len(calls) == 3
# Checking our call did not have any resources included
assert calls[0] == call(
ANY,
Expand Down Expand Up @@ -1055,8 +1057,10 @@ def test_with_env_not_set(
# Then
calls = client.add_options.call_args_list

# We should only have two calls: our invocation and the aggregation step
assert len(calls) == 2
# We should only have three calls:
# We should only have two calls: invocation and the aggregation step
# and error-handling aggregation step
assert len(calls) == 3
# Checking our call did not have any resources included
assert calls[0] == call(
ANY,
Expand Down
23 changes: 20 additions & 3 deletions projects/orquestra-sdk/tests/sdk/driver/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
Tests for orquestra.sdk._base._driver._client.
"""
import warnings
from datetime import timedelta
from typing import Any, Dict, List, Optional
from unittest.mock import Mock, create_autospec
Expand All @@ -12,11 +13,12 @@
import pytest
import responses
from orquestra.workflow_shared._spaces._structs import ProjectRef
from orquestra.workflow_shared.schema.ir import WorkflowDef
from orquestra.workflow_shared.schema.ir import Version, WorkflowDef
from orquestra.workflow_shared.schema.responses import JSONResult, PickleResult
from orquestra.workflow_shared.schema.workflow_run import RunStatus, State, TaskRun

import orquestra.sdk as sdk
from orquestra.sdk._client._base import _traversal
from orquestra.sdk._client._base._driver import _exceptions
from orquestra.sdk._client._base._driver._client import (
DriverClient,
Expand Down Expand Up @@ -132,21 +134,35 @@ def task_run_id(self):
def task_inv_id(self):
return "00000000-0000-0000-0000-000000000000"

@pytest.fixture
def mask_sdk_version(self, monkeypatch):
monkeypatch.setattr(
_traversal,
"get_current_sdk_version",
lambda *_: Version(
original="0.66.0", major=0, minor=66, patch=0, is_prerelease=False
),
)

@pytest.fixture
def status(self):
return RunStatus(state=State.SUCCEEDED, start_time=None, end_time=None)

@pytest.fixture
def workflow_def(self):
@sdk.task
@sdk.task(custom_image="")
def task():
return 1

@sdk.workflow
def workflow():
return task()

return workflow().model
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Attempting to read a workflow definition"
)
return workflow().model

class TestWorkflowDefinitions:
# ------ queries ------
Expand Down Expand Up @@ -454,6 +470,7 @@ def endpoint_mocker(endpoint_mocker_base, base_uri: str):
],
)
def test_params_encoding(
mask_sdk_version,
endpoint_mocker,
client: DriverClient,
workflow_def_id,
Expand Down
Loading

0 comments on commit 26daea6

Please sign in to comment.