Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into debug-logging
Browse files Browse the repository at this point in the history
  • Loading branch information
VukW committed Feb 1, 2024
2 parents e82a1bc + 71582b7 commit 29e7955
Show file tree
Hide file tree
Showing 19 changed files with 174 additions and 115 deletions.
1 change: 1 addition & 0 deletions .github/workflows/docker-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
python-version: '3.9'

- name: Install Singularity
working-directory: ..
run: |
sudo apt-get update
sudo apt-get install -y build-essential libssl-dev uuid-dev libgpgme11-dev \
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/local-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ jobs:
python-version: '3.9'

- name: Install Singularity
# The way we install singularity shouldn't happen inside another git repo, otherwise
# the singularity installer will set that repo's tag/commit as the singularity version.
working-directory: ..
run: |
sudo apt-get update
sudo apt-get install -y build-essential libssl-dev uuid-dev libgpgme11-dev \
Expand Down
16 changes: 9 additions & 7 deletions cli/cli_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ medperf mlcube submit --name model2 -m $MODEL_MLCUBE -p $MODEL2_PARAMS -a $MODEL
checkFailed "Model2 submission failed"
MODEL2_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)

# MLCube with singularity section
medperf mlcube submit --name model3 -m $MODEL_WITH_SINGULARITY -p $MODEL3_PARAMS -a $MODEL_ADD -i $MODEL_SING_IMAGE
checkFailed "Model3 submission failed"
MODEL3_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
Expand Down Expand Up @@ -236,11 +237,12 @@ echo "\n"

##########################################################
echo "====================================="
echo "Running model3 association"
echo "Running model3 association (with singularity)"
echo "====================================="
# medperf --platform singularity mlcube associate -m $MODEL3_UID -b $BMK_UID -y
# TMP: revert to singularity when MLCube issue is fixed
medperf mlcube associate -m $MODEL3_UID -b $BMK_UID -y
# this will run two types of singularity mlcubes:
# 1) an already built singularity image (model 3)
# 2) a docker image to be converted (metrics)
medperf --platform singularity mlcube associate -m $MODEL3_UID -b $BMK_UID -y
checkFailed "Model3 association failed"
##########################################################

Expand Down Expand Up @@ -312,10 +314,10 @@ echo "\n"

##########################################################
echo "====================================="
echo "Running model2"
echo "Running model3 (with singularity)"
echo "====================================="
medperf run -b $BMK_UID -d $DSET_A_UID -m $MODEL2_UID -y
checkFailed "Model2 run failed"
medperf --platform=singularity run -b $BMK_UID -d $DSET_A_UID -m $MODEL3_UID -y
checkFailed "Model3 run failed"
##########################################################

echo "\n"
Expand Down
1 change: 1 addition & 0 deletions cli/medperf/commands/compatibility_test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,6 @@ def prepare_cube(cube_uid: str):
def get_cube(uid: int, name: str, local_only: bool = False) -> Cube:
config.ui.text = f"Retrieving {name} cube"
cube = Cube.get(uid, local_only=local_only)
cube.download_run_files()
config.ui.print(f"> {name} cube download complete")
return cube
1 change: 1 addition & 0 deletions cli/medperf/commands/dataset/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def get_prep_cube(self):
self.ui.print(f"Benchmark Data Preparation: {benchmark.name}")
self.ui.text = "Retrieving data preparation cube"
self.cube = Cube.get(cube_uid)
self.cube.download_run_files()
self.ui.print("> Preparation cube download complete")

def run_cube_tasks(self):
Expand Down
3 changes: 2 additions & 1 deletion cli/medperf/commands/mlcube/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def __init__(self, submit_info: dict):
config.tmp_paths.append(self.cube.path)

def download(self):
self.cube.download()
self.cube.download_config_files()
self.cube.download_run_files()

def upload(self):
updated_body = self.cube.upload()
Expand Down
1 change: 1 addition & 0 deletions cli/medperf/commands/result/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def load_cached_results(self):
def __get_cube(self, uid: int, name: str) -> Cube:
self.ui.text = f"Retrieving {name} cube"
cube = Cube.get(uid)
cube.download_run_files()
self.ui.print(f"> {name} cube download complete")
return cube

Expand Down
10 changes: 7 additions & 3 deletions cli/medperf/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,13 @@ def wrapper(
"--gpus",
help="""
What GPUs to expose to MLCube.
Accepted Values are comma separated GPU IDs (e.g "1,2"), or \"all\".
MLCubes that aren't configured to use GPUs won't be affected by this.
Defaults to all available GPUs""",
Accepted Values are:\n
- "" or 0: to expose no GPUs (e.g.: --gpus="")\n
- "all": to expose all GPUs. (e.g.: --gpus=all)\n
- an integer: to expose a certain number of GPUs. ONLY AVAILABLE FOR DOCKER
(e.g., --gpus=2 to expose 2 GPUs)\n
- Form "device=<id1>,<id2>": to expose specific GPUs.
(e.g., --gpus="device=0,2")\n""",
),
cleanup: bool = typer.Option(
config.cleanup,
Expand Down
126 changes: 86 additions & 40 deletions cli/medperf/entities/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def get(cls, cube_uid: Union[str, int], local_only: bool = False) -> "Cube":

if not cube.is_valid:
raise InvalidEntityError("The requested MLCube is marked as INVALID.")
cube.download()
cube.download_config_files()
return cube

@classmethod
Expand Down Expand Up @@ -211,45 +211,66 @@ def download_additional(self):
def download_image(self):
url = self.image_tarball_url
tarball_hash = self.image_tarball_hash
img_hash = self.image_hash

if url:
_, local_hash = resources.get_cube_image(url, self.path, tarball_hash)
self.image_tarball_hash = local_hash
else:
# Retrieve image from image registry
logging.debug(f"Retrieving {self.id} image")
cmd = f"mlcube configure --mlcube={self.cube_path}"
with pexpect.spawn(cmd, timeout=config.mlcube_configure_timeout) as proc:
proc_out = proc.read()
if proc.exitstatus != 0:
raise ExecutionError(
"There was an error while retrieving the MLCube image"
)
logging.debug(proc_out)

# Retrieve image hash from MLCube
logging.debug(f"Retrieving {self.id} image hash")
tmp_out_yaml = generate_tmp_path()
cmd = f"mlcube inspect --mlcube={self.cube_path} --format=yaml"
cmd += f" --output-file {tmp_out_yaml}"
with pexpect.spawn(cmd, timeout=config.mlcube_inspect_timeout) as proc:
proc_stdout = proc.read()
logging.debug(proc_stdout)
if proc.exitstatus != 0:
raise ExecutionError(
"There was an error while inspecting the image hash"
)
with open(tmp_out_yaml) as f:
mlcube_details = yaml.safe_load(f)
remove_path(tmp_out_yaml)
local_hash = mlcube_details["hash"]
verify_hash(local_hash, img_hash)
self.image_hash = local_hash

def download(self):
"""Downloads the required elements for an mlcube to run locally."""
if config.platform == "docker":
# For docker, image should be pulled before calculating its hash
self._get_image_from_registry()
self._set_image_hash_from_registry()
elif config.platform == "singularity":
# For singularity, we need the hash first before trying to convert
self._set_image_hash_from_registry()

image_folder = os.path.join(config.cubes_folder, config.image_path)
if os.path.exists(image_folder):
for file in os.listdir(image_folder):
if file == self._converted_singularity_image_name:
return
remove_path(os.path.join(image_folder, file))

self._get_image_from_registry()
else:
# TODO: such a check should happen on commands entrypoints, not here
raise InvalidArgumentError("Unsupported platform")

@property
def _converted_singularity_image_name(self):
return f"{self.image_hash}.sif"

def _set_image_hash_from_registry(self):
# Retrieve image hash from MLCube
logging.debug(f"Retrieving {self.id} image hash")
tmp_out_yaml = generate_tmp_path()
cmd = f"mlcube inspect --mlcube={self.cube_path} --format=yaml"
cmd += f" --platform={config.platform} --output-file {tmp_out_yaml}"
with pexpect.spawn(cmd, timeout=config.mlcube_inspect_timeout) as proc:
proc_stdout = proc.read()
logging.debug(proc_stdout)
if proc.exitstatus != 0:
raise ExecutionError("There was an error while inspecting the image hash")
with open(tmp_out_yaml) as f:
mlcube_details = yaml.safe_load(f)
remove_path(tmp_out_yaml)
local_hash = mlcube_details["hash"]
verify_hash(local_hash, self.image_hash)
self.image_hash = local_hash

def _get_image_from_registry(self):
# Retrieve image from image registry
logging.debug(f"Retrieving {self.id} image")
cmd = f"mlcube configure --mlcube={self.cube_path} --platform={config.platform}"
if config.platform == "singularity":
cmd += f" -Psingularity.image={self._converted_singularity_image_name}"
with pexpect.spawn(cmd, timeout=config.mlcube_configure_timeout) as proc:
proc_out = proc.read()
if proc.exitstatus != 0:
raise ExecutionError("There was an error while retrieving the MLCube image")
logging.debug(proc_out)

def download_config_files(self):
try:
self.download_mlcube()
except InvalidEntityError as e:
Expand All @@ -260,6 +281,7 @@ def download(self):
except InvalidEntityError as e:
raise InvalidEntityError(f"MLCube {self.name} parameters file: {e}")

def download_run_files(self):
try:
self.download_additional()
except InvalidEntityError as e:
Expand Down Expand Up @@ -300,12 +322,36 @@ def run(
cmd_arg = f'{k}="{v}"'
cmd = " ".join([cmd, cmd_arg])

cpu_args = self.get_config("docker.cpu_args") or ""
gpu_args = self.get_config("docker.gpu_args") or ""
cpu_args = " ".join([cpu_args, "-u $(id -u):$(id -g)"]).strip()
gpu_args = " ".join([gpu_args, "-u $(id -u):$(id -g)"]).strip()
cmd += f' -Pdocker.cpu_args="{cpu_args}"'
cmd += f' -Pdocker.gpu_args="{gpu_args}"'
# TODO: we should override run args instead of what we are doing below
# we shouldn't allow arbitrary run args unless our client allows it
if config.platform == "docker":
# use current user
cpu_args = self.get_config("docker.cpu_args") or ""
gpu_args = self.get_config("docker.gpu_args") or ""
cpu_args = " ".join([cpu_args, "-u $(id -u):$(id -g)"]).strip()
gpu_args = " ".join([gpu_args, "-u $(id -u):$(id -g)"]).strip()
cmd += f' -Pdocker.cpu_args="{cpu_args}"'
cmd += f' -Pdocker.gpu_args="{gpu_args}"'
elif config.platform == "singularity":
# use -e to discard host env vars, -C to isolate the container (see singularity run --help)
run_args = self.get_config("singularity.run_args") or ""
run_args = " ".join([run_args, "-eC"]).strip()
cmd += f' -Psingularity.run_args="{run_args}"'

# set image name in case of running docker image with singularity
# Assuming we only accept mlcube.yamls with either singularity or docker sections
# TODO: make checks on submitted mlcubes
singularity_config = self.get_config("singularity")
if singularity_config is None:
cmd += (
f' -Psingularity.image="{self._converted_singularity_image_name}"'
)
else:
raise InvalidArgumentError("Unsupported platform")

# set accelerator count to zero to avoid unexpected behaviours and
# force mlcube to only use --gpus to figure out GPU config
cmd += " -Pplatform.accelerator_count=0"

container_loglevel = config.container_loglevel
if container_loglevel:
Expand Down
17 changes: 9 additions & 8 deletions cli/medperf/tests/commands/dataset/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from unittest.mock import call

from medperf.tests.mocks import MockCube
from medperf.tests.mocks import TestCube
from medperf.tests.mocks.benchmark import TestBenchmark
from medperf.tests.mocks.dataset import TestDataset
from medperf.commands.dataset.create import DataPreparation
Expand Down Expand Up @@ -39,7 +39,8 @@ def preparation(mocker, comms, ui):
DESCRIPTION,
LOCATION,
)
mocker.patch(PATCH_DATAPREP.format("Cube.get"), return_value=MockCube(True))
mocker.patch(PATCH_DATAPREP.format("Cube.get"), return_value=TestCube())
mocker.patch(PATCH_DATAPREP.format("Cube.download_run_files"))
preparation.get_prep_cube()
preparation.data_path = DATA_PATH
preparation.labels_path = LABELS_PATH
Expand Down Expand Up @@ -75,16 +76,16 @@ def test_get_prep_cube_gets_prep_cube_if_provided(
self, mocker, cube_uid, comms, ui
):
# Arrange
spy = mocker.patch(
PATCH_DATAPREP.format("Cube.get"), return_value=MockCube(True)
)
spy = mocker.patch(PATCH_DATAPREP.format("Cube.get"), return_value=TestCube())
down_spy = mocker.patch(PATCH_DATAPREP.format("Cube.download_run_files"))

# Act
preparation = DataPreparation(None, cube_uid, *[""] * 5)
preparation.get_prep_cube()

# Assert
spy.assert_called_once_with(cube_uid)
down_spy.assert_called_once()

@pytest.mark.parametrize("cube_uid", [998, 68, 109])
def test_get_prep_cube_gets_benchmark_cube_if_provided(
Expand All @@ -93,16 +94,16 @@ def test_get_prep_cube_gets_benchmark_cube_if_provided(
# Arrange
benchmark = TestBenchmark(data_preparation_mlcube=cube_uid)
mocker.patch(PATCH_DATAPREP.format("Benchmark.get"), return_value=benchmark)
spy = mocker.patch(
PATCH_DATAPREP.format("Cube.get"), return_value=MockCube(True)
)
spy = mocker.patch(PATCH_DATAPREP.format("Cube.get"), return_value=TestCube())
down_spy = mocker.patch(PATCH_DATAPREP.format("Cube.download_run_files"))

# Act
preparation = DataPreparation(cube_uid, None, *[""] * 5)
preparation.get_prep_cube()

# Assert
spy.assert_called_once_with(cube_uid)
down_spy.assert_called_once()

def test_run_cube_tasks_runs_required_tasks(self, mocker, preparation):
# Arrange
Expand Down
10 changes: 6 additions & 4 deletions cli/medperf/tests/commands/mlcube/test_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

@pytest.fixture
def cube(mocker):
mocker.patch(PATCH_MLCUBE.format("Cube.download"))
mocker.patch(PATCH_MLCUBE.format("Cube.download_config_files"))
mocker.patch(PATCH_MLCUBE.format("Cube.download_run_files"))
mocker.patch(PATCH_MLCUBE.format("Cube.upload"))
mocker.patch(PATCH_MLCUBE.format("Cube.write"))
return TestCube()
Expand Down Expand Up @@ -90,10 +91,11 @@ def test_upload_uploads_using_entity(mocker, comms, ui, cube):

def test_download_executes_expected_commands(mocker, comms, ui, cube):
submission = SubmitCube(cube.todict())
down_spy = mocker.patch(PATCH_MLCUBE.format("Cube.download"))

config_down_spy = mocker.patch(PATCH_MLCUBE.format("Cube.download_config_files"))
run_down_spy = mocker.patch(PATCH_MLCUBE.format("Cube.download_run_files"))
# Act
submission.download()

# Assert
down_spy.assert_called_once_with()
config_down_spy.assert_called_once_with()
run_down_spy.assert_called_once_with()
1 change: 1 addition & 0 deletions cli/medperf/tests/commands/result/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def __get_side_effect(id):
return cube

mocker.patch(PATCH_EXECUTION.format("Cube.get"), side_effect=__get_side_effect)
mocker.patch(PATCH_EXECUTION.format("Cube.download_run_files"))


def mock_execution(mocker, state_variables):
Expand Down
Loading

0 comments on commit 29e7955

Please sign in to comment.