Skip to content

Commit

Permalink
Debug logging (#527)
Browse files Browse the repository at this point in the history
* Added container-loglevel param that defines LOGLEVEL env for running mlcubes

* Reset default medperf loglevel to debug

* Added testing on debug-logging env for docker containers

* Bugfix in tests

* Test bugfix

* Test bugfix

* Tests bugfix

* Tests fix

* Tests fix

* Test fix

* Tests fix

* Tests fix

* Tests debug

* Tests debug

* Tests debug

* Tests debug

* Tests fix

* Tests debug

* Tests fix

* Tests fix

* Tests fix

* Removed test debug prints

* Moved singularity image for cli tests from `mockcube` to google storage

* Moved to cleaned mockcube repo

* Pushed mock cubes to hasan7n's repo

* Added filtering on MLCube framework logs

- MLCube Framework debug+info logs are printed to debug file only
- MLCube Framework warning+error logs are printed to stdout + debug file
- Container logs are printed to stdout + debug file

* Save everything printed to UI to debug file also

* Removed unsused code

* Fixed tests

* Fixed tests

* Made container loglevel env uppercase

* Tests fix

* update mlcube version

* Log filter refactoring

* Display mlcube info logs to user

* Dummy commit to retrigger CI

* Removed logging because of possible sensitive data

* Fixed linter warning

---------

Co-authored-by: hasan7n <[email protected]>
Co-authored-by: hasan7n <[email protected]>
  • Loading branch information
3 people authored Feb 18, 2024
1 parent d1581dc commit b6b9672
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 40 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,4 @@ If you wish to contribute to our documentation, here are the steps for successfu
mkdocs serve
```
- **Access local documentation:** Once mkdocs is done setting up the server, you should be able to access your local documentation website by heading to `http:/localhost:8000` on your browser.

91 changes: 82 additions & 9 deletions cli/cli_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,35 +90,52 @@ checkFailed "auth status command failed"

echo "\n"

##########################################################
echo "====================================="
echo "Existing cubes":
echo "====================================="
medperf mlcube ls
##########################################################

echo "\n"

##########################################################
echo "====================================="
echo "Submit cubes"
echo "====================================="

medperf mlcube submit --name prep -m $PREP_MLCUBE -p $PREP_PARAMS
medperf mlcube submit --name mock-prep -m $PREP_MLCUBE -p $PREP_PARAMS
checkFailed "Prep submission failed"
PREP_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
PREP_UID=$(medperf mlcube ls | grep mock-prep | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name model1 -m $MODEL_MLCUBE -p $MODEL1_PARAMS -a $MODEL_ADD
checkFailed "Model1 submission failed"
MODEL1_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
MODEL1_UID=$(medperf mlcube ls | grep model1 | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name model2 -m $MODEL_MLCUBE -p $MODEL2_PARAMS -a $MODEL_ADD
checkFailed "Model2 submission failed"
MODEL2_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
MODEL2_UID=$(medperf mlcube ls | grep model2 | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

# MLCube with singularity section
medperf mlcube submit --name model3 -m $MODEL_WITH_SINGULARITY -p $MODEL3_PARAMS -a $MODEL_ADD -i $MODEL_SING_IMAGE
checkFailed "Model3 submission failed"
MODEL3_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
MODEL3_UID=$(medperf mlcube ls | grep model3 | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name model-fail -m $FAILING_MODEL_MLCUBE -p $MODEL4_PARAMS -a $MODEL_ADD
checkFailed "failing model submission failed"
FAILING_MODEL_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
FAILING_MODEL_UID=$(medperf mlcube ls | grep model-fail | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name model-log-none -m $MODEL_LOG_MLCUBE -p $MODEL_LOG_NONE_PARAMS
checkFailed "Model with logging None submission failed"
MODEL_LOG_NONE_UID=$(medperf mlcube ls | grep model-log-none | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name model-log-debug -m $MODEL_LOG_MLCUBE -p $MODEL_LOG_DEBUG_PARAMS
checkFailed "Model with logging debug submission failed"
MODEL_LOG_DEBUG_UID=$(medperf mlcube ls | grep model-log-debug | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)

medperf mlcube submit --name metrics -m $METRIC_MLCUBE -p $METRIC_PARAMS
medperf mlcube submit --name mock-metrics -m $METRIC_MLCUBE -p $METRIC_PARAMS
checkFailed "Metrics submission failed"
METRICS_UID=$(medperf mlcube ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
METRICS_UID=$(medperf mlcube ls | grep mock-metrics | head -n 1 | tr -s ' ' | cut -d ' ' -f 2)
##########################################################

echo "\n"
Expand All @@ -139,7 +156,7 @@ echo "Submit benchmark"
echo "====================================="
medperf benchmark submit --name bmk --description bmk --demo-url $DEMO_URL --data-preparation-mlcube $PREP_UID --reference-model-mlcube $MODEL1_UID --evaluator-mlcube $METRICS_UID
checkFailed "Benchmark submission failed"
BMK_UID=$(medperf benchmark ls | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)
BMK_UID=$(medperf benchmark ls | grep bmk | tail -n 1 | tr -s ' ' | cut -d ' ' -f 2)

# Approve benchmark
ADMIN_TOKEN=$(jq -r --arg ADMIN $ADMIN '.[$ADMIN]' $MOCK_TOKENS_FILE)
Expand Down Expand Up @@ -258,6 +275,36 @@ checkFailed "Failing model association failed"

echo "\n"

##########################################################
echo "======================================================"
echo "Running logging-model-without-env association"
echo "======================================================"
medperf mlcube associate -m $MODEL_LOG_NONE_UID -b $BMK_UID -y
checkFailed "Logging-model-without-env association association failed"
##########################################################

echo "\n"

##########################################################
echo "======================================================"
echo "Running logging-model-with-debug association"
echo "======================================================"
medperf --container-loglevel debug mlcube associate -m $MODEL_LOG_DEBUG_UID -b $BMK_UID -y
checkFailed "Logging-model-with-debug association failed"
##########################################################

echo "\n"

##########################################################
echo "======================================================"
echo "Submitted associations:"
echo "======================================================"
medperf association ls
checkFailed "Listing associations failed"
##########################################################

echo "\n"

##########################################################
echo "====================================="
echo "Activate modelowner profile"
Expand All @@ -278,6 +325,10 @@ medperf association approve -b $BMK_UID -m $MODEL3_UID
checkFailed "Model3 association approval failed"
medperf association approve -b $BMK_UID -m $FAILING_MODEL_UID
checkFailed "failing model association approval failed"
medperf association approve -b $BMK_UID -m $MODEL_LOG_NONE_UID
checkFailed "Logging-model-without-env association approval failed"
medperf association approve -b $BMK_UID -m $MODEL_LOG_DEBUG_UID
checkFailed "Logging-model-with-debug association approval failed"
##########################################################

echo "\n"
Expand Down Expand Up @@ -356,6 +407,28 @@ checkFailed "Failing mlcube run with ignore errors failed"

echo "\n"

##########################################################
echo "====================================="
echo "Running logging model without logging env"
echo "====================================="
rm -rf $MEDPERF_STORAGE/predictions/$SERVER_STORAGE_ID/model-log-none/$DSET_A_GENUID
medperf run -b $BMK_UID -d $DSET_A_UID -m $MODEL_LOG_NONE_UID -y
checkFailed "run logging model without logging env failed"
##########################################################

echo "\n"

##########################################################
echo "====================================="
echo "Running logging model with debug logging env"
echo "====================================="
rm -rf $MEDPERF_STORAGE/predictions/$SERVER_STORAGE_ID/model-log-debug/$DSET_A_GENUID
medperf --container-loglevel debug run -b $BMK_UID -d $DSET_A_UID -m $MODEL_LOG_DEBUG_UID -y
checkFailed "run logging model with debug logging env failed"
##########################################################

echo "\n"

##########################################################
echo "====================================="
echo "Logout users"
Expand Down
6 changes: 4 additions & 2 deletions cli/medperf/commands/compatibility_test/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from typing import List

from medperf.commands.execution import Execution
from medperf.entities.dataset import Dataset
Expand All @@ -26,7 +25,7 @@ def run(
data_uid: str = None,
no_cache: bool = False,
offline: bool = False,
) -> List:
) -> (str, dict):
"""Execute a test workflow. Components of a complete workflow should be passed.
When only the benchmark is provided, it implies the following workflow will be used:
- the benchmark's demo dataset is used as the raw data
Expand Down Expand Up @@ -92,6 +91,9 @@ def run(
if results is None:
results = test_exec.execute()
test_exec.write(results)
else:
logging.info('Existing results are found. Test would not be re-executed.')
logging.debug(f'Existing results: {results}')
return test_exec.data_uid, results

def __init__(
Expand Down
4 changes: 3 additions & 1 deletion cli/medperf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,12 @@
statistics_timeout = None
infer_timeout = None
evaluate_timeout = None
container_loglevel = None
mlcube_configure_timeout = None
mlcube_inspect_timeout = None

# Other
loglevel = "info"
loglevel = "debug"
cleanup = True
ui = "CLI"

Expand All @@ -184,6 +185,7 @@
"platform",
"gpus",
"cleanup",
"container_loglevel"
]
configurable_parameters = inline_parameters + [
"server",
Expand Down
12 changes: 11 additions & 1 deletion cli/medperf/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ def wrapper(
"--evaluate_timeout",
help="Maximum time in seconds before interrupting evaluate task",
),
container_loglevel: str = typer.Option(
config.container_loglevel,
"--container-loglevel",
help="Logging level for containers to be run [debug | info | warning | error]",
),
platform: str = typer.Option(
config.platform,
"--platform",
Expand Down Expand Up @@ -188,6 +193,11 @@ def wrapper(
"--evaluate_timeout",
help="Maximum time in seconds before interrupting evaluate task",
),
container_loglevel: str = typer.Option(
config.container_loglevel,
"--container-loglevel",
help="Logging level for containers to be run [debug | info | warning | error]",
),
platform: str = typer.Option(
config.platform,
"--platform",
Expand All @@ -209,7 +219,7 @@ def wrapper(
cleanup: bool = typer.Option(
config.cleanup,
"--cleanup/--no-cleanup",
help="Wether to clean up temporary medperf storage after execution",
help="Whether to clean up temporary medperf storage after execution",
),
**kwargs,
):
Expand Down
22 changes: 15 additions & 7 deletions cli/medperf/entities/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def __local_all(cls) -> List["Cube"]:
cubes_folder = config.cubes_folder
try:
uids = next(os.walk(cubes_folder))[1]
logging.debug(f'Local cubes found: {uids}')
except StopIteration:
msg = "Couldn't iterate over cubes directory"
logging.warning(msg)
Expand Down Expand Up @@ -244,8 +245,9 @@ def _set_image_hash_from_registry(self):
# Retrieve image hash from MLCube
logging.debug(f"Retrieving {self.id} image hash")
tmp_out_yaml = generate_tmp_path()
cmd = f"mlcube inspect --mlcube={self.cube_path} --format=yaml"
cmd = f"mlcube --log-level {config.loglevel} inspect --mlcube={self.cube_path} --format=yaml"
cmd += f" --platform={config.platform} --output-file {tmp_out_yaml}"
logging.info(f"Running MLCube command: {cmd}")
with pexpect.spawn(cmd, timeout=config.mlcube_inspect_timeout) as proc:
proc_stdout = proc.read()
logging.debug(proc_stdout)
Expand All @@ -261,9 +263,10 @@ def _set_image_hash_from_registry(self):
def _get_image_from_registry(self):
# Retrieve image from image registry
logging.debug(f"Retrieving {self.id} image")
cmd = f"mlcube configure --mlcube={self.cube_path} --platform={config.platform}"
cmd = f"mlcube --log-level {config.loglevel} configure --mlcube={self.cube_path} --platform={config.platform}"
if config.platform == "singularity":
cmd += f" -Psingularity.image={self._converted_singularity_image_name}"
logging.info(f"Running MLCube command: {cmd}")
with pexpect.spawn(cmd, timeout=config.mlcube_configure_timeout) as proc:
proc_out = proc.read()
if proc.exitstatus != 0:
Expand Down Expand Up @@ -312,8 +315,7 @@ def run(
kwargs (dict): additional arguments that are passed directly to the mlcube command
"""
kwargs.update(string_params)
# TODO: re-use `loglevel=critical` or figure out a clean MLCube logging
cmd = "mlcube run"
cmd = f"mlcube --log-level {config.loglevel} run"
cmd += f" --mlcube={self.cube_path} --task={task} --platform={config.platform} --network=none"
if config.gpus is not None:
cmd += f" --gpus={config.gpus}"
Expand All @@ -323,6 +325,8 @@ def run(
cmd_arg = f'{k}="{v}"'
cmd = " ".join([cmd, cmd_arg])

container_loglevel = config.container_loglevel

# TODO: we should override run args instead of what we are doing below
# we shouldn't allow arbitrary run args unless our client allows it
if config.platform == "docker":
Expand All @@ -333,6 +337,9 @@ def run(
gpu_args = " ".join([gpu_args, "-u $(id -u):$(id -g)"]).strip()
cmd += f' -Pdocker.cpu_args="{cpu_args}"'
cmd += f' -Pdocker.gpu_args="{gpu_args}"'

if container_loglevel:
cmd += f' -Pdocker.env_args="-e MEDPERF_LOGLEVEL={container_loglevel.upper()}"'
elif config.platform == "singularity":
# use -e to discard host env vars, -C to isolate the container (see singularity run --help)
run_args = self.get_config("singularity.run_args") or ""
Expand All @@ -347,6 +354,7 @@ def run(
cmd += (
f' -Psingularity.image="{self._converted_singularity_image_name}"'
)
# TODO: pass logging env for singularity also there
else:
raise InvalidArgumentError("Unsupported platform")

Expand All @@ -355,9 +363,9 @@ def run(
cmd += " -Pplatform.accelerator_count=0"

logging.info(f"Running MLCube command: {cmd}")
proc = pexpect.spawn(cmd, timeout=timeout)
proc_out = combine_proc_sp_text(proc)
proc.close()
with pexpect.spawn(cmd, timeout=timeout) as proc:
proc_out = combine_proc_sp_text(proc)

if output_logs is None:
logging.debug(proc_out)
else:
Expand Down
2 changes: 1 addition & 1 deletion cli/medperf/entities/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class TestReport(Entity, MedperfBaseSchema):
"""
Class representing a compatibility test report entry
A test report is comprised of the components of a test execution:
A test report consists of the components of a test execution:
- data used, which can be:
- a demo dataset url and its hash, or
- a raw data path and its labels path, or
Expand Down
2 changes: 1 addition & 1 deletion cli/medperf/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def setup_logging(log_file: str, loglevel: str):
log_fmt = "%(asctime)s | %(levelname)s: %(message)s"
log_fmt = "%(asctime)s | %(module)s.%(funcName)s | %(levelname)s: %(message)s"
handler = handlers.RotatingFileHandler(log_file, backupCount=20)
handler.setFormatter(logging.Formatter(log_fmt))
logging.basicConfig(
Expand Down
12 changes: 6 additions & 6 deletions cli/medperf/tests/entities/test_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def test_download_run_files_without_image_configures_mlcube(
)
spy = mocker.spy(medperf.entities.cube.pexpect, "spawn")
expected_cmds = [
f"mlcube configure --mlcube={self.manifest_path} --platform={config.platform}",
f"mlcube inspect --mlcube={self.manifest_path}"
f"mlcube --log-level debug configure --mlcube={self.manifest_path} --platform={config.platform}",
f"mlcube --log-level debug inspect --mlcube={self.manifest_path}"
f" --format=yaml --platform={config.platform} --output-file {tmp_path}",
]
expected_cmds = [call(cmd, timeout=None) for cmd in expected_cmds]
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_cube_runs_command(self, mocker, timeout, setup, task):
)
mocker.patch(PATCH_CUBE.format("Cube.get_config"), side_effect=["", ""])
expected_cmd = (
f"mlcube run --mlcube={self.manifest_path} --task={task} "
f"mlcube --log-level debug run --mlcube={self.manifest_path} --task={task} "
+ f"--platform={self.platform} --network=none --mount=ro"
+ ' -Pdocker.cpu_args="-u $(id -u):$(id -g)"'
+ ' -Pdocker.gpu_args="-u $(id -u):$(id -g)"'
Expand All @@ -202,7 +202,7 @@ def test_cube_runs_command_with_rw_access(self, mocker, setup, task):
side_effect=["", ""],
)
expected_cmd = (
f"mlcube run --mlcube={self.manifest_path} --task={task} "
f"mlcube --log-level debug run --mlcube={self.manifest_path} --task={task} "
+ f"--platform={self.platform} --network=none"
+ ' -Pdocker.cpu_args="-u $(id -u):$(id -g)"'
+ ' -Pdocker.gpu_args="-u $(id -u):$(id -g)"'
Expand All @@ -222,7 +222,7 @@ def test_cube_runs_command_with_extra_args(self, mocker, setup, task):
spy = mocker.patch("pexpect.spawn", side_effect=mpexpect.spawn)
mocker.patch(PATCH_CUBE.format("Cube.get_config"), side_effect=["", ""])
expected_cmd = (
f"mlcube run --mlcube={self.manifest_path} --task={task} "
f"mlcube --log-level debug run --mlcube={self.manifest_path} --task={task} "
+ f'--platform={self.platform} --network=none --mount=ro test="test"'
+ ' -Pdocker.cpu_args="-u $(id -u):$(id -g)"'
+ ' -Pdocker.gpu_args="-u $(id -u):$(id -g)"'
Expand All @@ -245,7 +245,7 @@ def test_cube_runs_command_and_preserves_runtime_args(self, mocker, setup, task)
side_effect=["cpuarg cpuval", "gpuarg gpuval"],
)
expected_cmd = (
f"mlcube run --mlcube={self.manifest_path} --task={task} "
f"mlcube --log-level debug run --mlcube={self.manifest_path} --task={task} "
+ f"--platform={self.platform} --network=none --mount=ro"
+ ' -Pdocker.cpu_args="cpuarg cpuval -u $(id -u):$(id -g)"'
+ ' -Pdocker.gpu_args="gpuarg gpuval -u $(id -u):$(id -g)"'
Expand Down
Loading

0 comments on commit b6b9672

Please sign in to comment.