Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into fl-tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
hasan7n committed Apr 27, 2024
2 parents c479c3e + 48f1fb1 commit 7e45c55
Show file tree
Hide file tree
Showing 19 changed files with 324 additions and 188 deletions.
17 changes: 15 additions & 2 deletions cli/cli_chestxray_tutorial_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ checkFailed "testdata profile activation failed"
medperf run -b 1 -d $DSET_UID -m 4 -y
checkFailed "Benchmark execution step failed"

# Test offline compatibility test
wget -P $MODEL_LOCAL/workspace/additional_files "https://storage.googleapis.com/medperf-storage/chestxray_tutorial/cnn_weights.tar.gz"
tar -xzvf $MODEL_LOCAL/workspace/additional_files/cnn_weights.tar.gz -C $MODEL_LOCAL/workspace/additional_files
medperf test run --offline --no-cache \
--demo_dataset_url https://storage.googleapis.com/medperf-storage/chestxray_tutorial/demo_data.tar.gz \
--demo_dataset_hash "71faabd59139bee698010a0ae3a69e16d97bc4f2dde799d9e187b94ff9157c00" \
-p $PREP_LOCAL \
-m $MODEL_LOCAL \
-e $METRIC_LOCAL

checkFailed "offline compatibility test execution step failed"
rm $MODEL_LOCAL/workspace/additional_files/cnn_weights.tar.gz
rm $MODEL_LOCAL/workspace/additional_files/cnn_weights.pth

echo "====================================="
echo "Logout users"
echo "====================================="
Expand All @@ -113,7 +127,6 @@ checkFailed "testdata profile activation failed"
medperf auth logout
checkFailed "logout failed"


echo "====================================="
echo "Delete test profiles"
echo "====================================="
Expand All @@ -128,4 +141,4 @@ checkFailed "Profile deletion failed"

if ${CLEANUP}; then
clean
fi
fi
2 changes: 1 addition & 1 deletion cli/medperf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.0"
__version__ = "0.1.2"
16 changes: 10 additions & 6 deletions cli/medperf/commands/compatibility_test/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from medperf.commands.dataset.prepare import DataPreparation
from medperf.commands.dataset.submit import DataCreation
from medperf.utils import get_folders_hash, remove_path
from medperf.utils import get_file_hash, get_folders_hash, remove_path
from medperf.exceptions import InvalidArgumentError, InvalidEntityError

from medperf.comms.entity_resources import resources
Expand Down Expand Up @@ -48,11 +48,13 @@ def prepare_local_cube(path):
config.tmp_paths.append(dst)
cube_metadata_file = os.path.join(path, config.cube_metadata_filename)
if not os.path.exists(cube_metadata_file):
mlcube_yaml_path = os.path.join(path, config.cube_filename)
mlcube_yaml_hash = get_file_hash(mlcube_yaml_path)
temp_metadata = {
"id": None,
"name": temp_uid,
"git_mlcube_url": "mock_url",
"mlcube_hash": "",
"mlcube_hash": mlcube_yaml_hash,
"parameters_hash": "",
"image_tarball_hash": "",
"additional_files_tarball_hash": "",
Expand Down Expand Up @@ -88,9 +90,11 @@ def prepare_cube(cube_uid: str):
path = path.resolve()

if os.path.exists(path):
logging.info("local path provided. Creating symbolic link")
temp_uid = prepare_local_cube(path)
return temp_uid
mlcube_yaml_path = os.path.join(path, config.cube_filename)
if os.path.exists(mlcube_yaml_path):
logging.info("local path provided. Creating symbolic link")
temp_uid = prepare_local_cube(path)
return temp_uid

logging.error(f"mlcube {cube_uid} was not found as an existing mlcube")
raise InvalidArgumentError(
Expand Down Expand Up @@ -127,12 +131,12 @@ def create_test_dataset(
location="local",
approved=False,
submit_as_prepared=skip_data_preparation_step,
for_test=True,
)
data_creation.validate()
data_creation.create_dataset_object()
# TODO: existing dataset could make problems
# make some changes since this is a test dataset
data_creation.dataset.for_test = True
config.tmp_paths.remove(data_creation.dataset.path)
data_creation.dataset.write()
if skip_data_preparation_step:
Expand Down
5 changes: 5 additions & 0 deletions cli/medperf/commands/dataset/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def run(
location: str = None,
approved: bool = False,
submit_as_prepared: bool = False,
for_test: bool = False,
):
preparation = cls(
benchmark_uid,
Expand All @@ -40,6 +41,7 @@ def run(
location,
approved,
submit_as_prepared,
for_test,
)
preparation.validate()
preparation.validate_prep_cube()
Expand All @@ -64,6 +66,7 @@ def __init__(
location: str,
approved: bool,
submit_as_prepared: bool,
for_test: bool,
):
self.ui = config.ui
self.data_path = str(Path(data_path).resolve())
Expand All @@ -76,6 +79,7 @@ def __init__(
self.prep_cube_uid = prep_cube_uid
self.approved = approved
self.submit_as_prepared = submit_as_prepared
self.for_test = for_test

def validate(self):
if not os.path.exists(self.data_path):
Expand Down Expand Up @@ -122,6 +126,7 @@ def create_dataset_object(self):
generated_metadata={},
state="DEVELOPMENT",
submitted_as_prepared=self.submit_as_prepared,
for_test=self.for_test,
)
dataset.write()
config.tmp_paths.append(dataset.path)
Expand Down
7 changes: 4 additions & 3 deletions cli/medperf/tests/commands/dataset/test_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def creation(mocker, comms, ui):
LOCATION,
False,
IS_PREPARED,
for_test=False,
)
mocker.patch(
PATCH_DATAPREP.format("Cube.get"), return_value=TestCube(is_valid=True)
Expand Down Expand Up @@ -76,7 +77,7 @@ def test_validate_prep_cube_gets_prep_cube_if_provided(
)

# Act
creation = DataCreation(None, cube_uid, *[""] * 7, False)
creation = DataCreation(None, cube_uid, *[""] * 7, False, False)
creation.validate_prep_cube()

# Assert
Expand All @@ -94,7 +95,7 @@ def test_validate_prep_cube_gets_benchmark_cube_if_provided(
)

# Act
creation = DataCreation(cube_uid, None, *[""] * 7, False)
creation = DataCreation(cube_uid, None, *[""] * 7, False, False)
creation.validate_prep_cube()

# Assert
Expand All @@ -107,7 +108,7 @@ def test_fails_if_invalid_params(self, mocker, benchmark_uid, cube_uid, comms, u
num_arguments = int(benchmark_uid is None) + int(cube_uid is None)

# Act
creation = DataCreation(benchmark_uid, cube_uid, *[""] * 7, False)
creation = DataCreation(benchmark_uid, cube_uid, *[""] * 7, False, False)
# Assert

if num_arguments != 1:
Expand Down
8 changes: 7 additions & 1 deletion cli/tests_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ DIRECTORY="${DIRECTORY:-/tmp/medperf_test_files}"
CLEANUP="${CLEANUP:-false}"
FRESH="${FRESH:-false}"
MEDPERF_STORAGE=~/.medperf
MEDPERF_LOG_STORAGE=~/.medperf_logs
SERVER_STORAGE_ID="$(echo $SERVER_URL | cut -d '/' -f 3 | sed -e 's/[.:]/_/g')"
TIMEOUT="${TIMEOUT:-30}"
VERSION_PREFIX="/api/v0"
Expand Down Expand Up @@ -52,7 +53,7 @@ checkFailed() {
fi
echo $1
echo "medperf log:"
tail "$MEDPERF_STORAGE/logs/medperf.log"
tail "$MEDPERF_LOG_STORAGE/medperf.log"
if ${CLEANUP}; then
clean
fi
Expand Down Expand Up @@ -118,3 +119,8 @@ BENCHMARKOWNER="[email protected]"
ADMIN="[email protected]"
DATAOWNER2="[email protected]"
AGGOWNER="[email protected]"

# local MLCubes for local compatibility tests
PREP_LOCAL="$(dirname $(dirname $(realpath "$0")))/examples/chestxray_tutorial/data_preparator/mlcube"
MODEL_LOCAL="$(dirname $(dirname $(realpath "$0")))/examples/chestxray_tutorial/model_custom_cnn/mlcube"
METRIC_LOCAL="$(dirname $(dirname $(realpath "$0")))/examples/chestxray_tutorial/metrics/mlcube"
14 changes: 8 additions & 6 deletions docs/mlcubes/mlcube_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ docker_image_name [docker/image:latest]: # (6)!
2. Determines how the MLCube root folder will be named.
3. Gives a Human-readable description to the MLCube Project.
4. Documents the MLCube implementation by specifying the author.
5. Indicates how many GPUs should be visible by the MLCube.
5. Set it to 0. This is now ignored and will be removed in the next release. Please check the last section to learn how to use MLCube with GPUs.
6. MLCubes use Docker containers under the hood. Here, you can provide an image tag to the image that will be created by this MLCube. **You should use a valid name that allows you to upload it to a Docker registry.**

After filling the configuration options, the following directory structure will be generated:
Expand Down Expand Up @@ -232,9 +232,6 @@ accelerator_count [0]: 0
docker_image_name [docker/image:latest]: repository/model-tutorial:0.0.0
```

!!! note
This example is built to be used with a CPU. See the [last section](#using-the-example-with-gpus) to know how to configure this example with a GPU.

Note that `docker_image_name` is arbitrarily chosen. Use a valid docker image.

### Move your Codebase
Expand Down Expand Up @@ -355,6 +352,11 @@ The provided example codebase runs only on CPU. You can modify it to have `pytor

The general instructions for building an MLCube to work with a GPU are the same as the provided instructions, but with the following slight modifications:

- Use a number different than `0` for the `accelerator_count` that you will be prompted with when creating the MLCube template.
- Inside the `docker` section of the `mlcube.yaml`, add a key value pair: `gpu_args: --gpus=all`. These `gpu_args` will be passed to `docker run` under the hood by MLCube. You may add more than just `--gpus=all`.
- Make sure you install the required GPU dependencies in the docker image. For instance, this may be done by simply modifying the `pip` dependencies in the `requirements.txt` file to download `pytorch` with cuda, or by changing the base image of the dockerfile.

For testing your MLCube with GPUs using the MLCube tool as in the previous section, make sure you run the `mlcube run` command with a `--gpus` argument. Example: `mlcube run --gpus=all ...`

For testing your MLCube with GPUs using MedPerf, make sure you pass as well the `--gpus` argument to the MedPerf command. Example: `medperf --gpus=all <subcommand> ...`.

!!!tip
Run `medperf --help` to see the possible options you can use for the `--gpus` argument.
50 changes: 50 additions & 0 deletions scripts/get_dataset_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import hashlib
import os
import yaml

from medperf import config
from medperf.init import initialize
from typer import Option


def sha256sum(filename):
h = hashlib.sha256()
b = bytearray(128 * 1024)
mv = memoryview(b)
with open(filename, "rb", buffering=0) as f:
while n := f.readinto(mv):
h.update(mv[:n])
return h.hexdigest()


def generate_hash_dict(path):
hash_dict = {}
contents = os.listdir(path)

for item in contents:
item_path = os.path.join(path, item)
if os.path.isdir(item_path):
hash_dict[item] = generate_hash_dict(item_path)
else:
hash_dict[item] = sha256sum(item_path)

return hash_dict


def main(
dataset_uid: str = Option(None, "-d", "--dataset"),
output_file: str = Option("dataset_hashes.yaml", "-f", "--file"),
):
initialize()
dset_path = os.path.join(config.datasets_folder, dataset_uid)

# Get hashes of tree
hash_dict = generate_hash_dict(dset_path)

# Write results to a file
with open(output_file, "w") as f:
yaml.dump(hash_dict, f)


if __name__ == "__main__":
run(main)
56 changes: 56 additions & 0 deletions scripts/get_reviewed_cases_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import tarfile
import hashlib
import shutil
import os
import yaml


def sha256sum(filename):
h = hashlib.sha256()
b = bytearray(128 * 1024)
mv = memoryview(b)
with open(filename, "rb", buffering=0) as f:
while n := f.readinto(mv):
h.update(mv[:n])
return h.hexdigest()


def generate_hash_dict(path):
hash_dict = {}
contents = os.listdir(path)

for item in contents:
item_path = os.path.join(path, item)
if os.path.isdir(item_path):
hash_dict[item] = generate_hash_dict(item_path)
else:
hash_dict[item] = sha256sum(item_path)

return hash_dict


def main():
dst = ".reviewed_cases_contents"
hashes_file = "reviewed_cases_hashes.yaml"

# Create destination folder
shutil.rmtree(dst, ignore_errors=True)
os.makedirs(dst, exist_ok=True)

# Extract contents
with tarfile.open("reviewed_cases.tar.gz") as file:
file.extractall(dst)

# Get hashes of tree
hash_dict = generate_hash_dict(dst)

# Write results to a file
with open(hashes_file, "w") as f:
yaml.dump(hash_dict, f)

# Delete generated files and folders
shutil.rmtree(dst, ignore_errors=True)


if __name__ == "__main__":
main()
15 changes: 3 additions & 12 deletions scripts/monitor/rano_monitor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
DEFAULT_STAGES_PATH,
STAGES_HELP,
DSET_LOC_HELP,
OUT_HELP
OUT_HELP,
)
from rano_monitor.dataset_browser import DatasetBrowser
from rano_monitor.handlers import InvalidHandler
from rano_monitor.handlers import PromptHandler
from rano_monitor.handlers import ReportHandler, ReportState
from rano_monitor.handlers import ReviewedHandler
from rano_monitor.handlers import TarballReviewedHandler
from rano_monitor.tarball_browser import TarballBrowser
from typer import Option
Expand All @@ -40,13 +39,11 @@ def run_dset_app(dset_path, stages_path, output_path):
report_state = ReportState(report_path, t_app)
report_watchdog = ReportHandler(report_state)
prompt_watchdog = PromptHandler(dset_data_path, t_app)
reviewed_watchdog = ReviewedHandler(dset_data_path, t_app)
invalid_watchdog = InvalidHandler(invalid_path, t_app)

t_app.set_vars(
dset_data_path,
stages_path,
reviewed_watchdog,
output_path,
invalid_path,
invalid_watchdog,
Expand All @@ -56,7 +53,6 @@ def run_dset_app(dset_path, stages_path, output_path):
observer = Observer()
observer.schedule(report_watchdog, dset_path)
observer.schedule(prompt_watchdog, os.path.join(dset_path, "data"))
observer.schedule(reviewed_watchdog, ".")
observer.schedule(invalid_watchdog, os.path.dirname(invalid_path))
observer.start()
t_app.run()
Expand Down Expand Up @@ -89,13 +85,8 @@ def run_tarball_app(tarball_path):

@app.command()
def main(
dataset_uid: str = Option(None, "-d", "--dataset", help=DSET_HELP),
stages_path: str = Option(
DEFAULT_STAGES_PATH,
"-s",
"--stages",
help=STAGES_HELP
),
dataset_uid: str = Option(..., "-d", "--dataset", help=DSET_HELP),
stages_path: str = Option(DEFAULT_STAGES_PATH, "-s", "--stages", help=STAGES_HELP),
dset_path: str = Option(
None,
"-p",
Expand Down
Loading

0 comments on commit 7e45c55

Please sign in to comment.