Server tests (#502)

* write all tests * change: remove unused endpoints These endpoints are not used, and don't seem to help in our workflow. No need to keep maintaining them * change: fix PUT user permission PUT now is only permitted to the admin users We need to define what can be edited and how to sync with Auth0 before exposing this to the end users * change: fix PUT result behaviour Users were able to edit results dict and associated entities * change: remove URLs from cube uniqueness criteria * change: apply mlcube rules on both PUT and POST users have to include hashes for URLs on mlcube creation but they were able to set it blank later * improve readability * change: modify how associations list works make dataset association private, return necessary fields to help the client know the latest approval status * change: fix default approval status * change: fix approved_at update this field was updated for any request containing approval_status * add notes and TODOs * update workflow file * modify client behaviour for model association list * add some documentation for writing tests * update workflow file * update server readme * empty * change: fix mlcube associations list permissions * change: modify result PUT and DELETE - added is_valid and user_metadata fields - PUT and DELETE now are only for admin * change: fix permissions of approving the benchmark * change: prevent updating a benchmark to PENDING * Revert "change: remove unused endpoints" This reverts commit 152862a. and deactivates the endpoints * change: restrict marking benchmark as operation doing this requires its three mlcubes to be operation * change: demo dataset url is required * change: fix delete permissions for associations * change: server check for dataset assoc prep mlcube * change: client side new association logic * change: make client side demo url required * cleanup TODOs and related minor changes * fix linter issues * use a better datetime parser * change integration tests order of model priority * stop using benchmark.models for permission reasons * add Result's new fields to the client * refactor to fix linter issue
mlcommons · Dec 1, 2023 · 8c26674 · 8c26674
1 parent f69feeb
commit 8c26674
Show file tree

Hide file tree

Showing 67 changed files with 6,779 additions and 523 deletions.
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -22,7 +22,8 @@ jobs:
         pip install flake8 pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f cli/requirements.txt ]; then pip install -e cli; fi
-        if [ -f server/requirements.txt ]; then pip install -r server/requirements.txt; fi
+        pip install -r server/requirements.txt
+        pip install -r server/test-requirements.txt
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -35,6 +36,7 @@ jobs:
         # Ignore E231, as it is raising warnings with auto-generated code.
         flake8 . --count --max-complexity=10 --max-line-length=127 --ignore F821,W503,E231 --statistics --exclude=examples/,"*/migrations/*",cli/medperf/templates/
     - name: Test with pytest
+      working-directory: ./cli
       run: |
         pytest
     - name: Set server environment vars
@@ -45,4 +47,4 @@ jobs:
       run: python manage.py migrate
     - name: Run server unit tests
       working-directory: ./server
-      run: python manage.py test
+      run: python manage.py test --parallel
diff --git a/cli/cli_tests.sh b/cli/cli_tests.sh
@@ -244,16 +244,6 @@ checkFailed "Failing model association failed"
 
 echo "\n"
 
-##########################################################
-echo "====================================="
-echo "Changing priority of model2"
-echo "====================================="
-medperf association set_priority -b $BMK_UID -m $MODEL2_UID -p 77
-checkFailed "Priority set of model2 failed"
-##########################################################
-
-echo "\n"
-
 ##########################################################
 echo "====================================="
 echo "Activate modelowner profile"
@@ -278,6 +268,26 @@ checkFailed "failing model association approval failed"
 
 echo "\n"
 
+##########################################################
+echo "====================================="
+echo "Activate benchmarkowner profile"
+echo "====================================="
+medperf profile activate testbenchmark
+checkFailed "testbenchmark profile activation failed"
+##########################################################
+
+echo "\n"
+
+##########################################################
+echo "====================================="
+echo "Changing priority of model2"
+echo "====================================="
+medperf association set_priority -b $BMK_UID -m $MODEL2_UID -p 77
+checkFailed "Priority set of model2 failed"
+##########################################################
+
+echo "\n"
+
 ##########################################################
 echo "====================================="
 echo "Activate dataowner profile"

diff --git a/cli/medperf/commands/association/priority.py b/cli/medperf/commands/association/priority.py
@@ -1,12 +1,11 @@
 from medperf import config
 from medperf.exceptions import InvalidArgumentError
+from medperf.entities.benchmark import Benchmark
 
 
 class AssociationPriority:
     @staticmethod
-    def run(
-        benchmark_uid: int, mlcube_uid: int, priority: int,
-    ):
+    def run(benchmark_uid: int, mlcube_uid: int, priority: int):
         """Sets priority for an association between a benchmark and an mlcube
 
         Args:
@@ -15,7 +14,7 @@ def run(
             priority (int): priority value
 
         """
-        associated_cubes = config.comms.get_benchmark_models(benchmark_uid)
+        associated_cubes = Benchmark.get_models_uids(benchmark_uid)
         if mlcube_uid not in associated_cubes:
             raise InvalidArgumentError(
                 "The given mlcube doesn't exist or is not associated with the benchmark"

diff --git a/cli/medperf/commands/benchmark/benchmark.py b/cli/medperf/commands/benchmark/benchmark.py
@@ -37,7 +37,7 @@ def submit(
     ),
     docs_url: str = typer.Option("", "--docs-url", "-u", help="URL to documentation"),
     demo_url: str = typer.Option(
-        "",
+        ...,
         "--demo-url",
         help="""Identifier to download the demonstration dataset tarball file.\n
         See `medperf mlcube submit --help` for more information""",

diff --git a/cli/medperf/commands/result/create.py b/cli/medperf/commands/result/create.py
@@ -50,7 +50,6 @@ def run(
         execution.prepare()
         execution.validate()
         execution.prepare_models()
-        execution.validate_models()
         if not no_cache:
             execution.load_cached_results()
         with execution.ui.interactive():
@@ -101,8 +100,19 @@ def validate(self):
     def prepare_models(self):
         if self.models_input_file:
             self.models_uids = self.__get_models_from_file()
-        elif self.models_uids is None:
-            self.models_uids = self.benchmark.models
+
+        if self.models_uids == [self.benchmark.reference_model_mlcube]:
+            # avoid the need of sending a request to the server for
+            # finding the benchmark's associated models
+            return
+
+        benchmark_models = Benchmark.get_models_uids(self.benchmark_uid)
+        benchmark_models.append(self.benchmark.reference_model_mlcube)
+
+        if self.models_uids is None:
+            self.models_uids = benchmark_models
+        else:
+            self.__validate_models(benchmark_models)
 
     def __get_models_from_file(self):
         if not os.path.exists(self.models_input_file):
@@ -117,9 +127,9 @@ def __get_models_from_file(self):
             msg += "The file should contain a list of comma-separated integers"
             raise InvalidArgumentError(msg)
 
-    def validate_models(self):
+    def __validate_models(self, benchmark_models):
         models_set = set(self.models_uids)
-        benchmark_models_set = set(self.benchmark.models)
+        benchmark_models_set = set(benchmark_models)
         non_assoc_cubes = models_set.difference(benchmark_models_set)
         if non_assoc_cubes:
             if len(non_assoc_cubes) > 1:

diff --git a/cli/medperf/comms/interface.py b/cli/medperf/comms/interface.py
@@ -50,14 +50,14 @@ def get_benchmark(self, benchmark_uid: int) -> dict:
         """
 
     @abstractmethod
-    def get_benchmark_models(self, benchmark_uid: int) -> List[int]:
-        """Retrieves all the models associated with a benchmark. reference model not included
+    def get_benchmark_model_associations(self, benchmark_uid: int) -> List[int]:
+        """Retrieves all the model associations of a benchmark.
 
         Args:
             benchmark_uid (int): UID of the desired benchmark
 
         Returns:
-            list[int]: List of model UIDS
+            list[int]: List of benchmark model associations
         """
 
     @abstractmethod

diff --git a/cli/medperf/comms/rest.py b/cli/medperf/comms/rest.py
@@ -5,7 +5,12 @@
 from medperf.enums import Status
 import medperf.config as config
 from medperf.comms.interface import Comms
-from medperf.utils import sanitize_json, log_response_error, format_errors_dict
+from medperf.utils import (
+    sanitize_json,
+    log_response_error,
+    format_errors_dict,
+    filter_latest_associations,
+)
 from medperf.exceptions import (
     CommunicationError,
     CommunicationRetrievalError,
@@ -174,18 +179,17 @@ def get_benchmark(self, benchmark_uid: int) -> dict:
             )
         return res.json()
 
-    def get_benchmark_models(self, benchmark_uid: int) -> List[int]:
-        """Retrieves all the models associated with a benchmark. reference model not included
+    def get_benchmark_model_associations(self, benchmark_uid: int) -> List[int]:
+        """Retrieves all the model associations of a benchmark.
 
         Args:
             benchmark_uid (int): UID of the desired benchmark
 
         Returns:
-            list[int]: List of model UIDS
+            list[int]: List of benchmark model associations
         """
-        models = self.__get_list(f"{self.server_url}/benchmarks/{benchmark_uid}/models")
-        model_uids = [model["id"] for model in models]
-        return model_uids
+        assocs = self.__get_list(f"{self.server_url}/benchmarks/{benchmark_uid}/models")
+        return filter_latest_associations(assocs, "model_mlcube")
 
     def get_user_benchmarks(self) -> List[dict]:
         """Retrieves all benchmarks created by the user
@@ -475,7 +479,7 @@ def get_datasets_associations(self) -> List[dict]:
             List[dict]: List containing all associations information
         """
         assocs = self.__get_list(f"{self.server_url}/me/datasets/associations/")
-        return assocs
+        return filter_latest_associations(assocs, "dataset")
 
     def get_cubes_associations(self) -> List[dict]:
         """Get all cube associations related to the current user
@@ -484,7 +488,7 @@ def get_cubes_associations(self) -> List[dict]:
             List[dict]: List containing all associations information
         """
         assocs = self.__get_list(f"{self.server_url}/me/mlcubes/associations/")
-        return assocs
+        return filter_latest_associations(assocs, "model_mlcube")
 
     def set_mlcube_association_priority(
         self, benchmark_uid: int, mlcube_uid: int, priority: int

diff --git a/cli/medperf/entities/benchmark.py b/cli/medperf/entities/benchmark.py
@@ -3,7 +3,7 @@
 import yaml
 import logging
 from typing import List, Optional, Union
-from pydantic import HttpUrl, Field, validator
+from pydantic import HttpUrl, Field
 
 import medperf.config as config
 from medperf.entities.interface import Entity, Uploadable
@@ -26,24 +26,16 @@ class Benchmark(Entity, Uploadable, MedperfSchema, ApprovableSchema, DeployableS
 
     description: Optional[str] = Field(None, max_length=20)
     docs_url: Optional[HttpUrl]
-    demo_dataset_tarball_url: Optional[str]
+    demo_dataset_tarball_url: str
     demo_dataset_tarball_hash: Optional[str]
     demo_dataset_generated_uid: Optional[str]
     data_preparation_mlcube: int
     reference_model_mlcube: int
     data_evaluator_mlcube: int
-    models: List[int] = None
     metadata: dict = {}
     user_metadata: dict = {}
     is_active: bool = True
 
-    @validator("models", pre=True, always=True)
-    def set_default_models_value(cls, value, values, **kwargs):
-        if not value:
-            # Empty or None value assigned
-            return [values["reference_model_mlcube"]]
-        return value
-
     def __init__(self, *args, **kwargs):
         """Creates a new benchmark instance
 
@@ -91,11 +83,6 @@ def __remote_all(cls, filters: dict) -> List["Benchmark"]:
         try:
             comms_fn = cls.__remote_prefilter(filters)
             bmks_meta = comms_fn()
-            for bmk_meta in bmks_meta:
-                # Loading all related models for all benchmarks could be expensive.
-                # Most probably not necessary when getting all benchmarks.
-                # If associated models for a benchmark are needed then use Benchmark.get()
-                bmk_meta["models"] = [bmk_meta["reference_model_mlcube"]]
             benchmarks = [cls(**meta) for meta in bmks_meta]
         except CommunicationRetrievalError:
             msg = "Couldn't retrieve all benchmarks from the server"
@@ -175,9 +162,6 @@ def __remote_get(cls, benchmark_uid: int) -> "Benchmark":
         """
         logging.debug(f"Retrieving benchmark {benchmark_uid} remotely")
         benchmark_dict = config.comms.get_benchmark(benchmark_uid)
-        ref_model = benchmark_dict["reference_model_mlcube"]
-        add_models = cls.get_models_uids(benchmark_uid)
-        benchmark_dict["models"] = [ref_model] + add_models
         benchmark = cls(**benchmark_dict)
         benchmark.write()
         return benchmark
@@ -230,7 +214,13 @@ def get_models_uids(cls, benchmark_uid: int) -> List[int]:
         Returns:
             List[int]: List of mlcube uids
         """
-        return config.comms.get_benchmark_models(benchmark_uid)
+        associations = config.comms.get_benchmark_model_associations(benchmark_uid)
+        models_uids = [
+            assoc["model_mlcube"]
+            for assoc in associations
+            if assoc["approval_status"] == "APPROVED"
+        ]
+        return models_uids
 
     def todict(self) -> dict:
         """Dictionary representation of the benchmark instance
@@ -267,7 +257,6 @@ def upload(self):
             raise InvalidArgumentError("Cannot upload test benchmarks.")
         body = self.todict()
         updated_body = config.comms.upload_benchmark(body)
-        updated_body["models"] = body["models"]
         return updated_body
 
     def display_dict(self):
@@ -279,7 +268,6 @@ def display_dict(self):
             "Created At": self.created_at,
             "Data Preparation MLCube": int(self.data_preparation_mlcube),
             "Reference Model MLCube": int(self.reference_model_mlcube),
-            "Associated Models": ",".join(map(str, self.models)),
             "Data Evaluator MLCube": int(self.data_evaluator_mlcube),
             "State": self.state,
             "Approval Status": self.approval_status,

diff --git a/cli/medperf/entities/result.py b/cli/medperf/entities/result.py
@@ -27,6 +27,7 @@ class Result(Entity, Uploadable, MedperfSchema, ApprovableSchema):
     dataset: int
     results: dict
     metadata: dict = {}
+    user_metadata: dict = {}
 
     def __init__(self, *args, **kwargs):
         """Creates a new result instance"""

diff --git a/cli/medperf/entities/schemas.py b/cli/medperf/entities/schemas.py
@@ -79,6 +79,7 @@ class MedperfSchema(MedperfBaseSchema):
     id: Optional[int]
     name: str = Field(..., max_length=64)
     owner: Optional[int]
+    is_valid: bool = True
     created_at: Optional[datetime]
     modified_at: Optional[datetime]
 
@@ -92,7 +93,6 @@ def name_max_length(cls, v, *, values, **kwargs):
 class DeployableSchema(BaseModel):
     # TODO: This must change after allowing edits
     state: str = "OPERATION"
-    is_valid: bool = True
 
 
 class ApprovableSchema(BaseModel):

diff --git a/cli/medperf/tests/commands/association/test_priority.py b/cli/medperf/tests/commands/association/test_priority.py
@@ -21,18 +21,18 @@ def func(benchmark_uid, mlcube_uid, priority):
     return func
 
 
-def get_benchmark_models_behavior(associations):
+def get_benchmark_model_associations_behavior(associations):
     def func(benchmark_uid):
-        return [assoc["model_mlcube"] for assoc in associations]
+        return associations
 
     return func
 
 
 def setup_comms(mocker, comms, associations):
     mocker.patch.object(
         comms,
-        "get_benchmark_models",
-        side_effect=get_benchmark_models_behavior(associations),
+        "get_benchmark_model_associations",
+        side_effect=get_benchmark_model_associations_behavior(associations),
     )
     mocker.patch.object(
         comms,
@@ -49,7 +49,9 @@ def setup(request, mocker, comms):
 
 
 @pytest.mark.parametrize(
-    "setup", [{"associations": TEST_ASSOCIATIONS}], indirect=True,
+    "setup",
+    [{"associations": TEST_ASSOCIATIONS}],
+    indirect=True,
 )
 class TestRun:
     @pytest.fixture(autouse=True)