mlcommons · hasan7n · Dec 1, 2023 · Nov 20, 2023 · Nov 20, 2023 · Nov 20, 2023
@@ -22,7 +22,8 @@ jobs:
         pip install flake8 pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         if [ -f cli/requirements.txt ]; then pip install -e cli; fi
-        if [ -f server/requirements.txt ]; then pip install -r server/requirements.txt; fi
+        pip install -r server/requirements.txt
+        pip install -r server/test-requirements.txt
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -35,6 +36,7 @@ jobs:
         # Ignore E231, as it is raising warnings with auto-generated code.
         flake8 . --count --max-complexity=10 --max-line-length=127 --ignore F821,W503,E231 --statistics --exclude=examples/,"*/migrations/*",cli/medperf/templates/
     - name: Test with pytest
+      working-directory: ./cli
       run: |
         pytest
     - name: Set server environment vars
@@ -45,4 +47,4 @@ jobs:
       run: python manage.py migrate
     - name: Run server unit tests
       working-directory: ./server
-      run: python manage.py test
+      run: python manage.py test --parallel
@@ -183,8 +183,10 @@ def get_benchmark_models(self, benchmark_uid: int) -> List[int]:
         Returns:
             list[int]: List of model UIDS
         """
-        models = self.__get_list(f"{self.server_url}/benchmarks/{benchmark_uid}/models")
-        model_uids = [model["id"] for model in models]
+        # TODO: filter to find approved ones only if we decide to have
+        #       this logic client side
+        assocs = self.__get_list(f"{self.server_url}/benchmarks/{benchmark_uid}/models")
+        model_uids = [assoc["model_mlcube"] for assoc in assocs]
         return model_uids
 
     def get_user_benchmarks(self) -> List[dict]:

@@ -323,6 +323,6 @@ def test_execution_of_one_model_writes_result(self, mocker, setup):
 
         # Assert
         assert (
-            yaml.load(open(expected_file))["results"]
+            yaml.safe_load(open(expected_file))["results"]
             == self.state_variables["models_props"][model_uid]["results"]
         )
@@ -291,7 +291,7 @@ def test_get_benchmarks_calls_benchmarks_path(mocker, server, body):
 @pytest.mark.parametrize("exp_uids", [[142, 437, 196], [303, 27, 24], [40, 19, 399]])
 def test_get_benchmark_models_return_uids(mocker, server, exp_uids):
     # Arrange
-    body = [{"id": uid} for uid in exp_uids]
+    body = [{"model_mlcube": uid} for uid in exp_uids]
     mocker.patch(patch_server.format("REST._REST__get_list"), return_value=body)
 
     # Act

@@ -1,3 +1,64 @@
 # Server
 
 Documentation TBD
+
+## Writing Tests
+
+Each endpoint must have a test file. An exception is for the endpoints defined in the utils folder, one single file contains tests for all its endpoints.
+
+### Naming conventions
+
+A test file in a module is named according to the relative endpoint it tests. For example, the test files for the `/datasets/` and `/benchmarks/` endpoints (POST and GET list) are named as `test_.py`. The test file for `/results/<pk>/` endpoint is named as `test_pk.py`.
+
+### What to keep in mind when testing
+
+Testing an endpoint means testing, for each HTTP method it supports, the following:
+
+- Serializer validation rules (`serializers.py`)
+- Database constraints (`models.py`)
+- Permissions (referred to in `views.py`)
+
+Testing is focused on the actions that are not expected to happen, and focuses less on the actions that can happen (as an example, the tests should ensure that an unauthenticated user cannot access an endpoint, but they may not ensure that a certain type of user can edit a certain field.)
+
+### How tests should work
+
+Each test class should inherit from `MedPerfTest`, which sets up the local authentication and provides utils to create assets (users, datasets, mlcubes, ...)
+
+Each test class contains at least one test function. Both test classes and test class functions can be parameterized. **Each instance of a parameterized test is run independantly**; a new fresh database is used and the class's `SetUp` method is called prior to each test execution.
+
+### Running tests
+
+#### Run the whole tests
+
+To run the whole tests, run:
+
+```bash
+python manage.py test
+```
+
+use the `--parallel` option to parallelize the tests.
+
+```bash
+python manage.py test --parallel
+```
+
+#### Run individual files
+
+You can run individual tests files. For example:
+
+```bash
+python manage.py test dataset.tests.test_pk
+```
+
+#### Run individual tests
+
+Running individual test classes or test functions can be done as follows. Example:
+
+```bash
+python manage.py test benchmark.tests.test_ -k BenchmarkPostTest
+python manage.py test benchmark.tests.test_ -k test_creation_of_duplicate_name_gets_rejected
+```
+
+### Debugging tests
+
+Tests are not "unittests". For example, the test suite for `dataset` relies on the `mlcube` functionalities. This is because the `dataset` tests use utils to create a preparation MLCube for the datasets. When debugging, it might be useful to run test suites in a certain order, and use the `--failfast` option to exit on the first failure. A script is provided for this: `debug_tests.sh`.
@@ -1,5 +1,7 @@
 from rest_framework.permissions import BasePermission
 from .models import Benchmark
+from benchmarkdataset.models import BenchmarkDataset
+from django.db.models import OuterRef, Subquery
 
 
 class IsAdmin(BasePermission):
@@ -25,3 +27,29 @@ def has_permission(self, request, view):
             return True
         else:
             return False
+
+
+# TODO: check if we need to use such permission
+class IsAssociatedDatasetOwner(BasePermission):
+    def has_permission(self, request, view):
+        pk = view.kwargs.get("pk", None)
+        if not pk:
+            return False
+
+        latest_datasets_assocs_status = (
+            BenchmarkDataset.objects.all()
+            .filter(benchmark__id=pk, dataset__id=OuterRef("id"))
+            .order_by("-created_at")[:1]
+            .values("approval_status")
+        )
+
+        user_associated_datasets = (
+            request.user.dataset_set.all()
+            .annotate(assoc_status=Subquery(latest_datasets_assocs_status))
+            .filter(assoc_status="APPROVED")
+        )
+
+        if user_associated_datasets.exists():
+            return True
+        else:
+            return False
@@ -30,12 +30,19 @@ class Meta:
     def update(self, instance, validated_data):
         for k, v in validated_data.items():
             setattr(instance, k, v)
+        # TODO: the condition below will run even
+        #       if a user edits the benchmark after it gets approved
         if instance.approval_status != "PENDING":
             instance.approved_at = timezone.now()
         instance.save()
         return instance
 
     def validate(self, data):
+        # TODO: fix permissions of approving the benchmark
+        # TODO: remove the ability to update to PENDING (it just adds complexity?)
+        # TODO: define what should happen to existing assets when a benchmark
+        #       is rejected after being approved (associations? results? note also
+        #       that results submission doesn't check benchmark's approval status)
         owner = self.instance.owner
         if "approval_status" in data:
             if (