diff --git a/.github/workflows/unit.yaml b/.github/workflows/unit.yaml index c16e3c5..beccc9d 100644 --- a/.github/workflows/unit.yaml +++ b/.github/workflows/unit.yaml @@ -88,3 +88,4 @@ jobs: file: ./coverage.xml token: ${{ secrets.CODECOV_TOKEN }} verbose: true + files: ./coverage.xml diff --git a/docs/usage.rst b/docs/usage.rst index 4293aed..801f00c 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -85,23 +85,115 @@ Finally you need to configure the ``pytest`` execution environment itself. Add t You are now ready to make API calls within your tests! +Generate a test file tree in GEE +-------------------------------- + +Using the ``pytest_gee`` plugin, you can easily generate a test file tree in GEE that will be used to run your tests. +This tree will start in a folder named with the ``gee_hash`` fixture and will be deleted at the end of the test session. + +By using this method you will ensure that the folder you are using for your test is unique and that it will not interfere with other tests (e.g. parallel tests). + +.. code-block:: python + + # test_something.py + + def test_something(gee_hash, gee_folder_root, gee_test_folder): + # this folder is existing within your GEE account and will be deleted at the end of the test session + print(gee_folder_root) + +Customize the test folder tree +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default the test folder tree is empty and will be deleted at the end of the test session. +You can decide to populate it with some assets that will be used in your tests. + +To do so customize the ``gee_folder_structure`` fixture in your ``conftest.py`` file. +This fixture is a ``dict`` that will be used to create the folder tree in GEE. As shown in the following example you can add subfolder and assets to this tree. +assets need to be ``ee.Image`` or ``ee.FeatureCollection`` objects and remain small as the creation operation is taken care of by the plugin. +Specifically for ``ee.Image`` objects, please use the ``clipToBoundsAndScale`` method to make sure the asset has a geometry and a scale. + +.. code-block:: python + + # conftest.py + + import pytest + + @pytest.fixture(scope="session") + def gee_folder_structure(): + """Override the default test folder structure.""" + point = ee.Geometry.Point([0, 0]) + return { + "folder": { + "image": ee.Image(1).clipToBoundsAndScale(point.buffer(100), scale=30), + "fc": ee.FeatureCollection(point), + } + } + +Which will render in your GEE account as: + +.. code-block:: + + 8d98a5be574041a6a54d6def9d915c67/ + └── folder/ + ├── fc (FeatureCollection) + └── image (ImageCollection) + +Customize the root folder +^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default the test folder will be created at the root of the user account. There are situation where one could prefer to store it in a specific folder. + +To do so customize the ``gee_folder_root`` fixture in your ``conftest.py`` file, simply return the asset id of the folder you want to use as root. + +.. code-block:: python + + # conftest.py + + import pytest + + @pytest.fixture(scope="session") + def gee_folder_root(): + """Override the default test folder root.""" + return "users/username/my_root_folder" + +.. note:: + + This is compulsory if you use a service account to connect to GEE as the service account has no associated root folder. + Create assets ------------- Most of tests pipelines are checking different python versions in parallel which can create multiple issues from a GEE perspective: - The assets names need to be unique -- the tasks names need also to be unique +- The tasks names need also to be unique -To avoid this issue, the plugin is shipped with a session wise unique hex fixture that can be used to suffix or prefix your assets and tasks names. +To avoid this issue, the plugin is shipped with a session wise unique hex fixture ``gee_hash`` that can be used to suffix or prefix your assets and tasks names. +To make sure the asset exist when you run your tests, you can use the ``pytest_gee.wait`` method to wait until the asset is effectively generated. .. code-block:: python # test.py import pytest + import pytest_gee def test_create_asset(gee_hash): + # create an asset name asset_name = f"asset_{gee_hash}" + + # export the an object to this asset + task = ee.batch.Export.image.toAsset( + image=ee.Image(1), + description=asset_name, + assetId=asset_name, + scale=1, + maxPixels=1e9, + ) + task.start() + + # wait for the asset to be created + pytest_gee.wait(task) + # Do something with the asset name diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index cad63f5..af18d2a 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -1,10 +1,15 @@ """The init file of the package.""" +from __future__ import annotations + import os from pathlib import Path +from typing import Union import ee import httplib2 +from pytest_gee import utils + __version__ = "0.2.0" __author__ = "Pierrick Rambaud" __email__ = "pierrick.rambaud49@gmail.com" @@ -35,3 +40,18 @@ def init_ee_from_token(): # if the user is in local development the authentication should # already be available ee.Initialize(http_transport=httplib2.Http()) + + +def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: + """Wait until the selected process is finished or we reached timeout value. + + Args: + task: name of the running task or the Task object itself. + timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 5 minutes. + + Returns: + the final state of the task + """ + # just expose the utils function + # this is compulsory as wait is also needed in the utils module + return utils.wait(task, timeout) diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index 558ece8..9a60864 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -1,10 +1,38 @@ """A pytest plugin to build a GEE environment for a test session.""" +from __future__ import annotations + import uuid +from pathlib import Path +import ee import pytest +from . import utils + @pytest.fixture(scope="session") def gee_hash(): """Generate a unique hash for the test session.""" return uuid.uuid4().hex + + +@pytest.fixture(scope="session") +def gee_folder_root(): + """Link to the root folder of the connected account.""" + return Path(ee.data.getAssetRoots()[0]["id"]) + + +@pytest.fixture(scope="session") +def gee_folder_structure(): + """The structure of the generated test folder.""" + return {} + + +@pytest.fixture(scope="session") +def gee_test_folder(gee_hash, gee_folder_root, gee_folder_structure): + """Create a test folder for the duration of the test session.""" + folder = utils.init_tree(gee_folder_structure, gee_hash, gee_folder_root) + + yield folder + + utils.delete_assets(folder, False) diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py new file mode 100644 index 0000000..62775c9 --- /dev/null +++ b/pytest_gee/utils.py @@ -0,0 +1,227 @@ +"""functions used to build the API that we don't want to expose to end users. + +.. danger:: + + This module is for internal use only and should not be used directly. +""" +from __future__ import annotations + +import time +from pathlib import Path, PurePosixPath +from typing import List, Optional, Union + +import ee + + +def wait(task: Union[ee.batch.Task, str], timeout: int = 10 * 60) -> str: + """Wait until the selected process is finished or we reached timeout value. + + Args: + task: name of the running task or the Task object itself. + timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 1 minutes. + + Returns: + the final state of the task + """ + # give 5 seconds of delay to GEE to make sure the task is created + time.sleep(5) + + # init both the task object and the state + task = task if isinstance(task, ee.batch.Task) else get_task(task) + assert task is not None, "The task is not found" + state = "UNSUBMITTED" + + # loop every 5s to check the task state. This is blocking the Python interpreter + start_time = time.time() + while state != "COMPLETED" and time.time() - start_time < timeout: + time.sleep(5) + state = task.status()["state"] + if state == "FAILED": + break + + return state + + +def get_task(task_descripsion: str) -> Optional[ee.batch.Task]: + """Search for the described task in the user Task list return None if nothing is found. + + Args: + task_descripsion: the task description + + Returns: + return the found task else None + """ + task = None + for t in ee.batch.Task.list(): + if t.config["description"] == task_descripsion: + task = t + break + + return task + + +def get_assets(folder: Union[str, Path]) -> List[dict]: + """Get all the assets from the parameter folder. every nested asset will be displayed. + + Args: + folder: the initial GEE folder + + Returns: + the asset list. each asset is a dict with 3 keys: 'type', 'name' and 'id' + """ + # set the folder and init the list + asset_list: list = [] + folder = folder if isinstance(folder, str) else folder.as_posix() + + # recursive function to get all the assets + def _recursive_get(folder, asset_list): + for asset in ee.data.listAssets({"parent": folder})["assets"]: + asset_list.append(asset) + if asset["type"] == "FOLDER": + asset_list = _recursive_get(asset["name"], asset_list) + return asset_list + + return _recursive_get(folder, asset_list) + + +def export_asset( + object: ee.ComputedObject, asset_id: Union[str, Path], description: str +) -> PurePosixPath: + """Export assets to the GEE platform, only working for very simple objects. + + ARgs: + object: the object to export + asset_id: the name of the asset to create + description: the description of the task + + Returns: + the path of the created asset + """ + # convert the asset_id to a string note that GEE only supports unix style separator + asset_id = asset_id if isinstance(asset_id, str) else asset_id.as_posix() + + if isinstance(object, ee.FeatureCollection): + task = ee.batch.Export.table.toAsset( + collection=object, + description=description, + assetId=asset_id, + ) + elif isinstance(object, ee.Image): + task = ee.batch.Export.image.toAsset( + region=object.geometry(), + image=object, + description=description, + assetId=asset_id, + ) + else: + raise ValueError("Only ee.Image and ee.FeatureCollection are supported") + + # launch the task and wait for the end of exportation + task.start() + wait(description) + + return PurePosixPath(asset_id) + + +def init_tree(structure: dict, prefix: str, root: str) -> PurePosixPath: + """Create an EarthEngine folder tree from a dictionary. + + The input ditionary should described the structure of the folder you want to create. + The keys are the folder names and the values are the subfolders. + Once you reach an ``ee.FeatureCollection`` and/or an ``ee.Image`` set it in the dictionary and the function will export the object. + + Args: + structure: the structure of the folder to create + prefix: the prefix to use on every item (folder, tasks, asset_id, etc.) + root: the root folder of the test where to create the test folder. + + Returns: + the path of the created folder + + Examples: + >>> structure = { + ... "folder_1": { + ... "image": ee.image(1), + ... "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), + ... }, + ... } + ... init_tree(structure, "toto") + """ + # recursive function to create the folder tree + def _recursive_create(structure, prefix, folder): + for name, content in structure.items(): + asset_id = PurePosixPath(folder) / name + description = f"{prefix}_{name}" + if isinstance(content, dict): + ee.data.createAsset({"type": "FOLDER"}, str(asset_id)) + _recursive_create(content, prefix, asset_id) + else: + export_asset(content, asset_id, description) + + # create the root folder + root = ee.data.getAssetRoots()[0]["id"] + root_folder = f"{root}/{prefix}" + ee.data.createAsset({"type": "FOLDER"}, root_folder) + + # start the recursive function + _recursive_create(structure, prefix, root_folder) + + return PurePosixPath(root_folder) + + +def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: + """Delete the selected asset and all its content. + + This method will delete all the files and folders existing in an asset folder. + By default a dry run will be launched and if you are satisfyed with the displayed names, change the ``dry_run`` variable to ``False``. + No other warnng will be displayed. + + .. warning:: + + If this method is used on the root directory you will loose all your data, it's highly recommended to use a dry run first and carefully review the destroyed files. + + Args: + asset_id: the Id of the asset or a folder + dry_run: whether or not a dry run should be launched. dry run will only display the files name without deleting them. + + Returns: + a list of all the files deleted or to be deleted + """ + # convert the asset_id to a string + asset_id = asset_id if isinstance(asset_id, str) else asset_id.as_posix() + + # define a delete function to change the behaviour of the method depending of the mode + # in dry mode, the function only store the assets to be destroyed as a dictionary. + # in non dry mode, the function store the asset names in a dictionary AND delete them. + output = [] + + def delete(id: str): + output.append(id) + dry_run is True or ee.data.deleteAsset(id) + + # identify the type of asset + asset_info = ee.data.getAsset(asset_id) + + if asset_info["type"] == "FOLDER": + + # get all the assets + asset_list = get_assets(folder=asset_id) + + # split the files by nesting levels + # we will need to delete the more nested files first + assets_ordered: dict = {} + for asset in asset_list: + lvl = len(asset["id"].split("/")) + assets_ordered.setdefault(lvl, []) + assets_ordered[lvl].append(asset) + + # delete all items starting from the more nested ones + assets_ordered = dict(sorted(assets_ordered.items(), reverse=True)) + for lvl in assets_ordered: + for i in assets_ordered[lvl]: + delete(i["name"]) + + # delete the initial folder/asset + delete(asset_id) + + return output diff --git a/tests/conftest.py b/tests/conftest.py index ff02bd5..f72300e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ """Pytest session configuration.""" +import ee +import pytest import pytest_gee @@ -6,3 +8,15 @@ def pytest_configure(): """Init GEE in the test environment.""" pytest_gee.init_ee_from_token() + + +@pytest.fixture(scope="session") +def gee_folder_structure(): + """Override the default test folder structure.""" + point = ee.Geometry.Point([0, 0]) + return { + "folder": { + "image": ee.Image(1).clipToBoundsAndScale(point.buffer(100), scale=30), + "fc": ee.FeatureCollection(point), + } + } diff --git a/tests/test_pytest_gee.py b/tests/test_pytest_gee.py index 02b4777..af18aa8 100644 --- a/tests/test_pytest_gee.py +++ b/tests/test_pytest_gee.py @@ -1,6 +1,8 @@ """Test the pytest_gee package.""" import ee +import pytest_gee + def test_hash_fixture(gee_hash): """Test the hash fixture.""" @@ -11,3 +13,29 @@ def test_hash_fixture(gee_hash): def test_gee_init(): """Test the init_ee_from_token function.""" assert ee.Number(1).getInfo() == 1 + + +def test_structure(gee_folder_structure): + """Test the structure fixture.""" + assert isinstance(gee_folder_structure, dict) + assert "folder" in gee_folder_structure + assert "image" in gee_folder_structure["folder"] + assert "fc" in gee_folder_structure["folder"] + + +def test_init_tree(gee_folder_root, gee_test_folder): + """Test the init_tree function.""" + # search all the assets contained in the test_folder + asset_list = pytest_gee.utils.get_assets(gee_folder_root) + asset_list = [i["name"] for i in asset_list] + + # identify specific files and folders + folder = gee_test_folder / "folder" + image = folder / "image" + feature_collection = folder / "fc" + + # check that they exist + assert str(gee_test_folder) in asset_list + assert str(folder) in asset_list + assert str(image) in asset_list + assert str(feature_collection) in asset_list