From 21664c7d9b0ed38cc9247f3cd00702a3b6e6a07c Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Fri, 15 Dec 2023 15:51:25 +0000 Subject: [PATCH 01/11] feat: create utility functions for GEE object manipulation --- pytest_gee/__init__.py | 89 ++++++++++++++++++++++++++++++++++++++++++ pytest_gee/plugin.py | 2 + pytest_gee/utils.py | 54 +++++++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 pytest_gee/utils.py diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index cad63f5..abb508a 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -1,10 +1,16 @@ """The init file of the package.""" +from __future__ import annotations + import os +from datetime import time from pathlib import Path +from typing import Union import ee import httplib2 +from pytest_gee.utils import get_assets, get_task + __version__ = "0.2.0" __author__ = "Pierrick Rambaud" __email__ = "pierrick.rambaud49@gmail.com" @@ -35,3 +41,86 @@ def init_ee_from_token(): # if the user is in local development the authentication should # already be available ee.Initialize(http_transport=httplib2.Http()) + + +def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: + """Wait until the selected process is finished or we reached timeout value. + + Args: + task: name of the running task or the Task object itself. + timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 5 minutes. + + Returns: + the final state of the task + """ + # give 5 seconds of delay to GEE to make sure the task is created + time.sleep(5) + + # init both the task object and the state + task = task if isinstance(task, ee.batch.Task) else get_task(task) + state = "UNSUBMITTED" + + # loop every 5s to check the task state. This is blocking the Python interpreter + start_time = time.time() + while state != "COMPLETED" and time.time() - start_time < timeout: + time.sleep(5) + state = task.state + if state == "FAILED": + break + + return state + + +def delete_assets(asset_id: str, dry_run: bool = True) -> list: + """Delete the selected asset and all its content. + + This method will delete all the files and folders existing in an asset folder. + By default a dry run will be launched and if you are satisfyed with the displayed names, change the ``dry_run`` variable to ``False``. + No other warnng will be displayed. + + .. warning:: + + If this method is used on the root directory you will loose all your data, it's highly recommended to use a dry run first and carefully review the destroyed files. + + Args: + asset_id: the Id of the asset or a folder + dry_run: whether or not a dry run should be launched. dry run will only display the files name without deleting them. + + Returns: + a list of all the files deleted or to be deleted + """ + # define a delete function to change the behaviour of the method depending of the mode + # in dry mode, the function only store the assets to be destroyed as a dictionary. + # in non dry mode, the function store the asset names in a dictionary AND delete them. + output = [] + + def delete(id: str): + output.append(id) + dry_run is True or ee.data.deleteAsset(id) + + # identify the type of asset + asset_info = ee.data.getAsset(asset_id) + + if asset_info["type"] == "FOLDER": + + # get all the assets + asset_list = get_assets(folder=asset_id) + + # split the files by nesting levels + # we will need to delete the more nested files first + assets_ordered = {} + for asset in asset_list: + lvl = len(asset["id"].split("/")) + assets_ordered.setdefault(lvl, []) + assets_ordered[lvl].append(asset) + + # delete all items starting from the more nested one but not folders + assets_ordered = dict(sorted(assets_ordered.items(), reverse=True)) + for lvl in assets_ordered: + for i in assets_ordered[lvl]: + delete(i["name"]) + + # delete the initial folder/asset + delete(asset_id) + + return output diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index 558ece8..96f0985 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -1,4 +1,6 @@ """A pytest plugin to build a GEE environment for a test session.""" +from __future__ import annotations + import uuid import pytest diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py new file mode 100644 index 0000000..4084008 --- /dev/null +++ b/pytest_gee/utils.py @@ -0,0 +1,54 @@ +"""functions used to build the API that we don't want to expose to end users. + +.. danger:: + + This module is for internal use only and should not be used directly. +""" +from __future__ import annotations + +from pathlib import Path +from typing import List, Optional, Union + +import ee + + +def get_task(task_descripsion: str) -> Optional[ee.batch.Task]: + """Search for the described task in the user Task list return None if nothing is found. + + Args: + task_descripsion: the task description + + Returns: + return the found task else None + """ + task = None + for t in ee.batch.Task.list(): + if t.config["description"] == task_descripsion: + task = t + break + + return task + + +def get_assets(folder: Union[str, Path]) -> List[dict]: + """Get all the assets from the parameter folder. every nested asset will be displayed. + + Args: + folder: the initial GEE folder + + Returns: + the asset list. each asset is a dict with 3 keys: 'type', 'name' and 'id' + """ + # set the folder and init the list + asset_list = [] + folder = str(folder) + + # recursive function to get all the assets + def _recursive_get(folder, asset_list): + for asset in ee.data.listAssets({"parent": folder})["assets"]: + asset_list.append(asset) + if asset["type"] == "FOLDER": + asset_list = _recursive_get(asset["name"], asset_list) + return asset_list + + return _recursive_get(folder, asset_list) From 0c50e49b0645e59fc9bbd9616397c27a368ab5bd Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Fri, 15 Dec 2023 16:58:24 +0000 Subject: [PATCH 02/11] feat: build a tree within GEE --- pytest_gee/__init__.py | 69 +++++++++++++++++++++++++++++----------- pytest_gee/plugin.py | 7 ++++ pytest_gee/utils.py | 63 ++++++++++++++++++++++++++++++++++++ tests/conftest.py | 16 ++++++++++ tests/test_pytest_gee.py | 19 +++++++++++ 5 files changed, 155 insertions(+), 19 deletions(-) diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index abb508a..a494d5d 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -2,14 +2,13 @@ from __future__ import annotations import os -from datetime import time from pathlib import Path from typing import Union import ee import httplib2 -from pytest_gee.utils import get_assets, get_task +from pytest_gee import utils __version__ = "0.2.0" __author__ = "Pierrick Rambaud" @@ -53,22 +52,9 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: Returns: the final state of the task """ - # give 5 seconds of delay to GEE to make sure the task is created - time.sleep(5) - - # init both the task object and the state - task = task if isinstance(task, ee.batch.Task) else get_task(task) - state = "UNSUBMITTED" - - # loop every 5s to check the task state. This is blocking the Python interpreter - start_time = time.time() - while state != "COMPLETED" and time.time() - start_time < timeout: - time.sleep(5) - state = task.state - if state == "FAILED": - break - - return state + # just expose the utils function + # this is compulsory as wait is also needed in the utils module + return utils.wait(task, timeout) def delete_assets(asset_id: str, dry_run: bool = True) -> list: @@ -104,7 +90,7 @@ def delete(id: str): if asset_info["type"] == "FOLDER": # get all the assets - asset_list = get_assets(folder=asset_id) + asset_list = utils.get_assets(folder=asset_id) # split the files by nesting levels # we will need to delete the more nested files first @@ -124,3 +110,48 @@ def delete(id: str): delete(asset_id) return output + + +def init_tree(structure: dict, prefix: str, account_root: str) -> Path: + """Create an EarthEngine folder tree from a dictionary. + + The input ditionary should described the structure of the folder you want to create. + The keys are the folder names and the values are the subfolders. + Once you reach an ``ee.FeatureCollection`` and/or an ``ee.Image`` set it in the dictionary and the function will export the object. + + Args: + structure: the structure of the folder to create + prefix: the prefix to use on every item (folder, tasks, asset_id, etc.) + account_root: the root folder of the test where to create the test folder. + + Returns: + the path of the created folder + + Examples: + >>> structure = { + ... "folder_1": { + ... "image": ee.image(1), + ... "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), + ... }, + ... } + ... init_tree(structure, "toto") + """ + # recursive function to create the folder tree + def _recursive_create(structure, prefix, folder): + for name, content in structure.items(): + if isinstance(content, dict): + loc_folder = f"{folder}/{prefix}_{name}" + ee.data.createAsset({"type": "FOLDER"}, loc_folder) + _recursive_create(content, prefix, loc_folder) + else: + utils.export_asset(content, Path(folder) / f"{prefix}_{name}") + + # create the root folder + account_root = ee.data.getAssetRoots()[0]["id"] + root_folder = f"{account_root}/{prefix}" + root_folder = ee.data.createAsset({"type": "FOLDER"}, root_folder) + + # start the recursive function + _recursive_create(structure, prefix) + + return Path(root_folder) diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index 96f0985..5c35eda 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -3,6 +3,7 @@ import uuid +import ee import pytest @@ -10,3 +11,9 @@ def gee_hash(): """Generate a unique hash for the test session.""" return uuid.uuid4().hex + + +@pytest.fixture(scope="session") +def account_root(): + """Link to the root folder of the connected account.""" + return ee.data.getAssetRoots()[0]["id"] diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 4084008..5e290fa 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -6,12 +6,41 @@ """ from __future__ import annotations +from datetime import time from pathlib import Path from typing import List, Optional, Union import ee +def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: + """Wait until the selected process is finished or we reached timeout value. + + Args: + task: name of the running task or the Task object itself. + timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 5 minutes. + + Returns: + the final state of the task + """ + # give 5 seconds of delay to GEE to make sure the task is created + time.sleep(5) + + # init both the task object and the state + task = task if isinstance(task, ee.batch.Task) else get_task(task) + state = "UNSUBMITTED" + + # loop every 5s to check the task state. This is blocking the Python interpreter + start_time = time.time() + while state != "COMPLETED" and time.time() - start_time < timeout: + time.sleep(5) + state = task.state + if state == "FAILED": + break + + return state + + def get_task(task_descripsion: str) -> Optional[ee.batch.Task]: """Search for the described task in the user Task list return None if nothing is found. @@ -52,3 +81,37 @@ def _recursive_get(folder, asset_list): return asset_list return _recursive_get(folder, asset_list) + + +def export_asset(object: ee.ComputedObject, asset_id: Union[str, Path]) -> Path: + """Export assets to the GEE platform, only working for very simple objects. + + ARgs: + object: the object to export + asset_id: the name of the asset to create + + Returns: + the path of the created asset + """ + asset_id = Path(asset_id) + if isinstance(object, ee.FeatureCollection): + task = ee.batch.Export.table.toAsset( + collection=object, + description=asset_id.stem, + assetId=str(asset_id), + ) + elif isinstance(object, ee.Image): + task = ee.batch.Export.image.toAsset( + image=object, + description=asset_id.stem, + assetId=str(asset_id), + bestEffort=True, + ) + else: + raise ValueError("Only ee.Image and ee.FeatureCollection are supported") + + # launch the task and wait for the end of exportation + task.start() + wait(task) + + return asset_id diff --git a/tests/conftest.py b/tests/conftest.py index ff02bd5..661f607 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ """Pytest session configuration.""" +import ee +import pytest import pytest_gee @@ -6,3 +8,17 @@ def pytest_configure(): """Init GEE in the test environment.""" pytest_gee.init_ee_from_token() + + +@pytest.fixture(scope="session") +def test_folder(gee_hash): + """Create a test folder for the test session.""" + structure = { + "folder": { + "image": ee.Image(1), + "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), + } + } + folder = pytest_gee.init_tree(structure, gee_hash) + + return folder diff --git a/tests/test_pytest_gee.py b/tests/test_pytest_gee.py index 02b4777..1d503d7 100644 --- a/tests/test_pytest_gee.py +++ b/tests/test_pytest_gee.py @@ -1,6 +1,8 @@ """Test the pytest_gee package.""" import ee +import pytest_gee + def test_hash_fixture(gee_hash): """Test the hash fixture.""" @@ -11,3 +13,20 @@ def test_hash_fixture(gee_hash): def test_gee_init(): """Test the init_ee_from_token function.""" assert ee.Number(1).getInfo() == 1 + + +def test_init_tree(gee_hash, account_root, test_folder): + """Test the init_tree function.""" + # search all the assets contained in the test_folder + asset_list = pytest_gee.utils.get_assets(account_root) + + # identify specific files and folders + folder = test_folder / f"{gee_hash}_folder" + image = folder / f"{gee_hash}_image" + feature_collection = folder / f"{gee_hash}_fc" + + # check that they exist + assert str(test_folder) in asset_list + assert str(folder) in asset_list + assert str(image) in asset_list + assert str(feature_collection) in asset_list From 2a73bdce9a4cf4c8b98b3440137b300bfa8b1cbc Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Sun, 17 Dec 2023 16:39:57 +0000 Subject: [PATCH 03/11] feat: first working implementation of the test folder --- pytest_gee/__init__.py | 9 +++++---- pytest_gee/utils.py | 13 +++++++------ tests/conftest.py | 13 ++++++++----- tests/test_pytest_gee.py | 1 + 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index a494d5d..8d0c146 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -57,7 +57,7 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: return utils.wait(task, timeout) -def delete_assets(asset_id: str, dry_run: bool = True) -> list: +def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: """Delete the selected asset and all its content. This method will delete all the files and folders existing in an asset folder. @@ -75,6 +75,7 @@ def delete_assets(asset_id: str, dry_run: bool = True) -> list: Returns: a list of all the files deleted or to be deleted """ + asset_id = str(asset_id) # define a delete function to change the behaviour of the method depending of the mode # in dry mode, the function only store the assets to be destroyed as a dictionary. # in non dry mode, the function store the asset names in a dictionary AND delete them. @@ -94,7 +95,7 @@ def delete(id: str): # split the files by nesting levels # we will need to delete the more nested files first - assets_ordered = {} + assets_ordered: dict = {} for asset in asset_list: lvl = len(asset["id"].split("/")) assets_ordered.setdefault(lvl, []) @@ -149,9 +150,9 @@ def _recursive_create(structure, prefix, folder): # create the root folder account_root = ee.data.getAssetRoots()[0]["id"] root_folder = f"{account_root}/{prefix}" - root_folder = ee.data.createAsset({"type": "FOLDER"}, root_folder) + ee.data.createAsset({"type": "FOLDER"}, root_folder) # start the recursive function - _recursive_create(structure, prefix) + _recursive_create(structure, prefix, root_folder) return Path(root_folder) diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 5e290fa..608b785 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -6,19 +6,19 @@ """ from __future__ import annotations -from datetime import time +import time from pathlib import Path from typing import List, Optional, Union import ee -def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: +def wait(task: Union[ee.batch.Task, str], timeout: int = 60) -> str: """Wait until the selected process is finished or we reached timeout value. Args: task: name of the running task or the Task object itself. - timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 5 minutes. + timeout: timeout in seconds. if set to 0 the parameter is ignored. default to 1 minutes. Returns: the final state of the task @@ -28,6 +28,7 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: # init both the task object and the state task = task if isinstance(task, ee.batch.Task) else get_task(task) + assert task is not None, "The task is not found" state = "UNSUBMITTED" # loop every 5s to check the task state. This is blocking the Python interpreter @@ -69,7 +70,7 @@ def get_assets(folder: Union[str, Path]) -> List[dict]: the asset list. each asset is a dict with 3 keys: 'type', 'name' and 'id' """ # set the folder and init the list - asset_list = [] + asset_list: list = [] folder = str(folder) # recursive function to get all the assets @@ -102,16 +103,16 @@ def export_asset(object: ee.ComputedObject, asset_id: Union[str, Path]) -> Path: ) elif isinstance(object, ee.Image): task = ee.batch.Export.image.toAsset( + region=object.geometry(), image=object, description=asset_id.stem, assetId=str(asset_id), - bestEffort=True, ) else: raise ValueError("Only ee.Image and ee.FeatureCollection are supported") # launch the task and wait for the end of exportation task.start() - wait(task) + wait(asset_id.stem) return asset_id diff --git a/tests/conftest.py b/tests/conftest.py index 661f607..a587713 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,14 +11,17 @@ def pytest_configure(): @pytest.fixture(scope="session") -def test_folder(gee_hash): +def test_folder(gee_hash, account_root): """Create a test folder for the test session.""" + point = ee.Geometry.Point([0, 0]) structure = { "folder": { - "image": ee.Image(1), - "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), + "image": ee.Image(1).clipToBoundsAndScale(point.buffer(100), scale=30), + "fc": ee.FeatureCollection(point), } } - folder = pytest_gee.init_tree(structure, gee_hash) + folder = pytest_gee.init_tree(structure, gee_hash, account_root) - return folder + yield folder + + pytest_gee.delete_assets(folder, False) diff --git a/tests/test_pytest_gee.py b/tests/test_pytest_gee.py index 1d503d7..f895311 100644 --- a/tests/test_pytest_gee.py +++ b/tests/test_pytest_gee.py @@ -19,6 +19,7 @@ def test_init_tree(gee_hash, account_root, test_folder): """Test the init_tree function.""" # search all the assets contained in the test_folder asset_list = pytest_gee.utils.get_assets(account_root) + asset_list = [i["name"] for i in asset_list] # identify specific files and folders folder = test_folder / f"{gee_hash}_folder" From cd73e8dda083a98de8eeee0d73c6d15e95d71edf Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Sun, 17 Dec 2023 17:56:19 +0000 Subject: [PATCH 04/11] fix: simplify asset names --- pytest_gee/__init__.py | 9 +++++---- pytest_gee/utils.py | 11 +++++++---- tests/test_pytest_gee.py | 8 ++++---- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index 8d0c146..c19b65f 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -140,12 +140,13 @@ def init_tree(structure: dict, prefix: str, account_root: str) -> Path: # recursive function to create the folder tree def _recursive_create(structure, prefix, folder): for name, content in structure.items(): + asset_id = Path(folder) / name + description = f"{prefix}_{name}" if isinstance(content, dict): - loc_folder = f"{folder}/{prefix}_{name}" - ee.data.createAsset({"type": "FOLDER"}, loc_folder) - _recursive_create(content, prefix, loc_folder) + ee.data.createAsset({"type": "FOLDER"}, str(asset_id)) + _recursive_create(content, prefix, asset_id) else: - utils.export_asset(content, Path(folder) / f"{prefix}_{name}") + utils.export_asset(content, asset_id, description) # create the root folder account_root = ee.data.getAssetRoots()[0]["id"] diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 608b785..a8bcc08 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -84,12 +84,15 @@ def _recursive_get(folder, asset_list): return _recursive_get(folder, asset_list) -def export_asset(object: ee.ComputedObject, asset_id: Union[str, Path]) -> Path: +def export_asset( + object: ee.ComputedObject, asset_id: Union[str, Path], description: str +) -> Path: """Export assets to the GEE platform, only working for very simple objects. ARgs: object: the object to export asset_id: the name of the asset to create + description: the description of the task Returns: the path of the created asset @@ -98,14 +101,14 @@ def export_asset(object: ee.ComputedObject, asset_id: Union[str, Path]) -> Path: if isinstance(object, ee.FeatureCollection): task = ee.batch.Export.table.toAsset( collection=object, - description=asset_id.stem, + description=description, assetId=str(asset_id), ) elif isinstance(object, ee.Image): task = ee.batch.Export.image.toAsset( region=object.geometry(), image=object, - description=asset_id.stem, + description=description, assetId=str(asset_id), ) else: @@ -113,6 +116,6 @@ def export_asset(object: ee.ComputedObject, asset_id: Union[str, Path]) -> Path: # launch the task and wait for the end of exportation task.start() - wait(asset_id.stem) + wait(description) return asset_id diff --git a/tests/test_pytest_gee.py b/tests/test_pytest_gee.py index f895311..95de36a 100644 --- a/tests/test_pytest_gee.py +++ b/tests/test_pytest_gee.py @@ -15,16 +15,16 @@ def test_gee_init(): assert ee.Number(1).getInfo() == 1 -def test_init_tree(gee_hash, account_root, test_folder): +def test_init_tree(account_root, test_folder): """Test the init_tree function.""" # search all the assets contained in the test_folder asset_list = pytest_gee.utils.get_assets(account_root) asset_list = [i["name"] for i in asset_list] # identify specific files and folders - folder = test_folder / f"{gee_hash}_folder" - image = folder / f"{gee_hash}_image" - feature_collection = folder / f"{gee_hash}_fc" + folder = test_folder / "folder" + image = folder / "image" + feature_collection = folder / "fc" # check that they exist assert str(test_folder) in asset_list From 980e228361c792dd58e28a14e869d5a01317b6b5 Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Sun, 17 Dec 2023 19:29:35 +0000 Subject: [PATCH 05/11] refactor: make the fixture overridable --- pytest_gee/__init__.py | 102 --------------------------------------- pytest_gee/plugin.py | 20 +++++++- pytest_gee/utils.py | 102 +++++++++++++++++++++++++++++++++++++++ tests/conftest.py | 11 ++--- tests/test_pytest_gee.py | 16 ++++-- 5 files changed, 136 insertions(+), 115 deletions(-) diff --git a/pytest_gee/__init__.py b/pytest_gee/__init__.py index c19b65f..af18d2a 100644 --- a/pytest_gee/__init__.py +++ b/pytest_gee/__init__.py @@ -55,105 +55,3 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 5 * 60) -> str: # just expose the utils function # this is compulsory as wait is also needed in the utils module return utils.wait(task, timeout) - - -def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: - """Delete the selected asset and all its content. - - This method will delete all the files and folders existing in an asset folder. - By default a dry run will be launched and if you are satisfyed with the displayed names, change the ``dry_run`` variable to ``False``. - No other warnng will be displayed. - - .. warning:: - - If this method is used on the root directory you will loose all your data, it's highly recommended to use a dry run first and carefully review the destroyed files. - - Args: - asset_id: the Id of the asset or a folder - dry_run: whether or not a dry run should be launched. dry run will only display the files name without deleting them. - - Returns: - a list of all the files deleted or to be deleted - """ - asset_id = str(asset_id) - # define a delete function to change the behaviour of the method depending of the mode - # in dry mode, the function only store the assets to be destroyed as a dictionary. - # in non dry mode, the function store the asset names in a dictionary AND delete them. - output = [] - - def delete(id: str): - output.append(id) - dry_run is True or ee.data.deleteAsset(id) - - # identify the type of asset - asset_info = ee.data.getAsset(asset_id) - - if asset_info["type"] == "FOLDER": - - # get all the assets - asset_list = utils.get_assets(folder=asset_id) - - # split the files by nesting levels - # we will need to delete the more nested files first - assets_ordered: dict = {} - for asset in asset_list: - lvl = len(asset["id"].split("/")) - assets_ordered.setdefault(lvl, []) - assets_ordered[lvl].append(asset) - - # delete all items starting from the more nested one but not folders - assets_ordered = dict(sorted(assets_ordered.items(), reverse=True)) - for lvl in assets_ordered: - for i in assets_ordered[lvl]: - delete(i["name"]) - - # delete the initial folder/asset - delete(asset_id) - - return output - - -def init_tree(structure: dict, prefix: str, account_root: str) -> Path: - """Create an EarthEngine folder tree from a dictionary. - - The input ditionary should described the structure of the folder you want to create. - The keys are the folder names and the values are the subfolders. - Once you reach an ``ee.FeatureCollection`` and/or an ``ee.Image`` set it in the dictionary and the function will export the object. - - Args: - structure: the structure of the folder to create - prefix: the prefix to use on every item (folder, tasks, asset_id, etc.) - account_root: the root folder of the test where to create the test folder. - - Returns: - the path of the created folder - - Examples: - >>> structure = { - ... "folder_1": { - ... "image": ee.image(1), - ... "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), - ... }, - ... } - ... init_tree(structure, "toto") - """ - # recursive function to create the folder tree - def _recursive_create(structure, prefix, folder): - for name, content in structure.items(): - asset_id = Path(folder) / name - description = f"{prefix}_{name}" - if isinstance(content, dict): - ee.data.createAsset({"type": "FOLDER"}, str(asset_id)) - _recursive_create(content, prefix, asset_id) - else: - utils.export_asset(content, asset_id, description) - - # create the root folder - account_root = ee.data.getAssetRoots()[0]["id"] - root_folder = f"{account_root}/{prefix}" - ee.data.createAsset({"type": "FOLDER"}, root_folder) - - # start the recursive function - _recursive_create(structure, prefix, root_folder) - - return Path(root_folder) diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index 5c35eda..cf0c5f8 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -6,6 +6,8 @@ import ee import pytest +from . import utils + @pytest.fixture(scope="session") def gee_hash(): @@ -14,6 +16,22 @@ def gee_hash(): @pytest.fixture(scope="session") -def account_root(): +def gee_folder_root(): """Link to the root folder of the connected account.""" return ee.data.getAssetRoots()[0]["id"] + + +@pytest.fixture(scope="session") +def gee_folder_structure(): + """The structure of the generated test folder.""" + return {} + + +@pytest.fixture(scope="session") +def gee_test_folder(gee_hash, gee_folder_root, gee_folder_structure): + """Create a test folder for the duration of the test session.""" + folder = utils.init_tree(gee_folder_structure, gee_hash, gee_folder_root) + + yield folder + + utils.delete_assets(folder, False) diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index a8bcc08..6872ca5 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -119,3 +119,105 @@ def export_asset( wait(description) return asset_id + + +def init_tree(structure: dict, prefix: str, root: str) -> Path: + """Create an EarthEngine folder tree from a dictionary. + + The input ditionary should described the structure of the folder you want to create. + The keys are the folder names and the values are the subfolders. + Once you reach an ``ee.FeatureCollection`` and/or an ``ee.Image`` set it in the dictionary and the function will export the object. + + Args: + structure: the structure of the folder to create + prefix: the prefix to use on every item (folder, tasks, asset_id, etc.) + root: the root folder of the test where to create the test folder. + + Returns: + the path of the created folder + + Examples: + >>> structure = { + ... "folder_1": { + ... "image": ee.image(1), + ... "fc": ee.FeatureCollection(ee.Geometry.Point([0, 0])), + ... }, + ... } + ... init_tree(structure, "toto") + """ + # recursive function to create the folder tree + def _recursive_create(structure, prefix, folder): + for name, content in structure.items(): + asset_id = Path(folder) / name + description = f"{prefix}_{name}" + if isinstance(content, dict): + ee.data.createAsset({"type": "FOLDER"}, str(asset_id)) + _recursive_create(content, prefix, asset_id) + else: + export_asset(content, asset_id, description) + + # create the root folder + root = ee.data.getAssetRoots()[0]["id"] + root_folder = f"{root}/{prefix}" + ee.data.createAsset({"type": "FOLDER"}, root_folder) + + # start the recursive function + _recursive_create(structure, prefix, root_folder) + + return Path(root_folder) + + +def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: + """Delete the selected asset and all its content. + + This method will delete all the files and folders existing in an asset folder. + By default a dry run will be launched and if you are satisfyed with the displayed names, change the ``dry_run`` variable to ``False``. + No other warnng will be displayed. + + .. warning:: + + If this method is used on the root directory you will loose all your data, it's highly recommended to use a dry run first and carefully review the destroyed files. + + Args: + asset_id: the Id of the asset or a folder + dry_run: whether or not a dry run should be launched. dry run will only display the files name without deleting them. + + Returns: + a list of all the files deleted or to be deleted + """ + asset_id = str(asset_id) + # define a delete function to change the behaviour of the method depending of the mode + # in dry mode, the function only store the assets to be destroyed as a dictionary. + # in non dry mode, the function store the asset names in a dictionary AND delete them. + output = [] + + def delete(id: str): + output.append(id) + dry_run is True or ee.data.deleteAsset(id) + + # identify the type of asset + asset_info = ee.data.getAsset(asset_id) + + if asset_info["type"] == "FOLDER": + + # get all the assets + asset_list = get_assets(folder=asset_id) + + # split the files by nesting levels + # we will need to delete the more nested files first + assets_ordered: dict = {} + for asset in asset_list: + lvl = len(asset["id"].split("/")) + assets_ordered.setdefault(lvl, []) + assets_ordered[lvl].append(asset) + + # delete all items starting from the more nested ones + assets_ordered = dict(sorted(assets_ordered.items(), reverse=True)) + for lvl in assets_ordered: + for i in assets_ordered[lvl]: + delete(i["name"]) + + # delete the initial folder/asset + delete(asset_id) + + return output diff --git a/tests/conftest.py b/tests/conftest.py index a587713..f72300e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,17 +11,12 @@ def pytest_configure(): @pytest.fixture(scope="session") -def test_folder(gee_hash, account_root): - """Create a test folder for the test session.""" +def gee_folder_structure(): + """Override the default test folder structure.""" point = ee.Geometry.Point([0, 0]) - structure = { + return { "folder": { "image": ee.Image(1).clipToBoundsAndScale(point.buffer(100), scale=30), "fc": ee.FeatureCollection(point), } } - folder = pytest_gee.init_tree(structure, gee_hash, account_root) - - yield folder - - pytest_gee.delete_assets(folder, False) diff --git a/tests/test_pytest_gee.py b/tests/test_pytest_gee.py index 95de36a..af18aa8 100644 --- a/tests/test_pytest_gee.py +++ b/tests/test_pytest_gee.py @@ -15,19 +15,27 @@ def test_gee_init(): assert ee.Number(1).getInfo() == 1 -def test_init_tree(account_root, test_folder): +def test_structure(gee_folder_structure): + """Test the structure fixture.""" + assert isinstance(gee_folder_structure, dict) + assert "folder" in gee_folder_structure + assert "image" in gee_folder_structure["folder"] + assert "fc" in gee_folder_structure["folder"] + + +def test_init_tree(gee_folder_root, gee_test_folder): """Test the init_tree function.""" # search all the assets contained in the test_folder - asset_list = pytest_gee.utils.get_assets(account_root) + asset_list = pytest_gee.utils.get_assets(gee_folder_root) asset_list = [i["name"] for i in asset_list] # identify specific files and folders - folder = test_folder / "folder" + folder = gee_test_folder / "folder" image = folder / "image" feature_collection = folder / "fc" # check that they exist - assert str(test_folder) in asset_list + assert str(gee_test_folder) in asset_list assert str(folder) in asset_list assert str(image) in asset_list assert str(feature_collection) in asset_list From 53c746f26f3976dd1e2413c07dd3c6cfb59b1b43 Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 12:46:52 +0000 Subject: [PATCH 06/11] fix: avoid hitting the timeout in the wait function --- pytest_gee/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 6872ca5..598eab9 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -35,9 +35,11 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 60) -> str: start_time = time.time() while state != "COMPLETED" and time.time() - start_time < timeout: time.sleep(5) - state = task.state + state = task.status()["state"] if state == "FAILED": break + if state == "COMPLETED": + print("I found the finished operation") return state @@ -97,7 +99,6 @@ def export_asset( Returns: the path of the created asset """ - asset_id = Path(asset_id) if isinstance(object, ee.FeatureCollection): task = ee.batch.Export.table.toAsset( collection=object, @@ -118,7 +119,7 @@ def export_asset( task.start() wait(description) - return asset_id + return Path(asset_id) def init_tree(structure: dict, prefix: str, root: str) -> Path: From 09b7cf81d4f86c0cdf087709a6f0b024a268d605 Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 14:16:08 +0000 Subject: [PATCH 07/11] docs: add the documentation of file creation --- docs/usage.rst | 87 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 4293aed..9ad2fb5 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -85,23 +85,106 @@ Finally you need to configure the ``pytest`` execution environment itself. Add t You are now ready to make API calls within your tests! +Generate a test file tree in GEE +-------------------------------- + +Using the ``pytest_gee`` plugin, you can easily generate a test file tree in GEE that will be used to run your tests. +This tree will start in a folder named with the ``gee_hash`` fixture and will be deleted at the end of the test session. + +By using this method you will ensure that the folder you are using for your test is unique and that it will not interfere with other tests (e.g. parallel tests). + +.. code-block:: python + + # test_something.py + + def test_something(gee_hash, gee_folder_root, gee_test_folder): + # this folder is existing within your GEE account and will be deleted at the end of the test session + print(gee_folder_root) + +Customize the test folder tree +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default the test folder tree is empty and will be deleted at the end of the test session. +You can decide to populate it with some assets that will be used in your tests. + +To do so customize the ``gee_folder_structure`` fixture in your ``conftest.py`` file. +This fixture is a ``dict`` that will be used to create the folder tree in GEE. As shown in the following example you can add subfolder and assets to this tree. +assets need to be ``ee.Image`` or ``ee.FeatureCollection`` objects and remain small as the creation operation is taken care of by the plugin. +Specifically for ``ee.Image`` objects, please use the ``clipToBoundsAndScale`` method to make sure the asset has a geometry and a scale. + +.. code-block:: python + + # conftest.py + + import pytest + + @pytest.fixture(scope="session") + def gee_folder_structure(): + """Override the default test folder structure.""" + point = ee.Geometry.Point([0, 0]) + return { + "folder": { + "image": ee.Image(1).clipToBoundsAndScale(point.buffer(100), scale=30), + "fc": ee.FeatureCollection(point), + } + } + +Customize the root folder +^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default the test folder will be created at the root of the user account. There are situation where one could prefer to store it in a specific folder. + +To do so customize the ``gee_folder_root`` fixture in your ``conftest.py`` file, simply return the asset id of the folder you want to use as root. + +.. code-block:: python + + # conftest.py + + import pytest + + @pytest.fixture(scope="session") + def gee_folder_root(): + """Override the default test folder root.""" + return "users/username/my_root_folder" + +.. note:: + + This is compulsory if you use a service account to connect to GEE as the service account has no associated root folder. + Create assets ------------- Most of tests pipelines are checking different python versions in parallel which can create multiple issues from a GEE perspective: - The assets names need to be unique -- the tasks names need also to be unique +- The tasks names need also to be unique -To avoid this issue, the plugin is shipped with a session wise unique hex fixture that can be used to suffix or prefix your assets and tasks names. +To avoid this issue, the plugin is shipped with a session wise unique hex fixture ``gee_hash`` that can be used to suffix or prefix your assets and tasks names. +To make sure the asset exist when you run your tests, you can use the ``pytest_gee.wait`` method to wait until the asset is effectively generated. .. code-block:: python # test.py import pytest + import pytest_gee def test_create_asset(gee_hash): + # create an asset name asset_name = f"asset_{gee_hash}" + + # export the an object to this asset + task = ee.batch.Export.image.toAsset( + image=ee.Image(1), + description=asset_name, + assetId=asset_name, + scale=1, + maxPixels=1e9, + ) + task.start() + + # wait for the asset to be created + pytest_gee.wait(task) + # Do something with the asset name From 863b5e7525fcf2868b15dd0fb3dd6d96ef9f6bea Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 14:58:06 +0000 Subject: [PATCH 08/11] fix: use pureposixPath for GEE --- pytest_gee/plugin.py | 3 ++- pytest_gee/utils.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index cf0c5f8..567098a 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -2,6 +2,7 @@ from __future__ import annotations import uuid +from pathlib import PurePosixPath import ee import pytest @@ -18,7 +19,7 @@ def gee_hash(): @pytest.fixture(scope="session") def gee_folder_root(): """Link to the root folder of the connected account.""" - return ee.data.getAssetRoots()[0]["id"] + return PurePosixPath(ee.data.getAssetRoots()[0]["id"]) @pytest.fixture(scope="session") diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 598eab9..4518b90 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -7,7 +7,7 @@ from __future__ import annotations import time -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import List, Optional, Union import ee @@ -38,8 +38,6 @@ def wait(task: Union[ee.batch.Task, str], timeout: int = 60) -> str: state = task.status()["state"] if state == "FAILED": break - if state == "COMPLETED": - print("I found the finished operation") return state @@ -62,7 +60,7 @@ def get_task(task_descripsion: str) -> Optional[ee.batch.Task]: return task -def get_assets(folder: Union[str, Path]) -> List[dict]: +def get_assets(folder: Union[str, PurePosixPath]) -> List[dict]: """Get all the assets from the parameter folder. every nested asset will be displayed. Args: @@ -87,8 +85,8 @@ def _recursive_get(folder, asset_list): def export_asset( - object: ee.ComputedObject, asset_id: Union[str, Path], description: str -) -> Path: + object: ee.ComputedObject, asset_id: Union[str, PurePosixPath], description: str +) -> PurePosixPath: """Export assets to the GEE platform, only working for very simple objects. ARgs: @@ -119,7 +117,7 @@ def export_asset( task.start() wait(description) - return Path(asset_id) + return PurePosixPath(asset_id) def init_tree(structure: dict, prefix: str, root: str) -> Path: @@ -149,7 +147,7 @@ def init_tree(structure: dict, prefix: str, root: str) -> Path: # recursive function to create the folder tree def _recursive_create(structure, prefix, folder): for name, content in structure.items(): - asset_id = Path(folder) / name + asset_id = PurePosixPath(folder) / name description = f"{prefix}_{name}" if isinstance(content, dict): ee.data.createAsset({"type": "FOLDER"}, str(asset_id)) @@ -165,10 +163,10 @@ def _recursive_create(structure, prefix, folder): # start the recursive function _recursive_create(structure, prefix, root_folder) - return Path(root_folder) + return PurePosixPath(root_folder) -def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: +def delete_assets(asset_id: Union[str, PurePosixPath], dry_run: bool = True) -> list: """Delete the selected asset and all its content. This method will delete all the files and folders existing in an asset folder. @@ -186,7 +184,9 @@ def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: Returns: a list of all the files deleted or to be deleted """ + # convert the asset_id to a string asset_id = str(asset_id) + # define a delete function to change the behaviour of the method depending of the mode # in dry mode, the function only store the assets to be destroyed as a dictionary. # in non dry mode, the function store the asset names in a dictionary AND delete them. From 7e44f1f789f9bc7fd0639cb90b605760a07915af Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 15:37:10 +0000 Subject: [PATCH 09/11] fix: change return hints --- pytest_gee/plugin.py | 4 ++-- pytest_gee/utils.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pytest_gee/plugin.py b/pytest_gee/plugin.py index 567098a..9a60864 100644 --- a/pytest_gee/plugin.py +++ b/pytest_gee/plugin.py @@ -2,7 +2,7 @@ from __future__ import annotations import uuid -from pathlib import PurePosixPath +from pathlib import Path import ee import pytest @@ -19,7 +19,7 @@ def gee_hash(): @pytest.fixture(scope="session") def gee_folder_root(): """Link to the root folder of the connected account.""" - return PurePosixPath(ee.data.getAssetRoots()[0]["id"]) + return Path(ee.data.getAssetRoots()[0]["id"]) @pytest.fixture(scope="session") diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 4518b90..933bda7 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -60,7 +60,7 @@ def get_task(task_descripsion: str) -> Optional[ee.batch.Task]: return task -def get_assets(folder: Union[str, PurePosixPath]) -> List[dict]: +def get_assets(folder: Union[str, Path]) -> List[dict]: """Get all the assets from the parameter folder. every nested asset will be displayed. Args: @@ -71,7 +71,7 @@ def get_assets(folder: Union[str, PurePosixPath]) -> List[dict]: """ # set the folder and init the list asset_list: list = [] - folder = str(folder) + folder = folder if isinstance(folder, str) else folder.as_posix() # recursive function to get all the assets def _recursive_get(folder, asset_list): @@ -85,7 +85,7 @@ def _recursive_get(folder, asset_list): def export_asset( - object: ee.ComputedObject, asset_id: Union[str, PurePosixPath], description: str + object: ee.ComputedObject, asset_id: Union[str, Path], description: str ) -> PurePosixPath: """Export assets to the GEE platform, only working for very simple objects. @@ -97,18 +97,21 @@ def export_asset( Returns: the path of the created asset """ + # convert the asset_id to a string note that GEE only supports unix style separator + asset_id = asset_id if isinstance(asset_id, str) else asset_id.as_posix() + if isinstance(object, ee.FeatureCollection): task = ee.batch.Export.table.toAsset( collection=object, description=description, - assetId=str(asset_id), + assetId=asset_id, ) elif isinstance(object, ee.Image): task = ee.batch.Export.image.toAsset( region=object.geometry(), image=object, description=description, - assetId=str(asset_id), + assetId=asset_id, ) else: raise ValueError("Only ee.Image and ee.FeatureCollection are supported") @@ -120,7 +123,7 @@ def export_asset( return PurePosixPath(asset_id) -def init_tree(structure: dict, prefix: str, root: str) -> Path: +def init_tree(structure: dict, prefix: str, root: str) -> PurePosixPath: """Create an EarthEngine folder tree from a dictionary. The input ditionary should described the structure of the folder you want to create. @@ -166,7 +169,7 @@ def _recursive_create(structure, prefix, folder): return PurePosixPath(root_folder) -def delete_assets(asset_id: Union[str, PurePosixPath], dry_run: bool = True) -> list: +def delete_assets(asset_id: Union[str, Path], dry_run: bool = True) -> list: """Delete the selected asset and all its content. This method will delete all the files and folders existing in an asset folder. @@ -185,7 +188,7 @@ def delete_assets(asset_id: Union[str, PurePosixPath], dry_run: bool = True) -> a list of all the files deleted or to be deleted """ # convert the asset_id to a string - asset_id = str(asset_id) + asset_id = asset_id if isinstance(asset_id, str) else asset_id.as_posix() # define a delete function to change the behaviour of the method depending of the mode # in dry mode, the function only store the assets to be destroyed as a dictionary. From 2ad10b78586f2b5f328c05c9b3061885d4e17762 Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 16:16:03 +0000 Subject: [PATCH 10/11] fix: increase the timeout value --- pytest_gee/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest_gee/utils.py b/pytest_gee/utils.py index 933bda7..62775c9 100644 --- a/pytest_gee/utils.py +++ b/pytest_gee/utils.py @@ -13,7 +13,7 @@ import ee -def wait(task: Union[ee.batch.Task, str], timeout: int = 60) -> str: +def wait(task: Union[ee.batch.Task, str], timeout: int = 10 * 60) -> str: """Wait until the selected process is finished or we reached timeout value. Args: From fa9e0d9bc8ebc802239fbaa519c98a4d4bf9c9be Mon Sep 17 00:00:00 2001 From: Pierrick Rambaud Date: Mon, 18 Dec 2023 16:29:16 +0000 Subject: [PATCH 11/11] fix: add a tree example --- docs/usage.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 9ad2fb5..801f00c 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -129,6 +129,15 @@ Specifically for ``ee.Image`` objects, please use the ``clipToBoundsAndScale`` m } } +Which will render in your GEE account as: + +.. code-block:: + + 8d98a5be574041a6a54d6def9d915c67/ + └── folder/ + ├── fc (FeatureCollection) + └── image (ImageCollection) + Customize the root folder ^^^^^^^^^^^^^^^^^^^^^^^^^