diff --git a/README.md b/README.md index 7639b26..fb7f9bd 100644 --- a/README.md +++ b/README.md @@ -67,9 +67,7 @@ The steps above will automatically install the Python library [eodag](https://eo Before running SADASADAM, eodag needs to be configured (see [eodag documentation](https://eodag.readthedocs.io/en/stable/getting_started_guide/configure.html)). The eodag config file needs to be filled with credentials for satellite data providers. SADASADAM calls eodag to download only Sentinel-2 and Landsat-8/9 Level 1C data. Therefore, providing credentials to the `cop_dataspace` and `usgs` sections of the eodag config file -is recommended. In order to make the downloaded data accessible to FORCE, -**the download path `outputs_prefix` of the eodag config file needs to be defined in the SADASADAM config file parameter `download_dir` as well** -(see below). It is recommended to define `extract: False` in the eodag config file as SADASADAM automatically extracts the downloaded data according to the input requirements of FORCE. +is recommended. It is recommended to define `extract: False` in the eodag config file as SADASADAM automatically extracts the downloaded data according to the input requirements of FORCE. A priority of providers can be defined in the eodag config file. We noticed the unexpected behaviour that download of Sentinel-2 from `cop_dataspace` fails (error related to `peps` provider credentials), if both `cop_dataspace` and `usgs` have the same priority. @@ -85,7 +83,8 @@ SADASADAM can be executed with one single command, but internally, the script ca ##### Download of satellite data SADASADAM will try to download all Sentinel-2 and Landsat-8/9 Level 1C scenes that match the filter options passed in the SADASADAM config file. -It makes use of user credentials and download paths defined in the eodag config file (see section above). +It makes use of user credentials and download paths defined in the eodag config file (see section above). The download path however can also be overwritten by +the `download_dir` parameter of the SADASADAM config file. All data are extracted, corrupt archives are removed and tried to download again. ##### FORCE processing @@ -133,7 +132,7 @@ cloud_cover: 75 # maximum percentage of cloud cover in scene ##### FORCE & postprocessing options ``` -download_dir: '/path/to/eodag/download_dir' # Path to the download directory defined in the eodag conf file. FORCE will use all valid satellite +download_dir: '/path/to/download_dir' # Path to the download directory. FORCE will use all valid satellite # scenes (extracted Landsat-8/9 and Sentinel-2 in .SAFE format) in this directory as input. temp_force_dir: '/path/to/temp_force_dir' # Path to a directory that can hold intermediate FORCE results. A new FORCE directory with a timestamp will be created here. wvdb_dir: '/path/to/wvdb_dir' # Path to store the water vapor database. This database is required for Landsat processing in FORCE. diff --git a/config_example.yaml b/config_example.yaml index 512399a..70308ba 100644 --- a/config_example.yaml +++ b/config_example.yaml @@ -42,7 +42,7 @@ cloud_cover: 100 # type=str, help='Path to folder where output is stored' output_dir: '/path/to/output/dir/' -# type=str, help='Path to download products. Needs to be the same as defined in the eodag config file' +# type=str, help='Path to download products.' download_dir: '/path/to/download/dir/' # type=str, help='Path to folder where FORCE processing is done' diff --git a/pyproject.toml b/pyproject.toml index 9407b6e..7b0e53e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ sadasadam = "sadasadam.cli:main" [project] name = "sadasadam" -version = "0.1.0" +version = "0.1.1" authors = [ { name="Guido Riembauer", email="riembauer@mundialis.de" }, { name="Momen Mawad", email="mawad@mundialis.de" }, diff --git a/sadasadam/cli.py b/sadasadam/cli.py index 69d5bd2..aa049a1 100644 --- a/sadasadam/cli.py +++ b/sadasadam/cli.py @@ -26,10 +26,7 @@ import yaml from sadasadam.force import ForceProcess -from sadasadam.download import ( - download_with_eodag, - extract_and_delete_tar_gz_files, -) +from sadasadam.download import download_and_extract def check_bool(variable): @@ -213,20 +210,17 @@ def main(): "lonmax": east, "latmax": north, } - # start the download process - for product_name in products: - download_with_eodag( - product_type=product_name, - geom=geom, - start_date=start, - end_date=end, - cloudcover=cloud_cover, - ) - + download_and_extract( + products=products, + geom=geom, + start_date=start, + end_date=end, + cloudcover=cloud_cover, + download_dir=download_dir, + ) # Start FORCE if download_only is False: - extract_and_delete_tar_gz_files(download_dir) print("Setting up FORCE processing...") # start FORCE process force_proc = ForceProcess( diff --git a/sadasadam/download.py b/sadasadam/download.py index 06c6814..44e0cd8 100644 --- a/sadasadam/download.py +++ b/sadasadam/download.py @@ -23,43 +23,52 @@ import os import shutil +import zipfile from eodag import EODataAccessGateway def download_with_eodag( - product_type, geom, start_date, end_date, cloudcover=100 + product_type, geom, start_date, end_date, download_dir, cloudcover=100 ): """Function to download satellite data using eodag library""" # initialize eodag dag = EODataAccessGateway() # search for products - - search_results, total_count = dag.search( - productType=product_type, - # accepts WKT polygons, shapely.geometry, ... - geom=geom, - start=start_date, - end=end_date, - # Set cloud cover - cloudCover=cloudcover, - raise_errors=True, - ) + items_per_page = 20 + search_kwargs = { + "items_per_page": items_per_page, + "productType": product_type, + "geom": geom, + "start": start_date, + "end": end_date, + "cloudCover": cloudcover, + } + search_results = dag.search_all(**search_kwargs) + num_results = len(search_results) print( - f"Found {total_count} matching scenes of type {product_type}, " - "starting download..." + f"Found {num_results} matching scenes " + f"of type {product_type}, starting download..." ) - dag.download_all(search_results) + dag.download_all(search_results, outputs_prefix=download_dir) def extract_and_delete_tar_gz_files(directory): """ - Function to extract .tar.gz files recursively from a directory - and delete them + Function to extract .tar.gz and .SAFE.zip files + recursively from a directory and delete them """ + corrupt_files = [] for file in os.listdir(directory): - if file.endswith((".SAFE.zip", ".tar.gz")): + if file.endswith((".SAFE.zip", ".tar.gz", ".SAFE")): file_path = os.path.join(directory, file) + warning_text = ( + "Warning: - " + f"Unable to extract: {file_path}. " + "Retrying Download..." + ) + landsat_extract_dir = None + remove = True try: if file.endswith(".tar.gz"): landsat_extract_dir_name = file.split(".")[0] @@ -75,18 +84,77 @@ def extract_and_delete_tar_gz_files(directory): directory, landsat_extract_dir_name ) - # Extract the .tar.gz file to the created directory - shutil.unpack_archive( - file_path, extract_dir=landsat_extract_dir - ) + target_dir = landsat_extract_dir + unpack = True elif file.endswith(".SAFE.zip"): - shutil.unpack_archive(file_path, extract_dir=directory) - # Delete the .tar.gz file after extraction - os.remove(file_path) + zfile = zipfile.ZipFile(file_path) + zfile_test = zfile.testzip() + if zfile_test is not None: + print(warning_text) + corrupt_files.append(file_path) + unpack = False + else: + target_dir = directory + unpack = True + + elif file.endswith(".SAFE"): + # this should fail if the .SAFE is a corrupt + # downloaded file and not previously extracted + os.listdir(file_path) + unpack = False + remove = False + + if unpack is True: + shutil.unpack_archive(file_path, extract_dir=target_dir) + # Delete file after extraction + if remove is True: + os.remove(file_path) except Exception as exception: + print(f"{warning_text}: {exception}") + corrupt_files.append(file_path) + os.remove(file_path) + if landsat_extract_dir: + shutil.rmtree(landsat_extract_dir) + continue + + return corrupt_files + + +def download_and_extract( + products, + geom, + start_date, + end_date, + download_dir, + cloudcover=100, + max_tries=3, +): + """ + Function to download satellite data using eodag library, extract, + and retry download if files are corrupt + """ + run_download = True + count = 0 + while run_download is True: + for product_name in products: + download_with_eodag( + product_type=product_name, + geom=geom, + start_date=start_date, + end_date=end_date, + cloudcover=cloudcover, + download_dir=download_dir, + ) + corrupt_files = extract_and_delete_tar_gz_files(download_dir) + if len(corrupt_files) == 0: + run_download = False + count += 1 + if count == max_tries: + run_download = False + if len(corrupt_files) > 0: print( - f"Warning: {exception} - " - "Unable to extract or delete: {file_path}" + f"Scene/s {'; '.join(corrupt_files)} seem to be " + f"corrupt even after {max_tries} downloads. " + "Files are removed and processing continues without them" ) - continue