From aafdb32d71e7adc4182943cd9cda87155719e561 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 22 Jul 2020 18:35:25 -0400 Subject: [PATCH 1/7] Started writing GDAL based COG translation --- app/cogeo.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/app/cogeo.py b/app/cogeo.py index 3ec30d011..9cb7ab1d3 100644 --- a/app/cogeo.py +++ b/app/cogeo.py @@ -3,6 +3,8 @@ import tempfile import shutil import rasterio +import re +import subprocess from rio_cogeo.cogeo import cog_validate, cog_translate from rio_tiler.utils import has_alpha_band from webodm import settings @@ -18,12 +20,13 @@ def valid_cogeo(src_path): return cog_validate(src_path, strict=True) -def assure_cogeo(src_path): +def assure_cogeo(src_path, use_legacy=False): """ Guarantee that the .tif passed as an argument is a Cloud Optimized GeoTIFF (cogeo) If the path is not a cogeo, it is destructively converted into a cogeo. If the file cannot be converted, the function does not change the file :param src_path: path to GeoTIFF (cogeo or not) + :param use_legacy: whether to force the use of legacy implementation. By default the best implementation is used. :return: None """ @@ -36,6 +39,64 @@ def assure_cogeo(src_path): # Not a cogeo logger.info("Optimizing %s as Cloud Optimized GeoTIFF" % src_path) + + # Check if we have GDAL >= 3.1 + use_legacy = False + gdal_version = get_gdal_version() + if gdal_version: + major, minor, build = gdal_version + + # GDAL 2 and lower + if major <= 2: + use_legacy = True + + # GDAL 3.0 and lower + if major == 3 and minor < 1: + use_legacy = True + else: + # This shouldn't happen + use_legacy = True + + if use_legacy: + logger.info("Using legacy implementation (GDAL >= 3.1 not found)") + make_cogeo_legacy(src_path) + else: + make_cogeo_gdal(src_path) + +def get_gdal_version(): + # Bit of a hack without installing + # python bindings + gdal_translate = shutil.which('gdal_translate') + if not gdal_translate: + return None + + # Get version + version_output = subprocess.check_output("%s --version" % gdal_translate) + + m = re.match(r"GDAL\s+([\d+])\.([\d+])\.([\d+]),\s+released", version_output) + if not m: + return None + + return tuple(map(int, m.groups())) + + +def make_cogeo_gdal(src_path): + """ + Make src_path a Cloud Optimized GeoTIFF. + Requires GDAL >= 3.1 + """ + + tmpfile = tempfile.mktemp('_cogeo.tif', dir=settings.MEDIA_TMP) + swapfile = tempfile.mktemp('_cogeo_swap.tif', dir=settings.MEDIA_TMP) + + # gdal_translate -of COG -co BLOCKSIZE=256 -co COMPRESS=deflate -co NUM_THREADS=4 -co BIGTIFF=IF_SAFER -co QUALITY=100 -co SPARSE_OK=ON --config GDAL_NUM_THREADS ALL_CPUS brighton.tif cog.tif + +def make_cogeo_legacy(src_path): + """ + Make src_path a Cloud Optimized GeoTIFF + This implementation does not require GDAL >= 3.1 + but sometimes (rarely) hangs for unknown reasons + """ tmpfile = tempfile.mktemp('_cogeo.tif', dir=settings.MEDIA_TMP) swapfile = tempfile.mktemp('_cogeo_swap.tif', dir=settings.MEDIA_TMP) @@ -77,4 +138,8 @@ def assure_cogeo(src_path): raise e if os.path.isfile(swapfile): - os.remove(swapfile) \ No newline at end of file + os.remove(swapfile) + + return True + else: + return False \ No newline at end of file From 0e0c558e5e8059a3fa02582d88c6ffcf7eeaf32b Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 22 Jul 2020 22:43:08 -0400 Subject: [PATCH 2/7] Upgrade to Python 3.8, GDAL 3.1, Node 12 --- Dockerfile | 6 +++--- app/cogeo.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index c90874a67..4babc6612 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.6-stretch +FROM python:3.8-stretch MAINTAINER Piero Toffanin ENV PYTHONUNBUFFERED 1 @@ -9,7 +9,7 @@ ENV PROJ_LIB=/usr/share/proj RUN mkdir /webodm WORKDIR /webodm -RUN curl --silent --location https://deb.nodesource.com/setup_10.x | bash - +RUN curl --silent --location https://deb.nodesource.com/setup_12.x | bash - RUN apt-get -qq install -y nodejs # Configure use of testing branch of Debian @@ -19,7 +19,7 @@ RUN printf "deb http://ftp.us.debian.org/debian/ stable main contrib non- RUN printf "deb http://ftp.us.debian.org/debian/ testing main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ testing main contrib non-free" > /etc/apt/sources.list.d/testing.list # Install Node.js GDAL, nginx, letsencrypt, psql -RUN apt-get -qq update && apt-get -qq install -t testing -y binutils libproj-dev gdal-bin nginx certbot grass-core && apt-get -qq install -y gettext-base cron postgresql-client-9.6 +RUN apt-get -qq update && apt-get -qq install -t testing -y binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y gettext-base cron postgresql-client-9.6 # Install pip reqs ADD requirements.txt /webodm/ diff --git a/app/cogeo.py b/app/cogeo.py index 9cb7ab1d3..28be25b89 100644 --- a/app/cogeo.py +++ b/app/cogeo.py @@ -57,7 +57,7 @@ def assure_cogeo(src_path, use_legacy=False): # This shouldn't happen use_legacy = True - if use_legacy: + if True or use_legacy: logger.info("Using legacy implementation (GDAL >= 3.1 not found)") make_cogeo_legacy(src_path) else: From a78ca876d2527cee836c6288a99627a6cd63f657 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Thu, 23 Jul 2020 12:53:48 -0400 Subject: [PATCH 3/7] Updated dockerfile --- Dockerfile | 16 +++++++++++----- requirements.txt | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4babc6612..8d72af3b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-stretch +FROM debian:stretch MAINTAINER Piero Toffanin ENV PYTHONUNBUFFERED 1 @@ -9,8 +9,9 @@ ENV PROJ_LIB=/usr/share/proj RUN mkdir /webodm WORKDIR /webodm -RUN curl --silent --location https://deb.nodesource.com/setup_12.x | bash - -RUN apt-get -qq install -y nodejs +RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends wget +RUN wget --no-check-certificate https://deb.nodesource.com/setup_12.x -O /tmp/node.sh && bash /tmp/node.sh +RUN apt-get -qq update && apt-get -qq install -y nodejs npm # Configure use of testing branch of Debian RUN printf "Package: *\nPin: release a=stable\nPin-Priority: 900\n" > /etc/apt/preferences.d/stable.pref @@ -18,8 +19,9 @@ RUN printf "Package: *\nPin: release a=testing\nPin-Priority: 750\n" > /etc/apt/ RUN printf "deb http://ftp.us.debian.org/debian/ stable main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ stable main contrib non-free" > /etc/apt/sources.list.d/stable.list RUN printf "deb http://ftp.us.debian.org/debian/ testing main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ testing main contrib non-free" > /etc/apt/sources.list.d/testing.list -# Install Node.js GDAL, nginx, letsencrypt, psql -RUN apt-get -qq update && apt-get -qq install -t testing -y binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y gettext-base cron postgresql-client-9.6 +# Install Python3, Node.js GDAL, nginx, letsencrypt, psql +RUN apt-get -qq update && apt-get -qq install -t testing -y --no-install-recommends python3 python3-pip git g++ python3-dev libpq-dev binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y --no-install-recommends gettext-base cron postgresql-client-9.6 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 2 # Install pip reqs ADD requirements.txt /webodm/ @@ -40,6 +42,10 @@ RUN npm install --quiet -g webpack && npm install --quiet -g webpack-cli && npm RUN python manage.py collectstatic --noinput RUN bash app/scripts/plugin_cleanup.sh && echo "from app.plugins import build_plugins;build_plugins()" | python manage.py shell +# Cleanup +RUN apt-get remove -y g++ python3-dev libpq-dev && apt-get autoremove -y +RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + RUN rm /webodm/webodm/secret_key.py VOLUME /webodm/app/media diff --git a/requirements.txt b/requirements.txt index d19c2ad7d..ee7f4dc69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,7 +56,7 @@ webcolors==1.5 -e git://github.com/OpenDroneMap/rio-tiler.git#egg=rio-tiler rio-color==1.0.0 rio-cogeo==1.1.8 -rasterio==1.1.0 ; sys_platform == 'linux' or sys_platform == 'darwin' +rasterio==1.1.5 ; sys_platform == 'linux' or sys_platform == 'darwin' https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/rasterio-1.1.3-cp37-cp37m-win_amd64.whl ; sys_platform == "win32" https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/GDAL-3.0.4-cp37-cp37m-win_amd64.whl ; sys_platform == "win32" Shapely==1.7.0 ; sys_platform == "win32" From 68ce8284f174fb72591989a7cb1fce96bf1dfb59 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 26 Jul 2020 12:24:01 -0400 Subject: [PATCH 4/7] Dockerfile changes --- Dockerfile | 16 +++++++++++----- requirements.txt | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4babc6612..c30807074 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-stretch +FROM debian:stretch MAINTAINER Piero Toffanin ENV PYTHONUNBUFFERED 1 @@ -9,8 +9,9 @@ ENV PROJ_LIB=/usr/share/proj RUN mkdir /webodm WORKDIR /webodm -RUN curl --silent --location https://deb.nodesource.com/setup_12.x | bash - -RUN apt-get -qq install -y nodejs +RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends wget +RUN wget --no-check-certificate https://deb.nodesource.com/setup_12.x -O /tmp/node.sh && bash /tmp/node.sh +RUN apt-get -qq update && apt-get -qq install -y nodejs # Configure use of testing branch of Debian RUN printf "Package: *\nPin: release a=stable\nPin-Priority: 900\n" > /etc/apt/preferences.d/stable.pref @@ -18,8 +19,9 @@ RUN printf "Package: *\nPin: release a=testing\nPin-Priority: 750\n" > /etc/apt/ RUN printf "deb http://ftp.us.debian.org/debian/ stable main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ stable main contrib non-free" > /etc/apt/sources.list.d/stable.list RUN printf "deb http://ftp.us.debian.org/debian/ testing main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ testing main contrib non-free" > /etc/apt/sources.list.d/testing.list -# Install Node.js GDAL, nginx, letsencrypt, psql -RUN apt-get -qq update && apt-get -qq install -t testing -y binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y gettext-base cron postgresql-client-9.6 +# Install Python3, Node.js GDAL, nginx, letsencrypt, psql +RUN apt-get -qq update && apt-get -qq install -t testing -y --no-install-recommends python3 python3-pip git g++ python3-dev libpq-dev binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y --no-install-recommends gettext-base cron postgresql-client-9.6 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 2 # Install pip reqs ADD requirements.txt /webodm/ @@ -40,6 +42,10 @@ RUN npm install --quiet -g webpack && npm install --quiet -g webpack-cli && npm RUN python manage.py collectstatic --noinput RUN bash app/scripts/plugin_cleanup.sh && echo "from app.plugins import build_plugins;build_plugins()" | python manage.py shell +# Cleanup +RUN apt-get remove -y g++ python3-dev libpq-dev && apt-get autoremove -y +RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + RUN rm /webodm/webodm/secret_key.py VOLUME /webodm/app/media diff --git a/requirements.txt b/requirements.txt index d19c2ad7d..ee7f4dc69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,7 +56,7 @@ webcolors==1.5 -e git://github.com/OpenDroneMap/rio-tiler.git#egg=rio-tiler rio-color==1.0.0 rio-cogeo==1.1.8 -rasterio==1.1.0 ; sys_platform == 'linux' or sys_platform == 'darwin' +rasterio==1.1.5 ; sys_platform == 'linux' or sys_platform == 'darwin' https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/rasterio-1.1.3-cp37-cp37m-win_amd64.whl ; sys_platform == "win32" https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/GDAL-3.0.4-cp37-cp37m-win_amd64.whl ; sys_platform == "win32" Shapely==1.7.0 ; sys_platform == "win32" From 0aa07fb2c0ae762366d59ff654989d676db9b350 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 27 Jul 2020 11:45:31 -0400 Subject: [PATCH 5/7] GDAL COGEO validation/creation --- Dockerfile | 3 +- app/cogeo.py | 52 ++- app/vendor/__init__.py | 0 .../validate_cloud_optimized_geotiff.py | 407 ++++++++++++++++++ 4 files changed, 452 insertions(+), 10 deletions(-) create mode 100644 app/vendor/__init__.py create mode 100644 app/vendor/validate_cloud_optimized_geotiff.py diff --git a/Dockerfile b/Dockerfile index c30807074..ef8d61312 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,7 @@ ENV PROJ_LIB=/usr/share/proj RUN mkdir /webodm WORKDIR /webodm +# Install Node.js RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends wget RUN wget --no-check-certificate https://deb.nodesource.com/setup_12.x -O /tmp/node.sh && bash /tmp/node.sh RUN apt-get -qq update && apt-get -qq install -y nodejs @@ -19,7 +20,7 @@ RUN printf "Package: *\nPin: release a=testing\nPin-Priority: 750\n" > /etc/apt/ RUN printf "deb http://ftp.us.debian.org/debian/ stable main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ stable main contrib non-free" > /etc/apt/sources.list.d/stable.list RUN printf "deb http://ftp.us.debian.org/debian/ testing main contrib non-free\ndeb-src http://ftp.us.debian.org/debian/ testing main contrib non-free" > /etc/apt/sources.list.d/testing.list -# Install Python3, Node.js GDAL, nginx, letsencrypt, psql +# Install Python3, GDAL, nginx, letsencrypt, psql RUN apt-get -qq update && apt-get -qq install -t testing -y --no-install-recommends python3 python3-pip git g++ python3-dev libpq-dev binutils libproj-dev gdal-bin python3-gdal nginx certbot grass-core && apt-get -qq install -y --no-install-recommends gettext-base cron postgresql-client-9.6 RUN update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 2 diff --git a/app/cogeo.py b/app/cogeo.py index 28be25b89..19cacbb99 100644 --- a/app/cogeo.py +++ b/app/cogeo.py @@ -5,6 +5,7 @@ import rasterio import re import subprocess +from pipes import quote from rio_cogeo.cogeo import cog_validate, cog_translate from rio_tiler.utils import has_alpha_band from webodm import settings @@ -17,16 +18,23 @@ def valid_cogeo(src_path): :param src_path: path to GeoTIFF :return: true if the GeoTIFF is a cogeo, false otherwise """ - return cog_validate(src_path, strict=True) + try: + from app.vendor.validate_cloud_optimized_geotiff import validate + warnings, errors, details = validate(src_path, full_check=True) + return not errors and not warnings + except ModuleNotFoundError: + logger.warning("Using legacy cog_validate (osgeo.gdal package not found)") + # Legacy + return cog_validate(src_path, strict=True) -def assure_cogeo(src_path, use_legacy=False): +def assure_cogeo(src_path): """ Guarantee that the .tif passed as an argument is a Cloud Optimized GeoTIFF (cogeo) If the path is not a cogeo, it is destructively converted into a cogeo. If the file cannot be converted, the function does not change the file :param src_path: path to GeoTIFF (cogeo or not) - :param use_legacy: whether to force the use of legacy implementation. By default the best implementation is used. + :param force_use_legacy: whether to force the use of legacy implementation. By default the best implementation is used. :return: None """ @@ -57,11 +65,11 @@ def assure_cogeo(src_path, use_legacy=False): # This shouldn't happen use_legacy = True - if True or use_legacy: - logger.info("Using legacy implementation (GDAL >= 3.1 not found)") - make_cogeo_legacy(src_path) + if use_legacy: + logger.warning("Using legacy implementation (GDAL >= 3.1 not found)") + return make_cogeo_legacy(src_path) else: - make_cogeo_gdal(src_path) + return make_cogeo_gdal(src_path) def get_gdal_version(): # Bit of a hack without installing @@ -71,7 +79,7 @@ def get_gdal_version(): return None # Get version - version_output = subprocess.check_output("%s --version" % gdal_translate) + version_output = subprocess.check_output([gdal_translate, "--version"]).decode('utf-8') m = re.match(r"GDAL\s+([\d+])\.([\d+])\.([\d+]),\s+released", version_output) if not m: @@ -89,7 +97,33 @@ def make_cogeo_gdal(src_path): tmpfile = tempfile.mktemp('_cogeo.tif', dir=settings.MEDIA_TMP) swapfile = tempfile.mktemp('_cogeo_swap.tif', dir=settings.MEDIA_TMP) - # gdal_translate -of COG -co BLOCKSIZE=256 -co COMPRESS=deflate -co NUM_THREADS=4 -co BIGTIFF=IF_SAFER -co QUALITY=100 -co SPARSE_OK=ON --config GDAL_NUM_THREADS ALL_CPUS brighton.tif cog.tif + try: + subprocess.run(["gdal_translate", "-of", "COG", + "-co", "BLOCKSIZE=256", + "-co", "COMPRESS=deflate", + "-co", "NUM_THREADS=ALL_CPUS", + "-co", "BIGTIFF=IF_SAFER", + "--config", "GDAL_NUM_THREADS", "ALL_CPUS", + quote(src_path), quote(tmpfile)]) + except Exception as e: + logger.warning("Cannot create Cloud Optimized GeoTIFF: %s" % str(e)) + + if os.path.isfile(tmpfile): + shutil.move(src_path, swapfile) # Move to swap location + + try: + shutil.move(tmpfile, src_path) + except IOError as e: + logger.warning("Cannot move %s to %s: %s" % (tmpfile, src_path, str(e))) + shutil.move(swapfile, src_path) # Attempt to restore + raise e + + if os.path.isfile(swapfile): + os.remove(swapfile) + + return True + else: + return False def make_cogeo_legacy(src_path): """ diff --git a/app/vendor/__init__.py b/app/vendor/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/app/vendor/validate_cloud_optimized_geotiff.py b/app/vendor/validate_cloud_optimized_geotiff.py new file mode 100644 index 000000000..1b34f26ec --- /dev/null +++ b/app/vendor/validate_cloud_optimized_geotiff.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# ***************************************************************************** +# $Id$ +# +# Project: GDAL +# Purpose: Validate Cloud Optimized GeoTIFF file structure +# Author: Even Rouault, +# +# ***************************************************************************** +# Copyright (c) 2017, Even Rouault +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# ***************************************************************************** + +import os.path +import struct +import sys +from osgeo import gdal + + +def Usage(): + print('Usage: validate_cloud_optimized_geotiff.py [-q] [--full-check=yes/no/auto] test.tif') + print('') + print('Options:') + print('-q: quiet mode') + print('--full-check=yes/no/auto: check tile/strip leader/trailer bytes. auto=yes for local files, and no for remote files') + return 1 + + +class ValidateCloudOptimizedGeoTIFFException(Exception): + pass + + +def full_check_band(f, band_name, band, errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery): + + block_size = band.GetBlockSize() + mask_band = None + if mask_interleaved_with_imagery: + mask_band = band.GetMaskBand() + mask_block_size = mask_band.GetBlockSize() + if block_size != mask_block_size: + errors += [band_name + ': mask block size is different from its imagery band'] + mask_band = None + + yblocks = (band.YSize + block_size[1] - 1) // block_size[1] + xblocks = (band.XSize + block_size[0] - 1) // block_size[0] + last_offset = 0 + for y in range(yblocks): + for x in range(xblocks): + + offset = band.GetMetadataItem('BLOCK_OFFSET_%d_%d' % (x, y), 'TIFF') + offset = int(offset) if offset is not None else 0 + bytecount = band.GetMetadataItem('BLOCK_SIZE_%d_%d' % (x, y), 'TIFF') + bytecount = int(bytecount) if bytecount is not None else 0 + + if offset > 0: + if block_order_row_major and offset < last_offset: + errors += [band_name + + ': offset of block (%d, %d) is smaller than previous block' % (x, y)] + + if block_leader_size_as_uint4: + gdal.VSIFSeekL(f, offset - 4, 0) + leader_size = struct.unpack('= 4: + gdal.VSIFSeekL(f, offset + bytecount - 4, 0) + last_bytes = gdal.VSIFReadL(8, 1, f) + if last_bytes[0:4] != last_bytes[4:8]: + errors += [band_name + + ': for block (%d, %d), trailer bytes are invalid' % (x, y)] + + if mask_band: + offset_mask = mask_band.GetMetadataItem('BLOCK_OFFSET_%d_%d' % (x, y), 'TIFF') + offset_mask = int(offset_mask) if offset_mask is not None else 0 + if offset > 0 and offset_mask > 0: + #bytecount_mask = int(mask_band.GetMetadataItem('BLOCK_SIZE_%d_%d' % (x,y), 'TIFF')) + expected_offset_mask = offset + bytecount + \ + (4 if block_leader_size_as_uint4 else 0) + \ + (4 if block_trailer_last_4_bytes_repeated else 0) + if offset_mask != expected_offset_mask: + errors += ['Mask of ' + band_name + ': for block (%d, %d), offset is %d, whereas %d was expected' % ( + x, y, offset_mask, expected_offset_mask)] + elif offset == 0 and offset_mask > 0: + if block_order_row_major and offset_mask < last_offset: + errors += ['Mask of ' + band_name + + ': offset of block (%d, %d) is smaller than previous block' % (x, y)] + + offset = offset_mask + + last_offset = offset + + +def validate(ds, check_tiled=True, full_check=False): + """Check if a file is a (Geo)TIFF with cloud optimized compatible structure. + + Args: + ds: GDAL Dataset for the file to inspect. + check_tiled: Set to False to ignore missing tiling. + full_check: Set to TRUe to check tile/strip leader/trailer bytes. Might be slow on remote files + + Returns: + A tuple, whose first element is an array of error messages + (empty if there is no error), and the second element, a dictionary + with the structure of the GeoTIFF file. + + Raises: + ValidateCloudOptimizedGeoTIFFException: Unable to open the file or the + file is not a Tiff. + """ + + if int(gdal.VersionInfo('VERSION_NUM')) < 2020000: + raise ValidateCloudOptimizedGeoTIFFException( + 'GDAL 2.2 or above required') + + unicode_type = type(''.encode('utf-8').decode('utf-8')) + if isinstance(ds, (str, unicode_type)): + gdal.PushErrorHandler() + ds = gdal.Open(ds) + gdal.PopErrorHandler() + if ds is None: + raise ValidateCloudOptimizedGeoTIFFException( + 'Invalid file : %s' % gdal.GetLastErrorMsg()) + if ds.GetDriver().ShortName != 'GTiff': + raise ValidateCloudOptimizedGeoTIFFException( + 'The file is not a GeoTIFF') + + details = {} + errors = [] + warnings = [] + filename = ds.GetDescription() + main_band = ds.GetRasterBand(1) + ovr_count = main_band.GetOverviewCount() + filelist = ds.GetFileList() + if filelist is not None and filename + '.ovr' in filelist: + errors += [ + 'Overviews found in external .ovr file. They should be internal'] + + if main_band.XSize > 512 or main_band.YSize > 512: + if check_tiled: + block_size = main_band.GetBlockSize() + if block_size[0] == main_band.XSize and block_size[0] > 1024: + errors += [ + 'The file is greater than 512xH or Wx512, but is not tiled'] + + if ovr_count == 0: + warnings += [ + 'The file is greater than 512xH or Wx512, it is recommended ' + 'to include internal overviews'] + + ifd_offset = int(main_band.GetMetadataItem('IFD_OFFSET', 'TIFF')) + ifd_offsets = [ifd_offset] + + block_order_row_major = False + block_leader_size_as_uint4 = False + block_trailer_last_4_bytes_repeated = False + mask_interleaved_with_imagery = False + + if ifd_offset not in (8, 16): + + # Check if there is GDAL hidden structural metadata + f = gdal.VSIFOpenL(filename, 'rb') + if not f: + raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") + signature = struct.unpack('B' * 4, gdal.VSIFReadL(4, 1, f)) + bigtiff = signature in ((0x49, 0x49, 0x2B, 0x00), (0x4D, 0x4D, 0x00, 0x2B)) + if bigtiff: + expected_ifd_pos = 16 + else: + expected_ifd_pos = 8 + gdal.VSIFSeekL(f, expected_ifd_pos, 0) + pattern = "GDAL_STRUCTURAL_METADATA_SIZE=%06d bytes\n" % 0 + got = gdal.VSIFReadL(len(pattern), 1, f).decode('LATIN1') + if len(got) == len(pattern) and got.startswith('GDAL_STRUCTURAL_METADATA_SIZE='): + size = int(got[len('GDAL_STRUCTURAL_METADATA_SIZE='):][0:6]) + extra_md = gdal.VSIFReadL(size, 1, f).decode('LATIN1') + block_order_row_major = 'BLOCK_ORDER=ROW_MAJOR' in extra_md + block_leader_size_as_uint4 = 'BLOCK_LEADER=SIZE_AS_UINT4' in extra_md + block_trailer_last_4_bytes_repeated = 'BLOCK_TRAILER=LAST_4_BYTES_REPEATED' in extra_md + mask_interleaved_with_imagery = 'MASK_INTERLEAVED_WITH_IMAGERY=YES' in extra_md + if 'KNOWN_INCOMPATIBLE_EDITION=YES' in extra_md: + errors += ["KNOWN_INCOMPATIBLE_EDITION=YES is declared in the file"] + expected_ifd_pos += len(pattern) + size + expected_ifd_pos += expected_ifd_pos % 2 # IFD offset starts on a 2-byte boundary + gdal.VSIFCloseL(f) + + if expected_ifd_pos != ifd_offsets[0]: + errors += [ + 'The offset of the main IFD should be %d. It is %d instead' % (expected_ifd_pos, ifd_offsets[0])] + + details['ifd_offsets'] = {} + details['ifd_offsets']['main'] = ifd_offset + + for i in range(ovr_count): + # Check that overviews are by descending sizes + ovr_band = ds.GetRasterBand(1).GetOverview(i) + if i == 0: + if (ovr_band.XSize > main_band.XSize or + ovr_band.YSize > main_band.YSize): + errors += [ + 'First overview has larger dimension than main band'] + else: + prev_ovr_band = ds.GetRasterBand(1).GetOverview(i - 1) + if (ovr_band.XSize > prev_ovr_band.XSize or + ovr_band.YSize > prev_ovr_band.YSize): + errors += [ + 'Overview of index %d has larger dimension than ' + 'overview of index %d' % (i, i - 1)] + + if check_tiled: + block_size = ovr_band.GetBlockSize() + if block_size[0] == ovr_band.XSize and block_size[0] > 1024: + errors += [ + 'Overview of index %d is not tiled' % i] + + # Check that the IFD of descending overviews are sorted by increasing + # offsets + ifd_offset = int(ovr_band.GetMetadataItem('IFD_OFFSET', 'TIFF')) + ifd_offsets.append(ifd_offset) + details['ifd_offsets']['overview_%d' % i] = ifd_offset + if ifd_offsets[-1] < ifd_offsets[-2]: + if i == 0: + errors += [ + 'The offset of the IFD for overview of index %d is %d, ' + 'whereas it should be greater than the one of the main ' + 'image, which is at byte %d' % + (i, ifd_offsets[-1], ifd_offsets[-2])] + else: + errors += [ + 'The offset of the IFD for overview of index %d is %d, ' + 'whereas it should be greater than the one of index %d, ' + 'which is at byte %d' % + (i, ifd_offsets[-1], i - 1, ifd_offsets[-2])] + + # Check that the imagery starts by the smallest overview and ends with + # the main resolution dataset + + def get_block_offset(band): + blockxsize, blockysize = band.GetBlockSize() + for y in range(int((band.YSize + blockysize - 1) / blockysize)): + for x in range(int((band.XSize + blockxsize - 1) / blockxsize)): + block_offset = band.GetMetadataItem('BLOCK_OFFSET_%d_%d' % (x, y), 'TIFF') + if block_offset: + return int(block_offset) + return 0 + + block_offset = get_block_offset(main_band) + data_offsets = [block_offset] + details['data_offsets'] = {} + details['data_offsets']['main'] = block_offset + for i in range(ovr_count): + ovr_band = ds.GetRasterBand(1).GetOverview(i) + block_offset = get_block_offset(ovr_band) + data_offsets.append(block_offset) + details['data_offsets']['overview_%d' % i] = block_offset + + if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]: + if ovr_count > 0: + errors += [ + 'The offset of the first block of the smallest overview ' + 'should be after its IFD'] + else: + errors += [ + 'The offset of the first block of the image should ' + 'be after its IFD'] + for i in range(len(data_offsets) - 2, 0, -1): + if data_offsets[i] != 0 and data_offsets[i] < data_offsets[i + 1]: + errors += [ + 'The offset of the first block of overview of index %d should ' + 'be after the one of the overview of index %d' % + (i - 1, i)] + if len(data_offsets) >= 2 and data_offsets[0] != 0 and data_offsets[0] < data_offsets[1]: + errors += [ + 'The offset of the first block of the main resolution image ' + 'should be after the one of the overview of index %d' % + (ovr_count - 1)] + + if full_check and (block_order_row_major or block_leader_size_as_uint4 or + block_trailer_last_4_bytes_repeated or + mask_interleaved_with_imagery): + f = gdal.VSIFOpenL(filename, 'rb') + if not f: + raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") + + full_check_band(f, 'Main resolution image', main_band, errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery) + if main_band.GetMaskFlags() == gdal.GMF_PER_DATASET and \ + (filename + '.msk') not in ds.GetFileList(): + full_check_band(f, 'Mask band of main resolution image', + main_band.GetMaskBand(), errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, False) + for i in range(ovr_count): + ovr_band = ds.GetRasterBand(1).GetOverview(i) + full_check_band(f, 'Overview %d' % i, ovr_band, errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, + mask_interleaved_with_imagery) + if ovr_band.GetMaskFlags() == gdal.GMF_PER_DATASET and \ + (filename + '.msk') not in ds.GetFileList(): + full_check_band(f, 'Mask band of overview %d' % i, + ovr_band.GetMaskBand(), errors, + block_order_row_major, + block_leader_size_as_uint4, + block_trailer_last_4_bytes_repeated, False) + gdal.VSIFCloseL(f) + + return warnings, errors, details + + +def main(): + """Return 0 in case of success, 1 for failure.""" + + i = 1 + filename = None + quiet = False + full_check = None + while i < len(sys.argv): + if sys.argv[i] == '-q': + quiet = True + elif sys.argv[i] == '--full-check=yes': + full_check = True + elif sys.argv[i] == '--full-check=no': + full_check = False + elif sys.argv[i] == '--full-check=auto': + full_check = None + elif sys.argv[i][0] == '-': + return Usage() + elif filename is None: + filename = sys.argv[i] + else: + return Usage() + + i += 1 + + if filename is None: + return Usage() + + if full_check is None: + full_check = filename.startswith('/vsimem/') or os.path.exists(filename) + + try: + ret = 0 + warnings, errors, details = validate(filename, full_check=full_check) + if warnings: + if not quiet: + print('The following warnings were found:') + for warning in warnings: + print(' - ' + warning) + print('') + if errors: + if not quiet: + print('%s is NOT a valid cloud optimized GeoTIFF.' % filename) + print('The following errors were found:') + for error in errors: + print(' - ' + error) + print('') + ret = 1 + else: + if not quiet: + print('%s is a valid cloud optimized GeoTIFF' % filename) + + if not quiet and not warnings and not errors: + headers_size = min(details['data_offsets'][k] for k in details['data_offsets']) + if headers_size == 0: + headers_size = gdal.VSIStatL(filename).size + print('\nThe size of all IFD headers is %d bytes' % headers_size) + except ValidateCloudOptimizedGeoTIFFException as e: + if not quiet: + print('%s is NOT a valid cloud optimized GeoTIFF : %s' % + (filename, str(e))) + ret = 1 + + return ret + + +if __name__ == '__main__': + sys.exit(main()) From cfc176ddec164c88036931dd614938cec62c0f6a Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 27 Jul 2020 11:51:04 -0400 Subject: [PATCH 6/7] Removed non existant parameter description --- app/cogeo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/cogeo.py b/app/cogeo.py index 19cacbb99..a6d2ea769 100644 --- a/app/cogeo.py +++ b/app/cogeo.py @@ -34,7 +34,6 @@ def assure_cogeo(src_path): If the path is not a cogeo, it is destructively converted into a cogeo. If the file cannot be converted, the function does not change the file :param src_path: path to GeoTIFF (cogeo or not) - :param force_use_legacy: whether to force the use of legacy implementation. By default the best implementation is used. :return: None """ From 9026ecbbfb0f806858a23411e8decaa963bee00b Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 27 Jul 2020 12:14:23 -0400 Subject: [PATCH 7/7] Add pseudo restart button --- app/static/app/js/components/TaskListItem.jsx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/app/static/app/js/components/TaskListItem.jsx b/app/static/app/js/components/TaskListItem.jsx index 58764ad9c..f1923860e 100644 --- a/app/static/app/js/components/TaskListItem.jsx +++ b/app/static/app/js/components/TaskListItem.jsx @@ -285,9 +285,21 @@ class TaskListItem extends React.Component { rfMap[rfParam].onClick = this.genRestartAction(rfParam); } - return task.can_rerun_from + let items = task.can_rerun_from .map(rf => rfMap[rf]) .filter(rf => rf !== undefined); + + if (items.length > 0 && [statusCodes.CANCELED, statusCodes.FAILED].indexOf(task.status) !== -1){ + // Add resume "pseudo button" to help users understand + // how to resume a task that failed for memory/disk issues. + items.unshift({ + label: "Resume Processing", + icon: "fa fa-bolt", + onClick: this.genRestartAction(task.can_rerun_from[task.can_rerun_from.length - 1]) + }); + } + + return items; } genRestartAction(rerunFrom = null){