From e37ec3b739530d9319973caf6c08c0270885192f Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Thu, 10 Aug 2023 20:53:55 +0100 Subject: [PATCH] Make remote filesystem listing more robust to errors and add single remote endpoint --- pydatalab/pydatalab/remote_filesystems.py | 4 +- pydatalab/pydatalab/routes/v0_1/__init__.py | 5 +- pydatalab/pydatalab/routes/v0_1/remotes.py | 105 +++++++++++++++++--- pydatalab/tasks.py | 2 +- 4 files changed, 98 insertions(+), 18 deletions(-) diff --git a/pydatalab/pydatalab/remote_filesystems.py b/pydatalab/pydatalab/remote_filesystems.py index 25cd54914..2c9dbcf1a 100644 --- a/pydatalab/pydatalab/remote_filesystems.py +++ b/pydatalab/pydatalab/remote_filesystems.py @@ -127,8 +127,8 @@ def get_directory_structure( last_updated, ) - except RuntimeError as exc: - dir_structure = [{"type": "error", "details": str(exc)}] + except Exception as exc: + dir_structure = [{"type": "error", "name": directory["name"], "details": str(exc)}] last_updated = datetime.datetime.now() return { diff --git a/pydatalab/pydatalab/routes/v0_1/__init__.py b/pydatalab/pydatalab/routes/v0_1/__init__.py index f514370e0..471d691bf 100644 --- a/pydatalab/pydatalab/routes/v0_1/__init__.py +++ b/pydatalab/pydatalab/routes/v0_1/__init__.py @@ -9,19 +9,18 @@ from .healthcheck import ENDPOINTS as healthcheck_endpoints from .info import ENDPOINTS as info_endpoints from .items import ENDPOINTS as items_endpoints -from .remotes import ENDPOINTS as remotes_endpoints +from .remotes import remote ENDPOINTS: Dict[str, Callable] = { **blocks_endpoints, **items_endpoints, **files_endpoints, - **remotes_endpoints, **healthcheck_endpoints, **auth_endpoints, **graphs_endpoints, **info_endpoints, } -BLUEPRINTS = [collection] +BLUEPRINTS = [collection, remote] __all__ = ("ENDPOINTS", "BLUEPRINTS", "__api_version__") diff --git a/pydatalab/pydatalab/routes/v0_1/remotes.py b/pydatalab/pydatalab/routes/v0_1/remotes.py index f01962f35..796e8e190 100644 --- a/pydatalab/pydatalab/routes/v0_1/remotes.py +++ b/pydatalab/pydatalab/routes/v0_1/remotes.py @@ -1,12 +1,31 @@ -from typing import Callable, Dict +from typing import Any, Dict, Optional -from flask import jsonify, request +from flask import Blueprint, jsonify, request from flask_login import current_user from pydatalab.config import CONFIG -from pydatalab.remote_filesystems import get_directory_structures +from pydatalab.remote_filesystems import ( + get_directory_structure, + get_directory_structures, +) +def _check_invalidate_cache(args: Dict[str, str]) -> Optional[bool]: + invalidate_cache: Optional[bool] = None + if "invalidate_cache" in args: + invalidate_cache_arg = args.get("invalidate_cache") + if invalidate_cache_arg not in ("1", "0"): + raise RuntimeError("invalidate_cache must be 0 or 1") + invalidate_cache = bool(int(invalidate_cache_arg)) + + return invalidate_cache + + +remote = Blueprint("remotes", __name__) + + +@remote.route("/list-remote-directories/", methods=["GET"]) +@remote.route("/remotes", methods=["GET"]) def list_remote_directories(): """Returns the most recent directory structures from the server. @@ -26,12 +45,19 @@ def list_remote_directories(): 401, ) - invalidate_cache = None - if "invalidate_cache" in request.args: - invalidate_cache = request.args["invalidate_cache"] - if invalidate_cache not in ("1", "0"): - return jsonify({"error": "invalidate_cache must be 0 or 1"}), 400 - invalidate_cache = bool(int(invalidate_cache)) + try: + invalidate_cache = _check_invalidate_cache(request.args) + except RuntimeError as e: + return ( + jsonify( + { + "status": "error", + "title": "Invalid Argument", + "detail": str(e), + } + ), + 400, + ) all_directory_structures = get_directory_structures( CONFIG.REMOTE_FILESYSTEMS, invalidate_cache=invalidate_cache @@ -50,6 +76,61 @@ def list_remote_directories(): list_remote_directories.methods = ("GET",) # type: ignore -ENDPOINTS: Dict[str, Callable] = { - "/list-remote-directories/": list_remote_directories, -} +@remote.route("/remotes/", methods=["GET"]) +def get_remote_directory(remote_id: str): + """Returns the most recent directory structure from the server for the + given configured remote name. + + """ + if not current_user.is_authenticated and not CONFIG.TESTING: + return ( + jsonify( + { + "status": "error", + "title": "Not Authorized", + "detail": "Listing remote directories requires authentication.", + } + ), + 401, + ) + + try: + invalidate_cache = _check_invalidate_cache(request.args) + except RuntimeError as e: + return ( + jsonify( + { + "status": "error", + "title": "Invalid Argument", + "detail": str(e), + } + ), + 400, + ) + + for d in CONFIG.REMOTE_FILESYSTEMS: + if remote_id == d["name"]: + remote_obj = d + break + else: + return ( + jsonify( + { + "status": "error", + "title": "Not Found", + "detail": f"No remote found with name {remote_id!r}", + } + ), + 404, + ) + + directory_structure = get_directory_structure(remote_obj, invalidate_cache=invalidate_cache) + + response: Dict[str, Any] = {} + response["meta"] = {} + response["meta"]["remote"] = d + if directory_structure: + oldest_update = min(d["last_updated"] for d in directory_structure) + response["meta"]["oldest_cache_update"] = oldest_update.isoformat() + response["data"] = directory_structure + return jsonify(response), 200 diff --git a/pydatalab/tasks.py b/pydatalab/tasks.py index 506d4bea4..c796a5e74 100644 --- a/pydatalab/tasks.py +++ b/pydatalab/tasks.py @@ -290,7 +290,7 @@ def check_item_validity(_, base_url: str = None, starting_materials: bool = Fals @task -def check_remotes(_, base_url: str = None): +def check_remotes(_, base_url: str | None = None): """This task looks up all configured remotes and checks that they can be synced.