diff --git a/pydatalab/pydatalab/remote_filesystems.py b/pydatalab/pydatalab/remote_filesystems.py index 380817ce7..e7f62078a 100644 --- a/pydatalab/pydatalab/remote_filesystems.py +++ b/pydatalab/pydatalab/remote_filesystems.py @@ -208,12 +208,43 @@ def _call_remote_tree( except Exception as exc: raise RuntimeError(f"Remote tree process {command!r} returned: {exc!r}") if stderr: - raise RuntimeError(f"Remote tree process {command!r} returned: {stderr!r}") + # Do not return the bare stderr, but instead specialise the error message to common errors + if "WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!" in stderr.decode("utf-8"): + msg = f"Remote host identification has changed for {hostname}: please contact the administrator of this datalab deployment." + LOGGER.error( + "Remote host identification for %s has changed, failed to update remote directories", + hostname, + ) + else: + msg = "Remote tree process returned an error: please contact the administrator of this datalab deployment." + LOGGER.error( + "Remote tree process on %s returned an error: %s", + hostname, + stderr.decode("utf-8"), + ) + raise RuntimeError(msg) try: return json.loads(stdout) except Exception: - raise RuntimeError(f"Remote tree process {command!r} returned: {stdout!r}") + if "error opening dir" in stdout.decode("utf-8"): + msg = "Can no longer access the configured directory on the remote system; please contact the administrator of this datalab deployment." + LOGGER.error( + "Remote directory %s on %s no longer accessible. Response: %s", + directory_path, + hostname, + stdout.decode("utf-8"), + ) + else: + msg = "Remote tree process failed with an unhandled error; please contact the administrator of this datalab deployment." + LOGGER.error( + "Remote directory syncing for %s on %s failed. Response: %s", + directory_path, + hostname, + stdout.decode("utf-8"), + ) + + raise RuntimeError(msg) if hostname: LOGGER.debug(f"Calling remote {tree_command} on {directory_path}") diff --git a/pydatalab/tasks.py b/pydatalab/tasks.py index ab7cacadc..7e48121af 100644 --- a/pydatalab/tasks.py +++ b/pydatalab/tasks.py @@ -290,7 +290,7 @@ def check_item_validity(_, base_url: str | None = None, starting_materials: bool @task -def check_remotes(_, base_url: str | None = None): +def check_remotes(_, base_url: str | None = None, invalidate_cache: bool = False): """This task looks up all configured remotes and checks that they can be synced. @@ -299,6 +299,7 @@ def check_remotes(_, base_url: str | None = None): Parameters: base_url: The API URL. + invalidate_cache: Whether to force cache invalidation. """ @@ -320,7 +321,10 @@ def check_remotes(_, base_url: str | None = None): if not user_response.status_code == 200: raise SystemExit(f"Could not get current user: {user_response.content!r}") - directory_response = requests.get(f"{base_url}/list-remote-directories/", headers=headers) + directory_response = requests.get( + f"{base_url}/list-remote-directories?invalidate_cache={'1' if invalidate_cache else '0'}", + headers=headers, + ) if directory_response.status_code != 200: raise SystemExit(f"Could not get remote directories: {directory_response}") @@ -331,7 +335,9 @@ def check_remotes(_, base_url: str | None = None): for d in directory_structures: if d["status"] == "error": log.error(f"ꙮ {d['name']!r}: {d['contents'][0]['details']!r}") - elif d["type"] == "toplevel": + elif d["status"] == "cached": + log.info(f"✩ {d['name']!r}: {d['last_updated']!r}") + elif d["status"] == "updated": log.info(f"✓ {d['name']!r}: {d['last_updated']!r}")