diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 0e026b5a7b0a..a8e798143f8b 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -288,13 +288,14 @@ - - - - + + - + + + + diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml new file mode 100644 index 000000000000..d0c56c154c09 --- /dev/null +++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml @@ -0,0 +1,37 @@ + + + + galaxy-util + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml index 59354b39b5fd..0160746283da 100644 --- a/lib/galaxy/datatypes/converters/tar_to_directory.xml +++ b/lib/galaxy/datatypes/converters/tar_to_directory.xml @@ -1,7 +1,7 @@ - galaxy-util + galaxy-util mkdir '$output1.files_path'; diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 0024adea6155..e5fea2612ace 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -465,6 +465,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd): composite_extensions = trans.app.datatypes_registry.get_composite_extensions() composite_extensions.append("html") # for archiving composite datatypes composite_extensions.append("data_manager_json") # for downloading bundles if bundled. + composite_extensions.append("directory") # for downloading directories. if data.extension in composite_extensions: return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip")) @@ -1212,6 +1213,18 @@ def regex_line_dataprovider( class Directory(Data): """Class representing a directory of files.""" + file_ext = "directory" + + def _archive_main_file( + self, archive: ZipstreamWrapper, display_name: str, data_filename: str + ) -> Tuple[bool, str, str]: + """Overwrites the method to not do anything. + + No main file gets added to a directory archive. + """ + error, msg, messagetype = False, "", "" + return error, msg, messagetype + class GenericAsn1(Text): """Class for generic ASN.1 text format""" diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py index ed32bc92caf0..15d0198f9c31 100644 --- a/lib/galaxy_test/api/test_tools_upload.py +++ b/lib/galaxy_test/api/test_tools_upload.py @@ -1,13 +1,17 @@ import json import os +import tempfile import urllib.parse from base64 import b64encode +from typing import cast import pytest from tusclient import client from galaxy.tool_util.verify.test_data import TestDataResolver from galaxy.util import UNKNOWN +from galaxy.util.compression_utils import decompress_bytes_to_directory +from galaxy.util.hash_util import md5_hash_file from galaxy.util.unittest_utils import ( skip_if_github_down, skip_if_site_down, @@ -29,6 +33,14 @@ B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" +EXPECTED_TAR_CONTENTS = { + "testdir": "Directory", + "testdir/c": "Directory", + "testdir/a": "File", + "testdir/b": "File", + "testdir/c/d": "File", +} + class TestToolsUpload(ApiTestCase): dataset_populator: DatasetPopulator @@ -604,18 +616,11 @@ def _check_testdir_composite(self, dataset, history_id): assert content.strip() == "Test123" extra_files = self.dataset_populator.get_history_dataset_extra_files(history_id, dataset_id=dataset["id"]) assert len(extra_files) == 5, extra_files - expected_contents = { - "testdir": "Directory", - "testdir/c": "Directory", - "testdir/a": "File", - "testdir/b": "File", - "testdir/c/d": "File", - } found_files = set() for extra_file in extra_files: path = extra_file["path"] - assert path in expected_contents - assert extra_file["class"] == expected_contents[path] + assert path in EXPECTED_TAR_CONTENTS + assert extra_file["class"] == EXPECTED_TAR_CONTENTS[path] found_files.add(path) assert len(found_files) == 5, found_files @@ -639,6 +644,75 @@ def test_upload_composite_from_bad_tar(self, history_id): details = self.dataset_populator.get_history_dataset_details(history_id, dataset=dataset, assert_ok=False) assert details["state"] == "error" + def test_upload_tar_roundtrip(self, history_id): + testdir = TestDataResolver().get_filename("testdir.tar") + expected_size = os.path.getsize(testdir) + with open(testdir, "rb") as fh: + details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True) + assert details["file_ext"] == "tar" + assert details["file_size"] == expected_size + content = cast( + bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes") + ) + # Make sure we got the expected content size. + assert len(content) == expected_size + + # Make sure we get the expected contents. + dir_path = decompress_bytes_to_directory(content) + assert dir_path.endswith("testdir") + for path, entry_class in EXPECTED_TAR_CONTENTS.items(): + path = os.path.join(dir_path, os.path.pardir, path) + if entry_class == "Directory": + assert os.path.isdir(path) + else: + assert os.path.isfile(path) + + # Make sure the hash of the content matches the hash of the original file. + expected_hash = md5_hash_file(testdir) + assert expected_hash is not None + self._assert_content_matches_hash(content, expected_hash) + + def _assert_content_matches_hash(self, content: bytes, expected_hash: str): + with tempfile.NamedTemporaryFile("wb") as temp: + temp.write(content) + temp.flush() + actual_hash = md5_hash_file(temp.name) + assert actual_hash == expected_hash + + def test_upload_zip_roundtrip(self, history_id): + testdir = TestDataResolver().get_filename("testdir1.zip") + expected_size = os.path.getsize(testdir) + with open(testdir, "rb") as fh: + details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True) + assert details["file_ext"] == "zip" + assert details["file_size"] == expected_size + content = cast( + bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes") + ) + # Make sure we got the expected content size. + assert len(content) == expected_size + + # Make sure we get the expected contents. + dir_path = decompress_bytes_to_directory(content) + assert dir_path.endswith("testdir1") + EXPECTED_ZIP_CONTENTS = { + "file1": "File", + "file2": "File", + "dir1/": "Directory", + "dir1/file3": "File", + } + for path, entry_class in EXPECTED_ZIP_CONTENTS.items(): + path = os.path.join(dir_path, path) + if entry_class == "Directory": + assert os.path.isdir(path) + else: + assert os.path.isfile(path) + + # Make sure the hash of the content matches the hash of the original file. + expected_hash = md5_hash_file(testdir) + assert expected_hash is not None + self._assert_content_matches_hash(content, expected_hash) + def test_upload_dbkey(self): with self.dataset_populator.test_history() as history_id: payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")