diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample
index 0e026b5a7b0a..a8e798143f8b 100644
--- a/lib/galaxy/config/sample/datatypes_conf.xml.sample
+++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample
@@ -288,13 +288,14 @@
-
-
-
-
+
+
-
+
+
+
+
diff --git a/lib/galaxy/datatypes/converters/archive_to_directory.xml b/lib/galaxy/datatypes/converters/archive_to_directory.xml
new file mode 100644
index 000000000000..d0c56c154c09
--- /dev/null
+++ b/lib/galaxy/datatypes/converters/archive_to_directory.xml
@@ -0,0 +1,37 @@
+
+
+
+ galaxy-util
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml
index 59354b39b5fd..0160746283da 100644
--- a/lib/galaxy/datatypes/converters/tar_to_directory.xml
+++ b/lib/galaxy/datatypes/converters/tar_to_directory.xml
@@ -1,7 +1,7 @@
- galaxy-util
+ galaxy-util
mkdir '$output1.files_path';
diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
index 0024adea6155..e5fea2612ace 100644
--- a/lib/galaxy/datatypes/data.py
+++ b/lib/galaxy/datatypes/data.py
@@ -465,6 +465,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
composite_extensions.append("html") # for archiving composite datatypes
composite_extensions.append("data_manager_json") # for downloading bundles if bundled.
+ composite_extensions.append("directory") # for downloading directories.
if data.extension in composite_extensions:
return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip"))
@@ -1212,6 +1213,18 @@ def regex_line_dataprovider(
class Directory(Data):
"""Class representing a directory of files."""
+ file_ext = "directory"
+
+ def _archive_main_file(
+ self, archive: ZipstreamWrapper, display_name: str, data_filename: str
+ ) -> Tuple[bool, str, str]:
+ """Overwrites the method to not do anything.
+
+ No main file gets added to a directory archive.
+ """
+ error, msg, messagetype = False, "", ""
+ return error, msg, messagetype
+
class GenericAsn1(Text):
"""Class for generic ASN.1 text format"""
diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py
index ed32bc92caf0..15d0198f9c31 100644
--- a/lib/galaxy_test/api/test_tools_upload.py
+++ b/lib/galaxy_test/api/test_tools_upload.py
@@ -1,13 +1,17 @@
import json
import os
+import tempfile
import urllib.parse
from base64 import b64encode
+from typing import cast
import pytest
from tusclient import client
from galaxy.tool_util.verify.test_data import TestDataResolver
from galaxy.util import UNKNOWN
+from galaxy.util.compression_utils import decompress_bytes_to_directory
+from galaxy.util.hash_util import md5_hash_file
from galaxy.util.unittest_utils import (
skip_if_github_down,
skip_if_site_down,
@@ -29,6 +33,14 @@
B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8")
URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}"
+EXPECTED_TAR_CONTENTS = {
+ "testdir": "Directory",
+ "testdir/c": "Directory",
+ "testdir/a": "File",
+ "testdir/b": "File",
+ "testdir/c/d": "File",
+}
+
class TestToolsUpload(ApiTestCase):
dataset_populator: DatasetPopulator
@@ -604,18 +616,11 @@ def _check_testdir_composite(self, dataset, history_id):
assert content.strip() == "Test123"
extra_files = self.dataset_populator.get_history_dataset_extra_files(history_id, dataset_id=dataset["id"])
assert len(extra_files) == 5, extra_files
- expected_contents = {
- "testdir": "Directory",
- "testdir/c": "Directory",
- "testdir/a": "File",
- "testdir/b": "File",
- "testdir/c/d": "File",
- }
found_files = set()
for extra_file in extra_files:
path = extra_file["path"]
- assert path in expected_contents
- assert extra_file["class"] == expected_contents[path]
+ assert path in EXPECTED_TAR_CONTENTS
+ assert extra_file["class"] == EXPECTED_TAR_CONTENTS[path]
found_files.add(path)
assert len(found_files) == 5, found_files
@@ -639,6 +644,75 @@ def test_upload_composite_from_bad_tar(self, history_id):
details = self.dataset_populator.get_history_dataset_details(history_id, dataset=dataset, assert_ok=False)
assert details["state"] == "error"
+ def test_upload_tar_roundtrip(self, history_id):
+ testdir = TestDataResolver().get_filename("testdir.tar")
+ expected_size = os.path.getsize(testdir)
+ with open(testdir, "rb") as fh:
+ details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True)
+ assert details["file_ext"] == "tar"
+ assert details["file_size"] == expected_size
+ content = cast(
+ bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes")
+ )
+ # Make sure we got the expected content size.
+ assert len(content) == expected_size
+
+ # Make sure we get the expected contents.
+ dir_path = decompress_bytes_to_directory(content)
+ assert dir_path.endswith("testdir")
+ for path, entry_class in EXPECTED_TAR_CONTENTS.items():
+ path = os.path.join(dir_path, os.path.pardir, path)
+ if entry_class == "Directory":
+ assert os.path.isdir(path)
+ else:
+ assert os.path.isfile(path)
+
+ # Make sure the hash of the content matches the hash of the original file.
+ expected_hash = md5_hash_file(testdir)
+ assert expected_hash is not None
+ self._assert_content_matches_hash(content, expected_hash)
+
+ def _assert_content_matches_hash(self, content: bytes, expected_hash: str):
+ with tempfile.NamedTemporaryFile("wb") as temp:
+ temp.write(content)
+ temp.flush()
+ actual_hash = md5_hash_file(temp.name)
+ assert actual_hash == expected_hash
+
+ def test_upload_zip_roundtrip(self, history_id):
+ testdir = TestDataResolver().get_filename("testdir1.zip")
+ expected_size = os.path.getsize(testdir)
+ with open(testdir, "rb") as fh:
+ details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True)
+ assert details["file_ext"] == "zip"
+ assert details["file_size"] == expected_size
+ content = cast(
+ bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes")
+ )
+ # Make sure we got the expected content size.
+ assert len(content) == expected_size
+
+ # Make sure we get the expected contents.
+ dir_path = decompress_bytes_to_directory(content)
+ assert dir_path.endswith("testdir1")
+ EXPECTED_ZIP_CONTENTS = {
+ "file1": "File",
+ "file2": "File",
+ "dir1/": "Directory",
+ "dir1/file3": "File",
+ }
+ for path, entry_class in EXPECTED_ZIP_CONTENTS.items():
+ path = os.path.join(dir_path, path)
+ if entry_class == "Directory":
+ assert os.path.isdir(path)
+ else:
+ assert os.path.isfile(path)
+
+ # Make sure the hash of the content matches the hash of the original file.
+ expected_hash = md5_hash_file(testdir)
+ assert expected_hash is not None
+ self._assert_content_matches_hash(content, expected_hash)
+
def test_upload_dbkey(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")