Skip to content

Commit

Permalink
Merge pull request #17614 from wm75/archive-to-dir-converter
Browse files Browse the repository at this point in the history
Improve usability of Directory datatype
  • Loading branch information
bgruening authored Oct 30, 2024
2 parents 339724e + 14eb0c3 commit c6a10d6
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 15 deletions.
11 changes: 6 additions & 5 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,14 @@
<display file="qiime/qiime2/q2view.xml"/>
</datatype>
<datatype extension='qiime2.tabular' type="galaxy.datatypes.qiime2:QIIME2Metadata" display_in_upload="true"/>
<datatype extension="zip" type="galaxy.datatypes.binary:CompressedZipArchive" display_in_upload="true"/>
<datatype extension="ncbi_genome_dataset.zip" type="galaxy.datatypes.binary:CompressedZipArchive" subclass="true" display_in_upload="true"/>
<datatype extension="tar" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true">
<converter file="tar_to_directory.xml" target_datatype="directory"/>
<datatype extension="zip" type="galaxy.datatypes.binary:CompressedZipArchive" display_in_upload="true">
<converter file="archive_to_directory.xml" target_datatype="directory" />
</datatype>
<datatype extension="directory" type="galaxy.datatypes.data:Directory">
<datatype extension="ncbi_genome_dataset.zip" type="galaxy.datatypes.binary:CompressedZipArchive" subclass="true" display_in_upload="true"/>
<datatype extension="tar" auto_compressed_types="gz,bz2" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true">
<converter file="archive_to_directory.xml" target_datatype="directory"/>
</datatype>
<datatype extension="directory" type="galaxy.datatypes.data:Directory"/>
<datatype extension="yaml" type="galaxy.datatypes.text:Yaml" display_in_upload="true" />
<!-- Proteomics Datatypes -->
<datatype extension="mrm" type="galaxy.datatypes.tabular:Tabular" display_in_upload="true" subclass="true"/>
Expand Down
37 changes: 37 additions & 0 deletions lib/galaxy/datatypes/converters/archive_to_directory.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<tool id="CONVERTER_archive_to_directory" name="Unpack archive to directory" version="1.0.0" profile="21.09">
<!-- Use compression_utils instead of shell commands (tar/unzip) so we can verify safety of results -->
<requirements>
<requirement type="package" version="23.2.1">galaxy-util</requirement>
</requirements>
<command><![CDATA[
mkdir '$output1.files_path' &&
cd '$output1.files_path' &&
python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.')"
]]></command>
<configfiles>
<configfile filename="metadata_json"><![CDATA[{
"output1": {
"name": "$input1.name unpacked to $__target_datatype__",
"ext": "$__target_datatype__"
}
}]]></configfile>
</configfiles>
<inputs>
<param format="tar,zip" name="input1" type="data"/>
<param name="__target_datatype__" type="select" label="Target data type">
<option value="directory">directory</option>
</param>
</inputs>
<outputs provided_metadata_file="metadata_json">
<data format="auto" name="output1"/>
</outputs>
<tests>
<test>
<param name="input1" ftype="tar" value="testdir1.tar"/>
<param name="__target_datatype__" value="directory"/>
<output name="output1" ftype="directory" value="testdir1.tar.directory"/>
</test>
</tests>
<help>
</help>
</tool>
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/converters/tar_to_directory.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<tool id="CONVERTER_tar_to_directory" name="Convert tar to directory" version="1.0.1" profile="17.05">
<!-- Don't use tar directly so we can verify safety of results - tar -xzf '$input1'; -->
<requirements>
<requirement type="package" version="23.1">galaxy-util</requirement>
<requirement type="package" version="23.2.1">galaxy-util</requirement>
</requirements>
<command>
mkdir '$output1.files_path';
Expand Down
13 changes: 13 additions & 0 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
composite_extensions.append("html") # for archiving composite datatypes
composite_extensions.append("data_manager_json") # for downloading bundles if bundled.
composite_extensions.append("directory") # for downloading directories.

if data.extension in composite_extensions:
return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip"))
Expand Down Expand Up @@ -1212,6 +1213,18 @@ def regex_line_dataprovider(
class Directory(Data):
"""Class representing a directory of files."""

file_ext = "directory"

def _archive_main_file(
self, archive: ZipstreamWrapper, display_name: str, data_filename: str
) -> Tuple[bool, str, str]:
"""Overwrites the method to not do anything.
No main file gets added to a directory archive.
"""
error, msg, messagetype = False, "", ""
return error, msg, messagetype


class GenericAsn1(Text):
"""Class for generic ASN.1 text format"""
Expand Down
92 changes: 83 additions & 9 deletions lib/galaxy_test/api/test_tools_upload.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import json
import os
import tempfile
import urllib.parse
from base64 import b64encode
from typing import cast

import pytest
from tusclient import client

from galaxy.tool_util.verify.test_data import TestDataResolver
from galaxy.util import UNKNOWN
from galaxy.util.compression_utils import decompress_bytes_to_directory
from galaxy.util.hash_util import md5_hash_file
from galaxy.util.unittest_utils import (
skip_if_github_down,
skip_if_site_down,
Expand All @@ -29,6 +33,14 @@
B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8")
URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}"

EXPECTED_TAR_CONTENTS = {
"testdir": "Directory",
"testdir/c": "Directory",
"testdir/a": "File",
"testdir/b": "File",
"testdir/c/d": "File",
}


class TestToolsUpload(ApiTestCase):
dataset_populator: DatasetPopulator
Expand Down Expand Up @@ -604,18 +616,11 @@ def _check_testdir_composite(self, dataset, history_id):
assert content.strip() == "Test123"
extra_files = self.dataset_populator.get_history_dataset_extra_files(history_id, dataset_id=dataset["id"])
assert len(extra_files) == 5, extra_files
expected_contents = {
"testdir": "Directory",
"testdir/c": "Directory",
"testdir/a": "File",
"testdir/b": "File",
"testdir/c/d": "File",
}
found_files = set()
for extra_file in extra_files:
path = extra_file["path"]
assert path in expected_contents
assert extra_file["class"] == expected_contents[path]
assert path in EXPECTED_TAR_CONTENTS
assert extra_file["class"] == EXPECTED_TAR_CONTENTS[path]
found_files.add(path)

assert len(found_files) == 5, found_files
Expand All @@ -639,6 +644,75 @@ def test_upload_composite_from_bad_tar(self, history_id):
details = self.dataset_populator.get_history_dataset_details(history_id, dataset=dataset, assert_ok=False)
assert details["state"] == "error"

def test_upload_tar_roundtrip(self, history_id):
testdir = TestDataResolver().get_filename("testdir.tar")
expected_size = os.path.getsize(testdir)
with open(testdir, "rb") as fh:
details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True)
assert details["file_ext"] == "tar"
assert details["file_size"] == expected_size
content = cast(
bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes")
)
# Make sure we got the expected content size.
assert len(content) == expected_size

# Make sure we get the expected contents.
dir_path = decompress_bytes_to_directory(content)
assert dir_path.endswith("testdir")
for path, entry_class in EXPECTED_TAR_CONTENTS.items():
path = os.path.join(dir_path, os.path.pardir, path)
if entry_class == "Directory":
assert os.path.isdir(path)
else:
assert os.path.isfile(path)

# Make sure the hash of the content matches the hash of the original file.
expected_hash = md5_hash_file(testdir)
assert expected_hash is not None
self._assert_content_matches_hash(content, expected_hash)

def _assert_content_matches_hash(self, content: bytes, expected_hash: str):
with tempfile.NamedTemporaryFile("wb") as temp:
temp.write(content)
temp.flush()
actual_hash = md5_hash_file(temp.name)
assert actual_hash == expected_hash

def test_upload_zip_roundtrip(self, history_id):
testdir = TestDataResolver().get_filename("testdir1.zip")
expected_size = os.path.getsize(testdir)
with open(testdir, "rb") as fh:
details = self._upload_and_get_details(fh, api="fetch", history_id=history_id, assert_ok=True)
assert details["file_ext"] == "zip"
assert details["file_size"] == expected_size
content = cast(
bytes, self.dataset_populator.get_history_dataset_content(history_id, dataset=details, type="bytes")
)
# Make sure we got the expected content size.
assert len(content) == expected_size

# Make sure we get the expected contents.
dir_path = decompress_bytes_to_directory(content)
assert dir_path.endswith("testdir1")
EXPECTED_ZIP_CONTENTS = {
"file1": "File",
"file2": "File",
"dir1/": "Directory",
"dir1/file3": "File",
}
for path, entry_class in EXPECTED_ZIP_CONTENTS.items():
path = os.path.join(dir_path, path)
if entry_class == "Directory":
assert os.path.isdir(path)
else:
assert os.path.isfile(path)

# Make sure the hash of the content matches the hash of the original file.
expected_hash = md5_hash_file(testdir)
assert expected_hash is not None
self._assert_content_matches_hash(content, expected_hash)

def test_upload_dbkey(self):
with self.dataset_populator.test_history() as history_id:
payload = self.dataset_populator.upload_payload(history_id, "Test123", dbkey="hg19")
Expand Down

0 comments on commit c6a10d6

Please sign in to comment.