Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic options: add data table filter #12941

Open
wants to merge 21 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e66f4bb
add data table filter
bernt-matthias Nov 17, 2021
26043cd
add more linting for filters
bernt-matthias Nov 18, 2021
2470e46
use test/functional/tool-data/ as tool-data dir in tests
bernt-matthias Nov 18, 2021
d7c99a0
forbid from_file to reference files outside of tool-data
bernt-matthias Nov 18, 2021
64831e0
fix linter and syntax errors
bernt-matthias Nov 18, 2021
5b7df0f
add link to tool-data/shared in test tool-data
bernt-matthias Nov 19, 2021
bce9300
remove debug messages
bernt-matthias Nov 19, 2021
95bbd88
fix rebase mistake
bernt-matthias Jun 6, 2023
8633181
one more rebase error
bernt-matthias Jun 6, 2023
d199a27
add context to new linter messages
bernt-matthias Jun 6, 2023
f6d9ca9
Merge branch 'dev' into topic/data-table-filter
bernt-matthias Feb 16, 2024
38e60d6
add missing import
bernt-matthias Feb 16, 2024
c1bb7b5
update black formatting
bernt-matthias Feb 16, 2024
840ebe0
Merge branch 'dev' into topic/data-table-filter
bernt-matthias Feb 17, 2024
c1091c1
update linters and tests to new style
bernt-matthias Feb 17, 2024
967db5b
Merge branch 'dev' into topic/data-table-filter
bernt-matthias Oct 22, 2024
e8a612b
remove unnecessary list comprehensions
bernt-matthias Oct 22, 2024
a664638
fix number of tests
bernt-matthias Oct 22, 2024
536f2cf
Merge branch 'dev' into topic/data-table-filter
bernt-matthias Nov 6, 2024
cde25ed
Update test/unit/tool_util/test_tool_linters.py
bernt-matthias Nov 6, 2024
d206795
Merge branch 'dev' into topic/data-table-filter
bernt-matthias Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 185 additions & 1 deletion lib/galaxy/tool_util/linters/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ast
import re
import warnings
from copy import deepcopy
from typing import (
Iterator,
Optional,
Expand Down Expand Up @@ -130,7 +131,6 @@
]

# TODO lint for valid param type - attribute combinations
# TODO check if dataset is available for filters referring other datasets
# TODO check if ref input param is present for from_dataset


Expand Down Expand Up @@ -490,6 +490,190 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
)


FILTER_REQUIRED_ATTRIBUTES = {
"data_meta": ["type", "ref", "key"], # column needs special treatment
"param_value": ["type", "ref", "column"],
"static_value": ["type", "column", "value"],
"regexp": ["type", "column", "value"],
"unique_value": ["type", "column"],
"multiple_splitter": ["type", "column"],
"attribute_value_splitter": ["type", "column"],
"add_value": ["type", "value"],
"remove_value": ["type"], # this is handled separately in InputsOptionsRemoveValueFilterRequiredAttributes
"sort_by": ["type", "column"],
"data_table": ["type", "column", "table_name", "data_table_column"],
}


class InputsOptionsFiltersRequiredAttributes(Linter):
"""
check required attributes of filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES:
continue
for attrib in FILTER_REQUIRED_ATTRIBUTES[filter_type]:
if attrib not in filter.attrib:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}' filter misses required attribute '{attrib}'",
node=filter,
)


class InputsOptionsRemoveValueFilterRequiredAttributes(Linter):
"""
check required attributes of remove_value filter
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return
for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
# check for required attributes for filter (remove_value needs a bit more logic here)
if filter_type != "remove_value":
continue
if not (
(
"value" in filter.attrib
and "ref" not in filter.attrib
and "meta_ref" not in filter.attrib
and "key" not in filter.attrib
)
or (
"value" not in filter.attrib
and "ref" in filter.attrib
and "meta_ref" not in filter.attrib
and "key" not in filter.attrib
)
or (
"value" not in filter.attrib
and "ref" not in filter.attrib
and "meta_ref" in filter.attrib
and "key" in filter.attrib
)
):
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter needs either the 'value'; 'ref'; or 'meta' and 'key' attribute(s)",
node=filter,
)


FILTER_ALLOWED_ATTRIBUTES = deepcopy(FILTER_REQUIRED_ATTRIBUTES)
FILTER_ALLOWED_ATTRIBUTES["static_value"].append("keep")
FILTER_ALLOWED_ATTRIBUTES["regexp"].append("keep")
FILTER_ALLOWED_ATTRIBUTES["data_meta"].extend(["column", "multiple", "separator"])
FILTER_ALLOWED_ATTRIBUTES["param_value"].extend(["keep", "ref_attribute"])
FILTER_ALLOWED_ATTRIBUTES["multiple_splitter"].append("separator")
FILTER_ALLOWED_ATTRIBUTES["attribute_value_splitter"].extend(["pair_separator", "name_val_separator"])
FILTER_ALLOWED_ATTRIBUTES["add_value"].extend(["name", "index"])
FILTER_ALLOWED_ATTRIBUTES["remove_value"].extend(["value", "ref", "meta_ref", "key"])
FILTER_ALLOWED_ATTRIBUTES["data_table"].append("keep")


class InputsOptionsFiltersAllowedAttributes(Linter):
"""
check allowed attributes of filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is None or filter_type not in FILTER_ALLOWED_ATTRIBUTES:
continue
for attrib in filter.attrib:
if attrib not in FILTER_ALLOWED_ATTRIBUTES[filter_type]:
lint_ctx.warn(
f"Select parameter [{param_name}] '{filter_type}' filter specifies unnecessary attribute '{attrib}'",
node=filter,
)


class InputsOptionsRegexFilterExpression(Linter):
"""
Check the regular expression of regexp filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type == "regexp" and "value" in filter.attrib:
try:
re.compile(filter.attrib["value"])
except re.error as re_error:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter 'value' is not a valid regular expression ({re_error})'",
node=filter,
)


class InputsOptionsFiltersCheckReferences(Linter):
"""
Check the references used in filters
"""

@classmethod
def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
tool_xml = getattr(tool_source, "xml_tree", None)
if not tool_xml:
return

# get the set of param names
param_names = {param_name for _, param_name in _iter_param(tool_xml)}

for param, param_name in _iter_param(tool_xml):
options = param.find("./options")
if options is None:
continue
for filter in param.findall("./options/filter"):
filter_type = filter.get("type", None)
if filter_type is not None:
# check for references to other inputs
# TODO: currently ref and metaref seem only to work for top level params,
# once this is fixed the linter needs to be extended, e.g. `f.attrib[ref_attrib].split('|')[-1]`
for ref_attrib in ["meta_ref", "ref"]:
if ref_attrib in filter.attrib and filter.attrib[ref_attrib] not in param_names:
lint_ctx.error(
f"Select parameter [{param_name}] '{filter_type}'' filter attribute '{ref_attrib}' refers to non existing parameter '{filter.attrib[ref_attrib]}'",
node=filter,
)


class InputsDataOptionsFiltersRef(Linter):
"""
Lint for set ref for filters of data parameters
Expand Down
18 changes: 16 additions & 2 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -5646,8 +5646,9 @@ Currently the following filters are defined:
* ``data_meta`` populate or filter options based on the metadata of another input parameter specified by ``ref``. If a ``column`` is given options are filtered for which the entry in this column ``column`` is equal to metadata of the input parameter specified by ``ref``.
If no ``column`` is given the metadata value of the referenced input is added to the options list (in this case the corresponding ``options`` tag must not have the ``from_data_table`` or ``from_dataset`` attributes).
In both cases the desired metadata is selected by ``key``.
* ``data_table`` remove values according to the entries of a data table. Remove options where the value in ``column`` appears in the data table ``table_name`` in column ``table_column``. Setting ``keep`` will to ``true`` will keep only entries also appearing in the data table.

The ``static_value`` and ``regexp`` filters can be inverted by setting ``keep`` to true.
The ``static_value``, ``regexp``, and ``data_table`` filters can be inverted by setting ``keep`` to true.

* ``add_value``: add an option with a given ``name`` and ``value`` to the options. By default, the new option is appended, with ``index`` the insertion position can be specified.
* ``remove_value``: remove a value from the options. Either specified explicitly with ``value``, the value of another input specified with ``ref``, or the metadata ``key`` of another input ``meta_ref``.
Expand Down Expand Up @@ -5821,7 +5822,7 @@ only used if ``multiple`` is set to ``true``.]]></xs:documentation>
<xs:annotation>
<xs:documentation xml:lang="en">If ``true``, keep columns matching the
value, if ``false`` discard columns matching the value. Used when ``type`` is
either ``static_value``, ``regexp`` or ``param_value``. Default: true</xs:documentation>
either ``static_value``, ``regexp``, ``param_value`` or ``data_table``. Default: true.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="value" type="xs:string">
Expand Down Expand Up @@ -5870,6 +5871,18 @@ from the list.</xs:documentation>
<xs:documentation xml:lang="en">Only used if ``type`` is ``attribute_value_splitter``. This is used to separate attributes and values from each other within an attribute-value pair, i.e. ``=`` if the target content is ``A=V; B=W; C=Y``. Defaults to whitespace.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="table_name" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">Only used when ``type`` is
``data_table``. The name of the data table to use.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="data_table_column" type="xs:string">
<xs:annotation>
<xs:documentation xml:lang="en">Only used when ``type`` is
``data_table``. The column of the data table to use (0 based index or column name).</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>
<xs:complexType name="Outputs">
<xs:annotation>
Expand Down Expand Up @@ -7922,6 +7935,7 @@ and ``bibtex`` are the only supported options.</xs:documentation>
<xs:enumeration value="add_value"/>
<xs:enumeration value="remove_value"/>
<xs:enumeration value="sort_by"/>
<xs:enumeration value="data_table"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="ActionsConditionalFilterType">
Expand Down
65 changes: 63 additions & 2 deletions lib/galaxy/tools/parameters/dynamic_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from galaxy.util import (
Element,
safe_contains,
string_as_bool,
)
from galaxy.util.template import fill_template
Expand Down Expand Up @@ -537,6 +538,61 @@ def filter_options(self, options, trans, other_values):
return sorted(options, key=lambda x: x[self.column], reverse=self.reverse)


class DataTableFilter(Filter):
"""
Filters a list of options by entries present in a data table, i.e.
option[column] needs to be in the specified data table column

Type: data_table

Required Attributes:

- column: column in options to compare with
- table_name: data table to use
- data_table_column: data table column to use

Optional Attributes:

- keep: Keep options where option[column] is in the data table column (True)
Discard columns matching value (False)

"""

def __init__(self, d_option, elem):
Filter.__init__(self, d_option, elem)
self.table_name = elem.get("table_name", None)
assert self.table_name is not None, "Required 'table_name' attribute missing from filter"
column = elem.get("column", None)
assert column is not None, "Required 'column' attribute missing from filter"
self.column = d_option.column_spec_to_index(column)
self.data_table_column = elem.get("data_table_column", None)
assert self.data_table_column is not None, "Required 'data_table_column' attribute missing from filter"
self.keep = string_as_bool(elem.get("keep", "True"))

def filter_options(self, options, trans, other_values):
# get column from data table, by index or column name
entries = None
try:
entries = {f[int(self.data_table_column)] for f in trans.app.tool_data_tables[self.table_name].get_fields()}
except TypeError:
pass
try:
entries = {
f[self.data_table_column] for f in trans.app.tool_data_tables[self.table_name].get_named_fields_list()
}
except KeyError:
pass
if entries is None:
log.error(f"could not get data from column {self.data_table_column} from data_table {self.table_name}")
return options

rval = []
for o in options:
if self.keep == (o[self.column] in entries):
rval.append(o)
return rval


filter_types = dict(
data_meta=DataMetaFilter,
param_value=ParamValueFilter,
Expand All @@ -548,6 +604,7 @@ def filter_options(self, options, trans, other_values):
add_value=AdditionalValueFilter,
remove_value=RemoveValueFilter,
sort_by=SortByColumnFilter,
data_table=DataTableFilter,
)


Expand Down Expand Up @@ -599,14 +656,18 @@ def load_from_parameter(from_parameter, transform_lines=None):
self.parse_column_definitions(elem)
if data_file is not None:
data_file = data_file.strip()
if not os.path.isabs(data_file):
full_path = os.path.join(self.tool_param.tool.app.config.tool_data_path, data_file)
full_path = os.path.join(self.tool_param.tool.app.config.tool_data_path, data_file)
full_path = os.path.normpath(full_path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the normpath call necessary here ? safe_contains should be smart enough to figure this out, and joining with cwd for non-absolute paths does not seem right (that seems to be what normpath does internally).

if safe_contains(self.tool_param.tool.app.config.tool_data_path, full_path):
if os.path.exists(full_path):
self.index_file = data_file
with open(full_path) as fh:
self.file_fields = self.parse_file_fields(fh)
else:
self.missing_index_file = data_file
else:
log.error(f"'from_file' ({data_file}) references path outside of Galaxy's tool-data dir!")
self.missing_index_file = data_file
elif dataset_file is not None:
self.meta_file_key = elem.get("meta_file_key", None)
self.dataset_ref_name = dataset_file
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy_test/driver/driver_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ def setup_galaxy_config(
data_manager_config_file = _resolve_relative_config_paths(data_manager_config_file)
tool_config_file = _resolve_relative_config_paths(tool_conf)
tool_data_table_config_path = _resolve_relative_config_paths(tool_data_table_config_path)

config = dict(
admin_users="[email protected]",
allow_library_path_paste=True,
Expand Down Expand Up @@ -224,6 +223,7 @@ def setup_galaxy_config(
running_functional_tests=True,
template_cache_path=template_cache_path,
tool_config_file=tool_config_file,
tool_data_path="test/functional/tool-data/",
tool_data_table_config_path=tool_data_table_config_path,
tool_path=tool_path,
update_integrated_tool_panel=update_integrated_tool_panel,
Expand Down
1 change: 1 addition & 0 deletions test/functional/tool-data/shared
2 changes: 2 additions & 0 deletions test/functional/tool-data/test_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hg19_value hg19 hg19_name hg19_path
absent_value absent absent_name absent_path
Loading
Loading