Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/read bad icephys table #919

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.8.2 (Upcoming)

### Bug fixes
- Relaxed error checking in the `DynamicTable.__init__` and `AlginedDynamicTable.__init__` when reading from disk to help users access bad data files. @oruebel [#919](https://github.com/hdmf-dev/hdmf/pull/919)

## HDMF 3.8.1 (July 25, 2023)

### Bug fixes
Expand Down
68 changes: 51 additions & 17 deletions src/hdmf/common/alignedtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import pandas as pd
import warnings

from . import register_class
from .table import DynamicTable
Expand Down Expand Up @@ -51,12 +52,27 @@
in_categories = [tab.name for tab in in_category_tables]
# check that if categories is given that we also have category_tables
if in_categories is not None and in_category_tables is None:
raise ValueError("Categories provided but no category_tables given")
if self._in_construct_mode: # Relax error checking when reading from file
warnings.warn("Custom categories=%s specified for %s name=%s but no category_tables"

Check warning on line 56 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L56

Added line #L56 was not covered by tests
"were give" % (str(in_categories), str(type(self)), self.name),
RuntimeWarning)
in_categories = None

Check warning on line 59 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L59

Added line #L59 was not covered by tests
else:
raise ValueError("Categories provided but no category_tables given")
# at this point both in_categories and in_category_tables should either both be None or both be a list
if in_categories is not None:
if len(in_categories) != len(in_category_tables):
raise ValueError("%s category_tables given but %s categories specified" %
(len(in_category_tables), len(in_categories)))
if self._in_construct_mode: # Relax error checking when reading from file
default_categories = [t.name for t in in_category_tables]
warnings.warn("%s category_tables given but %s categories=%s specified. "

Check warning on line 67 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L67

Added line #L67 was not covered by tests
"Using default order %s categories instead." %
(len(in_category_tables), len(in_categories),
str(in_categories), str(default_categories)),
RuntimeWarning)
in_categories = default_categories

Check warning on line 72 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L72

Added line #L72 was not covered by tests
else:
raise ValueError("%s category_tables given but %s categories specified" %
(len(in_category_tables), len(in_categories)))
# Initialize the main dynamic table
super().__init__(**kwargs)
# Create and set all sub-categories
Expand All @@ -68,33 +84,51 @@
# We have categories to process
if len(self.id) == 0:
# The user did not initialize our main table id's nor set columns for our main table
for i in range(len(in_category_tables[0])):
self.id.append(i)
try:
for i in range(len(in_category_tables[0])):
self.id.append(i)
except TypeError as e:

Check warning on line 90 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L90

Added line #L90 was not covered by tests
if self._in_construct_mode: # Relax error checking when reading from file
warnings.warn("Can't resize `id` column for %s name=%s due to the "

Check warning on line 92 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L92

Added line #L92 was not covered by tests
"following TypeError: %s" % (str(type(self)), self.name, str(e)),
RuntimeWarning)
else:
raise e

Check warning on line 96 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L96

Added line #L96 was not covered by tests
# Check that in_categories and in_category_tables are consistent
for i, v in enumerate(in_category_tables):
# Error check that the name of the table is in our categories list
if v.name not in in_categories:
if self._in_construct_mode: # Relax error checking when reading from file
default_categories = [t.name for t in in_category_tables]
warnings.warn("DynamicTable %s does not appear in categories %s. "

Check warning on line 103 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L103

Added line #L103 was not covered by tests
"Using default order %s categories instead." %
(v.name, str(in_categories), str(default_categories)),
RuntimeWarning)
in_categories = default_categories
break

Check warning on line 108 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L107-L108

Added lines #L107 - L108 were not covered by tests
else:
raise ValueError("DynamicTable %s does not appear in "
"categories %s" % (v.name, str(in_categories)))

# Add the user-provided categories in the correct order as described by the categories
# This is necessary, because we do not store the categories explicitly but we maintain them
# as the order of our self.category_tables. In this makes sure look-ups are consistent.
lookup_index = OrderedDict([(k, -1) for k in in_categories])
for i, v in enumerate(in_category_tables):
# Error check that the name of the table is in our categories list
if v.name not in lookup_index:
raise ValueError("DynamicTable %s does not appear in categories %s" % (v.name, str(in_categories)))
# Error check to make sure no two tables with the same name are given
if lookup_index[v.name] >= 0:
raise ValueError("Duplicate table name %s found in input dynamic_tables" % v.name)
lookup_index[v.name] = i
for table_name, tabel_index in lookup_index.items():
# This error case should not be able to occur since the length of the in_categories and
# in_category_tables must match and we made sure that each DynamicTable we added had its
# name in the in_categories list. We, therefore, exclude this check from coverage testing
# but we leave it in just as a backup trigger in case something unexpected happens
if tabel_index < 0: # pragma: no cover
raise ValueError("DynamicTable %s listed in categories but does not appear in category_tables" %
table_name) # pragma: no cover
# Test that all category tables have the correct number of rows
category = in_category_tables[tabel_index]
if len(category) != len(self):
raise ValueError('Category DynamicTable %s does not align, it has %i rows expected %i' %
(category.name, len(category), len(self)))
msg = ("Category DynamicTable %s does not align, it has %i rows expected %i"
% (category.name, len(category), len(self)))
if self._in_construct_mode: # Relax error checking when reading from file
warnings.warn(msg, RuntimeWarning)

Check warning on line 129 in src/hdmf/common/alignedtable.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/alignedtable.py#L129

Added line #L129 was not covered by tests
else:
raise ValueError(msg)
# Add the category table to our category_tables.
dts[category.name] = category
# Set the self.category_tables attribute, which will set the parent/child relationships for the category_tables
Expand Down
11 changes: 10 additions & 1 deletion src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,16 @@
else:
# Calculate the order of column names
if columns is None:
raise ValueError("Must supply 'columns' if specifying 'colnames'")
# Invalid table. Custom colnames are given but the VectorData for those columns are missing.
if self._in_construct_mode: # Relax error checking when reading from an existing file
# To allow reading of bad files and since we don't have any actual data for the
# custom columns, we can try to just ignore the columns and warn instead of raising a ValueError.
self.colnames = tuple()
self.columns = tuple()
warn("Ignoring custom named columns. Must supply 'columns' if specifying"

Check warning on line 403 in src/hdmf/common/table.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/common/table.py#L401-L403

Added lines #L401 - L403 were not covered by tests
" 'colnames'=%s for table name=%s" % (str(colnames), self.name))
else: # be strict when constructing a new table
raise ValueError("Must supply 'columns' if specifying 'colnames'")
else:
# order the columns according to the column names, which does not include indices
self.colnames = tuple(pystr(c) for c in colnames)
Expand Down
20 changes: 20 additions & 0 deletions tests/unit/common/test_alignedtable.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from pandas.testing import assert_frame_equal
import warnings
import os

from hdmf.backends.hdf5 import HDF5IO
from hdmf.common import DynamicTable, VectorData, get_manager, AlignedDynamicTable, DynamicTableRegion
Expand Down Expand Up @@ -586,3 +587,22 @@ def test_get_colnames(self):
('test3', 'c1'), ('test3', 'c2'), ('test3', 'c3')]
self.assertListEqual(adt.get_colnames(include_category_tables=True, ignore_category_ids=True),
expected_colnames)

def test_bad_nwb_icephys_data(self):
"""
Test reading a file with a bad AlignedDynamicTable which has:
1) Bad id column where the main id column is empty but the category tables have 3 rows
2) The categories listed in the file have a spelling error so the ordering is bad
3) The colnames is bad because it lists a custom column but the VectorData is missing
"""
try:
from pynwb import NWBHDF5IO
except ImportError:
self.skipTest("PyNWB not installed for testing")
test_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"bad_nwb_icephys_aligned_dynamic_table_file.nwb")

with NWBHDF5IO(test_filename, 'r', load_namespaces=True) as nwbio:
with warnings.catch_warnings(record=True) as w:
_ = nwbio.read()
self.assertEqual(len(w), 7)
Comment on lines +598 to +608
Copy link
Contributor Author

@oruebel oruebel Jul 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rly the easiest way to test this is to just try an read the broken NWB file which has all the different errors. However, PyNWB is not installed for our CI, so this test gets skipped. Is there a way that we can read the file without just HDF5IO?