Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add condition_labels as an argument #18

Merged
merged 6 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,16 @@ binned_aligned_spikes = BinnedAlignedSpikes(
data=data, # Shape (number_of_units, number_of_events, number_of_bins)
timestamps=timestamps, # Shape (number_of_events,)
condition_indices=condition_indices, # Shape (number_of_events,)
condition_labels=condition_labels, # Shape (number_of_conditions,) or np.unique(condition_indices).size
)
```

Note that `number_of_events` here represents the total number of repetitions for all the conditions being aggregated. For example, if data is being aggregated from two stimuli where the first stimulus appeared twice and the second appeared three times, the `number_of_events` would be 5.

The `condition_indices` is an indicator vector that should be constructed so that `data[:, condition_indices == condition_index, :]` corresponds to the binned spike counts for the condition with the specified condition_index. You can retrieve the same data using the convenience method `binned_aligned_spikes.get_data_for_condition(condition_index)`.

The `condition_labels` argument is optional and can be used to store the labels of the conditions. This is meant to help to understand the nature of the conditions

It's important to note that the timestamps must be in ascending order and must correspond positionally to the condition indices and the second dimension of the data. If they are not, a ValueError will be raised. To help organize the data correctly, you can use the convenience method `BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)`, which ensures the data is properly sorted. Here’s how it can be used:

```python
Expand All @@ -209,7 +212,8 @@ binned_aligned_spikes = BinnedAlignedSpikes(
milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
data=sorted_data,
event_timestamps=sorted_event_timestamps,
condition_indices=sorted_condition_indices,
condition_indices=sorted_condition_indices,
condition_labels=condition_labels
)
```

Expand Down Expand Up @@ -278,6 +282,7 @@ milliseconds_from_event_to_first_bin = -50.0
data = np.concatenate([data_for_first_stimuli, data_for_second_stimuli], axis=1)
event_timestamps = np.concatenate([timestamps_first_stimuli, timestamps_second_stimuli])
condition_indices = np.concatenate([np.zeros(2), np.ones(3)])
condition_labels = ["a", "b"]

sorted_data, sorted_event_timestamps, sorted_condition_indices = BinnedAlignedSpikes.sort_data_by_event_timestamps(data=data, event_timestamps=event_timestamps, condition_indices=condition_indices)

Expand Down
18 changes: 15 additions & 3 deletions spec/ndx-binned-spikes.extensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ groups:
- neurodata_type_def: BinnedAlignedSpikes
neurodata_type_inc: NWBDataInterface
default_name: BinnedAlignedSpikes
doc: A data interface for binned spike data aligned to an event (e.g. a stimuli
doc: A data interface for binned spike data aligned to an event (e.g. a stimulus
or the beginning of a trial).
attributes:
- name: name
Expand All @@ -11,7 +11,8 @@ groups:
doc: The name of this container
- name: description
dtype: text
value: Spikes data binned and aligned to the timestamps of one or multiple conditions.
value: Spikes data binned and aligned to the event timestamps of one or multiple
conditions.
doc: A description of what the data represents
- name: bin_width_in_milliseconds
dtype: float64
Expand All @@ -25,7 +26,7 @@ groups:
required: false
datasets:
- name: data
dtype: numeric
dtype: uint64
dims:
- num_units
- number_of_events
Expand Down Expand Up @@ -54,6 +55,17 @@ groups:
type, trial number, category, etc.).This is only used when the data is aligned
to multiple conditions
quantity: '?'
- name: condition_labels
dtype: text
dims:
- number_of_conditions
shape:
- null
doc: The labels of the conditions that the data is aligned to. The size of this
array should match the number of conditions. This is only used when the data
is aligned to multiple conditions. First condition is index 0, second is index
1, etc.
quantity: '?'
- name: units_region
neurodata_type_inc: DynamicTableRegion
doc: A reference to the Units table region that contains the units of the data.
Expand Down
13 changes: 12 additions & 1 deletion src/pynwb/ndx_binned_spikes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class BinnedAlignedSpikes(NWBDataInterface):
)

DEFAULT_NAME = "BinnedAlignedSpikes"
DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the timestamps of one or multiple conditions."
DEFAULT_DESCRIPTION = "Spikes data binned and aligned to the event timestamps of one or multiple conditions."

@docval(
{
Expand Down Expand Up @@ -97,6 +97,17 @@ class BinnedAlignedSpikes(NWBDataInterface):
"shape": (None,),
"default": None,
},
{
"name":"condition_labels",
"type": "array_data",
"doc": (
"The labels of the conditions that the data is aligned to. The size of this array should match "
"the number of conditions. This is only used when the data is aligned to multiple conditions. "
"First condition is index 0, second is index 1, etc."
),
"shape": (None,),
"default": None,
},
{
"name": "units_region",
"type": DynamicTableRegion,
Expand Down
41 changes: 27 additions & 14 deletions src/pynwb/ndx_binned_spikes/testing/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pynwb.misc import Units
from hdmf.common import DynamicTableRegion


# TODO: Remove once pynwb 2.7.0 is released and use the mock class there
def mock_Units(
num_units: int = 10,
Expand Down Expand Up @@ -47,11 +48,12 @@ def mock_BinnedAlignedSpikes(
event_timestamps: Optional[np.ndarray] = None,
data: Optional[np.ndarray] = None,
condition_indices: Optional[np.ndarray] = None,
condition_labels: Optional[np.ndarray] = None,
units_region: Optional[DynamicTableRegion] = None,
sort_data: bool = True,
) -> BinnedAlignedSpikes:
"""
Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.
Generate a mock BinnedAlignedSpikes object with specified parameters or from given data.

Parameters
----------
Expand All @@ -77,11 +79,16 @@ def mock_BinnedAlignedSpikes(
An array of event_timestamps for each event. If not provided, it will be automatically generated.
It should have size `number_of_events`.
condition_indices : np.ndarray, optional
An array of indices characterizing each condition. If not provided, it will be automatically generated.
An array of indices characterizing each condition. If not provided, it will be automatically generated
from the number of conditions and number of events. It should have size `number_of_events`.
If provided, the `number_of_conditions` parameter will be ignored and the number of conditions will be
inferred from the unique values in `condition_indices`.
condition_labels: np.ndarray, optional
An array of labels for each condition. It should have size `number_of_conditions`.
units_region: DynamicTableRegion, optional
A reference to the Units table region that contains the units of the data.
sort_data: bool, optional
If True, the data will be sorted by timestamps.
If True, the data will be sorted by timestamps.
Returns
-------
BinnedAlignedSpikes
Expand All @@ -107,14 +114,13 @@ def mock_BinnedAlignedSpikes(

if event_timestamps.shape[0] != number_of_events:
raise ValueError("The shape of `event_timestamps` does not match `number_of_events`.")

if condition_indices is None and number_of_conditions > 0:


assert number_of_conditions < number_of_events, (
"The number of conditions should be less than the number of events."
)


assert (
number_of_conditions < number_of_events
), "The number of conditions should be less than the number of events."

condition_indices = np.zeros(number_of_events, dtype=int)
all_indices = np.arange(number_of_conditions, dtype=int)

Expand All @@ -126,12 +132,18 @@ def mock_BinnedAlignedSpikes(
size=number_of_events - number_of_conditions,
replace=True,
)


if condition_indices is not None:
assert (
condition_indices.shape[0] == number_of_events
), "The shape of `condition_indices` does not match `number_of_events`."
condition_indices = np.array(condition_indices, dtype=int)
number_of_conditions = np.unique(condition_indices).size

if condition_labels is None:
condition_labels = np.array([f"condition_{i}" for i in range(number_of_conditions)], dtype="U")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why generate if they don't exist? Why not just make them optional?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not add data if it is redundant and does not include any new information

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming people want a mock with data if they specify num_conditions > 0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd really rather not. It's auto-magic, which is hard for users to predict, and it is duplication of data.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Don't feel very strong about this. Removing.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But your comment of duplication of data I don't understand. What is being duplicated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"condition 0" is at index 0
"condition 1" is at index 1
etc.

I guess it's not really duplicating since that data does not already exist, but it's adding data that is not really telling us anything we couldn't already infer. I think it's more meaningful to be able to tell whether conditions are labeled or not.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, I was just wondering if I missed something. I eliminated this behavior for the mock in the last commit.

else:
condition_labels = np.asarray(condition_labels, dtype="U")

if condition_labels.size != number_of_conditions:
raise ValueError("The number of condition labels should match the number of conditions.")

# Sort the data by timestamps
if sort_data:
Expand All @@ -146,6 +158,7 @@ def mock_BinnedAlignedSpikes(
data=data,
event_timestamps=event_timestamps,
condition_indices=condition_indices,
condition_labels=condition_labels,
units_region=units_region,
)
return binned_aligned_spikes
10 changes: 9 additions & 1 deletion src/pynwb/tests/test_binned_aligned_spikes.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def setUp(self):
self.event_timestamps = np.concatenate([self.timestamps_first_condition, self.timestamps_second_condition])

self.sorted_indices = np.argsort(self.event_timestamps)

self.condition_labels = ["first", "second"]

def test_constructor(self):
"""Test that the constructor for BinnedAlignedSpikes sets values as expected."""
Expand All @@ -193,6 +195,7 @@ def test_constructor(self):
data=data,
event_timestamps=event_timestamps,
condition_indices=condition_indices,
condition_labels=self.condition_labels,
)

np.testing.assert_array_equal(aggregated_binnned_align_spikes.data, self.data[:, self.sorted_indices, :])
Expand All @@ -202,6 +205,11 @@ def test_constructor(self):
np.testing.assert_array_equal(
aggregated_binnned_align_spikes.event_timestamps, self.event_timestamps[self.sorted_indices]
)

np.testing.assert_array_equal(
aggregated_binnned_align_spikes.condition_labels, self.condition_labels
)

self.assertEqual(aggregated_binnned_align_spikes.bin_width_in_milliseconds, self.bin_width_in_milliseconds)
self.assertEqual(
aggregated_binnned_align_spikes.milliseconds_from_event_to_first_bin,
Expand Down Expand Up @@ -259,7 +267,7 @@ def test_roundtrip_acquisition(self):
"""

# Testing here
self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=0)
self.binned_aligned_spikes = mock_BinnedAlignedSpikes(number_of_conditions=3, condition_labels=["a", "b", "c"])

self.nwbfile.add_acquisition(self.binned_aligned_spikes)

Expand Down
17 changes: 15 additions & 2 deletions src/spec/create_extension_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def main():
"The binned data. It should be an array whose first dimension is the number of units, the second dimension "
"is the number of events, and the third dimension is the number of bins."
),
dtype="numeric", # TODO should this be a uint64?
dtype="uint64",
shape=[None, None, None],
dims=["num_units", "number_of_events", "number_of_bins"],
)
Expand Down Expand Up @@ -63,12 +63,25 @@ def main():
quantity="?",
)

condition_labels = NWBDatasetSpec(
name="condition_labels",
doc=(
"The labels of the conditions that the data is aligned to. The size of this array should match "
"the number of conditions. This is only used when the data is aligned to multiple conditions."
"First condition is index 0, second is index 1, etc."
),
dtype="text",
shape=[None],
dims=["number_of_conditions"],
quantity="?",
)

binned_aligned_spikes = NWBGroupSpec(
neurodata_type_def="BinnedAlignedSpikes",
neurodata_type_inc="NWBDataInterface",
default_name="BinnedAlignedSpikes",
doc="A data interface for binned spike data aligned to an event (e.g. a stimulus or the beginning of a trial).",
datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, units_region],
datasets=[binned_aligned_spikes_data, event_timestamps, condition_indices, condition_labels, units_region],
attributes=[
NWBAttributeSpec(
name="name",
Expand Down
Loading