Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-run workflow synchronized with BIDS dataset #374

Closed
wants to merge 14 commits into from
97 changes: 97 additions & 0 deletions phys2bids/bids.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""BIDS functions for phys2bids package."""
import logging
import os
import re
from csv import reader
from pathlib import Path

Expand Down Expand Up @@ -290,3 +291,99 @@ def readme_file(outdir):
LGR.warning('phys2bids could not find README,'
'generating it EMPTY, please fill in the necessary info')
utils.write_file(file_path, '', text)


def update_bids_name(basename, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eh, with nipy/heudiconv#544 I feel that we are doomed to start some minibids or bidsnonofficial or bidspy (so it is not pybids), whatever, to start with a basic schema-driven bids filenames support and may be later expanded to something else (validator etc). otherwise -- we keep breeding duplicated code across the ecosystem, not good! WDYT @tsalo ? I can move that heudiconv PR into its own python package, will you join the fun? ;)

"""Add elements to a BIDS filename while retaining BIDS compatibility.

Parameters
----------
basename : str
Name of valid BIDS file.
kwargs : dict
Keyword arguments indicating entities and/or suffix and/or extension
to update/add to the BIDS filename. If a keyword's value is None, then
the entity will be removed from the filename.

Returns
-------
outname : str
Valid BIDS filename with updated entities/suffix/extension.
"""
# This is hardcoded, but would be nice to use BIDS spec's YAML files.
ENTITY_ORDER = [
"sub",
"ses",
"task",
"acq",
"ce",
"rec",
"dir",
"run",
"mod",
"echo",
"fa",
"inv",
"mt",
"part",
"ch",
"recording",
"proc",
"space",
"split",
]

outdir = os.path.dirname(basename)
outname = os.path.basename(basename)

# Determine scan suffix (should always be physio)
suffix = outname.split("_")[-1].split(".")[0]
extension = "." + ".".join(outname.split("_")[-1].split(".")[1:])
filetype = suffix + extension

for key, val in kwargs.items():
if key == "suffix":
if not val.startswith("_"):
val = "_" + val

if not val.endswith("."):
val = val + "."

outname = outname.replace("_" + suffix + ".", val)
elif key == "extension":
# add leading . if not ''
if val and not val.startswith("."):
val = "." + val
outname = outname.replace(extension, val)
else:
# entities
if key not in ENTITY_ORDER:
raise ValueError(f"Key {key} not understood.")

if val is None:
# remove entity from filename if kwarg val is None
regex = f"_{key}-[0-9a-zA-Z]+"
outname = re.sub(regex, "", outname)
elif f"_{key}-{val}" in basename:
LGR.warning(
f"Key-value pair {key}/{val} already found in "
f"basename {basename}. Skipping."
)
elif f"_{key}-" in basename:
LGR.warning(
f"Key {key} already found in basename {basename}. "
"Overwriting."
)
regex = f"_{key}-[0-9a-zA-Z]+"
outname = re.sub(regex, f"_{key}-{val}", outname)
else:
loc = ENTITY_ORDER.index(key)
entities_to_check = ENTITY_ORDER[loc:]
entities_to_check = [f"_{etc}-" for etc in entities_to_check]
entities_to_check.append(f"_{filetype}")
for etc in entities_to_check:
if etc in outname:
outname = outname.replace(etc, f"_{key}-{val}{etc}")
break
outname = os.path.join(outdir, outname)
return outname
136 changes: 136 additions & 0 deletions phys2bids/slice4phys.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import numpy as np

from phys2bids.bids import update_bids_name

LGR = logging.getLogger(__name__)


Expand Down Expand Up @@ -106,6 +108,140 @@ def find_runs(phys_in, ntp_list, tr_list, thr=None, padding=9):
return run_timestamps


def slice_phys(phys, run_timestamps, padding=9, update_trigger=False):
"""Slice a physio object based on run-/file-wise onsets and offsets.

Parameters
----------
phys : BlueprintInput
Multi-run physio data in BlueprintInput object.
run_timestamps : dict
Each key is a run-wise filename and value is a tuple of (onset, offset),
where onset and offset are integers corresponding to index values of
the trigger channel.
padding : float or tuple, optional
Amount of time before and after run to keep in physio time series, in seconds.
May be a single value (in which case the time before and after is the same) or
a two-item tuple (which case the first item is time before and the second is
time after).
These values will be automatically reduced in cases where the pad would extend
before or after the physio acquisition.
update_trigger : bool, optional
Whether to update the trigger channel time series based on estimated scan onsets from
the BIDS dataset (True) or to leave it as-is (False). Default is False.

Returns
-------
phys_in_slices : dict
Each key is a run-wise filename (possibly further split by frequency)
and each value is a BlueprintInput object.

Notes
-----
The goal of this function is to abstract out the general slicing procedure
from slice4phys.
"""
phys = deepcopy(phys)
if update_trigger:
tsalo marked this conversation as resolved.
Show resolved Hide resolved
LGR.warning(
"Overwriting the trigger channel. "
"The original signal will be retained in a 'original trigger' channel."
)
phys.freq.append(phys.freq[phys.trigger_idx])
phys.units.append(phys.units[phys.trigger_idx])
phys.timeseries.append(phys.timeseries[phys.trigger_idx].copy())
phys.ch_name.append("original trigger")

# Fix up the trigger time series
phys.timeseries[phys.trigger_idx][:] = 0
for ons, off in run_timestamps.values():
phys.timeseries[phys.trigger_idx][ons:off] = 1

if not isinstance(padding, tuple):
time_before, time_after = padding, padding
else:
time_before, time_after = padding

phys_in_slices = {}
for i_run, fname in enumerate(run_timestamps.keys()):
run_attributes = run_timestamps[fname] # tmp variable to collect run's info
trigger_onset, trigger_offset = run_attributes
min_onset, max_offset = 0, phys.timeseries[phys.trigger_idx].shape[0]

unique_frequencies = np.unique(phys.freq)
trigger_freq = phys.freq[phys.trigger_idx]

# Limit padding based on beginning and end of physio recording.
# We could also limit padding to prevent overlap between scans, if desired.
run_time_before = np.minimum(
time_before, (trigger_onset - min_onset) / trigger_freq
)
run_time_after = np.minimum(
time_after, (max_offset - trigger_offset) / trigger_freq
)

for freq in unique_frequencies:
to_subtract = int(run_time_before * freq)
to_add = int(run_time_after * freq)

run_onset_idx = int(trigger_onset * freq / trigger_freq) - to_subtract
run_offset_idx = int(trigger_offset * freq / trigger_freq) + to_add

# Split into frequency-specific object limited to onset-offset
temp_phys_in = deepcopy(phys[run_onset_idx:run_offset_idx])

if temp_phys_in.freq[temp_phys_in.trigger_idx] != freq:
example_ts = phys.timeseries[phys.freq.index(freq)]

# Determine time
new_time = (
np.arange(example_ts.shape[0])
/ temp_phys_in.timeseries[phys.freq.index(freq)].freq
)
new_time += np.min(temp_phys_in.timeseries[0])
temp_phys_in.timeseries[0] = new_time
temp_phys_in.freq[0] = freq

# Resample trigger
trigger = temp_phys_in.timeseries[temp_phys_in.trigger_idx]
cur_xvals = np.arange(len(trigger))
new_xvals = np.linspace(0, len(trigger), example_ts.shape[0])
new_trigger = np.interp(
new_xvals,
cur_xvals,
temp_phys_in.timeseries[temp_phys_in.trigger_idx],
)
temp_phys_in.timeseries[temp_phys_in.trigger_idx] = new_trigger
temp_phys_in.freq[temp_phys_in.trigger_idx] = freq

if len(unique_frequencies) > 1:
run_fname = update_bids_name(fname, recording=str(freq) + "Hz")

# Drop other frequency channels
channel_idx = np.arange(temp_phys_in.ch_amount)
nonfreq_channels = [
i for i in channel_idx if temp_phys_in.freq[i] != freq
]
freq_channels = [i for i in channel_idx if temp_phys_in.freq[i] == freq]
temp_phys_in = temp_phys_in.delete_at_index(nonfreq_channels)

# Update trigger channel index around dropped channels
new_trigger_idx = freq_channels.index(temp_phys_in.trigger_idx)
temp_phys_in.trigger_idx = new_trigger_idx
else:
run_fname = fname

# zero out time
temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - np.min(
temp_phys_in.timeseries[0]
)
# Now take out the time before the scan starts
temp_phys_in.timeseries[0] = temp_phys_in.timeseries[0] - time_before

phys_in_slices[run_fname] = temp_phys_in
return phys_in_slices


def slice4phys(phys_in, ntp_list, tr_list, thr, padding=9):
"""
Slice runs for phys2bids.
Expand Down
Loading