diff --git a/docs/src/misc/merge_tables.md b/docs/src/misc/merge_tables.md index 81f13fdc9..dc10e2dfc 100644 --- a/docs/src/misc/merge_tables.md +++ b/docs/src/misc/merge_tables.md @@ -33,7 +33,7 @@ pipeline. By convention... from spyglass.utils.dj_merge_tables import _Merge @schema -class MergeTable(_Merge): +class MergeOutput(_Merge): definition = """ merge_id: uuid --- @@ -57,6 +57,11 @@ class MergeTable(_Merge): ![Merge diagram](../images/merge_diagram.png) +By convention, Merge Tables have been named with the pipeline name plus `Output` +(e.g., `LFPOutput`, `PositionOutput`). Using the underscore alias for this class +allows us to circumvent a DataJoint protection that interprets the class as a +table itself. + ## How ### Merging @@ -106,104 +111,4 @@ is not present in the parent. ## Example -First, we'll import various items related to the LFP Merge Table... - -```python -from spyglass.utils.dj_merge_tables import delete_downstream_merge, Merge -from spyglass.common.common_ephys import LFP as CommonLFP # Upstream 1 -from spyglass.lfp.lfp_merge import LFPOutput # Merge Table -from spyglass.lfp.v1.lfp import LFPV1 # Upstream 2 -``` - -Merge Tables have multiple custom methods that begin with `merge`. `help` can -show us the docstring of each - -```python -merge_methods=[d for d in dir(Merge) if d.startswith('merge')] -help(getattr(Merge,merge_methods[-1])) -``` - -We'll use this example to explore populating both `LFPV1` and the `LFPOutput` -Merge Table. - -```python -nwb_file_dict = { # We'll use this later when fetching from the Merge Table - "nwb_file_name": "tonks20211103_.nwb", -} -lfpv1_key = { - **nwb_file_dict, - "lfp_electrode_group_name": "CA1_test", - "target_interval_list_name": "test interval2", - "filter_name": "LFP 0-400 Hz", - "filter_sampling_rate": 30000, -} -LFPV1.populate(lfpv1_key) # Also populates LFPOutput -``` - -The Merge Table can also be populated with keys from `common_ephys.LFP`. - -```python -common_keys_CH = CommonLFP.fetch(limit=3, as_dict=True) # CH61 -LFPOutput.insert1(common_keys_CH[0], skip_duplicates=True) -LFPOutput.insert(common_keys_CH[1:], skip_duplicates=True) -common_keys_J1 = CommonLFP.fetch(limit=3, offset=80, as_dict=True) # J16 -LFPOutput.insert(common_keys_J1, skip_duplicates=True) -``` - -`merge_view` shows a union of the master and all part tables. - -```python -LFPOutput.merge_view() -LFPOutput.merge_view(restriction=lfpv1_key) -``` - -UUIDs help retain unique entries across all part tables. We can fetch NWB file -by referencing this or other features. - -```python -uuid_key = LFPOutput.fetch(limit=1, as_dict=True)[-1] -restrict = LFPOutput & uuid_key -result1 = restrict.fetch_nwb() - -nwb_key = LFPOutput.merge_restrict(nwb_file_dict).fetch(as_dict=True)[0] -result2 = (LFPOutput & nwb_key).fetch_nwb() -``` - -There are also functions for retrieving part/parent table(s) and fetching data. - -1. These `get` functions will either return the part table of the Merge table or - the parent table with the source information for that part. - -2. This `fetch` will collect all relevant entries and return them as a list in - the format specified by keyword arguments and one's DataJoint config. - -```python -result4 = LFPOutput.merge_get_part(restriction=common_keys_CH[0],join_master=True) -result5 = LFPOutput.merge_get_parent(restriction='nwb_file_name LIKE "CH%"') -result6 = result5.fetch('lfp_sampling_rate') # Sample rate for all CH* files -result7 = LFPOutput.merge_fetch("filter_name", "nwb_file_name") -result8 = LFPOutput.merge_fetch(as_dict=True) -``` - -When deleting from Merge Tables, we can either... - -1. delete from the Merge Table itself with `merge_delete`, deleting both - the master and part. - -2. use `merge_delete_parent` to delete from the parent sources, getting rid of - the entries in the source table they came from. - -3. use `delete_downstream_merge` to find Merge Tables downstream and get rid - full entries, avoiding orphaned master table entries. - -The two latter cases can be destructive, so we include an extra layer of -protection with `dry_run`. When true (by default), these functions return -a list of tables with the entries that would otherwise be deleted. - -```python -LFPOutput.merge_delete(common_keys_CH[0]) # Delete from merge table -LFPOutput.merge_delete_parent(restriction=nwb_file_dict, dry_run=True) -delete_downstream_merge( - table=CommonLFP, restriction=common_keys_CH[0], dry_run=True -) -``` +For example usage, see our Merge Table notebook. diff --git a/notebooks/01_Insert_Data.ipynb b/notebooks/01_Insert_Data.ipynb index a485bc507..d4139250d 100644 --- a/notebooks/01_Insert_Data.ipynb +++ b/notebooks/01_Insert_Data.ipynb @@ -2081,17 +2081,15 @@ " group_name=\"test\",\n", " electrode_list=[0],\n", ")\n", - "lfp.v1.LFPSelection.insert1(\n", - " {\n", - " \"nwb_file_name\": nwb_copy_file_name,\n", - " \"lfp_electrode_group_name\": \"test\",\n", - " \"target_interval_list_name\": \"01_s1\",\n", - " \"filter_name\": \"LFP 0-400 Hz\",\n", - " \"filter_sampling_rate\": 30_000,\n", - " },\n", - " skip_duplicates=True,\n", - ")\n", - "lfp.v1.LFPV1().populate()\n", + "lfp_key = {\n", + " \"nwb_file_name\": nwb_copy_file_name,\n", + " \"lfp_electrode_group_name\": \"test\",\n", + " \"target_interval_list_name\": \"01_s1\",\n", + " \"filter_name\": \"LFP 0-400 Hz\",\n", + " \"filter_sampling_rate\": 30_000,\n", + "}\n", + "lfp.v1.LFPSelection.insert1(lfp_key, skip_duplicates=True)\n", + "lfp.v1.LFPV1().populate(lfp_key)\n", "```\n", "\n", "
\n", diff --git a/notebooks/02_Data_Sync.ipynb b/notebooks/02_Data_Sync.ipynb index 7acf25306..defae02b0 100644 --- a/notebooks/02_Data_Sync.ipynb +++ b/notebooks/02_Data_Sync.ipynb @@ -630,8 +630,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the [next notebook](./10_Spike_Sorting.ipynb), we'll start working with \n", - "ephys data with spike sorting." + "In the [next notebook](./03_Merge_Tables.ipynb), we'll explore the details of a\n", + "table tier unique to Spyglass, Merge Tables." ] } ], diff --git a/notebooks/README.md b/notebooks/README.md index 8fc21cc31..694b5ef33 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -10,6 +10,7 @@ described in the categories below. Everyone should complete the Setup and Insert Data notebooks. Data Sync is an optional additional tool for collaborators that want to share analysis files. +Merge Tables features details on a new table tier unique to Spyglass. ## 1. Electrophysiology diff --git a/notebooks/py_scripts/01_Insert_Data.py b/notebooks/py_scripts/01_Insert_Data.py index cd3bb62ab..75eb1fa1b 100644 --- a/notebooks/py_scripts/01_Insert_Data.py +++ b/notebooks/py_scripts/01_Insert_Data.py @@ -327,17 +327,15 @@ # group_name="test", # electrode_list=[0], # ) -# lfp.v1.LFPSelection.insert1( -# { -# "nwb_file_name": nwb_copy_file_name, -# "lfp_electrode_group_name": "test", -# "target_interval_list_name": "01_s1", -# "filter_name": "LFP 0-400 Hz", -# "filter_sampling_rate": 30_000, -# }, -# skip_duplicates=True, -# ) -# lfp.v1.LFPV1().populate() +# lfp_key = { +# "nwb_file_name": nwb_copy_file_name, +# "lfp_electrode_group_name": "test", +# "target_interval_list_name": "01_s1", +# "filter_name": "LFP 0-400 Hz", +# "filter_sampling_rate": 30_000, +# } +# lfp.v1.LFPSelection.insert1(lfp_key, skip_duplicates=True) +# lfp.v1.LFPV1().populate(lfp_key) # ``` #
#
diff --git a/notebooks/py_scripts/02_Data_Sync.py b/notebooks/py_scripts/02_Data_Sync.py index 7e7516b31..06473e567 100644 --- a/notebooks/py_scripts/02_Data_Sync.py +++ b/notebooks/py_scripts/02_Data_Sync.py @@ -274,5 +274,5 @@ # # Up Next -# In the [next notebook](./10_Spike_Sorting.ipynb), we'll start working with -# ephys data with spike sorting. +# In the [next notebook](./03_Merge_Tables.ipynb), we'll explore the details of a +# table tier unique to Spyglass, Merge Tables. diff --git a/notebooks/py_scripts/03_Merge_Tables.py b/notebooks/py_scripts/03_Merge_Tables.py new file mode 100644 index 000000000..26e57a39b --- /dev/null +++ b/notebooks/py_scripts/03_Merge_Tables.py @@ -0,0 +1,213 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.2 +# kernelspec: +# display_name: spy +# language: python +# name: python3 +# --- + +# # Merge Tables +# + +# ## Intro +# + +# _Developer Note:_ if you may make a PR in the future, be sure to copy this +# notebook, and use the `gitignore` prefix `temp` to avoid future conflicts. +# +# This is one notebook in a multi-part series on Spyglass. +# +# - To set up your Spyglass environment and database, see +# [the Setup notebook](./00_Setup.ipynb) +# - To insert data, see [the Insert Data notebook](./01_Insert_Data.ipynb) +# - For additional info on DataJoint syntax, including table definitions and +# inserts, see +# [these additional tutorials](https://github.com/datajoint/datajoint-tutorials) +# - For information on why we use merge tables, and how to make one, see our +# [documentation](https://lorenfranklab.github.io/spyglass/0.4/misc/merge_tables/) +# + +# ## Imports +# + +# +# Let's start by importing the `spyglass` package, along with a few others. +# + +# + +import os +import datajoint as dj + +# change to the upper level folder to detect dj_local_conf.json +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +dj.config.load("dj_local_conf.json") # load config for database connection info + +# ignore datajoint+jupyter async warnings +import warnings + +warnings.simplefilter("ignore", category=DeprecationWarning) +warnings.simplefilter("ignore", category=ResourceWarning) +warnings.simplefilter("ignore", category=UserWarning) + +import spyglass.common as sgc +import spyglass.lfp as lfp +from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename +from spyglass.utils.dj_merge_tables import delete_downstream_merge, Merge +from spyglass.common.common_ephys import LFP as CommonLFP # Upstream 1 +from spyglass.lfp.lfp_merge import LFPOutput # Merge Table +from spyglass.lfp.v1.lfp import LFPV1 # Upstream 2 + +# - + +# ## Example data +# + +# Check to make sure the data inserted in the previour notebook is still there. + +nwb_file_name = "minirec20230622.nwb" +nwb_copy_file_name = get_nwb_copy_filename(nwb_file_name) +nwb_file_dict = {"nwb_file_name": nwb_copy_file_name} +sgc.Session & nwb_file_dict + +# If you haven't already done so, insert data into a Merge Table. +# +# _Note_: Some existing parents of Merge Tables perform the Merge Table insert as +# part of the populate methods. This practice will be revised in the future. +# +# + +sgc.FirFilterParameters().create_standard_filters() +lfp.lfp_electrode.LFPElectrodeGroup.create_lfp_electrode_group( + nwb_file_name=nwb_copy_file_name, + group_name="test", + electrode_list=[0], +) +lfp_key = { + "nwb_file_name": nwb_copy_file_name, + "lfp_electrode_group_name": "test", + "target_interval_list_name": "01_s1", + "filter_name": "LFP 0-400 Hz", + "filter_sampling_rate": 30_000, +} +lfp.v1.LFPSelection.insert1(lfp_key, skip_duplicates=True) +lfp.v1.LFPV1().populate(lfp_key) +LFPOutput.insert([lfp_key], skip_duplicates=True) + +# ## Helper functions +# + +# +# Merge Tables have multiple custom methods that begin with `merge`. +# +# `help` can show us the docstring of each + +merge_methods = [d for d in dir(Merge) if d.startswith("merge")] +print(merge_methods) + +help(getattr(Merge, merge_methods[-1])) + +# ## Showing data + +# `merge_view` shows a union of the master and all part tables. +# +# _Note_: Restrict Merge Tables with arguments, not the `&` operator. +# +# - Normally: `Table & "field='value'"` +# - Instead: `MergeTable.merge_view(restriction="field='value'"`). +# + +LFPOutput.merge_view() + +# UUIDs help retain unique entries across all part tables. We can fetch NWB file +# by referencing this or other features. +# + +uuid_key = LFPOutput.fetch(limit=1, as_dict=True)[-1] +restrict = LFPOutput & uuid_key +restrict + +result1 = restrict.fetch_nwb() +result1 + +nwb_key = LFPOutput.merge_restrict(nwb_file_dict).fetch(as_dict=True)[0] +nwb_key + +result2 = (LFPOutput & nwb_key).fetch_nwb() +result2 == result1 + +# ## Selecting data + +# There are also functions for retrieving part/parent table(s) and fetching data. +# +# These `get` functions will either return the part table of the Merge table or the parent table with the source information for that part. +# + +result4 = LFPOutput.merge_get_part(restriction=nwb_file_dict, join_master=True) +result4 + +result5 = LFPOutput.merge_get_parent(restriction='nwb_file_name LIKE "mini%"') +result5 + +# `fetch` will collect all relevant entries and return them as a list in +# the format specified by keyword arguments and one's DataJoint config. +# + +result6 = result5.fetch("lfp_sampling_rate") # Sample rate for all mini* files +result6 + +# `merge_fetch` requires a restriction as the first argument. For no restriction, +# use `True`. + +result7 = LFPOutput.merge_fetch(True, "filter_name", "nwb_file_name") +result7 + +result8 = LFPOutput.merge_fetch(as_dict=True) +result8 + +# ## Deletion from Merge Tables + +# When deleting from Merge Tables, we can either... +# +# 1. delete from the Merge Table itself with `merge_delete`, deleting both +# the master and part. +# +# 2. use `merge_delete_parent` to delete from the parent sources, getting rid of +# the entries in the source table they came from. +# +# 3. use `delete_downstream_merge` to find Merge Tables downstream and get rid +# full entries, avoiding orphaned master table entries. +# +# The two latter cases can be destructive, so we include an extra layer of +# protection with `dry_run`. When true (by default), these functions return +# a list of tables with the entries that would otherwise be deleted. + +LFPOutput.merge_delete(nwb_file_dict) # Delete from merge table +LFPOutput.merge_delete_parent(restriction=nwb_file_dict, dry_run=True) +delete_downstream_merge( + table=LFPV1, + restriction=nwb_file_dict, + dry_run=True, +) + +# To delete all merge table entries associated with an NWB file, use +# `delete_downstream_merge` with the `Nwbfile` table. +# + +delete_downstream_merge( + table=sgc.Nwbfile, + restriction={"nwb_file_name": nwb_copy_file_name}, + dry_run=True, + recurse_level=3, # for long pipelines with many tables +) + +# ## Up Next + +# In the [next notebook](./10_Spike_Sorting.ipynb), we'll start working with +# ephys data with spike sorting. diff --git a/tests/data_import/test_insert_sessions.py b/tests/data_import/test_insert_sessions.py index d8af63c5d..d7968d164 100644 --- a/tests/data_import/test_insert_sessions.py +++ b/tests/data_import/test_insert_sessions.py @@ -9,7 +9,7 @@ from hdmf.backends.warnings import BrokenLinkWarning from spyglass.data_import.insert_sessions import copy_nwb_link_raw_ephys -from spyglass.settings import load_config +from spyglass.settings import raw_dir @pytest.fixture() @@ -49,8 +49,6 @@ def new_nwbfile_raw_file_name(tmp_path): _ = tmp_path # CBroz: Changed to match testing base directory - raw_dir = load_config()["SPYGLASS_RAW_DIR"] - file_name = "raw.nwb" file_path = raw_dir + "/" + file_name @@ -93,8 +91,10 @@ def test_copy_nwb( new_nwbfile_raw_file_name_abspath ) - # test readability after moving the linking raw file (paths are stored as relative paths in NWB) - # so this should break the link (moving the linked-to file should also break the link) + # test readability after moving the linking raw file (paths are stored as + # relative paths in NWB) so this should break the link (moving the linked-to + # file should also break the link) + shutil.move(out_nwb_file_abspath, moved_nwbfile_no_ephys_file_path) with pynwb.NWBHDF5IO( path=str(moved_nwbfile_no_ephys_file_path), mode="r"