Merge pull request #9 from catalystneuro/Behavior/add-subject-metadata

[Behavior project] Add subject metadata
catalystneuro · Sep 3, 2024 · 58892fa · 58892fa
2 parents 2aabb6e + 84ff38b
commit 58892fa
Show file tree

Hide file tree

Showing 3 changed files with 109 additions and 34 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.2.0
+    rev: v4.6.0
     hooks:
     -   id: check-yaml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/psf/black
-    rev: 22.8.0
+    rev: 24.8.0
     hooks:
     -   id: black
         exclude: ^docs/
diff --git a/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py b/src/constantinople_lab_to_nwb/mah_2024/mah_2024_convert_session.py
@@ -1,21 +1,81 @@
 from pathlib import Path
 from typing import Union, Optional
 
+import pandas as pd
 from dateutil import tz
 from neuroconv.utils import load_dict_from_file, dict_deep_update
+from pymatreader import read_mat
 
 from constantinople_lab_to_nwb.mah_2024 import Mah2024NWBConverter
 
 
+def get_subject_metadata_from_rat_info_folder(
+    folder_path: Union[str, Path],
+    subject_id: str,
+    date: str,
+) -> dict:
+    """
+    Load subject metadata from the rat info files.
+    The "registry.mat" file contains information about the subject such as date of birth, sex, and vendor.
+    The "Mass_registry.mat" file contains information about the weight of the subject.
+
+    Parameters
+    ----------
+    folder_path: Union[str, Path]
+        The folder path containing the rat info files.
+    subject_id: str
+        The subject ID.
+    date: str
+        The date of the session in the format "yyyy-mm-dd".
+    """
+
+    folder_path = Path(folder_path)
+    rat_registry_file_path = folder_path / "registry.mat"
+
+    subject_metadata = dict()
+    if rat_registry_file_path.exists():
+        rat_registry = read_mat(str(rat_registry_file_path))
+        rat_registry = pd.DataFrame(rat_registry["Registry"])
+
+        filtered_rat_registry = rat_registry[rat_registry["RatName"] == subject_id]
+        if not filtered_rat_registry.empty:
+            date_of_birth = filtered_rat_registry["DOB"].values[0]
+            # convert date of birth to datetime with format "yyyy-mm-dd"
+            date_of_birth = pd.to_datetime(date_of_birth, format="%Y-%m-%d")
+            sex = filtered_rat_registry["sex"].values[0]
+            subject_metadata.update(
+                date_of_birth=date_of_birth,
+                sex=sex,
+            )
+            vendor = filtered_rat_registry["vendor"].values[0]
+            if vendor:
+                subject_metadata.update(description=f"Vendor: {vendor}")
+
+    mass_registry_file_path = folder_path / "Mass_registry.mat"
+    if mass_registry_file_path.exists():
+        mass_registry = read_mat(str(mass_registry_file_path))
+        mass_registry = pd.DataFrame(mass_registry["Mass_registry"])
+
+        filtered_mass_registry = mass_registry[(mass_registry["rat"] == subject_id) & (mass_registry["date"] == date)]
+        if not filtered_mass_registry.empty:
+            weight_g = filtered_mass_registry["mass"].astype(int).values[0]  # in grams
+            # convert mass to kg
+            weight_kg = weight_g / 1000
+            subject_metadata.update(weight=str(weight_kg))
+
+    return subject_metadata
+
+
 def session_to_nwb(
-        raw_behavior_file_path: Union[str, Path],
-        processed_behavior_file_path: Union[str, Path],
-        date: str,
-        nwbfile_path: Union[str, Path],
-        column_name_mapping: Optional[dict] = None,
-        column_descriptions: Optional[dict] = None,
-        overwrite: bool = False,
-        verbose: bool = False,
+    raw_behavior_file_path: Union[str, Path],
+    processed_behavior_file_path: Union[str, Path],
+    date: str,
+    nwbfile_path: Union[str, Path],
+    column_name_mapping: Optional[dict] = None,
+    column_descriptions: Optional[dict] = None,
+    subject_metadata: Optional[dict] = None,
+    overwrite: bool = False,
+    verbose: bool = False,
 ):
     """
     Convert a single session to NWB format.
@@ -35,6 +95,8 @@ def session_to_nwb(
         Dictionary to map the column names in the processed behavior data to more descriptive column names.
     column_descriptions: dict, optional
         Dictionary to add descriptions to the columns in the processed behavior data.
+    subject_metadata: dict, optional
+        Metadata about the subject.
     overwrite: bool, optional
         Whether to overwrite the NWB file if it already exists.
     verbose: bool, optional
@@ -61,7 +123,9 @@ def session_to_nwb(
 
     # Add Processed Behavior
     source_data.update(dict(ProcessedBehavior=dict(file_path=processed_behavior_file_path, date=date)))
-    conversion_options.update(dict(ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions)))
+    conversion_options.update(
+        dict(ProcessedBehavior=dict(column_name_mapping=column_name_mapping, column_descriptions=column_descriptions))
+    )
 
     converter = Mah2024NWBConverter(source_data=source_data, verbose=verbose)
 
@@ -88,6 +152,8 @@ def session_to_nwb(
     metadata = dict_deep_update(metadata, behavior_metadata)
 
     metadata["Subject"].update(subject_id=subject_id)
+    if subject_metadata is not None:
+        metadata["Subject"].update(subject_metadata)
 
     # Run conversion
     converter.run_conversion(
@@ -102,9 +168,9 @@ def session_to_nwb(
     # Parameters for conversion
 
     # The raw behavior data is stored in a .mat file (contains data for a single session)
-    bpod_file_path = Path("/Volumes/T9/Constantinople/C005/DataFiles/C005_RWTautowait_20190909_145629.mat")
+    bpod_file_path = Path("/Volumes/T9/Constantinople/raw_Bpod/C005/DataFiles/C005_RWTautowait_20190909_145629.mat")
     # The processed behavior data is stored in a .mat file (contains data for multiple days)
-    processed_behavior_file_path = Path("/Volumes/T9/Constantinople/ratTrial_C005-new.mat")
+    processed_behavior_file_path = Path("/Volumes/T9/Constantinople/A_Structs/ratTrial_C005.mat")
     # The date is used to identify the session to convert from the processed behavior file
     date = "09-Sep-2019"
     # The column name mapping is used to rename the columns in the processed data to more descriptive column names. (optional)
@@ -134,8 +200,8 @@ def session_to_nwb(
         vios="Whether the subject violated the trial by not maintaining center poke for the time required by 'nose_in_center'.",
         optout="Whether the subject opted out for each trial.",
         wait_time="The wait time for the subject for for each trial in seconds, after removing outliers."
-                  " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
-                  " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
+        " For hit trials (when reward was delivered) the wait time is equal to the reward delay."
+        " For opt-out trials, the wait time is equal to the time waited from trial start to opting out.",
         wait_for_cpoke="The time between side port poke and center poke in seconds, includes the time when the subject is consuming the reward.",
         zwait_for_cpoke="The z-scored wait_for_cpoke using all trials.",
         side="The rewarded port (Left or Right) for each trial.",
@@ -151,6 +217,14 @@ def session_to_nwb(
         wait_time_unthresholded="The wait time for the subject for each trial in seconds without removing outliers.",
         wait_thresh="The threshold in seconds to remove wait-times (mean + 1*std of all cumulative wait-times).",
     )
+
+    # Add subject metadata
+    subject_metadata = get_subject_metadata_from_rat_info_folder(
+        folder_path="/Volumes/T9/Constantinople/Rat_info",
+        subject_id="C005",
+        date="2019-09-09",
+    )
+
     # Path to the output NWB file
     nwbfile_path = Path("/Volumes/T9/Constantinople/nwbfiles/C005_RWTautowait_20190909_145629.nwb")
 
@@ -166,6 +240,7 @@ def session_to_nwb(
         column_name_mapping=column_name_mapping,
         column_descriptions=column_descriptions,
         nwbfile_path=nwbfile_path,
+        subject_metadata=subject_metadata,
         overwrite=overwrite,
         verbose=verbose,
     )
diff --git a/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml b/src/constantinople_lab_to_nwb/mah_2024/metadata/mah_2024_general_metadata.yaml
@@ -4,37 +4,37 @@ NWBFile:
     - https://doi.org/10.5281/zenodo.10031483
   experiment_description: |
     The value of the environment determines animals’ motivational states and sets expectations for error-based learning.
-    How are values computed? Reinforcement learning systems can store or cache values of states or actions that are 
+    How are values computed? Reinforcement learning systems can store or cache values of states or actions that are
     learned from experience, or they can compute values using a model of the environment to simulate possible futures.
-    These value computations have distinct trade-offs, and a central question is how neural systems decide which 
-    computations to use or whether/how to combine them. Here we show that rats use distinct value computations for 
-    sequential decisions within single trials. We used high-throughput training to collect statistically powerful 
+    These value computations have distinct trade-offs, and a central question is how neural systems decide which
+    computations to use or whether/how to combine them. Here we show that rats use distinct value computations for
+    sequential decisions within single trials. We used high-throughput training to collect statistically powerful
     datasets from 291 rats performing a temporal wagering task with hidden reward states. Rats adjusted how quickly they
-    initiated trials and how long they waited for rewards across states, balancing effort and time costs against 
+    initiated trials and how long they waited for rewards across states, balancing effort and time costs against
     expected rewards. Statistical modeling revealed that animals computed the value of the environment differently when
     initiating trials versus when deciding how long to wait for rewards, even though these decisions were only seconds
-    apart. Moreover, value estimates interacted via a dynamic learning rate. Our results reveal how distinct value 
-    computations interact on rapid timescales, and demonstrate the power of using high-throughput training to 
+    apart. Moreover, value estimates interacted via a dynamic learning rate. Our results reveal how distinct value
+    computations interact on rapid timescales, and demonstrate the power of using high-throughput training to
     understand rich, cognitive behaviors.
   session_description:
-    We developed a temporal wagering task for rats, in which they were offered one of several water rewards on each 
-    trial, the volume of which (5, 10, 20, 40, 80μL) was indicated by a tone. The reward was assigned randomly to one 
-    of two ports, indicated by an LED. The rat could wait for an unpredictable delay to obtain the reward, or at any 
-    time could terminate the trial by poking in the other port (opt-out). Wait times were defined as how long rats 
-    waited before opting out. Trial initiation times were defined as the time from opting out or consuming reward to 
-    initiating a new trial. Reward delays were drawn from an exponential distribution, and on 15–25 percent of trials, 
+    We developed a temporal wagering task for rats, in which they were offered one of several water rewards on each
+    trial, the volume of which (5, 10, 20, 40, 80μL) was indicated by a tone. The reward was assigned randomly to one
+    of two ports, indicated by an LED. The rat could wait for an unpredictable delay to obtain the reward, or at any
+    time could terminate the trial by poking in the other port (opt-out). Wait times were defined as how long rats
+    waited before opting out. Trial initiation times were defined as the time from opting out or consuming reward to
+    initiating a new trial. Reward delays were drawn from an exponential distribution, and on 15–25 percent of trials,
     rewards were withheld to force rats to opt-out, providing a continuous behavioral readout of subjective value.
     We used a high-throughput facility to train 291 rats using computerized, semi-automated procedures.
-    The task contained latent structure; rats experienced blocks of 40 completed trials (hidden states) in which they 
-    were presented with low (5, 10, or 20μL) or high (20, 40, or 80μL) rewards. These were interleaved with “mixed" 
-    blocks which offered all rewards. 20μL was present in all blocks, so comparing behavior on trials 
-    offering this reward revealed contextual effects (i.e., effects of hidden states). The hidden states differed in 
+    The task contained latent structure; rats experienced blocks of 40 completed trials (hidden states) in which they
+    were presented with low (5, 10, or 20μL) or high (20, 40, or 80μL) rewards. These were interleaved with “mixed"
+    blocks which offered all rewards. 20μL was present in all blocks, so comparing behavior on trials
+    offering this reward revealed contextual effects (i.e., effects of hidden states). The hidden states differed in
     their average reward and therefore in their opportunity costs, or what the rat might miss out on by continuing to wait.
   institution: NYU Center for Neural Science
   lab: Constantinople
   experimenter:
     - Mah, Andrew
 Subject:
   species: Rattus norvegicus
-  age: P6M/P24M  # in ISO 8601
-  sex: U  # TODO: update for each subject
+  # age: in ISO 8601 format, updated automatically for each subject
+  # sex: One of M, F, U, or O, updated automatically for each subject