Merge pull request #27 from catalystneuro/subject_ids

Subject ids
catalystneuro · May 28, 2024 · ae83b75 · ae83b75
2 parents a840c96 + 55b8d51
commit ae83b75
Show file tree

Hide file tree

Showing 4 changed files with 135 additions and 42 deletions.
diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py
@@ -41,6 +41,22 @@ def dataset_to_nwb(
     verbose : bool, optional
         Whether to print verbose output, by default True
     """
+    subjects_to_skip = {
+        "289.407",
+        "244.464",
+        "264.477",
+        "102.260",
+        "262.478",
+        "289.408",
+        "264.475",
+        "129.425",
+        "250.427",
+        "95.259",
+        "309.399",
+        "433.421",
+        "416.405",
+        "364.426",
+    }
     start_variable = "Start Date"
     data_dir_path = Path(data_dir_path)
     output_dir_path = Path(output_dir_path)
@@ -66,6 +82,8 @@ def dataset_to_nwb(
             experiment_type = session_to_nwb_kwargs["experiment_type"]
             experimental_group = session_to_nwb_kwargs["experimental_group"]
             subject_id = session_to_nwb_kwargs["subject_id"]
+            if subject_id in subjects_to_skip:
+                continue
             start_datetime = session_to_nwb_kwargs["start_datetime"]
             optogenetic_treatment = session_to_nwb_kwargs.get("optogenetic_treatment", None)
             if experiment_type == "FP":
@@ -190,6 +208,13 @@ def fp_to_nwb(
         "Photo_140_306-190809-121107",
         "Photo_271_396-200707-125117",
     }
+    partial_subject_ids_to_subject_id = {
+        "300": "300.405",
+        "418": "418.404",
+        "299": "299.405",
+        "276": "276.405",
+        "262.259.478": "262.478",
+    }
     raw_file_to_info = get_raw_info(behavior_path)
 
     # Iterate through file system to get necessary information for converting each session
@@ -317,6 +342,8 @@ def fp_to_nwb(
                 if box_number is not None:
                     session_conditions["Box"] = box_number
                 start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
+                if photometry_subject_id in partial_subject_ids_to_subject_id:
+                    photometry_subject_id = partial_subject_ids_to_subject_id[photometry_subject_id]
                 session_to_nwb_args = dict(
                     data_dir_path=data_dir_path,
                     output_dir_path=output_dir_path,
@@ -389,6 +416,8 @@ def fp_to_nwb(
                 if box_number is not None:
                     session_conditions["Box"] = box_number
                 start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
+                if subject_id in partial_subject_ids_to_subject_id:
+                    subject_id = partial_subject_ids_to_subject_id[subject_id]
                 session_to_nwb_args = dict(
                     data_dir_path=data_dir_path,
                     output_dir_path=output_dir_path,
@@ -436,6 +465,36 @@ def opto_to_nwb(
     list[dict]
         A list of dictionaries containing the arguments for session_to_nwb for each session.
     """
+    partial_subject_ids_to_subject_id = {
+        "268": "268.476",
+        "266": "266.477",
+        "244": "244.465",
+        "343": "343.483",
+        "419": "419.404",
+        "245": "245.464",
+        "342": "342.483",
+        "202": "202.465",
+        "313": "313.403",
+        "418": "418.404",
+        "340": "340.483",
+        "259": "259.478",
+        "264": "264.478",
+        "421": "421.404",
+        "417": "417.404",
+        "233": "233.469",
+        "261": "261.478",
+        "265": "265.476",
+        "311": "311.403",
+        "206": "206.468",
+        "243": "243.468",
+        "263": "263.477",
+        "338": "338.398",
+        "414": "414.405",
+        "300": "300.405",
+        "299": "299.405",
+        "276": "276.405",
+        "262.259.478": "262.478",
+    }
     experiment_type = "Opto"
     experimental_group_to_optogenetic_treatments = {
         "DLS-Excitatory": ["ChR2", "EYFP", "ChR2Scrambled"],
@@ -557,6 +616,8 @@ def opto_to_nwb(
             "Box": box_number,
         }
         start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
+        if subject in partial_subject_ids_to_subject_id:
+            subject = partial_subject_ids_to_subject_id[subject]
         session_to_nwb_args = dict(
             data_dir_path=data_dir_path,
             output_dir_path=output_dir_path,
@@ -617,6 +678,10 @@ def get_opto_subject_id(subject_path: Path):
         "263": "263.477",
         "338": "338.398",
         "414": "414.405",
+        "300": "300.405",
+        "299": "299.405",
+        "276": "276.405",
+        "262.259.478": "262.478",
     }
 
     # fmt: off

diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py
@@ -136,7 +136,7 @@ def session_to_nwb(
         conversion_options.update(dict(Optogenetic={}))
 
     # Add Excel-based Metadata
-    metadata_path = data_dir_path / "MouseDemographics.xlsx"
+    metadata_path = data_dir_path / "MouseDemographicsCorrected.xlsx"
     source_data.update(
         dict(
             Metadata={
@@ -263,7 +263,7 @@ def split_western_blot(*, file_path: Union[str, Path]):
     # No-shock example session
     experiment_type = "FP"
     experimental_group = "RR20"
-    subject_id = "95.259"
+    subject_id = "96.259"
     start_datetime = datetime(2019, 4, 9, 10, 34, 30)
     behavior_file_path = (
         data_dir_path
@@ -294,8 +294,8 @@ def split_western_blot(*, file_path: Union[str, Path]):
     # Shock session
     experiment_type = "FP"
     experimental_group = "RR20"
-    subject_id = "95.259"
-    start_datetime = datetime(2019, 4, 18, 10, 41, 42)
+    subject_id = "96.259"
+    start_datetime = datetime(2019, 4, 18, 9, 28, 20)
     session_conditions = {
         "Start Date": start_datetime.strftime("%m/%d/%y"),
         "Start Time": start_datetime.strftime("%H:%M:%S"),
@@ -446,9 +446,9 @@ def split_western_blot(*, file_path: Union[str, Path]):
 
     # Fiber Photometry session
     experiment_type = "FP"
-    experimental_group = "PR"
-    subject_id = "028.392"
-    start_datetime = datetime(2020, 7, 9, 13, 1, 26)
+    experimental_group = "PS"
+    subject_id = "112.283"
+    start_datetime = datetime(2019, 6, 20, 9, 32, 4)
     session_conditions = {
         "Start Date": start_datetime.strftime("%m/%d/%y"),
         "Start Time": start_datetime.strftime("%H:%M:%S"),
@@ -466,15 +466,16 @@ def split_western_blot(*, file_path: Union[str, Path]):
         data_dir_path
         / f"{experiment_type} Experiments"
         / "Photometry"
-        / f"Punishment Resistant"
+        / f"Punishment Sensitive"
         / f"Early RI60"
-        / f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-200709-130922"
+        / f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-190620-093542"
     )
     session_to_nwb(
         data_dir_path=data_dir_path,
         output_dir_path=output_dir_path,
         behavior_file_path=behavior_file_path,
         fiber_photometry_folder_path=fiber_photometry_folder_path,
+        has_demodulated_commanded_voltages=False,
         subject_id=subject_id,
         session_conditions=session_conditions,
         start_variable=start_variable,
@@ -716,8 +717,8 @@ def split_western_blot(*, file_path: Union[str, Path]):
     experiment_type = "Opto"
     experimental_group = "DLS-Excitatory"
     optogenetic_treatment = "ChR2"
-    subject_id = "079.402"
-    start_datetime = datetime(2020, 6, 26, 13, 19, 27)
+    subject_id = "242.388"
+    start_datetime = datetime(2020, 6, 26, 12, 10, 40)
     session_conditions = {
         "Start Date": start_datetime.strftime("%m/%d/%y"),
         "Start Time": start_datetime.strftime("%H:%M:%S"),

diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md
@@ -161,8 +161,42 @@ for that 1 session split across the two folders?
 ### Questions
 - Some of the subject_ids are not present in the metadata excel file -- pls provide
 - Some animals are missing the "Hemisphere with DMS" field -- pls provide
-=======
-    - Solution: Added missing MSNs; skipped RK_C_FR1_BOTH_1hr
+- Some of the mouse ids have typos (leading and trailing zeros) as well as some that appear incorrect (RR20 section)
+    So, I made the following corrections to metadata excel sheet:
+    Mouse ID corrections:
+        79.402 --> 079.402
+        344.4 --> 344.400
+        432.42 --> 432.420
+        48.392 --> 048.392
+        98.259 --> 98.257
+        101.259 --> 101.260
+        97.259 --> 97.257
+        99.259 --> 99.257
+        100.259 --> 100.258
+        359.43 --> 359.430
+        28.392 --> 028.392
+        227.43 --> 227.430
+        262.478 --> 262.259
+        354.43 --> 354.430
+        430.42 --> 430.420
+        342.483 --> 342.400
+    After these corrections the following mouse_ids are still missing from the excel sheet:
+    subjects_to_skip = {
+        "289.407",
+        "244.464",
+        "264.477",
+        "102.260",
+        "262.478",
+        "289.408",
+        "264.475",
+        "129.425",
+        "250.427",
+        "95.259",
+        "309.399",
+        "433.421",
+        "416.405",
+        "364.426",
+    }
 
 ### Active Questions
 - DMS-Excitatory has some csv files w/ only session-aggregated info (total right rewards but not right reward times) ex. ChR2/121_280.CSV -- do you have individual session info for these animals?

diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py
@@ -41,36 +41,29 @@ def get_metadata(self) -> DeepDict:
         df["Mouse ID"] = df["Mouse ID"].str.replace("(DNL)", "")
         df["Mouse ID"] = df["Mouse ID"].str.strip()
         df.set_index("Mouse ID", inplace=True)
+        subject_df = df.loc[self.source_data["subject_id"]]
 
-        if self.source_data["subject_id"] in df.index:
-            subject_df = df.loc[self.source_data["subject_id"]]
-
-            # Add metadata to metadata dict
-            excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"}
-            metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]]
-            metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"]
-            if not pd.isna(subject_df["Treatment"]):
-                metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"]
-            if subject_df["Experiment"] == "Fiber Photometry":
-                metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE"
-            elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory":
-                metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP"
-            elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2":
-                metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP"
-            if subject_df["Treatment"] == "Control":
-                metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP"
-            metadata["NWBFile"]["notes"] = (
-                f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n'
-                f'Experiment: {subject_df["Experiment"]}\n'
-                f'Behavior: {subject_df["Behavior"]}\n'
-                f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n'
-                f'Did Not Learn: {subject_df["DNL"]}\n'
-            )
-        else:  # TODO: Ask Lerner lab about missing subjects
-            if self.verbose:
-                print(f"Subject ID {self.source_data['subject_id']} not found in metadata file.")
-            metadata["Subject"]["sex"] = "U"
-
+        # Add metadata to metadata dict
+        excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"}
+        metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]]
+        metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"]
+        if not pd.isna(subject_df["Treatment"]):
+            metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"]
+        if subject_df["Experiment"] == "Fiber Photometry":
+            metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE"
+        elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory":
+            metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP"
+        elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2":
+            metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP"
+        if subject_df["Treatment"] == "Control":
+            metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP"
+        metadata["NWBFile"]["notes"] = (
+            f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n'
+            f'Experiment: {subject_df["Experiment"]}\n'
+            f'Behavior: {subject_df["Behavior"]}\n'
+            f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n'
+            f'Did Not Learn: {subject_df["DNL"]}\n'
+        )
         metadata["Subject"]["subject_id"] = self.source_data["subject_id"]
 
         return metadata