diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py index b9eb09f..d85e5ef 100644 --- a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py +++ b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py @@ -41,6 +41,22 @@ def dataset_to_nwb( verbose : bool, optional Whether to print verbose output, by default True """ + subjects_to_skip = { + "289.407", + "244.464", + "264.477", + "102.260", + "262.478", + "289.408", + "264.475", + "129.425", + "250.427", + "95.259", + "309.399", + "433.421", + "416.405", + "364.426", + } start_variable = "Start Date" data_dir_path = Path(data_dir_path) output_dir_path = Path(output_dir_path) @@ -66,6 +82,8 @@ def dataset_to_nwb( experiment_type = session_to_nwb_kwargs["experiment_type"] experimental_group = session_to_nwb_kwargs["experimental_group"] subject_id = session_to_nwb_kwargs["subject_id"] + if subject_id in subjects_to_skip: + continue start_datetime = session_to_nwb_kwargs["start_datetime"] optogenetic_treatment = session_to_nwb_kwargs.get("optogenetic_treatment", None) if experiment_type == "FP": @@ -190,6 +208,13 @@ def fp_to_nwb( "Photo_140_306-190809-121107", "Photo_271_396-200707-125117", } + partial_subject_ids_to_subject_id = { + "300": "300.405", + "418": "418.404", + "299": "299.405", + "276": "276.405", + "262.259.478": "262.478", + } raw_file_to_info = get_raw_info(behavior_path) # Iterate through file system to get necessary information for converting each session @@ -317,6 +342,8 @@ def fp_to_nwb( if box_number is not None: session_conditions["Box"] = box_number start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S") + if photometry_subject_id in partial_subject_ids_to_subject_id: + photometry_subject_id = partial_subject_ids_to_subject_id[photometry_subject_id] session_to_nwb_args = dict( data_dir_path=data_dir_path, output_dir_path=output_dir_path, @@ -389,6 +416,8 @@ def fp_to_nwb( if box_number is not None: session_conditions["Box"] = box_number start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S") + if subject_id in partial_subject_ids_to_subject_id: + subject_id = partial_subject_ids_to_subject_id[subject_id] session_to_nwb_args = dict( data_dir_path=data_dir_path, output_dir_path=output_dir_path, @@ -436,6 +465,36 @@ def opto_to_nwb( list[dict] A list of dictionaries containing the arguments for session_to_nwb for each session. """ + partial_subject_ids_to_subject_id = { + "268": "268.476", + "266": "266.477", + "244": "244.465", + "343": "343.483", + "419": "419.404", + "245": "245.464", + "342": "342.483", + "202": "202.465", + "313": "313.403", + "418": "418.404", + "340": "340.483", + "259": "259.478", + "264": "264.478", + "421": "421.404", + "417": "417.404", + "233": "233.469", + "261": "261.478", + "265": "265.476", + "311": "311.403", + "206": "206.468", + "243": "243.468", + "263": "263.477", + "338": "338.398", + "414": "414.405", + "300": "300.405", + "299": "299.405", + "276": "276.405", + "262.259.478": "262.478", + } experiment_type = "Opto" experimental_group_to_optogenetic_treatments = { "DLS-Excitatory": ["ChR2", "EYFP", "ChR2Scrambled"], @@ -557,6 +616,8 @@ def opto_to_nwb( "Box": box_number, } start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S") + if subject in partial_subject_ids_to_subject_id: + subject = partial_subject_ids_to_subject_id[subject] session_to_nwb_args = dict( data_dir_path=data_dir_path, output_dir_path=output_dir_path, @@ -617,6 +678,10 @@ def get_opto_subject_id(subject_path: Path): "263": "263.477", "338": "338.398", "414": "414.405", + "300": "300.405", + "299": "299.405", + "276": "276.405", + "262.259.478": "262.478", } # fmt: off diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py index e748ff8..5aa34e2 100644 --- a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py +++ b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py @@ -136,7 +136,7 @@ def session_to_nwb( conversion_options.update(dict(Optogenetic={})) # Add Excel-based Metadata - metadata_path = data_dir_path / "MouseDemographics.xlsx" + metadata_path = data_dir_path / "MouseDemographicsCorrected.xlsx" source_data.update( dict( Metadata={ @@ -263,7 +263,7 @@ def split_western_blot(*, file_path: Union[str, Path]): # No-shock example session experiment_type = "FP" experimental_group = "RR20" - subject_id = "95.259" + subject_id = "96.259" start_datetime = datetime(2019, 4, 9, 10, 34, 30) behavior_file_path = ( data_dir_path @@ -294,8 +294,8 @@ def split_western_blot(*, file_path: Union[str, Path]): # Shock session experiment_type = "FP" experimental_group = "RR20" - subject_id = "95.259" - start_datetime = datetime(2019, 4, 18, 10, 41, 42) + subject_id = "96.259" + start_datetime = datetime(2019, 4, 18, 9, 28, 20) session_conditions = { "Start Date": start_datetime.strftime("%m/%d/%y"), "Start Time": start_datetime.strftime("%H:%M:%S"), @@ -446,9 +446,9 @@ def split_western_blot(*, file_path: Union[str, Path]): # Fiber Photometry session experiment_type = "FP" - experimental_group = "PR" - subject_id = "028.392" - start_datetime = datetime(2020, 7, 9, 13, 1, 26) + experimental_group = "PS" + subject_id = "112.283" + start_datetime = datetime(2019, 6, 20, 9, 32, 4) session_conditions = { "Start Date": start_datetime.strftime("%m/%d/%y"), "Start Time": start_datetime.strftime("%H:%M:%S"), @@ -466,15 +466,16 @@ def split_western_blot(*, file_path: Union[str, Path]): data_dir_path / f"{experiment_type} Experiments" / "Photometry" - / f"Punishment Resistant" + / f"Punishment Sensitive" / f"Early RI60" - / f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-200709-130922" + / f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-190620-093542" ) session_to_nwb( data_dir_path=data_dir_path, output_dir_path=output_dir_path, behavior_file_path=behavior_file_path, fiber_photometry_folder_path=fiber_photometry_folder_path, + has_demodulated_commanded_voltages=False, subject_id=subject_id, session_conditions=session_conditions, start_variable=start_variable, @@ -716,8 +717,8 @@ def split_western_blot(*, file_path: Union[str, Path]): experiment_type = "Opto" experimental_group = "DLS-Excitatory" optogenetic_treatment = "ChR2" - subject_id = "079.402" - start_datetime = datetime(2020, 6, 26, 13, 19, 27) + subject_id = "242.388" + start_datetime = datetime(2020, 6, 26, 12, 10, 40) session_conditions = { "Start Date": start_datetime.strftime("%m/%d/%y"), "Start Time": start_datetime.strftime("%H:%M:%S"), diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md index f601c56..a94cb48 100644 --- a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md +++ b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md @@ -161,8 +161,42 @@ for that 1 session split across the two folders? ### Questions - Some of the subject_ids are not present in the metadata excel file -- pls provide - Some animals are missing the "Hemisphere with DMS" field -- pls provide -======= - - Solution: Added missing MSNs; skipped RK_C_FR1_BOTH_1hr +- Some of the mouse ids have typos (leading and trailing zeros) as well as some that appear incorrect (RR20 section) + So, I made the following corrections to metadata excel sheet: + Mouse ID corrections: + 79.402 --> 079.402 + 344.4 --> 344.400 + 432.42 --> 432.420 + 48.392 --> 048.392 + 98.259 --> 98.257 + 101.259 --> 101.260 + 97.259 --> 97.257 + 99.259 --> 99.257 + 100.259 --> 100.258 + 359.43 --> 359.430 + 28.392 --> 028.392 + 227.43 --> 227.430 + 262.478 --> 262.259 + 354.43 --> 354.430 + 430.42 --> 430.420 + 342.483 --> 342.400 + After these corrections the following mouse_ids are still missing from the excel sheet: + subjects_to_skip = { + "289.407", + "244.464", + "264.477", + "102.260", + "262.478", + "289.408", + "264.475", + "129.425", + "250.427", + "95.259", + "309.399", + "433.421", + "416.405", + "364.426", + } ### Active Questions - DMS-Excitatory has some csv files w/ only session-aggregated info (total right rewards but not right reward times) ex. ChR2/121_280.CSV -- do you have individual session info for these animals? diff --git a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py index b021517..6f40d08 100644 --- a/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py +++ b/src/lerner_lab_to_nwb/seiler_2024/seiler_2024excelmetadatainterface.py @@ -41,36 +41,29 @@ def get_metadata(self) -> DeepDict: df["Mouse ID"] = df["Mouse ID"].str.replace("(DNL)", "") df["Mouse ID"] = df["Mouse ID"].str.strip() df.set_index("Mouse ID", inplace=True) + subject_df = df.loc[self.source_data["subject_id"]] - if self.source_data["subject_id"] in df.index: - subject_df = df.loc[self.source_data["subject_id"]] - - # Add metadata to metadata dict - excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"} - metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]] - metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"] - if not pd.isna(subject_df["Treatment"]): - metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"] - if subject_df["Experiment"] == "Fiber Photometry": - metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE" - elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory": - metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP" - elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2": - metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP" - if subject_df["Treatment"] == "Control": - metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP" - metadata["NWBFile"]["notes"] = ( - f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n' - f'Experiment: {subject_df["Experiment"]}\n' - f'Behavior: {subject_df["Behavior"]}\n' - f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n' - f'Did Not Learn: {subject_df["DNL"]}\n' - ) - else: # TODO: Ask Lerner lab about missing subjects - if self.verbose: - print(f"Subject ID {self.source_data['subject_id']} not found in metadata file.") - metadata["Subject"]["sex"] = "U" - + # Add metadata to metadata dict + excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"} + metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]] + metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"] + if not pd.isna(subject_df["Treatment"]): + metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"] + if subject_df["Experiment"] == "Fiber Photometry": + metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE" + elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory": + metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP" + elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2": + metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP" + if subject_df["Treatment"] == "Control": + metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP" + metadata["NWBFile"]["notes"] = ( + f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n' + f'Experiment: {subject_df["Experiment"]}\n' + f'Behavior: {subject_df["Behavior"]}\n' + f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n' + f'Did Not Learn: {subject_df["DNL"]}\n' + ) metadata["Subject"]["subject_id"] = self.source_data["subject_id"] return metadata