Skip to content

Commit

Permalink
Merge pull request #27 from catalystneuro/subject_ids
Browse files Browse the repository at this point in the history
Subject ids
  • Loading branch information
pauladkisson authored May 28, 2024
2 parents a840c96 + 55b8d51 commit ae83b75
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 42 deletions.
65 changes: 65 additions & 0 deletions src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ def dataset_to_nwb(
verbose : bool, optional
Whether to print verbose output, by default True
"""
subjects_to_skip = {
"289.407",
"244.464",
"264.477",
"102.260",
"262.478",
"289.408",
"264.475",
"129.425",
"250.427",
"95.259",
"309.399",
"433.421",
"416.405",
"364.426",
}
start_variable = "Start Date"
data_dir_path = Path(data_dir_path)
output_dir_path = Path(output_dir_path)
Expand All @@ -66,6 +82,8 @@ def dataset_to_nwb(
experiment_type = session_to_nwb_kwargs["experiment_type"]
experimental_group = session_to_nwb_kwargs["experimental_group"]
subject_id = session_to_nwb_kwargs["subject_id"]
if subject_id in subjects_to_skip:
continue
start_datetime = session_to_nwb_kwargs["start_datetime"]
optogenetic_treatment = session_to_nwb_kwargs.get("optogenetic_treatment", None)
if experiment_type == "FP":
Expand Down Expand Up @@ -190,6 +208,13 @@ def fp_to_nwb(
"Photo_140_306-190809-121107",
"Photo_271_396-200707-125117",
}
partial_subject_ids_to_subject_id = {
"300": "300.405",
"418": "418.404",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}
raw_file_to_info = get_raw_info(behavior_path)

# Iterate through file system to get necessary information for converting each session
Expand Down Expand Up @@ -317,6 +342,8 @@ def fp_to_nwb(
if box_number is not None:
session_conditions["Box"] = box_number
start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
if photometry_subject_id in partial_subject_ids_to_subject_id:
photometry_subject_id = partial_subject_ids_to_subject_id[photometry_subject_id]
session_to_nwb_args = dict(
data_dir_path=data_dir_path,
output_dir_path=output_dir_path,
Expand Down Expand Up @@ -389,6 +416,8 @@ def fp_to_nwb(
if box_number is not None:
session_conditions["Box"] = box_number
start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
if subject_id in partial_subject_ids_to_subject_id:
subject_id = partial_subject_ids_to_subject_id[subject_id]
session_to_nwb_args = dict(
data_dir_path=data_dir_path,
output_dir_path=output_dir_path,
Expand Down Expand Up @@ -436,6 +465,36 @@ def opto_to_nwb(
list[dict]
A list of dictionaries containing the arguments for session_to_nwb for each session.
"""
partial_subject_ids_to_subject_id = {
"268": "268.476",
"266": "266.477",
"244": "244.465",
"343": "343.483",
"419": "419.404",
"245": "245.464",
"342": "342.483",
"202": "202.465",
"313": "313.403",
"418": "418.404",
"340": "340.483",
"259": "259.478",
"264": "264.478",
"421": "421.404",
"417": "417.404",
"233": "233.469",
"261": "261.478",
"265": "265.476",
"311": "311.403",
"206": "206.468",
"243": "243.468",
"263": "263.477",
"338": "338.398",
"414": "414.405",
"300": "300.405",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}
experiment_type = "Opto"
experimental_group_to_optogenetic_treatments = {
"DLS-Excitatory": ["ChR2", "EYFP", "ChR2Scrambled"],
Expand Down Expand Up @@ -557,6 +616,8 @@ def opto_to_nwb(
"Box": box_number,
}
start_datetime = datetime.strptime(f"{start_date} {start_time}", "%m/%d/%y %H:%M:%S")
if subject in partial_subject_ids_to_subject_id:
subject = partial_subject_ids_to_subject_id[subject]
session_to_nwb_args = dict(
data_dir_path=data_dir_path,
output_dir_path=output_dir_path,
Expand Down Expand Up @@ -617,6 +678,10 @@ def get_opto_subject_id(subject_path: Path):
"263": "263.477",
"338": "338.398",
"414": "414.405",
"300": "300.405",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}

# fmt: off
Expand Down
23 changes: 12 additions & 11 deletions src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def session_to_nwb(
conversion_options.update(dict(Optogenetic={}))

# Add Excel-based Metadata
metadata_path = data_dir_path / "MouseDemographics.xlsx"
metadata_path = data_dir_path / "MouseDemographicsCorrected.xlsx"
source_data.update(
dict(
Metadata={
Expand Down Expand Up @@ -263,7 +263,7 @@ def split_western_blot(*, file_path: Union[str, Path]):
# No-shock example session
experiment_type = "FP"
experimental_group = "RR20"
subject_id = "95.259"
subject_id = "96.259"
start_datetime = datetime(2019, 4, 9, 10, 34, 30)
behavior_file_path = (
data_dir_path
Expand Down Expand Up @@ -294,8 +294,8 @@ def split_western_blot(*, file_path: Union[str, Path]):
# Shock session
experiment_type = "FP"
experimental_group = "RR20"
subject_id = "95.259"
start_datetime = datetime(2019, 4, 18, 10, 41, 42)
subject_id = "96.259"
start_datetime = datetime(2019, 4, 18, 9, 28, 20)
session_conditions = {
"Start Date": start_datetime.strftime("%m/%d/%y"),
"Start Time": start_datetime.strftime("%H:%M:%S"),
Expand Down Expand Up @@ -446,9 +446,9 @@ def split_western_blot(*, file_path: Union[str, Path]):

# Fiber Photometry session
experiment_type = "FP"
experimental_group = "PR"
subject_id = "028.392"
start_datetime = datetime(2020, 7, 9, 13, 1, 26)
experimental_group = "PS"
subject_id = "112.283"
start_datetime = datetime(2019, 6, 20, 9, 32, 4)
session_conditions = {
"Start Date": start_datetime.strftime("%m/%d/%y"),
"Start Time": start_datetime.strftime("%H:%M:%S"),
Expand All @@ -466,15 +466,16 @@ def split_western_blot(*, file_path: Union[str, Path]):
data_dir_path
/ f"{experiment_type} Experiments"
/ "Photometry"
/ f"Punishment Resistant"
/ f"Punishment Sensitive"
/ f"Early RI60"
/ f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-200709-130922"
/ f"Photo_{subject_id.split('.')[0]}_{subject_id.split('.')[1]}-190620-093542"
)
session_to_nwb(
data_dir_path=data_dir_path,
output_dir_path=output_dir_path,
behavior_file_path=behavior_file_path,
fiber_photometry_folder_path=fiber_photometry_folder_path,
has_demodulated_commanded_voltages=False,
subject_id=subject_id,
session_conditions=session_conditions,
start_variable=start_variable,
Expand Down Expand Up @@ -716,8 +717,8 @@ def split_western_blot(*, file_path: Union[str, Path]):
experiment_type = "Opto"
experimental_group = "DLS-Excitatory"
optogenetic_treatment = "ChR2"
subject_id = "079.402"
start_datetime = datetime(2020, 6, 26, 13, 19, 27)
subject_id = "242.388"
start_datetime = datetime(2020, 6, 26, 12, 10, 40)
session_conditions = {
"Start Date": start_datetime.strftime("%m/%d/%y"),
"Start Time": start_datetime.strftime("%H:%M:%S"),
Expand Down
38 changes: 36 additions & 2 deletions src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,42 @@ for that 1 session split across the two folders?
### Questions
- Some of the subject_ids are not present in the metadata excel file -- pls provide
- Some animals are missing the "Hemisphere with DMS" field -- pls provide
=======
- Solution: Added missing MSNs; skipped RK_C_FR1_BOTH_1hr
- Some of the mouse ids have typos (leading and trailing zeros) as well as some that appear incorrect (RR20 section)
So, I made the following corrections to metadata excel sheet:
Mouse ID corrections:
79.402 --> 079.402
344.4 --> 344.400
432.42 --> 432.420
48.392 --> 048.392
98.259 --> 98.257
101.259 --> 101.260
97.259 --> 97.257
99.259 --> 99.257
100.259 --> 100.258
359.43 --> 359.430
28.392 --> 028.392
227.43 --> 227.430
262.478 --> 262.259
354.43 --> 354.430
430.42 --> 430.420
342.483 --> 342.400
After these corrections the following mouse_ids are still missing from the excel sheet:
subjects_to_skip = {
"289.407",
"244.464",
"264.477",
"102.260",
"262.478",
"289.408",
"264.475",
"129.425",
"250.427",
"95.259",
"309.399",
"433.421",
"416.405",
"364.426",
}

### Active Questions
- DMS-Excitatory has some csv files w/ only session-aggregated info (total right rewards but not right reward times) ex. ChR2/121_280.CSV -- do you have individual session info for these animals?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,36 +41,29 @@ def get_metadata(self) -> DeepDict:
df["Mouse ID"] = df["Mouse ID"].str.replace("(DNL)", "")
df["Mouse ID"] = df["Mouse ID"].str.strip()
df.set_index("Mouse ID", inplace=True)
subject_df = df.loc[self.source_data["subject_id"]]

if self.source_data["subject_id"] in df.index:
subject_df = df.loc[self.source_data["subject_id"]]

# Add metadata to metadata dict
excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"}
metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]]
metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"]
if not pd.isna(subject_df["Treatment"]):
metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"]
if subject_df["Experiment"] == "Fiber Photometry":
metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE"
elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP"
elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP"
if subject_df["Treatment"] == "Control":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP"
metadata["NWBFile"]["notes"] = (
f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n'
f'Experiment: {subject_df["Experiment"]}\n'
f'Behavior: {subject_df["Behavior"]}\n'
f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n'
f'Did Not Learn: {subject_df["DNL"]}\n'
)
else: # TODO: Ask Lerner lab about missing subjects
if self.verbose:
print(f"Subject ID {self.source_data['subject_id']} not found in metadata file.")
metadata["Subject"]["sex"] = "U"

# Add metadata to metadata dict
excel_sex_to_nwb_sex = {"Male": "M", "Female": "F"}
metadata["Subject"]["sex"] = excel_sex_to_nwb_sex[subject_df["Sex"]]
metadata["NWBFile"]["surgery"] = subject_df["Surgical Manipulation"]
if not pd.isna(subject_df["Treatment"]):
metadata["NWBFile"]["stimulus_notes"] = subject_df["Treatment"]
if subject_df["Experiment"] == "Fiber Photometry":
metadata["NWBFile"]["virus"] = "AAV5-CAG-FLEX-jGCaMP7b-WPRE"
elif subject_df["Experiment"] == "DLS-Excitatory" or subject_df["Experiment"] == "DMS-Excitatory":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-hChR2(H134R)-EYFP"
elif subject_df["Experiment"] == "DMS-Inhibitory" or subject_df["Experiment"] == "DMS-Inhibitory Group 2":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-eNpHR3.0-EYFP"
if subject_df["Treatment"] == "Control":
metadata["NWBFile"]["virus"] = "AAV5-EF1a-DIO-EYFP"
metadata["NWBFile"]["notes"] = (
f'Hemisphere with DMS: {subject_df["Hemisphere with DMS"]}\n'
f'Experiment: {subject_df["Experiment"]}\n'
f'Behavior: {subject_df["Behavior"]}\n'
f'Punishment Group: {str(subject_df["Punishment Group"]).replace("Resitant", "Resistant")}\n'
f'Did Not Learn: {subject_df["DNL"]}\n'
)
metadata["Subject"]["subject_id"] = self.source_data["subject_id"]

return metadata
Expand Down

0 comments on commit ae83b75

Please sign in to comment.