Skip to content

Commit

Permalink
added subjects to skip and metadata sheet corrections
Browse files Browse the repository at this point in the history
  • Loading branch information
pauladkisson committed May 24, 2024
1 parent d50e75b commit 55b8d51
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 49 deletions.
94 changes: 49 additions & 45 deletions src/lerner_lab_to_nwb/seiler_2024/seiler_2024_convert_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ def dataset_to_nwb(
verbose : bool, optional
Whether to print verbose output, by default True
"""
subjects_to_skip = {
"289.407",
"244.464",
"264.477",
"102.260",
"262.478",
"289.408",
"264.475",
"129.425",
"250.427",
"95.259",
"309.399",
"433.421",
"416.405",
"364.426",
}
start_variable = "Start Date"
data_dir_path = Path(data_dir_path)
output_dir_path = Path(output_dir_path)
Expand All @@ -55,51 +71,36 @@ def dataset_to_nwb(
verbose=verbose,
)
session_to_nwb_args_per_session = fp_session_to_nwb_args_per_session + opto_session_to_nwb_args_per_session
unique_subject_ids = set()
for session_to_nwb_kwargs in session_to_nwb_args_per_session:
subject_id = session_to_nwb_kwargs["subject_id"]
unique_subject_ids.add(subject_id)
metadata_path = Path(data_dir_path / "MouseDemographics.xlsx")
df = pd.read_excel(
metadata_path,
sheet_name="Mouse Demographics",
dtype={"Mouse ID": str},
)
df["DNL"] = df["Mouse ID"].str.contains("(DNL)", regex=False)
df["Mouse ID"] = df["Mouse ID"].str.replace("(DNL)", "")
df["Mouse ID"] = df["Mouse ID"].str.strip()
mouse_ids = set(df["Mouse ID"])
missing_subject_ids = unique_subject_ids - mouse_ids
for missing_subject_id in missing_subject_ids:
print(f"Missing metadata for {missing_subject_id}")

# futures = []
# with ProcessPoolExecutor(max_workers=max_workers) as executor:
# for session_to_nwb_kwargs in session_to_nwb_args_per_session:
# experiment_type = session_to_nwb_kwargs["experiment_type"]
# experimental_group = session_to_nwb_kwargs["experimental_group"]
# subject_id = session_to_nwb_kwargs["subject_id"]
# start_datetime = session_to_nwb_kwargs["start_datetime"]
# optogenetic_treatment = session_to_nwb_kwargs.get("optogenetic_treatment", None)
# if experiment_type == "FP":
# exception_file_path = (
# output_dir_path
# / f"ERROR_{experiment_type}_{experimental_group}_{subject_id}_{start_datetime.isoformat().replace(':', '-')}.txt"
# )
# elif experiment_type == "Opto":
# exception_file_path = (
# output_dir_path
# / f"ERROR_{experiment_type}_{experimental_group}_{optogenetic_treatment}_{subject_id}_{start_datetime.isoformat().replace(':', '-')}.txt"
# )
# futures.append(
# executor.submit(
# safe_session_to_nwb,
# session_to_nwb_kwargs=session_to_nwb_kwargs,
# exception_file_path=exception_file_path,
# )
# )
# for _ in tqdm(as_completed(futures), total=len(futures)):
# pass

futures = []
with ProcessPoolExecutor(max_workers=max_workers) as executor:
for session_to_nwb_kwargs in session_to_nwb_args_per_session:
experiment_type = session_to_nwb_kwargs["experiment_type"]
experimental_group = session_to_nwb_kwargs["experimental_group"]
subject_id = session_to_nwb_kwargs["subject_id"]
if subject_id in subjects_to_skip:
continue
start_datetime = session_to_nwb_kwargs["start_datetime"]
optogenetic_treatment = session_to_nwb_kwargs.get("optogenetic_treatment", None)
if experiment_type == "FP":
exception_file_path = (
output_dir_path
/ f"ERROR_{experiment_type}_{experimental_group}_{subject_id}_{start_datetime.isoformat().replace(':', '-')}.txt"
)
elif experiment_type == "Opto":
exception_file_path = (
output_dir_path
/ f"ERROR_{experiment_type}_{experimental_group}_{optogenetic_treatment}_{subject_id}_{start_datetime.isoformat().replace(':', '-')}.txt"
)
futures.append(
executor.submit(
safe_session_to_nwb,
session_to_nwb_kwargs=session_to_nwb_kwargs,
exception_file_path=exception_file_path,
)
)
for _ in tqdm(as_completed(futures), total=len(futures)):
pass


def safe_session_to_nwb(*, session_to_nwb_kwargs: dict, exception_file_path: Union[Path, str]):
Expand Down Expand Up @@ -208,6 +209,7 @@ def fp_to_nwb(
"418": "418.404",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}
raw_file_to_info = get_raw_info(behavior_path)

Expand Down Expand Up @@ -487,6 +489,7 @@ def opto_to_nwb(
"300": "300.405",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}
experiment_type = "Opto"
experimental_group_to_optogenetic_treatments = {
Expand Down Expand Up @@ -674,6 +677,7 @@ def get_opto_subject_id(subject_path: Path):
"300": "300.405",
"299": "299.405",
"276": "276.405",
"262.259.478": "262.478",
}

# fmt: off
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def session_to_nwb(
conversion_options.update(dict(Optogenetic={}))

# Add Excel-based Metadata
metadata_path = data_dir_path / "MouseDemographics.xlsx"
metadata_path = data_dir_path / "MouseDemographicsCorrected.xlsx"
source_data.update(
dict(
Metadata={
Expand Down
39 changes: 36 additions & 3 deletions src/lerner_lab_to_nwb/seiler_2024/seiler_2024_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,42 @@ for that 1 session split across the two folders?
### Questions
- Some of the subject_ids are not present in the metadata excel file -- pls provide
- Some animals are missing the "Hemisphere with DMS" field -- pls provide
- Looks like some of the mouse ids in the rr20 section of the excel file are incorrect (all XX.259 instead of XX.257)
=======
- Solution: Added missing MSNs; skipped RK_C_FR1_BOTH_1hr
- Some of the mouse ids have typos (leading and trailing zeros) as well as some that appear incorrect (RR20 section)
So, I made the following corrections to metadata excel sheet:
Mouse ID corrections:
79.402 --> 079.402
344.4 --> 344.400
432.42 --> 432.420
48.392 --> 048.392
98.259 --> 98.257
101.259 --> 101.260
97.259 --> 97.257
99.259 --> 99.257
100.259 --> 100.258
359.43 --> 359.430
28.392 --> 028.392
227.43 --> 227.430
262.478 --> 262.259
354.43 --> 354.430
430.42 --> 430.420
342.483 --> 342.400
After these corrections the following mouse_ids are still missing from the excel sheet:
subjects_to_skip = {
"289.407",
"244.464",
"264.477",
"102.260",
"262.478",
"289.408",
"264.475",
"129.425",
"250.427",
"95.259",
"309.399",
"433.421",
"416.405",
"364.426",
}

### Active Questions
- DMS-Excitatory has some csv files w/ only session-aggregated info (total right rewards but not right reward times) ex. ChR2/121_280.CSV -- do you have individual session info for these animals?
Expand Down

0 comments on commit 55b8d51

Please sign in to comment.