Skip to content

Commit

Permalink
if a splits_final.json exists in the raw dataset folder it will be co…
Browse files Browse the repository at this point in the history
…pied to the preprocessed folder as part of the experiment planning
  • Loading branch information
FabianIsensee committed Jan 9, 2024
1 parent d4a9817 commit 947eafb
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os.path
import shutil
from copy import deepcopy
from functools import lru_cache
Expand Down Expand Up @@ -79,6 +78,10 @@ def __init__(self, dataset_name_or_id: Union[str, int],

self.plans = None

if isfile(join(self.raw_dataset_folder, 'splits_final.json')):
_maybe_copy_splits_file(join(self.raw_dataset_folder, 'splits_final.json'),
join(preprocessed_folder, 'splits_final.json'))

def determine_reader_writer(self):
example_image = self.dataset[self.dataset.keys().__iter__().__next__()]['images'][0]
return determine_reader_writer_from_dataset_json(self.dataset_json, example_image)
Expand Down Expand Up @@ -530,5 +533,23 @@ def load_plans(self, fname: str):
self.plans = load_json(fname)


def _maybe_copy_splits_file(splits_file: str, target_fname: str):
if not isfile(target_fname):
shutil.copy(splits_file, target_fname)
else:
# split already exists, do not copy, but check that the splits match.
# This code allows target_fname to contain more splits than splits_file. This is OK.
splits_source = load_json(splits_file)
splits_target = load_json(target_fname)
# all folds in the source file must match the target file
for i in range(len(splits_source)):
train_source = set(splits_source[i]['train'])
train_target = set(splits_target[i]['train'])
assert train_target == train_source
val_source = set(splits_source[i]['val'])
val_target = set(splits_target[i]['val'])
assert val_source == val_target


if __name__ == '__main__':
ExperimentPlanner(2, 8).plan_experiment()
9 changes: 4 additions & 5 deletions nnunetv2/experiment_planning/plan_and_preprocess_api.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import shutil
from typing import List, Type, Optional, Tuple, Union

import nnunetv2
from batchgenerators.utilities.file_and_folder_operations import join, maybe_mkdir_p, subfiles, load_json
from batchgenerators.utilities.file_and_folder_operations import join, maybe_mkdir_p, load_json

import nnunetv2
from nnunetv2.configuration import default_num_processes
from nnunetv2.experiment_planning.dataset_fingerprint.fingerprint_extractor import DatasetFingerprintExtractor
from nnunetv2.experiment_planning.experiment_planners.default_experiment_planner import ExperimentPlanner
from nnunetv2.experiment_planning.verify_dataset_integrity import verify_dataset_integrity
from nnunetv2.paths import nnUNet_raw, nnUNet_preprocessed
from nnunetv2.utilities.dataset_name_id_conversion import convert_id_to_dataset_name, maybe_convert_to_dataset_name
from nnunetv2.utilities.dataset_name_id_conversion import convert_id_to_dataset_name
from nnunetv2.utilities.find_class_by_name import recursive_find_python_class
from nnunetv2.utilities.plans_handling.plans_handler import PlansManager
from nnunetv2.configuration import default_num_processes
from nnunetv2.utilities.utils import get_filenames_of_train_images_and_targets


Expand Down
4 changes: 2 additions & 2 deletions nnunetv2/training/nnUNetTrainer/nnUNetTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,9 +520,9 @@ def plot_network_architecture(self):
def do_split(self):
"""
The default split is a 5 fold CV on all available training cases. nnU-Net will create a split (it is seeded,
so always the same) and save it as splits_final.pkl file in the preprocessed data directory.
so always the same) and save it as splits_final.json file in the preprocessed data directory.
Sometimes you may want to create your own split for various reasons. For this you will need to create your own
splits_final.pkl file. If this file is present, nnU-Net is going to use it and whatever splits are defined in
splits_final.json file. If this file is present, nnU-Net is going to use it and whatever splits are defined in
it. You can create as many splits in this file as you want. Note that if you define only 4 splits (fold 0-3)
and then set fold=4 when training (that would be the fifth split), nnU-Net will print a warning and proceed to
use a random 80:20 data split.
Expand Down

0 comments on commit 947eafb

Please sign in to comment.