-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Data preparation MLCube for BraTS2023 (#479)
* add data preparation MLCube for BraTS2023 * finalize data preps * add dict for mapping missing modalities in synthesis task * add paramter
- Loading branch information
Showing
12 changed files
with
398 additions
and
16 deletions.
There are no files selected for viewing
13 changes: 0 additions & 13 deletions
13
examples/BraTS/data_prep/mlcube/workspace/labels/labels.csv
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
name: BraTS2023 Data Preparator Cube | ||
description: BraTS2023 Data Preparator Cube | ||
authors: | ||
- { name: "MLCommons Medical Working Group" } | ||
|
||
platform: | ||
accelerator_count: 0 | ||
|
||
docker: | ||
# Image name. | ||
image: mlcommons/brats2023-prep:0.0.1 | ||
# Docker build context relative to $MLCUBE_ROOT. Default is `build`. | ||
build_context: "../project" | ||
# Docker file name within docker build context, default is `Dockerfile`. | ||
build_file: "Dockerfile" | ||
|
||
tasks: | ||
prepare: | ||
parameters: | ||
inputs: | ||
{ | ||
data_path: input_data/, | ||
labels_path: input_labels/, | ||
parameters_file: parameters.yaml, | ||
} | ||
outputs: { output_path: data/, output_labels_path: labels/ } | ||
sanity_check: | ||
parameters: | ||
inputs: | ||
{ | ||
data_path: data/, | ||
labels_path: labels/, | ||
|
||
parameters_file: parameters.yaml, | ||
} | ||
statistics: | ||
parameters: | ||
inputs: | ||
{ | ||
data_path: data/, | ||
labels_path: labels/, | ||
|
||
parameters_file: parameters.yaml, | ||
} | ||
outputs: { output_path: { type: file, default: statistics.yaml } } |
2 changes: 2 additions & 0 deletions
2
examples/BraTS2023/data_prep/mlcube/workspace/parameters.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
task: segmentation | ||
segmentation_modalities: ["t1c", "t1n", "t2f", "t2w"] |
1 change: 1 addition & 0 deletions
1
examples/BraTS2023/data_prep/mlcube/workspace/parameters_inpainting.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
task: inpainting |
5 changes: 5 additions & 0 deletions
5
examples/BraTS2023/data_prep/mlcube/workspace/parameters_synthesis.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
task: synthesis | ||
segmentation_modalities: ["t1c", "t1n", "t2f", "t2w"] | ||
original_data_in_labels: original_data | ||
segmentation_labels: segmentation_labels | ||
missing_modality_json: "missing.json" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM python:3.9.16-slim | ||
|
||
COPY ./requirements.txt /mlcube_project/requirements.txt | ||
|
||
RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt | ||
|
||
ENV LANG C.UTF-8 | ||
|
||
COPY . /mlcube_project | ||
|
||
ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""MLCube handler file""" | ||
import typer | ||
import yaml | ||
from prepare import prepare_dataset | ||
from sanity_check import perform_sanity_checks | ||
from stats import generate_statistics | ||
|
||
app = typer.Typer() | ||
|
||
|
||
@app.command("prepare") | ||
def prepare( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
parameters_file: str = typer.Option(..., "--parameters_file"), | ||
output_path: str = typer.Option(..., "--output_path"), | ||
output_labels_path: str = typer.Option(..., "--output_labels_path"), | ||
): | ||
with open(parameters_file) as f: | ||
parameters = yaml.safe_load(f) | ||
|
||
prepare_dataset(data_path, labels_path, parameters, output_path, output_labels_path) | ||
|
||
|
||
@app.command("sanity_check") | ||
def sanity_check( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
parameters_file: str = typer.Option(..., "--parameters_file"), | ||
): | ||
with open(parameters_file) as f: | ||
parameters = yaml.safe_load(f) | ||
|
||
perform_sanity_checks(data_path, labels_path, parameters) | ||
|
||
|
||
@app.command("statistics") | ||
def statistics( | ||
data_path: str = typer.Option(..., "--data_path"), | ||
labels_path: str = typer.Option(..., "--labels_path"), | ||
parameters_file: str = typer.Option(..., "--parameters_file"), | ||
out_path: str = typer.Option(..., "--output_path"), | ||
): | ||
with open(parameters_file) as f: | ||
parameters = yaml.safe_load(f) | ||
|
||
generate_statistics(data_path, labels_path, parameters, out_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import os | ||
import shutil | ||
from glob import iglob | ||
import random | ||
import json | ||
|
||
random.seed(7) | ||
|
||
|
||
def __copy_modalities(input_folder, modalities, output_folder): | ||
for file in iglob(os.path.join(input_folder, "*.nii.gz")): | ||
for modality in modalities: | ||
if file.endswith(f"{modality}.nii.gz"): | ||
new_file = os.path.join(output_folder, os.path.basename(file)) | ||
shutil.copyfile(file, new_file) | ||
break | ||
|
||
|
||
def copy_segmentation_data( | ||
data_path, labels_path, parameters, output_path, output_labels_path | ||
): | ||
# copy data | ||
modalities = parameters["segmentation_modalities"] | ||
for folder in iglob(os.path.join(data_path, "*/")): | ||
outfolder = os.path.join( | ||
output_path, os.path.basename(os.path.normpath(folder)) | ||
) | ||
os.makedirs(outfolder, exist_ok=True) | ||
__copy_modalities(folder, modalities, outfolder) | ||
|
||
# copy labels | ||
for folder_or_file in iglob(os.path.join(labels_path, "*")): | ||
if os.path.isdir(folder_or_file): | ||
__copy_modalities(folder_or_file, ["seg"], output_labels_path) | ||
else: | ||
file = folder_or_file | ||
if file.endswith(f"seg.nii.gz"): | ||
new_file = os.path.join(output_labels_path, os.path.basename(file)) | ||
shutil.copyfile(file, new_file) | ||
break | ||
|
||
|
||
def post_process_for_synthesis(parameters, output_path, output_labels_path): | ||
modalities = parameters["segmentation_modalities"] | ||
original_data_in_labels = parameters["original_data_in_labels"] | ||
segmentation_labels = parameters["segmentation_labels"] | ||
missing_modality_json = parameters["missing_modality_json"] | ||
|
||
# move labels to a sub directory | ||
labels_subdir = os.path.join(output_labels_path, segmentation_labels) | ||
os.makedirs(labels_subdir, exist_ok=True) | ||
for obj in iglob(os.path.join(output_labels_path, "*")): | ||
if os.path.normpath(obj) != os.path.normpath(labels_subdir): | ||
shutil.move(obj, labels_subdir) | ||
|
||
# copy data to labels for metrics calculation | ||
data_subdir = os.path.join(output_labels_path, original_data_in_labels) | ||
shutil.copytree(output_path, data_subdir) | ||
|
||
# drop modalities | ||
missing_modality_dict = {} | ||
for folder in iglob(os.path.join(output_path, "*/")): | ||
missing_modality = random.choice(modalities) | ||
for file in iglob(os.path.join(folder, "*.nii.gz")): | ||
if file.endswith(f"{missing_modality}.nii.gz"): | ||
os.remove(file) | ||
break | ||
foldername = os.path.basename(os.path.normpath(folder)) | ||
missing_modality_dict[foldername] = missing_modality | ||
|
||
out_json = os.path.join(output_labels_path, missing_modality_json) | ||
with open(out_json, "w") as f: | ||
json.dump(missing_modality_dict, f) | ||
|
||
|
||
def copy_inpainting_data( | ||
data_path, labels_path, parameters, output_path, output_labels_path | ||
): | ||
# copy data | ||
modalities = ["mask", "t1n-voided"] | ||
for folder in iglob(os.path.join(data_path, "*/")): | ||
outfolder = os.path.join( | ||
output_path, os.path.basename(os.path.normpath(folder)) | ||
) | ||
os.makedirs(outfolder, exist_ok=True) | ||
__copy_modalities(folder, modalities, outfolder) | ||
|
||
# copy labels | ||
modalities = ["mask-healthy", "t1n"] | ||
for folder in iglob(os.path.join(labels_path, "*/")): | ||
outfolder = os.path.join( | ||
output_labels_path, os.path.basename(os.path.normpath(folder)) | ||
) | ||
os.makedirs(outfolder, exist_ok=True) | ||
__copy_modalities(folder, modalities, outfolder) | ||
|
||
|
||
def prepare_dataset( | ||
data_path, labels_path, parameters, output_path, output_labels_path | ||
): | ||
task = parameters["task"] | ||
assert task in ["segmentation", "inpainting", "synthesis"], "Invalid task" | ||
os.makedirs(output_path, exist_ok=True) | ||
os.makedirs(output_labels_path, exist_ok=True) | ||
|
||
if task in ["segmentation", "synthesis"]: | ||
copy_segmentation_data( | ||
data_path, labels_path, parameters, output_path, output_labels_path | ||
) | ||
if task == "synthesis": | ||
post_process_for_synthesis(parameters, output_path, output_labels_path) | ||
|
||
else: | ||
copy_inpainting_data( | ||
data_path, labels_path, parameters, output_path, output_labels_path | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
pyYAML | ||
typer | ||
numpy | ||
SimpleITK>=2.1.0 |
Oops, something went wrong.