Skip to content

Commit

Permalink
Data preparation MLCube for BraTS2023 (#479)
Browse files Browse the repository at this point in the history
* add data preparation MLCube for BraTS2023

* finalize data preps

* add dict for mapping missing modalities in synthesis task

* add paramter
  • Loading branch information
hasan7n authored Aug 25, 2023
1 parent 61febad commit bde1f78
Show file tree
Hide file tree
Showing 12 changed files with 398 additions and 16 deletions.
13 changes: 0 additions & 13 deletions examples/BraTS/data_prep/mlcube/workspace/labels/labels.csv

This file was deleted.

3 changes: 0 additions & 3 deletions examples/BraTS/data_prep/mlcube/workspace/names/names.txt

This file was deleted.

45 changes: 45 additions & 0 deletions examples/BraTS2023/data_prep/mlcube/mlcube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: BraTS2023 Data Preparator Cube
description: BraTS2023 Data Preparator Cube
authors:
- { name: "MLCommons Medical Working Group" }

platform:
accelerator_count: 0

docker:
# Image name.
image: mlcommons/brats2023-prep:0.0.1
# Docker build context relative to $MLCUBE_ROOT. Default is `build`.
build_context: "../project"
# Docker file name within docker build context, default is `Dockerfile`.
build_file: "Dockerfile"

tasks:
prepare:
parameters:
inputs:
{
data_path: input_data/,
labels_path: input_labels/,
parameters_file: parameters.yaml,
}
outputs: { output_path: data/, output_labels_path: labels/ }
sanity_check:
parameters:
inputs:
{
data_path: data/,
labels_path: labels/,

parameters_file: parameters.yaml,
}
statistics:
parameters:
inputs:
{
data_path: data/,
labels_path: labels/,

parameters_file: parameters.yaml,
}
outputs: { output_path: { type: file, default: statistics.yaml } }
2 changes: 2 additions & 0 deletions examples/BraTS2023/data_prep/mlcube/workspace/parameters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
task: segmentation
segmentation_modalities: ["t1c", "t1n", "t2f", "t2w"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
task: inpainting
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
task: synthesis
segmentation_modalities: ["t1c", "t1n", "t2f", "t2w"]
original_data_in_labels: original_data
segmentation_labels: segmentation_labels
missing_modality_json: "missing.json"
11 changes: 11 additions & 0 deletions examples/BraTS2023/data_prep/project/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.9.16-slim

COPY ./requirements.txt /mlcube_project/requirements.txt

RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt

ENV LANG C.UTF-8

COPY . /mlcube_project

ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"]
51 changes: 51 additions & 0 deletions examples/BraTS2023/data_prep/project/mlcube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""MLCube handler file"""
import typer
import yaml
from prepare import prepare_dataset
from sanity_check import perform_sanity_checks
from stats import generate_statistics

app = typer.Typer()


@app.command("prepare")
def prepare(
data_path: str = typer.Option(..., "--data_path"),
labels_path: str = typer.Option(..., "--labels_path"),
parameters_file: str = typer.Option(..., "--parameters_file"),
output_path: str = typer.Option(..., "--output_path"),
output_labels_path: str = typer.Option(..., "--output_labels_path"),
):
with open(parameters_file) as f:
parameters = yaml.safe_load(f)

prepare_dataset(data_path, labels_path, parameters, output_path, output_labels_path)


@app.command("sanity_check")
def sanity_check(
data_path: str = typer.Option(..., "--data_path"),
labels_path: str = typer.Option(..., "--labels_path"),
parameters_file: str = typer.Option(..., "--parameters_file"),
):
with open(parameters_file) as f:
parameters = yaml.safe_load(f)

perform_sanity_checks(data_path, labels_path, parameters)


@app.command("statistics")
def statistics(
data_path: str = typer.Option(..., "--data_path"),
labels_path: str = typer.Option(..., "--labels_path"),
parameters_file: str = typer.Option(..., "--parameters_file"),
out_path: str = typer.Option(..., "--output_path"),
):
with open(parameters_file) as f:
parameters = yaml.safe_load(f)

generate_statistics(data_path, labels_path, parameters, out_path)


if __name__ == "__main__":
app()
116 changes: 116 additions & 0 deletions examples/BraTS2023/data_prep/project/prepare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import os
import shutil
from glob import iglob
import random
import json

random.seed(7)


def __copy_modalities(input_folder, modalities, output_folder):
for file in iglob(os.path.join(input_folder, "*.nii.gz")):
for modality in modalities:
if file.endswith(f"{modality}.nii.gz"):
new_file = os.path.join(output_folder, os.path.basename(file))
shutil.copyfile(file, new_file)
break


def copy_segmentation_data(
data_path, labels_path, parameters, output_path, output_labels_path
):
# copy data
modalities = parameters["segmentation_modalities"]
for folder in iglob(os.path.join(data_path, "*/")):
outfolder = os.path.join(
output_path, os.path.basename(os.path.normpath(folder))
)
os.makedirs(outfolder, exist_ok=True)
__copy_modalities(folder, modalities, outfolder)

# copy labels
for folder_or_file in iglob(os.path.join(labels_path, "*")):
if os.path.isdir(folder_or_file):
__copy_modalities(folder_or_file, ["seg"], output_labels_path)
else:
file = folder_or_file
if file.endswith(f"seg.nii.gz"):
new_file = os.path.join(output_labels_path, os.path.basename(file))
shutil.copyfile(file, new_file)
break


def post_process_for_synthesis(parameters, output_path, output_labels_path):
modalities = parameters["segmentation_modalities"]
original_data_in_labels = parameters["original_data_in_labels"]
segmentation_labels = parameters["segmentation_labels"]
missing_modality_json = parameters["missing_modality_json"]

# move labels to a sub directory
labels_subdir = os.path.join(output_labels_path, segmentation_labels)
os.makedirs(labels_subdir, exist_ok=True)
for obj in iglob(os.path.join(output_labels_path, "*")):
if os.path.normpath(obj) != os.path.normpath(labels_subdir):
shutil.move(obj, labels_subdir)

# copy data to labels for metrics calculation
data_subdir = os.path.join(output_labels_path, original_data_in_labels)
shutil.copytree(output_path, data_subdir)

# drop modalities
missing_modality_dict = {}
for folder in iglob(os.path.join(output_path, "*/")):
missing_modality = random.choice(modalities)
for file in iglob(os.path.join(folder, "*.nii.gz")):
if file.endswith(f"{missing_modality}.nii.gz"):
os.remove(file)
break
foldername = os.path.basename(os.path.normpath(folder))
missing_modality_dict[foldername] = missing_modality

out_json = os.path.join(output_labels_path, missing_modality_json)
with open(out_json, "w") as f:
json.dump(missing_modality_dict, f)


def copy_inpainting_data(
data_path, labels_path, parameters, output_path, output_labels_path
):
# copy data
modalities = ["mask", "t1n-voided"]
for folder in iglob(os.path.join(data_path, "*/")):
outfolder = os.path.join(
output_path, os.path.basename(os.path.normpath(folder))
)
os.makedirs(outfolder, exist_ok=True)
__copy_modalities(folder, modalities, outfolder)

# copy labels
modalities = ["mask-healthy", "t1n"]
for folder in iglob(os.path.join(labels_path, "*/")):
outfolder = os.path.join(
output_labels_path, os.path.basename(os.path.normpath(folder))
)
os.makedirs(outfolder, exist_ok=True)
__copy_modalities(folder, modalities, outfolder)


def prepare_dataset(
data_path, labels_path, parameters, output_path, output_labels_path
):
task = parameters["task"]
assert task in ["segmentation", "inpainting", "synthesis"], "Invalid task"
os.makedirs(output_path, exist_ok=True)
os.makedirs(output_labels_path, exist_ok=True)

if task in ["segmentation", "synthesis"]:
copy_segmentation_data(
data_path, labels_path, parameters, output_path, output_labels_path
)
if task == "synthesis":
post_process_for_synthesis(parameters, output_path, output_labels_path)

else:
copy_inpainting_data(
data_path, labels_path, parameters, output_path, output_labels_path
)
4 changes: 4 additions & 0 deletions examples/BraTS2023/data_prep/project/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pyYAML
typer
numpy
SimpleITK>=2.1.0
Loading

0 comments on commit bde1f78

Please sign in to comment.