diff --git a/GANDLF/cli/generate_metrics.py b/GANDLF/cli/generate_metrics.py index 03d83bf6d..24ba6c144 100644 --- a/GANDLF/cli/generate_metrics.py +++ b/GANDLF/cli/generate_metrics.py @@ -8,7 +8,7 @@ import SimpleITK as sitk import numpy as np -from GANDLF.parseConfig import parseConfig +from GANDLF.config_manager import ConfigManager from GANDLF.utils import find_problem_type_from_parameters, one_hot from GANDLF.metrics import ( overall_stats, @@ -58,7 +58,7 @@ def generate_metrics_dict(input_csv: str, config: str, outputfile: str = None) - assert column in headers, f"The input csv should have a column named {column}" overall_stats_dict = {} - parameters = parseConfig(config) + parameters = ConfigManager(config) problem_type = parameters.get("problem_type", None) problem_type = ( find_problem_type_from_parameters(parameters) diff --git a/GANDLF/cli/main_run.py b/GANDLF/cli/main_run.py index 1f48f5f7c..ff9cb27fb 100644 --- a/GANDLF/cli/main_run.py +++ b/GANDLF/cli/main_run.py @@ -3,7 +3,7 @@ from GANDLF.training_manager import TrainingManager, TrainingManager_split from GANDLF.inference_manager import InferenceManager -from GANDLF.parseConfig import parseConfig +from GANDLF.config_manager import ConfigManager from GANDLF.utils import ( populate_header_in_parameters, parseTrainingCSV, @@ -34,7 +34,7 @@ def main_run( file_data_full = data_csv model_parameters = config_file device = device - parameters = parseConfig(model_parameters) + parameters = ConfigManager(model_parameters) parameters["device_id"] = -1 if train_mode: diff --git a/GANDLF/cli/post_training_model_optimization.py b/GANDLF/cli/post_training_model_optimization.py index 46a5ea6ff..0ca261465 100644 --- a/GANDLF/cli/post_training_model_optimization.py +++ b/GANDLF/cli/post_training_model_optimization.py @@ -1,6 +1,6 @@ import os from GANDLF.compute import create_pytorch_objects -from GANDLF.parseConfig import parseConfig +from GANDLF.config_manager import ConfigManager from GANDLF.utils import version_check, load_model, optimize_and_save_model @@ -21,7 +21,7 @@ def post_training_model_optimization(model_path: str, config_path: str) -> bool: # If parameters are not available in the model file, parse them from the config file parameters = ( - parseConfig(config_path, version_check_flag=False) + ConfigManager(config_path, version_check_flag=False) if parameters is None else parameters ) diff --git a/GANDLF/cli/preprocess_and_save.py b/GANDLF/cli/preprocess_and_save.py index 1309c331c..8a7803fd5 100644 --- a/GANDLF/cli/preprocess_and_save.py +++ b/GANDLF/cli/preprocess_and_save.py @@ -10,7 +10,7 @@ get_dataframe, get_correct_padding_size, ) -from GANDLF.parseConfig import parseConfig +from GANDLF.config_manager import ConfigManager from GANDLF.data.ImagesFromDataFrame import ImagesFromDataFrame from torch.utils.data import DataLoader from tqdm import tqdm @@ -45,7 +45,7 @@ def preprocess_and_save( # read the csv # don't care if the dataframe gets shuffled or not dataframe, headers = parseTrainingCSV(data_csv, train=False) - parameters = parseConfig(config_file) + parameters = ConfigManager(config_file) # save the parameters so that the same compute doesn't happen once again parameter_file = os.path.join(output_dir, "parameters.pkl") diff --git a/GANDLF/parseConfig.py b/GANDLF/config_manager.py similarity index 98% rename from GANDLF/parseConfig.py rename to GANDLF/config_manager.py index f839fe636..ce402a173 100644 --- a/GANDLF/parseConfig.py +++ b/GANDLF/config_manager.py @@ -98,7 +98,7 @@ def initialize_key(parameters, key, value=None): return parameters -def parseConfig(config_file_path, version_check_flag=True): +def _parseConfig(config_file_path, version_check_flag=True): """ This function parses the configuration file and returns a dictionary of parameters. @@ -714,3 +714,17 @@ def parseConfig(config_file_path, version_check_flag=True): params["inference_mechanism"] = inference_mechanism return params + + +def ConfigManager(config_file_path, version_check_flag=True) -> None: + """ + This function parses the configuration file and returns a dictionary of parameters. + + Args: + config_file_path (Union[str, dict]): The filename of the configuration file. + version_check_flag (bool, optional): Whether to check the version in configuration file. Defaults to True. + + Returns: + dict: The parameter dictionary. + """ + return _parseConfig(config_file_path, version_check_flag) diff --git a/docs/extending.md b/docs/extending.md index b6416084e..1a561d6b7 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -16,7 +16,7 @@ Before starting to work on the code-level on GaNDLF, please follow the [instruct ## Overall Architecture - Command-line parsing: [gandlf_run](https://github.com/mlcommons/GaNDLF/blob/master/gandlf_run) -- Parameters from [training configuration](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_all_options.yaml) get passed as a `dict` via [parameter parser](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/parseConfig.py) +- Parameters from [training configuration](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_all_options.yaml) get passed as a `dict` via the [config manager](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/config_manager.py) - [Training Manager](https://github.com/mlcommons/GaNDLF/blob/master/GANDLF/training_manager.py): - Handles k-fold training - Main entry point from CLI diff --git a/testing/test_full.py b/testing/test_full.py index 52920a772..40657c376 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -19,7 +19,7 @@ get_patch_size_in_microns, convert_to_tiff, ) -from GANDLF.parseConfig import parseConfig +from GANDLF.config_manager import ConfigManager from GANDLF.training_manager import TrainingManager from GANDLF.inference_manager import InferenceManager from GANDLF.cli import ( @@ -109,9 +109,7 @@ def test_generic_download_data(): print("00: Downloading the sample data") - urlToDownload = ( - "https://drive.google.com/uc?id=1c4Yrv-jnK6Tk7Ne1HmMTChv-4nYk43NT" - ) + urlToDownload = "https://drive.google.com/uc?id=1c4Yrv-jnK6Tk7Ne1HmMTChv-4nYk43NT" files_check = [ os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff"), @@ -123,7 +121,7 @@ def test_generic_download_data(): if not os.path.isfile(file): print("Downloading and extracting sample data") output = os.path.join(testingDir, "gandlf_unit_test_data.tgz") - gdown.download(urlToDownload, output, quiet=False, verify = True) + gdown.download(urlToDownload, output, quiet=False, verify=True) with zipfile.ZipFile(output, "r") as zip_ref: zip_ref.extractall(testingDir) os.remove(output) @@ -156,9 +154,7 @@ def test_generic_constructTrainingCSV(): # else: # continue outputFile = inputDir + "/train_" + application_data + ".csv" - outputFile_rel = ( - inputDir + "/train_" + application_data + "_relative.csv" - ) + outputFile_rel = inputDir + "/train_" + application_data + "_relative.csv" # Test with various combinations of relative/absolute paths # Absolute input/output writeTrainingCSV( @@ -235,13 +231,13 @@ def write_temp_config_path(parameters_to_write): return temp_config_path -#these are helper functions to be used in other tests +# # these are helper functions to be used in other tests def test_train_segmentation_rad_2d(device): print("03: Starting 2D Rad segmentation tests") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -256,9 +252,7 @@ def test_train_segmentation_rad_2d(device): parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters["data_preprocessing"]["resize_image"] = [224, 224] - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # read and initialize parameters for specific data dimension for model in all_models_segmentation: if model == "imagenet_unet": @@ -293,7 +287,7 @@ def test_train_segmentation_rad_2d(device): def test_train_segmentation_sdnet_rad_2d(device): print("04: Starting 2D Rad segmentation tests") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -308,9 +302,7 @@ def test_train_segmentation_sdnet_rad_2d(device): parameters["model"]["architecture"] = "sdnet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -331,7 +323,7 @@ def test_train_segmentation_rad_3d(device): print("05: Starting 3D Rad segmentation tests") # read and parse csv # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -344,14 +336,10 @@ def test_train_segmentation_rad_3d(device): parameters["model"]["final_layer"] = "softmax" parameters["model"]["amp"] = True parameters["in_memory"] = True - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in all_models_segmentation: if model == "imagenet_unet": @@ -390,7 +378,7 @@ def test_train_segmentation_rad_3d(device): def test_train_regression_rad_2d(device): print("06: Starting 2D Rad regression tests") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_regression.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -402,15 +390,11 @@ def test_train_regression_rad_2d(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"][ - "predictionHeaders" - ] + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] parameters["scaling_factor"] = 1 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in all_models_regression: parameters["model"]["architecture"] = model @@ -435,7 +419,7 @@ def test_train_regression_rad_2d_imagenet(device): print("07: Starting 2D Rad regression tests for imagenet models") # read and initialize parameters for specific data dimension print("Starting 2D Rad regression tests for imagenet models") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_regression.yaml", version_check_flag=False ) parameters["patch_size"] = patch_size["2D"] @@ -447,13 +431,9 @@ def test_train_regression_rad_2d_imagenet(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"][ - "predictionHeaders" - ] + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] parameters["scaling_factor"] = 1 - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in all_models_classification: parameters["model"]["architecture"] = model @@ -477,7 +457,7 @@ def test_train_regression_rad_2d_imagenet(device): def test_train_regression_brainage_rad_2d(device): print("08: Starting brain age tests") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_regression.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -489,17 +469,13 @@ def test_train_regression_brainage_rad_2d(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"][ - "predictionHeaders" - ] + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] parameters["scaling_factor"] = 1 parameters["model"]["architecture"] = "brain_age" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters_temp = copy.deepcopy(parameters) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -513,9 +489,7 @@ def test_train_regression_brainage_rad_2d(device): # file_config_temp = write_temp_config_path(parameters_temp) model_path = os.path.join(outputDir, "brain_age_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization( - model_path, config_path - ) + optimization_result = post_training_model_optimization(model_path, config_path) assert optimization_result == False, "Optimization should fail" sanitize_outputDir() @@ -526,7 +500,7 @@ def test_train_regression_brainage_rad_2d(device): def test_train_regression_rad_3d(device): print("09: Starting 3D Rad regression tests") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_regression.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -536,17 +510,11 @@ def test_train_regression_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_regression.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters["model"]["class_list"] = parameters["headers"][ - "predictionHeaders" - ] + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in all_models_regression: if "efficientnet" in model: @@ -582,7 +550,7 @@ def test_train_regression_rad_3d(device): def test_train_classification_rad_2d(device): print("10: Starting 2D Rad classification tests") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -596,9 +564,7 @@ def test_train_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in all_models_regression: if model == "imagenet_unet": @@ -645,7 +611,7 @@ def test_train_classification_rad_2d(device): def test_train_classification_rad_3d(device): print("11: Starting 3D Rad classification tests") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -655,12 +621,8 @@ def test_train_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False # loop through selected models and train for single epoch @@ -697,11 +659,10 @@ def test_train_classification_rad_3d(device): print("passed") - def test_train_resume_inference_classification_rad_3d(device): print("12: Starting 3D Rad classification tests for resume and reset") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -711,12 +672,8 @@ def test_train_resume_inference_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -770,10 +727,11 @@ def test_train_resume_inference_classification_rad_3d(device): print("passed") + def test_train_inference_optimize_classification_rad_3d(device): print("13: Starting 3D Rad segmentation tests for optimization") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -783,12 +741,8 @@ def test_train_inference_optimize_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["model"]["architecture"] = all_models_regression[0] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False @@ -804,13 +758,9 @@ def test_train_inference_optimize_classification_rad_3d(device): ) # file_config_temp = write_temp_config_path(parameters_temp) - model_path = os.path.join( - outputDir, all_models_regression[0] + "_best.pth.tar" - ) + model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization( - model_path, config_path - ) + optimization_result = post_training_model_optimization(model_path, config_path) assert optimization_result == True, "Optimization should pass" ## testing inference @@ -828,10 +778,11 @@ def test_train_inference_optimize_classification_rad_3d(device): print("passed") + def test_train_inference_optimize_segmentation_rad_2d(device): print("14: Starting 2D Rad segmentation tests for optimization") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -848,9 +799,7 @@ def test_train_inference_optimize_segmentation_rad_2d(device): parameters["model"]["architecture"] = "resunet" parameters["model"]["onnx_export"] = True parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -878,11 +827,9 @@ def test_train_inference_optimize_segmentation_rad_2d(device): def test_train_inference_classification_with_logits_single_fold_rad_3d(device): - print( - "15: Starting 3D Rad classification tests for single fold logits inference" - ) + print("15: Starting 3D Rad classification tests for single fold logits inference") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -894,12 +841,8 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -920,7 +863,7 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): temp_infer_csv = os.path.join(outputDir, "temp_infer_csv.csv") training_data.to_csv(temp_infer_csv, index=False) # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV(temp_infer_csv) @@ -930,12 +873,8 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): parameters["patch_size"] = patch_size["3D"] parameters["model"]["dimension"] = 3 parameters["model"]["final_layer"] = "logits" - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -955,7 +894,7 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): print("16: Starting 3D Rad classification tests for multi-fold logits inference") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -970,12 +909,8 @@ def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -1006,7 +941,7 @@ def test_train_scheduler_classification_rad_2d(device): # read and initialize parameters for specific data dimension # loop through selected models and train for single epoch for scheduler in global_schedulers_dict: - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False, ) @@ -1020,9 +955,7 @@ def test_train_scheduler_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "instance" - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters["scheduler"] = {} @@ -1033,7 +966,7 @@ def test_train_scheduler_classification_rad_2d(device): ## ensure parameters are parsed every single time file_config_temp = write_temp_config_path(parameters) - parameters = parseConfig(file_config_temp, version_check_flag=False) + parameters = ConfigManager(file_config_temp, version_check_flag=False) TrainingManager( dataframe=training_data, outputDir=outputDir, @@ -1051,7 +984,7 @@ def test_train_scheduler_classification_rad_2d(device): def test_train_optimizer_classification_rad_2d(device): print("18: Starting 2D Rad classification tests for optimizer") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -1066,9 +999,7 @@ def test_train_optimizer_classification_rad_2d(device): parameters["model"]["norm_type"] = "none" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for optimizer in global_optimizer_dict: parameters["optimizer"] = {} @@ -1095,7 +1026,7 @@ def test_train_optimizer_classification_rad_2d(device): def test_clip_train_classification_rad_3d(device): print("19: Starting 3D Rad classification tests for clipping") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -1105,16 +1036,12 @@ def test_clip_train_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) parameters["model"]["architecture"] = "vgg16" parameters["model"]["norm_type"] = "None" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for clip_mode in all_clip_modes: parameters["clip_mode"] = clip_mode @@ -1139,7 +1066,7 @@ def test_train_normtype_segmentation_rad_3d(device): # read and initialize parameters for specific data dimension # read and parse csv # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -1154,21 +1081,15 @@ def test_train_normtype_segmentation_rad_3d(device): parameters["in_memory"] = True parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # these should raise exceptions for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type file_config_temp = write_temp_config_path(parameters) with pytest.raises(Exception) as exc_info: - parameters = parseConfig( - file_config_temp, version_check_flag=False - ) + parameters = ConfigManager(file_config_temp, version_check_flag=False) print("Exception raised:", exc_info.value) @@ -1199,7 +1120,7 @@ def test_train_normtype_segmentation_rad_3d(device): def test_train_metrics_segmentation_rad_2d(device): print("21: Starting 2D Rad segmentation tests for metrics") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -1227,13 +1148,11 @@ def test_train_metrics_segmentation_rad_2d(device): parameters["model"]["print_summary"] = False file_config_temp = write_temp_config_path(parameters) - parameters = parseConfig(file_config_temp, version_check_flag=False) + parameters = ConfigManager(file_config_temp, version_check_flag=False) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -1252,7 +1171,7 @@ def test_train_metrics_segmentation_rad_2d(device): def test_train_metrics_regression_rad_2d(device): print("22: Starting 2D Rad regression tests for metrics") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_regression.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -1268,9 +1187,7 @@ def test_train_metrics_regression_rad_2d(device): parameters["model"]["architecture"] = "vgg11" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = True - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -1285,18 +1202,19 @@ def test_train_metrics_regression_rad_2d(device): print("passed") + def test_train_losses_segmentation_rad_2d(device): print("23: Starting 2D Rad segmentation tests for losses") # healper function to read and parse yaml and return parameters def get_parameters_after_alteration(loss_type: str) -> dict: - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) parameters["loss_function"] = loss_type file_config_temp = write_temp_config_path(parameters) # read and parse csv - parameters = parseConfig(file_config_temp, version_check_flag=True) + parameters = ConfigManager(file_config_temp, version_check_flag=True) parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 training_data, parameters["headers"] = parseTrainingCSV( @@ -1313,9 +1231,7 @@ def get_parameters_after_alteration(loss_type: str) -> dict: parameters["metrics"] = ["dice"] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) return parameters, training_data # loop through selected models and train for single epoch @@ -1348,7 +1264,7 @@ def get_parameters_after_alteration(loss_type: str) -> dict: def test_generic_config_read(): print("24: Starting testing reading configuration") - parameters = parseConfig( + parameters = ConfigManager( os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False, ) @@ -1357,15 +1273,13 @@ def test_generic_config_read(): file_config_temp = write_temp_config_path(parameters) # read and parse csv - parameters = parseConfig(file_config_temp, version_check_flag=True) + parameters = ConfigManager(file_config_temp, version_check_flag=True) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame( - training_data, parameters, True, "unit_test" - ) + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1379,15 +1293,13 @@ def test_generic_config_read(): with open(file_config_temp, "w") as file: yaml.dump(parameters, file) - parameters = parseConfig(file_config_temp, version_check_flag=True) + parameters = ConfigManager(file_config_temp, version_check_flag=True) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame( - training_data, parameters, True, "unit_test" - ) + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1399,15 +1311,13 @@ def test_generic_config_read(): with open(file_config_temp, "w") as file: yaml.dump(parameters, file) - parameters = parseConfig(file_config_temp, version_check_flag=True) + parameters = ConfigManager(file_config_temp, version_check_flag=True) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame( - training_data, parameters, True, "unit_test" - ) + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1419,15 +1329,13 @@ def test_generic_config_read(): with open(file_config_temp, "w") as file: yaml.dump(parameters, file) - parameters = parseConfig(file_config_temp, version_check_flag=True) + parameters = ConfigManager(file_config_temp, version_check_flag=True) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame( - training_data, parameters, True, "unit_test" - ) + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1436,6 +1344,7 @@ def test_generic_config_read(): print("passed") + def test_generic_cli_function_preprocess(): print("25: Starting testing cli function preprocess") file_config = os.path.join(testingDir, "config_segmentation.yaml") @@ -1445,16 +1354,14 @@ def test_generic_cli_function_preprocess(): input_data_df, _ = parseTrainingCSV(file_data, train=False) # add random metadata to ensure it gets preserved input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand( - input_data_df.shape[0] - ) + input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) input_data_df["metadata_test_int"] = np.random.randint( 0, 100, input_data_df.shape[0] ) temp_csv = os.path.join(outputDir, "temp.csv") input_data_df.to_csv(temp_csv) - parameters = parseConfig(file_config) + parameters = ConfigManager(file_config) parameters["modality"] = "rad" parameters["patch_size"] = patch_size["2D"] parameters["model"]["dimension"] = 2 @@ -1493,7 +1400,7 @@ def test_generic_cli_function_preprocess(): ## regression/classification preprocess file_config = os.path.join(testingDir, "config_regression.yaml") - parameters = parseConfig(file_config) + parameters = ConfigManager(file_config) parameters["modality"] = "rad" parameters["patch_size"] = patch_size["2D"] parameters["model"]["dimension"] = 2 @@ -1509,9 +1416,7 @@ def test_generic_cli_function_preprocess(): input_data_df, _ = parseTrainingCSV(file_data, train=False) # add random metadata to ensure it gets preserved input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand( - input_data_df.shape[0] - ) + input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) input_data_df["metadata_test_int"] = np.random.randint( 0, 100, input_data_df.shape[0] ) @@ -1540,7 +1445,7 @@ def test_generic_cli_function_preprocess(): def test_generic_cli_function_mainrun(device): print("26: Starting testing cli function main_run") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) @@ -1604,14 +1509,15 @@ def test_generic_cli_function_mainrun(device): print("passed") + def test_dataloader_construction_train_segmentation_3d(device): print("27: Starting 3D Rad segmentation tests") # read and parse csv # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) - params_all_preprocessing_and_augs = parseConfig( + params_all_preprocessing_and_augs = ConfigManager( os.path.join(baseConfigDir, "config_all_options.yaml") ) @@ -1638,9 +1544,7 @@ def test_dataloader_construction_train_segmentation_3d(device): parameters["model"]["dimension"] = 3 parameters["model"]["class_list"] = [0, 1] parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) parameters["model"]["architecture"] = "unet" parameters["weighted_loss"] = False parameters["model"]["onnx_export"] = False @@ -1648,9 +1552,7 @@ def test_dataloader_construction_train_segmentation_3d(device): parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} parameters["data_postprocessing"]["fill_holes"] = True parameters["data_postprocessing"]["cca"] = True - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch sanitize_outputDir() TrainingManager( @@ -1674,37 +1576,25 @@ def test_generic_preprocess_functions(): input_tensor = torch.rand(4, 256, 256, 1) input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) assert input_transformed.shape[0] == 3, "Number of channels is not 3" - assert ( - input_transformed.shape[1:] == input_tensor.shape[1:] - ), "Shape mismatch" + assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" input_tensor = torch.rand(3, 256, 256, 1) input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) assert input_transformed.shape[0] == 4, "Number of channels is not 4" - assert ( - input_transformed.shape[1:] == input_tensor.shape[1:] - ), "Shape mismatch" + assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 - input_transformed = global_preprocessing_dict["normalize_div_by_255"]( - input_tensor - ) + input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 - input_transformed = global_preprocessing_dict["normalize_imagenet"]( - input_tensor - ) - input_transformed = global_preprocessing_dict["normalize_standardize"]( - input_tensor - ) - input_transformed = global_preprocessing_dict["normalize_div_by_255"]( - input_tensor - ) + input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) parameters_dict = {} parameters_dict["min"] = 0.25 parameters_dict["max"] = 0.75 - input_transformed = global_preprocessing_dict["threshold"]( - parameters_dict - )(input_tensor) + input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( + input_tensor + ) assert ( torch.count_nonzero( input_transformed[input_transformed < parameters_dict["min"]] @@ -1713,9 +1603,7 @@ def test_generic_preprocess_functions(): == 0 ), "Input should be thresholded" - input_transformed = global_preprocessing_dict["clip"](parameters_dict)( - input_tensor - ) + input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) assert ( torch.count_nonzero( input_transformed[input_transformed < parameters_dict["min"]] @@ -1733,9 +1621,7 @@ def test_generic_preprocess_functions(): ## stain_normalization checks input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 - training_data, _ = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) + training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") parameters_temp = {} parameters_temp["data_preprocessing"] = {} parameters_temp["data_preprocessing"]["stain_normalizer"] = { @@ -1763,17 +1649,13 @@ def test_generic_preprocess_functions(): # adaptive histogram equalization parameters_temp = {} parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["histogram_matching"] = { - "target": "adaptive" - } + parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( parameters_temp["data_preprocessing"]["histogram_matching"] ) input_transformed = non_zero_normalizer(input_tensor) # histogram matching - training_data, _ = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) + training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") parameters_temp = {} parameters_temp["data_preprocessing"] = {} parameters_temp["data_preprocessing"]["histogram_matching"] = { @@ -1842,18 +1724,12 @@ def test_generic_preprocess_functions(): # test obtaining arrays input_tensor_3d = torch.rand(256, 256, 256) input_array = get_array_from_image_or_tensor(input_tensor_3d) - assert isinstance( - input_array, np.ndarray - ), "Array should be obtained from tensor" + assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" input_image = sitk.GetImageFromArray(input_array) input_array = get_array_from_image_or_tensor(input_image) - assert isinstance( - input_array, np.ndarray - ), "Array should be obtained from image" + assert isinstance(input_array, np.ndarray), "Array should be obtained from image" input_array = get_array_from_image_or_tensor(input_array) - assert isinstance( - input_array, np.ndarray - ), "Array should be obtained from array" + assert isinstance(input_array, np.ndarray), "Array should be obtained from array" with pytest.raises(Exception) as exc_info: input_list = [0, 1] @@ -1923,9 +1799,7 @@ def test_generic_preprocess_functions(): expected_output = [10, 10] output_size_dict = {"resize": expected_output} input_transformed = resize_image(input_image, output_size_dict) - assert ( - list(input_transformed.GetSize()) == expected_output - ), "Resize should work" + assert list(input_transformed.GetSize()) == expected_output, "Resize should work" sanitize_outputDir() @@ -1934,7 +1808,7 @@ def test_generic_preprocess_functions(): def test_generic_augmentation_functions(): print("29: Starting testing augmentation functions") - params_all_preprocessing_and_augs = parseConfig( + params_all_preprocessing_and_augs = ConfigManager( os.path.join(baseConfigDir, "config_all_options.yaml") ) @@ -1982,9 +1856,7 @@ def test_generic_augmentation_functions(): default_range = [-0.1, 0.1] for key in ranges: - params["data_augmentation"]["hed_transform"].setdefault( - key, default_range - ) + params["data_augmentation"]["hed_transform"].setdefault(key, default_range) params["data_augmentation"]["hed_transform"].setdefault( "cutoff_range", [0.05, 0.95] @@ -2014,16 +1886,12 @@ def test_generic_augmentation_functions(): output_tensor = None if aug_lower in global_augs_dict: output_tensor = global_augs_dict[aug]( - params_all_preprocessing_and_augs["data_augmentation"][ - aug_lower - ] + params_all_preprocessing_and_augs["data_augmentation"][aug_lower] )(input_tensor) assert output_tensor != None, "Augmentation should work" # additional test for elastic - params_elastic = params_all_preprocessing_and_augs["data_augmentation"][ - "elastic" - ] + params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] for key_to_pop in [ "num_control_points", "max_displacement", @@ -2031,18 +1899,17 @@ def test_generic_augmentation_functions(): ]: params_elastic.pop(key_to_pop, None) output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) - assert ( - output_tensor != None - ), "Augmentation for base elastic transform should work" + assert output_tensor != None, "Augmentation for base elastic transform should work" sanitize_outputDir() print("passed") + def test_train_checkpointing_segmentation_rad_2d(device): print("30: Starting 2D Rad segmentation tests for metrics") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) parameters["patch_sampler"] = { @@ -2051,9 +1918,7 @@ def test_train_checkpointing_segmentation_rad_2d(device): "biased_sampling": True, } file_config_temp = write_temp_config_path(parameters) - parameters = parseConfig( - file_config_temp, version_check_flag=False - ) + parameters = ConfigManager(file_config_temp, version_check_flag=False) training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" @@ -2085,9 +1950,7 @@ def test_train_checkpointing_segmentation_rad_2d(device): parameters["model"]["architecture"] = "unet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -2116,7 +1979,7 @@ def test_train_checkpointing_segmentation_rad_2d(device): def test_generic_model_patch_divisibility(): print("31: Starting patch divisibility tests") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) _, parameters["headers"] = parseTrainingCSV( @@ -2132,24 +1995,18 @@ def test_generic_model_patch_divisibility(): parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 parameters["metrics"] = ["dice"] - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) parameters["model"]["architecture"] = "uinc" parameters["model"]["base_filters"] = 11 # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) sanitize_outputDir() @@ -2207,12 +2064,8 @@ def test_generic_one_hot_logic(): ) for key, value in parameters["data_postprocessing"]["mapping"].items(): - comparison = (img_tensor_oh_rev_array == key) == ( - mapped_output == value - ) - assert comparison.all(), "Arrays at {}:{} are not equal".format( - key, value - ) + comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) + assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) # check the case where 0 is present as an int in a special case class_list = [0, "1||2||3", np.max(random_array)] @@ -2263,9 +2116,7 @@ def test_generic_anonymizer(): assert os.path.exists(output_file), "Anonymized file does not exist" # test nifti conversion - config_file_for_nifti = os.path.join( - outputDir, "config_anonymizer_nifti.yaml" - ) + config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") with open(config_file, "r") as file_data: yaml_data = file_data.read() parameters = yaml.safe_load(yaml_data) @@ -2276,32 +2127,22 @@ def test_generic_anonymizer(): # for nifti conversion, the input needs to be in a dir input_folder_for_nifti = os.path.join(outputDir, "nifti_input") Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) - shutil.copyfile( - input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm") - ) + shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) output_file = os.path.join(outputDir, "MR_small.nii.gz") - run_anonymizer( - input_folder_for_nifti, output_file, config_file_for_nifti, "rad" - ) + run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") assert os.path.exists(output_file), "Anonymized file does not exist" if not os.path.exists(output_file): raise Exception("Output NIfTI file was not created") - input_file = os.path.join( - inputDir, "2d_histo_segmentation", "1", "image.tiff" - ) + input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") output_file_histo = os.path.join(outputDir, "histo_anon.tiff") # this assertion should fail since histo anonymizer is not implementer with pytest.raises(BaseException) as exc_info: - run_anonymizer( - input_folder_for_nifti, output_file_histo, None, "histo" - ) - assert os.path.exists( - output_file_histo - ), "Anonymized file does not exist" + run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") + assert os.path.exists(output_file_histo), "Anonymized file does not exist" print("Exception raised: ", exc_info.value) sanitize_outputDir() @@ -2316,9 +2157,7 @@ def test_train_inference_segmentation_histology_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join( - output_dir_patches, "histo_patches_output" - ) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) parameters_patch = {} @@ -2336,11 +2175,9 @@ def test_train_inference_segmentation_histology_2d(device): file_config_temp, ) - file_for_Training = os.path.join( - output_dir_patches_output, "opm_train.csv" - ) + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) @@ -2350,9 +2187,7 @@ def test_train_inference_segmentation_histology_2d(device): parameters["model"]["class_list"] = [0, 255] parameters["model"]["amp"] = True parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["model"]["architecture"] = "resunet" parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 @@ -2385,6 +2220,7 @@ def test_train_inference_segmentation_histology_2d(device): print("passed") + def test_train_inference_classification_histology_large_2d(device): print( "35: Starting histology train/inference classification tests for large images to check exception handling" @@ -2395,9 +2231,7 @@ def test_train_inference_classification_histology_large_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join( - output_dir_patches, "histo_patches_output" - ) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) for sub in ["1", "2"]: @@ -2449,12 +2283,7 @@ def resize_for_ci(filename, scale): print("Trying vips:", ex1) try: os.system( - "vips resize " - + filename - + " " - + new_filename - + " " - + str(scale) + "vips resize " + filename + " " + new_filename + " " + str(scale) ) except Exception as ex2: print("Resize could not be done:", ex2) @@ -2462,9 +2291,7 @@ def resize_for_ci(filename, scale): for _, row in input_df.iterrows(): # ensure opm mask size check is triggered - _, _ = generate_initial_mask( - resize_for_ci(row["Channel_0"], scale=2), 1 - ) + _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) for patch_size in [ [128, 128], @@ -2490,21 +2317,19 @@ def resize_for_ci(filename, scale): input_df.to_csv(resized_inference_data_list, index=False) files_to_delete.append(resized_inference_data_list) - file_for_Training = os.path.join( - output_dir_patches_output, "opm_train.csv" - ) + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") temp_df = pd.read_csv(file_for_Training) temp_df.drop("Label", axis=1, inplace=True) temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) temp_df.to_csv(file_for_Training, index=False) # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "histo" parameters["patch_size"] = parameters_patch["patch_size"][0] file_config_temp = write_temp_config_path(parameters) - parameters = parseConfig(file_config_temp, version_check_flag=False) + parameters = ConfigManager(file_config_temp, version_check_flag=False) parameters["model"]["dimension"] = 2 # read and parse csv training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) @@ -2512,9 +2337,7 @@ def resize_for_ci(filename, scale): parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "none" parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 parameters["model"]["print_summary"] = False @@ -2531,9 +2354,7 @@ def resize_for_ci(filename, scale): reset=True, ) parameters["output_dir"] = modelDir # this is in inference mode - parameters["data_preprocessing"]["resize_patch"] = parameters_patch[ - "patch_size" - ] + parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] parameters["patch_size"] = [ parameters_patch["patch_size"][0] * 10, parameters_patch["patch_size"][1] * 10, @@ -2574,6 +2395,7 @@ def resize_for_ci(filename, scale): print("passed") + def test_train_inference_classification_histology_2d(device): print("36: Starting histology train/inference classification tests") # overwrite previous results @@ -2582,9 +2404,7 @@ def test_train_inference_classification_histology_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join( - output_dir_patches, "histo_patches_output" - ) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") parameters_patch = {} # extracting minimal number of patches to ensure that the test does not take too long @@ -2604,21 +2424,19 @@ def test_train_inference_classification_histology_2d(device): file_config_temp, ) - file_for_Training = os.path.join( - output_dir_patches_output, "opm_train.csv" - ) + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") temp_df = pd.read_csv(file_for_Training) temp_df.drop("Label", axis=1, inplace=True) temp_df["valuetopredict"] = np.random.randint(2, size=6) temp_df.to_csv(file_for_Training, index=False) # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "histo" parameters["patch_size"] = 128 file_config_temp = write_temp_config_path(parameters) - parameters = parseConfig(file_config_temp, version_check_flag=False) + parameters = ConfigManager(file_config_temp, version_check_flag=False) parameters["model"]["dimension"] = 2 # read and parse csv training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) @@ -2626,9 +2444,7 @@ def test_train_inference_classification_histology_2d(device): parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "none" parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 parameters["model"]["print_summary"] = False @@ -2673,7 +2489,7 @@ def test_train_segmentation_unet_layerchange_rad_2d(device): print("37: Starting 2D Rad segmentation tests for normtype") # read and parse csv # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -2696,9 +2512,7 @@ def test_train_segmentation_unet_layerchange_rad_2d(device): parameters["model"]["amp"] = True parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2722,7 +2536,7 @@ def test_train_segmentation_unet_layerchange_rad_2d(device): def test_train_segmentation_unetr_rad_3d(device): print("38: Testing UNETR for 3D segmentation") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -2736,24 +2550,18 @@ def test_train_segmentation_unetr_rad_3d(device): # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) parameters["model"]["dimension"] = 3 parameters["patch_size"] = [32, 32, 32] with pytest.raises(BaseException) as _: parameters["model"]["inner_patch_size"] = 19 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) with pytest.raises(BaseException) as _: parameters["model"]["inner_patch_size"] = 64 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) for patch in [16, 8]: parameters["model"]["inner_patch_size"] = patch @@ -2762,9 +2570,7 @@ def test_train_segmentation_unetr_rad_3d(device): parameters["model"]["num_channels"] = len( parameters["headers"]["channelHeaders"] ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2788,7 +2594,7 @@ def test_train_segmentation_unetr_rad_3d(device): def test_train_segmentation_unetr_rad_2d(device): print("39: Testing UNETR for 2D segmentation") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -2804,9 +2610,7 @@ def test_train_segmentation_unetr_rad_2d(device): parameters["model"]["amp"] = True parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2830,7 +2634,7 @@ def test_train_segmentation_unetr_rad_2d(device): def test_train_segmentation_transunet_rad_2d(device): print("40: Testing TransUNet for 2D segmentation") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -2844,16 +2648,12 @@ def test_train_segmentation_transunet_rad_2d(device): with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 6 parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 3 parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) parameters["model"]["embed_dim"] = 64 parameters["model"]["depth"] = 2 @@ -2861,9 +2661,7 @@ def test_train_segmentation_transunet_rad_2d(device): parameters["model"]["num_heads"] = 8 parameters["model"]["amp"] = True parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2887,7 +2685,7 @@ def test_train_segmentation_transunet_rad_2d(device): def test_train_segmentation_transunet_rad_3d(device): print("41: Testing TransUNet for 3D segmentation") - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -2900,44 +2698,32 @@ def test_train_segmentation_transunet_rad_3d(device): # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) parameters["model"]["dimension"] = 3 parameters["patch_size"] = [32, 32, 32] with pytest.raises(BaseException) as _: parameters["model"]["depth"] = 1 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 6 parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 3 parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) parameters["model"]["num_heads"] = 8 parameters["model"]["embed_dim"] = 64 parameters["model"]["depth"] = 2 parameters["model"]["class_list"] = [0, 255] parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2962,7 +2748,7 @@ def test_train_segmentation_transunet_rad_3d(device): def test_train_gradient_clipping_classification_rad_2d(device): print("42: Testing gradient clipping") # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_classification.yaml", version_check_flag=False ) parameters["modality"] = "rad" @@ -2976,9 +2762,7 @@ def test_train_gradient_clipping_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # ensure gradient clipping is getting tested for clip_mode in ["norm", "value", "agc"]: parameters["model"]["architecture"] = "imagenet_vgg11" @@ -3001,12 +2785,10 @@ def test_train_gradient_clipping_classification_rad_2d(device): def test_train_segmentation_unet_conversion_rad_3d(device): - print( - "43: Starting 3D Rad segmentation tests for unet with ACS conversion" - ) + print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") # read and parse csv # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -3020,14 +2802,10 @@ def test_train_segmentation_unet_conversion_rad_3d(device): parameters["model"]["amp"] = True parameters["in_memory"] = True parameters["verbose"] = False - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) # loop through selected models and train for single epoch for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: for converter_type in ["acs", "soft", "conv3d"]: @@ -3064,12 +2842,10 @@ def test_generic_cli_function_configgenerator(): for file in all_files: parameters = None with suppress_stdout_stderr(): - parameters = parseConfig( + parameters = ConfigManager( os.path.join(outputDir, file), version_check_flag=False ) - assert ( - parameters - ), "config generator did not generate valid config files" + assert parameters, "config generator did not generate valid config files" sanitize_outputDir() generator_config = yaml.safe_load(open(generator_config_path, "r")) @@ -3096,7 +2872,7 @@ def test_generic_cli_function_recoverconfig(): # Train, then recover a config and see if it exists/is valid YAML # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -3111,9 +2887,7 @@ def test_generic_cli_function_recoverconfig(): parameters["model"]["architecture"] = "sdnet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -3129,20 +2903,21 @@ def test_generic_cli_function_recoverconfig(): ), "recover_config returned false" assert os.path.exists(output_config_path), "Didn't create a config file" - new_params = parseConfig(output_config_path, version_check_flag=False) - assert new_params, "Created YAML could not be parsed by parseConfig" + new_params = ConfigManager(output_config_path, version_check_flag=False) + assert new_params, "Created YAML could not be parsed by ConfigManager" sanitize_outputDir() print("passed") + def test_generic_deploy_docker(): print("46: Testing deployment of a model to Docker") # Train, then try deploying that model (requires an installed Docker engine) deploymentOutputDir = os.path.join(outputDir, "mlcube") # read and parse csv - parameters = parseConfig( + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) training_data, parameters["headers"] = parseTrainingCSV( @@ -3160,9 +2935,7 @@ def test_generic_deploy_docker(): parameters["data_preprocessing"]["resize_image"] = [224, 224] parameters["memory_save_mode"] = True - parameters = populate_header_in_parameters( - parameters, parameters["headers"] - ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -3199,10 +2972,8 @@ def test_generic_deploy_docker(): def test_collision_subjectid_test_segmentation_rad_2d(device): - print( - "47: Starting 2D Rad segmentation tests for collision of subjectID in test" - ) - parameters = parseConfig( + print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") + parameters = ConfigManager( testingDir + "/config_segmentation.yaml", version_check_flag=False ) @@ -3264,14 +3035,10 @@ def test_generic_random_numbers_are_deterministic_on_cpu(): assert np.allclose(a, c) assert np.allclose(b, d) - e, f = [random.random() for _ in range(5)], [ - random.random() for _ in range(5) - ] + e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] set_determinism(seed=42) - g, h = [random.random() for _ in range(5)], [ - random.random() for _ in range(5) - ] + g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] # Check that the generated random numbers are the same with Python's built-in random module assert e == g @@ -3309,7 +3076,7 @@ def test_generic_cli_function_metrics_cli_rad_nd(): training_data["mask"] = training_data["Label"] # read and initialize parameters for specific data dimension - parameters = parseConfig( + parameters = ConfigManager( testingDir + f"/config_{problem_type_wrap}.yaml", version_check_flag=False, ) @@ -3334,12 +3101,11 @@ def test_generic_cli_function_metrics_cli_rad_nd(): # run the metrics calculation generate_metrics_dict(temp_infer_csv, temp_config, output_file) - assert os.path.isfile( - output_file - ), "Metrics output file was not generated" + assert os.path.isfile(output_file), "Metrics output file was not generated" sanitize_outputDir() + def test_generic_deploy_metrics_docker(): print("50: Testing deployment of a metrics generator to Docker") # requires an installed Docker engine