From fb0f769fb35d303e17164674c5e9da0e26aa5b05 Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Wed, 10 Jan 2018 10:29:57 -0500 Subject: [PATCH] ADD: Add PyRadiomics Model schema validation Update the PyKwalify validation in PyRadiomics to also allow validation of PyRadiomics model files. Add an example model file to explain the structure. Update requirements.txt to enforce PyKwalify version != 1.6.0, as this version contains a bug that breaks the usage of partial schemas, which is utilized to allow validation of the two use cases (parameter file and model file) Because partial schemas are used, load the schemas outside of pykwalify and pass them during validation as a dict. Update the testParams script to allow testing of both use cases. --- bin/testParams.py | 53 +++-- examples/exampleModels/exampleModel.yaml | 41 ++++ radiomics/__init__.py | 28 ++- radiomics/featureextractor.py | 4 +- radiomics/schemas/modelSchema.yaml | 22 +++ radiomics/schemas/paramSchema.yaml | 235 ++++++++++++----------- requirements.txt | 2 +- tests/test_exampleSettings.py | 6 +- 8 files changed, 251 insertions(+), 140 deletions(-) create mode 100644 examples/exampleModels/exampleModel.yaml create mode 100644 radiomics/schemas/modelSchema.yaml diff --git a/bin/testParams.py b/bin/testParams.py index 806d5c07..b87a0fb6 100644 --- a/bin/testParams.py +++ b/bin/testParams.py @@ -3,25 +3,54 @@ # custom parameters specified will be printed. If validation fails, an error message specifying cause of validation # error will be printed. -import sys +import argparse import pykwalify.core - from radiomics import getParameterValidationFiles -def main(paramsFile): - schemaFile, schemaFuncs = getParameterValidationFiles() - c = pykwalify.core.Core(source_file=paramsFile, schema_files=[schemaFile], extensions=[schemaFuncs]) +def main(paramsFile, is_model=False): + if is_model: + validate_model_file(paramsFile) + else: + validate_customization(paramsFile) + + +def validate_model_file(model_file,): + schema_data, schemaFuncs = getParameterValidationFiles(is_model_validation=True) + c = pykwalify.core.Core(source_file=model_file, schema_data=schema_data, extensions=[schemaFuncs]) + + try: + params = c.validate() + print('Model validation successfull!\n\n' + '###Model Type###\n%s\n' + % (params['model']['name'])) + except pykwalify.core.SchemaError as e: + print('Parameter validation failed!\n%s' % e.msg) + + +def validate_customization(parameter_file): + schema_data, schemaFuncs = getParameterValidationFiles() + c = pykwalify.core.Core(source_file=parameter_file, schema_data=schema_data, extensions=[schemaFuncs]) + try: params = c.validate() print('Parameter validation successfull!\n\n' - '###Enabled Features###\n%s\n' - '###Enabled Image Types###\n%s\n' - '###Settings###\n%s' % (params['featureClass'], params['imageType'], params['setting'])) - except Exception as e: - print('Parameter validation failed!\n%s' % e.message) + '###Enabled Features###\n%s\n' + '###Enabled Image Types###\n%s\n' + '###Settings###\n%s' % (params['featureClass'], params['imageType'], params['setting']) + ) + except pykwalify.core.SchemaError as e: + print('Parameter validation failed!\n%s' % e.msg) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('parameter_file', help='File representing the yaml or json structured configuration file to be ' + 'tested') + parser.add_argument('--model', '-m', action='store_true', + help='If this argument is specified, the configuration file is treated as a PyRadiomics Model, ' + 'otherwise, it is treated as an extraction parameter file') -if __name__ == '__main__' and len(sys.argv) > 1: - main(sys.argv[1]) + args = parser.parse_args() + main(args.parameter_file, args.model) diff --git a/examples/exampleModels/exampleModel.yaml b/examples/exampleModels/exampleModel.yaml new file mode 100644 index 00000000..7a1b0c3c --- /dev/null +++ b/examples/exampleModels/exampleModel.yaml @@ -0,0 +1,41 @@ +# This is an example of how a PyRadiomics Model looks like. +# It consists of 2 main parts: "extraction" and "model" +# +# "extraction": this part defines the settings PyRadiomics needs to extract the required features for the input. +# A model can incorporate features extracted from different images (e.g. multiple time points, or different MR +# sequences). For each image, a customized extraction may be defined by providing the image name as a key, and the +# customization as a value. This customization value must adhere to the same rules as a parameter file. +# In addition, extraction parameters that are common to all input images can be defined under "general". Be aware, +# "general" is therefore not allowed as an image name. +# If an image name is provided in the model, but not included in the extraction settings, no additional customization +# is applied for that image (just the general settings, if present) +# +# "model": this part provides all needed information to build the model. In this case, a simple linear regression model +# is shown. Both the "name" key (identifying the model type) and "parameters" key (providing model specific parameters) +# are required. What kind of parameters are possible/required depends on the model. In this case, only the intercept of +# the model and the betas (slopes) of the included features are required. + +extraction: + general: + setting: + binWidth: 25 + imageType: + Original: {} + + image1: + featureClass: + glcm: + - Dissimilarity + + image2: + featureClass: + firstorder: + - Mean + +model: + name: linearregression + parameters: + intercept: 0.334 + betas: + image1_original_glcm_Dissimilarity: 0.1 + image2_original_firstorder_Mean: 0.3 diff --git a/radiomics/__init__.py b/radiomics/__init__.py index 0b0dbcbb..d455d7e2 100644 --- a/radiomics/__init__.py +++ b/radiomics/__init__.py @@ -10,6 +10,7 @@ import tempfile import numpy # noqa: F401 +from pykwalify.compat import yaml from six.moves import urllib from . import imageoperations @@ -229,17 +230,36 @@ def getTestCase(testCase, repoDirectory=None): return imageFile, maskFile -def getParameterValidationFiles(): +def getParameterValidationFiles(is_model_validation=False): """ Returns file locations for the parameter schema and custom validation functions, which are needed when validating a parameter file using ``PyKwalify.core``. - This functions returns a tuple with the file location of the schema as first and python script with custom validation - functions as second element. + This functions returns a tuple with a dictionary representing the schema and the file location of a python script + containing the custom validation functions. """ dataDir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'schemas')) schemaFile = os.path.join(dataDir, 'paramSchema.yaml') + modelFile = os.path.join(dataDir, 'modelSchema.yaml') schemaFuncs = os.path.join(dataDir, 'schemaFuncs.py') - return schemaFile, schemaFuncs + + if not (os.path.isfile(schemaFile) and os.path.isfile(schemaFuncs)): + raise IOError('Customization Validation Files not Found!') + + with open(schemaFile) as schema: + schema_data = yaml.load(schema) + + if is_model_validation: + if not os.path.isfile(modelFile): + raise IOError('Model Validation File not Found!') + + # Add the additional validation requirements of the model schema + with open(modelFile) as model_schema: + schema_data.update(yaml.load(model_schema)) + else: + # Add the include to ensure that the customization_schema is applied + schema_data.update({'include': 'customization_schema'}) + + return schema_data, schemaFuncs class _DummyProgressReporter(object): diff --git a/radiomics/featureextractor.py b/radiomics/featureextractor.py index 52e593c4..1dfc8804 100644 --- a/radiomics/featureextractor.py +++ b/radiomics/featureextractor.py @@ -145,9 +145,9 @@ def _applyParams(self, paramsFile=None, paramsDict=None): # No handler available for either pykwalify or root logger, provide first radiomics handler (outputs to stderr) pykwalify.core.log.addHandler(logging.getLogger('radiomics').handlers[0]) - schemaFile, schemaFuncs = getParameterValidationFiles() + schema_data, schemaFuncs = getParameterValidationFiles() c = pykwalify.core.Core(source_file=paramsFile, source_data=paramsDict, - schema_files=[schemaFile], extensions=[schemaFuncs]) + schema_data=schema_data, extensions=[schemaFuncs]) params = c.validate() self.logger.debug('Parameters parsed, input is valid.') diff --git a/radiomics/schemas/modelSchema.yaml b/radiomics/schemas/modelSchema.yaml new file mode 100644 index 00000000..ed9fd1e1 --- /dev/null +++ b/radiomics/schemas/modelSchema.yaml @@ -0,0 +1,22 @@ +# include: customization_schema +name: model_schema +type: map +mapping: + extraction: + type: map + mapping: + regex;(.+): + include: customization_schema + model: + type: map + required: true + mapping: + name: + type: str + required: true + parameters: + type: map + required: true + mapping: + regex;(.+): + type: any diff --git a/radiomics/schemas/paramSchema.yaml b/radiomics/schemas/paramSchema.yaml index e50455bc..e8b9add0 100644 --- a/radiomics/schemas/paramSchema.yaml +++ b/radiomics/schemas/paramSchema.yaml @@ -1,120 +1,121 @@ # Parameters schema -name: Parameter schema -desc: This schema defines what arguments may be present in the parameters file that can be passed to the pyradiomics package. -type: map -mapping: - setting: &settings - type: map - mapping: - enableCExtensions: - type: bool - minimumROIDimensions: - type: int - range: - min: 1 - max: 3 - minimumROISize: - type: int - range: - min-ex: 0 - geometryTolerance: - type: float - range: - min-ex: 0 - correctMask: - type: bool - additionalInfo: - type: bool - label: - type: int - binWidth: - type: float - range: - min-ex: 0 - normalize: - type: bool - normalizeScale: - type: float - range: - min-ex: 0 - removeOutliers: - type: float - range: - min-ex: 0 - resampledPixelSpacing: - seq: - - type: float - range: - min: 0 - interpolator: - type: any - func: checkInterpolator - padDistance: - type: int - range: - min: 0 - distances: - seq: - - type: int - range: - min-ex: 0 - force2D: - type: bool - force2Ddimension: - type: int - range: - min: 0 - max: 2 - resegmentRange: - seq: - - type: float - preCrop: - type: bool - sigma: - seq: - - type: float - range: - min-ex: 0 - start_level: - type: int - range: - min: 0 - level: - type: int - range: - min-ex: 0 - wavelet: - type: str - func: checkWavelet - voxelArrayShift: - type: int - symmetricalGLCM: - type: bool - weightingNorm: - type: any - func: checkWeighting - gldm_a: - type: int - range: - min: 0 +schema;customization_schema: + name: Parameter schema + desc: This schema defines what arguments may be present in the parameters file that can be passed to the pyradiomics package. + type: map + mapping: + setting: &settings + type: map + mapping: + enableCExtensions: + type: bool + minimumROIDimensions: + type: int + range: + min: 1 + max: 3 + minimumROISize: + type: int + range: + min-ex: 0 + geometryTolerance: + type: float + range: + min-ex: 0 + correctMask: + type: bool + additionalInfo: + type: bool + label: + type: int + binWidth: + type: float + range: + min-ex: 0 + normalize: + type: bool + normalizeScale: + type: float + range: + min-ex: 0 + removeOutliers: + type: float + range: + min-ex: 0 + resampledPixelSpacing: + seq: + - type: float + range: + min: 0 + interpolator: + type: any + func: checkInterpolator + padDistance: + type: int + range: + min: 0 + distances: + seq: + - type: int + range: + min-ex: 0 + force2D: + type: bool + force2Ddimension: + type: int + range: + min: 0 + max: 2 + resegmentRange: + seq: + - type: float + preCrop: + type: bool + sigma: + seq: + - type: float + range: + min-ex: 0 + start_level: + type: int + range: + min: 0 + level: + type: int + range: + min-ex: 0 + wavelet: + type: str + func: checkWavelet + voxelArrayShift: + type: int + symmetricalGLCM: + type: bool + weightingNorm: + type: any + func: checkWeighting + gldm_a: + type: int + range: + min: 0 - featureClass: - type: map - func: checkFeatureClass - matching-rule: 'any' - mapping: - regex;(.+): - type: any + featureClass: + type: map + func: checkFeatureClass + matching-rule: 'any' + mapping: + regex;(.+): + type: any - imageType: - type: map - mapping: - # possible image types, it's value is a dictionary (holding image type specific settings) that follows the rules - # as in 'setting' - Original: *settings - LoG: *settings - Wavelet: *settings - Square: *settings - SquareRoot: *settings - Logarithm: *settings - Exponential: *settings + imageType: + type: map + mapping: + # possible image types, it's value is a dictionary (holding image type specific settings) that follows the rules + # as in 'setting' + Original: *settings + LoG: *settings + Wavelet: *settings + Square: *settings + SquareRoot: *settings + Logarithm: *settings + Exponential: *settings diff --git a/requirements.txt b/requirements.txt index 1d09b540..f7ed2993 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numpy>=1.9.2 SimpleITK>=0.9.1 PyWavelets>=0.4.0 -pykwalify>=1.6.0 +pykwalify!=1.6.0 six>=1.10.0 diff --git a/tests/test_exampleSettings.py b/tests/test_exampleSettings.py index 7a0b7750..23e3a433 100644 --- a/tests/test_exampleSettings.py +++ b/tests/test_exampleSettings.py @@ -14,7 +14,7 @@ def exampleSettings_name_func(testcase_func, param_num, param): class TestExampleSettings: def __init__(self): - self.schemaFile, self.schemaFuncs = getParameterValidationFiles() + self.schema_data, self.schemaFuncs = getParameterValidationFiles() def generateScenarios(): dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'examples', 'exampleSettings') @@ -27,7 +27,5 @@ def generateScenarios(): @parameterized.expand(generateScenarios(), testcase_func_name=exampleSettings_name_func) def test_scenarios(self, settingsFile): - assert os.path.isfile(self.schemaFile) - assert os.path.isfile(self.schemaFuncs) - c = pykwalify.core.Core(source_file=settingsFile, schema_files=[self.schemaFile], extensions=[self.schemaFuncs]) + c = pykwalify.core.Core(source_file=settingsFile, schema_data=self.schema_data, extensions=[self.schemaFuncs]) c.validate()