diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 56925e024..30d4e2bd3 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -315,7 +315,7 @@ def _get_dataset_input_validator( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[RESAMPLING_STRATEGIES]): + resampling_strategy (Optional[ResamplingStrategies]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -355,7 +355,7 @@ def get_dataset( Testing feature set y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]): Testing target set - resampling_strategy (Optional[RESAMPLING_STRATEGIES]): + resampling_strategy (Optional[ResamplingStrategies]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): @@ -973,7 +973,7 @@ def _search( `SMAC `_. tae_func (Optional[Callable]): TargetAlgorithm to be optimised. If None, `eval_function` - available in autoPyTorch/evaluation/train_evaluator is used. + available in autoPyTorch/evaluation/evaluator is used. Must be child class of AbstractEvaluator. all_supported_metrics (bool: default=True): If True, all metrics supporting current task will be calculated @@ -1380,7 +1380,7 @@ def fit_pipeline( X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None, dataset_name: Optional[str] = None, - resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes]] = None, + resampling_strategy: Optional[ResamplingStrategies] = None, resampling_strategy_args: Optional[Dict[str, Any]] = None, run_time_limit_secs: int = 60, memory_limit: Optional[int] = None, @@ -1415,7 +1415,7 @@ def fit_pipeline( be provided to track the generalization performance of each stage. dataset_name (Optional[str]): Name of the dataset, if None, random value is used. - resampling_strategy (Optional[RESAMPLING_STRATEGIES]): + resampling_strategy (Optional[ResamplingStrategies]): Strategy to split the training data. if None, uses HoldoutValTypes.holdout_validation. resampling_strategy_args (Optional[Dict[str, Any]]): diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 684c22a7b..3e88a4a97 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -336,7 +336,7 @@ def search( `SMAC `_. tae_func (Optional[Callable]): TargetAlgorithm to be optimised. If None, `eval_function` - available in autoPyTorch/evaluation/train_evaluator is used. + available in autoPyTorch/evaluation/evaluator is used. Must be child class of AbstractEvaluator. all_supported_metrics (bool: default=True): If True, all metrics supporting current task will be calculated diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index d766bad68..0d9028480 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -337,7 +337,7 @@ def search( `SMAC `_. tae_func (Optional[Callable]): TargetAlgorithm to be optimised. If None, `eval_function` - available in autoPyTorch/evaluation/train_evaluator is used. + available in autoPyTorch/evaluation/evaluator is used. Must be child class of AbstractEvaluator. all_supported_metrics (bool: default=True): If True, all metrics supporting current task will be calculated diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py index 78447a04e..e09747258 100644 --- a/autoPyTorch/datasets/resampling_strategy.py +++ b/autoPyTorch/datasets/resampling_strategy.py @@ -93,6 +93,14 @@ def is_stratified(self) -> bool: # TODO: replace it with another way ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes] + +def check_resampling_strategy(resampling_strategy: Optional[ResamplingStrategies]) -> None: + choices = (CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes) + if not isinstance(resampling_strategy, choices): + rs_names = (rs.__mro__[0].__name__ for rs in choices) + raise ValueError(f'resampling_strategy must be in {rs_names}, but got {resampling_strategy}') + + DEFAULT_RESAMPLING_PARAMETERS: Dict[ ResamplingStrategies, Dict[str, Any] diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index b0d5a433f..0233b69a4 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -167,47 +167,87 @@ class FixedPipelineParams(NamedTuple): search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): An object used to fine tune the hyperparameter search space of the pipeline """ - def __init__(self, backend: Backend, - queue: Queue, - metric: autoPyTorchMetric, - budget: float, - configuration: Union[int, str, Configuration], - budget_type: str = None, - pipeline_config: Optional[Dict[str, Any]] = None, - seed: int = 1, - output_y_hat_optimization: bool = True, - num_run: Optional[int] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None - ) -> None: - - self.starttime = time.time() - - self.configuration = configuration - self.backend: Backend = backend - self.queue = queue - - self.include = include - self.exclude = exclude - self.search_space_updates = search_space_updates - - self.metric = metric - - - self._init_datamanager_info() - - # Flag to save target for ensemble - self.output_y_hat_optimization = output_y_hat_optimization + backend: Backend + seed: int + metric: autoPyTorchMetric + budget_type: str # Literal['epochs', 'runtime'] + pipeline_config: Dict[str, Any] + save_y_opt: bool = True + include: Optional[Dict[str, Any]] = None + exclude: Optional[Dict[str, Any]] = None + disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None + logger_port: Optional[int] = None + all_supported_metrics: bool = True + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + + @classmethod + def with_default_pipeline_config( + cls, + pipeline_config: Optional[Dict[str, Any]] = None, + choice: str = 'default', + **kwargs: Any + ) -> 'FixedPipelineParams': + + if 'budget_type' in kwargs: + raise TypeError( + f'{cls.__name__}.with_default_pipeline_config() got multiple values for argument `budget_type`' + ) + + budget_type_choices = ('epochs', 'runtime') + if pipeline_config is None: + pipeline_config = get_default_pipeline_config(choice=choice) + if 'budget_type' not in pipeline_config: + raise ValueError('pipeline_config must have `budget_type`') + + budget_type = pipeline_config['budget_type'] + if pipeline_config['budget_type'] not in budget_type_choices: + raise ValueError(f"budget_type must be in {budget_type_choices}, but got {budget_type}") + + kwargs.update(pipeline_config=pipeline_config, budget_type=budget_type) + return cls(**kwargs) + + +class EvaluatorParams(NamedTuple): + """ + Attributes: + configuration (Union[int, str, Configuration]): + Determines the pipeline to be constructed. A dummy estimator is created for + integer configurations, a traditional machine learning pipeline is created + for string based configuration, and NAS is performed when a configuration + object is passed. + num_run (Optional[int]): + An identifier of the current configuration being fit. This number is unique per + configuration. + init_params (Optional[Dict[str, Any]]): + Optional argument that is passed to each pipeline step. It is the equivalent of + kwargs for the pipeline steps. + """ + budget: float + configuration: Union[int, str, Configuration] + num_run: Optional[int] = None + init_params: Optional[Dict[str, Any]] = None + + @classmethod + def with_default_budget( + cls, + budget: float = 0, + choice: str = 'default', + **kwargs: Any + ) -> 'EvaluatorParams': + budget = get_default_budget(choice=choice) if budget == 0 else budget + kwargs.update(budget=budget) + return cls(**kwargs) + + +class AbstractEvaluator(object): + """ + This method defines the interface that pipeline evaluators should follow, when + interacting with SMAC through TargetAlgorithmQuery. An evaluator is an object that: + constructs a pipeline (i.e. a classification or regression estimator) for a given pipeline_config and run settings (budget, seed) - + Fits and trains this pipeline (TrainEvaluator) or tests a given + + Fits and trains this pipeline (Evaluator) or tests a given configuration (TestEvaluator) The provided configuration determines the type of pipeline created. For more @@ -244,21 +284,33 @@ def _init_miscellaneous(self) -> None: DisableFileOutputParameters.check_compatibility(disable_file_output) self.disable_file_output = disable_file_output else: - if isinstance(self.configuration, int): - self.pipeline_class = DummyClassificationPipeline - elif isinstance(self.configuration, str): - if self.task_type in TABULAR_TASKS: - self.pipeline_class = MyTraditionalTabularClassificationPipeline - else: - raise ValueError("Only tabular tasks are currently supported with traditional methods") - elif isinstance(self.configuration, Configuration): - if self.task_type in TABULAR_TASKS: - self.pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline - elif self.task_type in IMAGE_TASKS: - self.pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline - else: - raise ValueError('task {} not available'.format(self.task_type)) - self.predict_function = self._predict_proba + self.disable_file_output = [] + + if self.num_folds == 1: # not save cv model when we perform holdout + self.disable_file_output.append('cv_model') + + def _init_dataset_properties(self) -> None: + datamanager: BaseDataset = self.fixed_pipeline_params.backend.load_datamanager() + if datamanager.task_type is None: + raise ValueError(f"Expected dataset {datamanager.__class__.__name__} to have task_type got None") + if datamanager.splits is None: + raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None") + + self.splits = datamanager.splits + self.num_folds: int = len(self.splits) + # Since cv might not finish in time, we take self.pipelines as None by default + self.pipelines: List[Optional[BaseEstimator]] = [None] * self.num_folds + self.task_type = STRING_TO_TASK_TYPES[datamanager.task_type] + self.num_classes = getattr(datamanager, 'num_classes', 1) + self.output_type = datamanager.output_type + + search_space_updates = self.fixed_pipeline_params.search_space_updates + self.dataset_properties = datamanager.get_dataset_properties( + get_dataset_requirements(info=datamanager.get_required_dataset_info(), + include=self.fixed_pipeline_params.include, + exclude=self.fixed_pipeline_params.exclude, + search_space_updates=search_space_updates + )) self.X_train, self.y_train = datamanager.train_tensors self.unique_train_labels = [ @@ -271,6 +323,8 @@ def _init_miscellaneous(self) -> None: if datamanager.test_tensors is not None: self.X_test, self.y_test = datamanager.test_tensors + del datamanager # Delete datamanager to release the memory + def _init_additional_metrics(self) -> None: all_supported_metrics = self.fixed_pipeline_params.all_supported_metrics metric = self.fixed_pipeline_params.metric @@ -282,59 +336,7 @@ def _init_additional_metrics(self) -> None: all_supported_metrics=all_supported_metrics) self.metrics_dict = {'additional_metrics': [m.name for m in [metric] + self.additional_metrics]} - def _init_datamanager_info( - self, - ) -> None: - """ - Initialises instance attributes that come from the datamanager. - For example, - X_train, y_train, etc. - """ - - datamanager: BaseDataset = self.backend.load_datamanager() - - assert datamanager.task_type is not None, \ - "Expected dataset {} to have task_type got None".format(datamanager.__class__.__name__) - self.task_type = STRING_TO_TASK_TYPES[datamanager.task_type] - self.output_type = STRING_TO_OUTPUT_TYPES[datamanager.output_type] - self.issparse = datamanager.issparse - - self.X_train, self.y_train = datamanager.train_tensors - - if datamanager.val_tensors is not None: - self.X_valid, self.y_valid = datamanager.val_tensors - else: - self.X_valid, self.y_valid = None, None - - if datamanager.test_tensors is not None: - self.X_test, self.y_test = datamanager.test_tensors - else: - self.X_test, self.y_test = None, None - - self.resampling_strategy = datamanager.resampling_strategy - - self.num_classes: Optional[int] = getattr(datamanager, "num_classes", None) - - self.dataset_properties = datamanager.get_dataset_properties( - get_dataset_requirements(info=datamanager.get_required_dataset_info(), - include=self.include, - exclude=self.exclude, - search_space_updates=self.search_space_updates - )) - self.splits = datamanager.splits - if self.splits is None: - raise AttributeError(f"create_splits on {datamanager.__class__.__name__} must be called " - f"before the instantiation of {self.__class__.__name__}") - - # delete datamanager from memory - del datamanager - - def _init_fit_dictionary( - self, - logger_port: int, - pipeline_config: Dict[str, Any], - metrics_dict: Optional[Dict[str, List[str]]] = None, - ) -> None: + def _init_fit_dictionary(self) -> None: """ Initialises the fit dictionary @@ -617,36 +619,4 @@ def _is_output_possible( if y is not None and not np.all(np.isfinite(y)): return False # Model predictions contains NaNs - Args: - prediction (np.ndarray): - The un-formatted predictions of a pipeline - Y_train (np.ndarray): - The labels from the dataset to give an intuition of the expected - predictions dimensionality - Returns: - (np.ndarray): - The formatted prediction - """ - assert self.num_classes is not None, "Called function on wrong task" - - if self.output_type == MULTICLASS and \ - prediction.shape[1] < self.num_classes: - if Y_train is None: - raise ValueError('Y_train must not be None!') - classes = list(np.unique(Y_train)) - - mapping = dict() - for class_number in range(self.num_classes): - if class_number in classes: - index = classes.index(class_number) - mapping[index] = class_number - new_predictions = np.zeros((prediction.shape[0], self.num_classes), - dtype=np.float32) - - for index in mapping: - class_index = mapping[index] - new_predictions[:, class_index] = prediction[:, index] - - return new_predictions - - return prediction + return True diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/evaluator.py similarity index 69% rename from autoPyTorch/evaluation/train_evaluator.py rename to autoPyTorch/evaluation/evaluator.py index 62c02029f..887e1548b 100644 --- a/autoPyTorch/evaluation/train_evaluator.py +++ b/autoPyTorch/evaluation/evaluator.py @@ -7,12 +7,11 @@ from smac.tae import StatusType -from autoPyTorch.automl_common.common.utils.backend import Backend -from autoPyTorch.constants import ( - CLASSIFICATION_TASKS, - MULTICLASSMULTIOUTPUT, +from autoPyTorch.datasets.resampling_strategy import ( + CrossValTypes, + NoResamplingStrategyTypes, + check_resampling_strategy ) -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes from autoPyTorch.evaluation.abstract_evaluator import ( AbstractEvaluator, EvaluationResults, @@ -21,7 +20,8 @@ from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams from autoPyTorch.utils.common import dict_repr, subsampler -__all__ = ['TrainEvaluator', 'eval_train_function'] +__all__ = ['Evaluator', 'eval_fn'] + class _CrossValidationResultsManager: def __init__(self, num_folds: int): @@ -83,15 +83,13 @@ def get_result_dict(self) -> Dict[str, Any]: ) -class TrainEvaluator(AbstractEvaluator): +class Evaluator(AbstractEvaluator): """ This class builds a pipeline using the provided configuration. A pipeline implementing the provided configuration is fitted using the datamanager object retrieved from disc, via the backend. After the pipeline is fitted, it is save to disc and the performance estimate - is communicated to the main process via a Queue. It is only compatible - with `CrossValTypes`, `HoldoutValTypes`, i.e, when the training data - is split and the validation set is used for SMBO optimisation. + is communicated to the main process via a Queue. Args: queue (Queue): @@ -101,52 +99,27 @@ class TrainEvaluator(AbstractEvaluator): Fixed parameters for a pipeline evaluator_params (EvaluatorParams): The parameters for an evaluator. + + Attributes: + train (bool): + Whether the training data is split and the validation set is used for SMBO optimisation. + cross_validation (bool): + Whether we use cross validation or not. """ - def __init__(self, backend: Backend, queue: Queue, - metric: autoPyTorchMetric, - budget: float, - configuration: Union[int, str, Configuration], - budget_type: str = None, - pipeline_config: Optional[Dict[str, Any]] = None, - seed: int = 1, - output_y_hat_optimization: bool = True, - num_run: Optional[int] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - keep_models: Optional[bool] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None) -> None: - super().__init__( - backend=backend, - queue=queue, - configuration=configuration, - metric=metric, - seed=seed, - output_y_hat_optimization=output_y_hat_optimization, - num_run=num_run, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates - ) + def __init__(self, queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams): + resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy + self.train = not isinstance(resampling_strategy, NoResamplingStrategyTypes) + self.cross_validation = isinstance(resampling_strategy, CrossValTypes) - if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)): - raise ValueError( - f'resampling_strategy for TrainEvaluator must be in ' - f'(CrossValTypes, HoldoutValTypes), but got {self.resampling_strategy}' - ) + if not self.train and fixed_pipeline_params.save_y_opt: + # TODO: Add the test to cover here + # No resampling can not be used for building ensembles. save_y_opt=False ensures it + fixed_pipeline_params = fixed_pipeline_params._replace(save_y_opt=False) + + super().__init__(queue=queue, fixed_pipeline_params=fixed_pipeline_params, evaluator_params=evaluator_params) - self.num_folds: int = len(self.splits) - self.logger.debug("Search space updates :{}".format(self.search_space_updates)) + if self.train: + self.logger.debug("Search space updates :{}".format(self.fixed_pipeline_params.search_space_updates)) def _evaluate_on_split(self, split_id: int) -> EvaluationResults: """ @@ -175,7 +148,7 @@ def _evaluate_on_split(self, split_id: int) -> EvaluationResults: return EvaluationResults( pipeline=pipeline, - opt_loss=self._loss(labels=self.y_train[opt_split], preds=opt_pred), + opt_loss=self._loss(labels=self.y_train[opt_split] if self.train else self.y_test, preds=opt_pred), train_loss=self._loss(labels=self.y_train[train_split], preds=train_pred), opt_pred=opt_pred, valid_pred=valid_pred, @@ -201,6 +174,7 @@ def _cross_validation(self) -> EvaluationResults: results = self._evaluate_on_split(split_id) self.pipelines[split_id] = results.pipeline + assert opt_split is not None # mypy redefinition cv_results.update(split_id, results, len(train_split), len(opt_split)) self.y_opt = np.concatenate([y_opt for y_opt in Y_opt if y_opt is not None]) @@ -212,15 +186,16 @@ def evaluate_loss(self) -> None: if self.splits is None: raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None") - if self.num_folds == 1: + if self.cross_validation: + results = self._cross_validation() + else: _, opt_split = self.splits[0] results = self._evaluate_on_split(split_id=0) - self.y_opt, self.pipelines[0] = self.y_train[opt_split], results.pipeline - else: - results = self._cross_validation() + self.pipelines[0] = results.pipeline + self.y_opt = self.y_train[opt_split] if self.train else self.y_test self.logger.debug( - f"In train evaluator.evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss}," + f"In evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss}," f" status: {results.status},\nadditional run info:\n{dict_repr(results.additional_run_info)}" ) self.record_evaluation(results=results) @@ -240,41 +215,23 @@ def _fit_and_evaluate_loss( kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]} train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) - opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) - valid_pred = self.predict(self.X_valid, **kwargs) test_pred = self.predict(self.X_test, **kwargs) + valid_pred = self.predict(self.X_valid, **kwargs) + + # No resampling ===> evaluate on test dataset + opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) if self.train else test_pred assert train_pred is not None and opt_pred is not None # mypy check return train_pred, opt_pred, valid_pred, test_pred -# create closure for evaluating an algorithm -def eval_train_function( - backend: Backend, - queue: Queue, - metric: autoPyTorchMetric, - budget: float, - config: Optional[Configuration], - seed: int, - output_y_hat_optimization: bool, - num_run: int, - include: Optional[Dict[str, Any]], - exclude: Optional[Dict[str, Any]], - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - pipeline_config: Optional[Dict[str, Any]] = None, - budget_type: str = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, - instance: str = None, -) -> None: +def eval_fn(queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams) -> None: """ This closure allows the communication between the TargetAlgorithmQuery and the - pipeline trainer (TrainEvaluator). + pipeline trainer (Evaluator). Fundamentally, smac calls the TargetAlgorithmQuery.run() method, which internally - builds a TrainEvaluator. The TrainEvaluator builds a pipeline, stores the output files + builds an Evaluator. The Evaluator builds a pipeline, stores the output files to disc via the backend, and puts the performance result of the run in the queue. Args: @@ -286,7 +243,11 @@ def eval_train_function( evaluator_params (EvaluatorParams): The parameters for an evaluator. """ - evaluator = TrainEvaluator( + resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy + check_resampling_strategy(resampling_strategy) + + # NoResamplingStrategyTypes ==> test evaluator, otherwise ==> train evaluator + evaluator = Evaluator( queue=queue, evaluator_params=evaluator_params, fixed_pipeline_params=fixed_pipeline_params diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py index 2203e35a8..bded4b701 100644 --- a/autoPyTorch/evaluation/tae.py +++ b/autoPyTorch/evaluation/tae.py @@ -24,13 +24,8 @@ from smac.tae.execute_func import AbstractTAFunc from autoPyTorch.automl_common.common.utils.backend import Backend -from autoPyTorch.datasets.resampling_strategy import ( - CrossValTypes, - HoldoutValTypes, - NoResamplingStrategyTypes -) -from autoPyTorch.evaluation.test_evaluator import eval_test_function -from autoPyTorch.evaluation.train_evaluator import eval_train_function +from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams +from autoPyTorch.evaluation.evaluator import eval_fn from autoPyTorch.evaluation.utils import ( DisableFileOutputParameters, empty_queue, @@ -65,6 +60,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> PynisherResultsType: raise NotImplementedError +# Since PynisherFunctionWrapperLikeType is not the exact type, we added Any... PynisherFunctionWrapperType = Union[Any, PynisherFunctionWrapperLikeType] @@ -102,7 +98,7 @@ def _get_eval_fn(cost_for_crash: float, target_algorithm: Optional[Callable] = N else: return functools.partial( run_target_algorithm_with_exception_handling, - ta=autoPyTorch.evaluation.train_evaluator.eval_fn, + ta=eval_fn, cost_for_crash=cost_for_crash, ) @@ -272,28 +268,9 @@ def __init__( all_supported_metrics: bool = True, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None ): - - self.backend = backend - - dm = self.backend.load_datamanager() - if dm.val_tensors is not None: - self._get_validation_loss = True - else: - self._get_validation_loss = False - if dm.test_tensors is not None: - self._get_test_loss = True - else: - self._get_test_loss = False - - self.resampling_strategy = dm.resampling_strategy - self.resampling_strategy_args = dm.resampling_strategy_args - - if isinstance(self.resampling_strategy, (HoldoutValTypes, CrossValTypes)): - eval_function = eval_train_function - self.output_y_hat_optimization = output_y_hat_optimization - elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes): - eval_function = eval_test_function - self.output_y_hat_optimization = False + dm = backend.load_datamanager() + self._exist_val_tensor = (dm.val_tensors is not None) + self._exist_test_tensor = (dm.test_tensors is not None) self.worst_possible_result = cost_for_crash @@ -306,43 +283,48 @@ def __init__( abort_on_first_run_crash=abort_on_first_run_crash, ) + # TODO: Modify so that we receive fixed_params from outside + self.fixed_pipeline_params = FixedPipelineParams.with_default_pipeline_config( + pipeline_config=pipeline_config, + backend=backend, + seed=seed, + metric=metric, + save_y_opt=save_y_opt, + include=include, + exclude=exclude, + disable_file_output=disable_file_output, + logger_port=logger_port, + all_supported_metrics=all_supported_metrics, + search_space_updates=search_space_updates, + ) self.pynisher_context = pynisher_context self.initial_num_run = initial_num_run - self.metric = metric - self.include = include - self.exclude = exclude - self.disable_file_output = disable_file_output self.init_params = init_params self.logger = _get_logger(logger_port, 'TAE') self.memory_limit = int(math.ceil(memory_limit)) if memory_limit is not None else memory_limit - dm = backend.load_datamanager() - self._exist_val_tensor = (dm.val_tensors is not None) - self._exist_test_tensor = (dm.test_tensors is not None) - @property def eval_fn(self) -> Callable: # this is a target algorithm defined in AbstractTAFunc during super().__init__(ta) return self.ta # type: ignore - self.search_space_updates = search_space_updates + @property + def budget_type(self) -> str: + # budget is defined by epochs by default + return self.fixed_pipeline_params.budget_type def _check_and_get_default_budget(self) -> float: budget_type_choices = ('epochs', 'runtime') + pipeline_config = self.fixed_pipeline_params.pipeline_config budget_choices = { - budget_type: float(self.pipeline_config.get(budget_type, np.inf)) + budget_type: float(pipeline_config.get(budget_type, np.inf)) for budget_type in budget_type_choices } - # budget is defined by epochs by default - budget_type = str(self.pipeline_config.get('budget_type', 'epochs')) - if self.budget_type is not None: - budget_type = self.budget_type - - if budget_type not in budget_type_choices: - raise ValueError(f"budget type must be in {budget_type_choices}, but got {budget_type}") + if self.budget_type not in budget_type_choices: + raise ValueError(f"budget type must be in {budget_type_choices}, but got {self.budget_type}") else: - return budget_choices[budget_type] + return budget_choices[self.budget_type] def run_wrapper(self, run_info: RunInfo) -> Tuple[RunInfo, RunValue]: """ @@ -363,12 +345,10 @@ def run_wrapper(self, run_info: RunInfo) -> Tuple[RunInfo, RunValue]: is_intensified = (run_info.budget != 0) default_budget = self._check_and_get_default_budget() - if self.budget_type is None and is_intensified: - raise ValueError(f'budget must be 0 (=no intensification) for budget_type=None, but got {run_info.budget}') - if self.budget_type is not None and run_info.budget < 0: + if run_info.budget < 0: raise ValueError(f'budget must be greater than zero but got {run_info.budget}') - if self.budget_type is not None and not is_intensified: + if not is_intensified: # The budget will be provided in train evaluator when budget_type is None run_info = run_info._replace(budget=default_budget) diff --git a/autoPyTorch/evaluation/test_evaluator.py b/autoPyTorch/evaluation/test_evaluator.py deleted file mode 100644 index 4d5b0ae91..000000000 --- a/autoPyTorch/evaluation/test_evaluator.py +++ /dev/null @@ -1,236 +0,0 @@ -from multiprocessing.queues import Queue -from typing import Any, Dict, List, Optional, Tuple, Union - -from ConfigSpace.configuration_space import Configuration - -import numpy as np - -from smac.tae import StatusType - -from autoPyTorch.automl_common.common.utils.backend import Backend -from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes -from autoPyTorch.evaluation.abstract_evaluator import ( - AbstractEvaluator, - fit_and_suppress_warnings -) -from autoPyTorch.evaluation.utils import DisableFileOutputParameters -from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric -from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates - - -__all__ = [ - 'eval_test_function', - 'TestEvaluator' -] - - -class TestEvaluator(AbstractEvaluator): - """ - This class builds a pipeline using the provided configuration. - A pipeline implementing the provided configuration is fitted - using the datamanager object retrieved from disc, via the backend. - After the pipeline is fitted, it is save to disc and the performance estimate - is communicated to the main process via a Queue. It is only compatible - with `NoResamplingStrategyTypes`, i.e, when the training data - is not split and the test set is used for SMBO optimisation. It can not - be used for building ensembles which is ensured by having - `output_y_hat_optimisation`=False - - Attributes: - backend (Backend): - An object to interface with the disk storage. In particular, allows to - access the train and test datasets - queue (Queue): - Each worker available will instantiate an evaluator, and after completion, - it will return the evaluation result via a multiprocessing queue - metric (autoPyTorchMetric): - A scorer object that is able to evaluate how good a pipeline was fit. It - is a wrapper on top of the actual score method (a wrapper on top of scikit - lean accuracy for example) that formats the predictions accordingly. - budget: (float): - The amount of epochs/time a configuration is allowed to run. - budget_type (str): - The budget type, which can be epochs or time - pipeline_config (Optional[Dict[str, Any]]): - Defines the content of the pipeline being evaluated. For example, it - contains pipeline specific settings like logging name, or whether or not - to use tensorboard. - configuration (Union[int, str, Configuration]): - Determines the pipeline to be constructed. A dummy estimator is created for - integer configurations, a traditional machine learning pipeline is created - for string based configuration, and NAS is performed when a configuration - object is passed. - seed (int): - A integer that allows for reproducibility of results - output_y_hat_optimization (bool): - Whether this worker should output the target predictions, so that they are - stored on disk. Fundamentally, the resampling strategy might shuffle the - Y_train targets, so we store the split in order to re-use them for ensemble - selection. - num_run (Optional[int]): - An identifier of the current configuration being fit. This number is unique per - configuration. - include (Optional[Dict[str, Any]]): - An optional dictionary to include components of the pipeline steps. - exclude (Optional[Dict[str, Any]]): - An optional dictionary to exclude components of the pipeline steps. - disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]): - Used as a list to pass more fine-grained - information on what to save. Must be a member of `DisableFileOutputParameters`. - Allowed elements in the list are: - - + `y_optimization`: - do not save the predictions for the optimization set, - which would later on be used to build an ensemble. Note that SMAC - optimizes a metric evaluated on the optimization set. - + `pipeline`: - do not save any individual pipeline files - + `pipelines`: - In case of cross validation, disables saving the joint model of the - pipelines fit on each fold. - + `y_test`: - do not save the predictions for the test set. - + `all`: - do not save any of the above. - For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`. - init_params (Optional[Dict[str, Any]]): - Optional argument that is passed to each pipeline step. It is the equivalent of - kwargs for the pipeline steps. - logger_port (Optional[int]): - Logging is performed using a socket-server scheme to be robust against many - parallel entities that want to write to the same file. This integer states the - socket port for the communication channel. If None is provided, a traditional - logger is used. - all_supported_metrics (bool): - Whether all supported metric should be calculated for every configuration. - search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): - An object used to fine tune the hyperparameter search space of the pipeline - """ - def __init__( - self, - backend: Backend, queue: Queue, - metric: autoPyTorchMetric, - budget: float, - configuration: Union[int, str, Configuration], - budget_type: str = None, - pipeline_config: Optional[Dict[str, Any]] = None, - seed: int = 1, - output_y_hat_optimization: bool = False, - num_run: Optional[int] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None - ) -> None: - super().__init__( - backend=backend, - queue=queue, - configuration=configuration, - metric=metric, - seed=seed, - output_y_hat_optimization=output_y_hat_optimization, - num_run=num_run, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates - ) - - if not isinstance(self.resampling_strategy, (NoResamplingStrategyTypes)): - raise ValueError( - f'resampling_strategy for TestEvaluator must be in ' - f'NoResamplingStrategyTypes, but got {self.resampling_strategy}' - ) - - def fit_predict_and_loss(self) -> None: - - split_id = 0 - train_indices, test_indices = self.splits[split_id] - - self.pipeline = self._get_pipeline() - X = {'train_indices': train_indices, - 'val_indices': test_indices, - 'split_id': split_id, - 'num_run': self.num_run, - **self.fit_dictionary} # fit dictionary - y = None - fit_and_suppress_warnings(self.logger, self.pipeline, X, y) - train_loss, _ = self.predict_and_loss(train=True) - test_loss, test_pred = self.predict_and_loss() - self.Y_optimization = self.y_test - self.finish_up( - loss=test_loss, - train_loss=train_loss, - opt_pred=test_pred, - valid_pred=None, - test_pred=test_pred, - file_output=True, - additional_run_info=None, - status=StatusType.SUCCESS, - ) - - def predict_and_loss( - self, train: bool = False - ) -> Tuple[Dict[str, float], np.ndarray]: - labels = self.y_train if train else self.y_test - feats = self.X_train if train else self.X_test - preds = self.predict_function( - X=feats, - pipeline=self.pipeline, - Y_train=self.y_train # Need this as we need to know all the classes in train splits - ) - loss_dict = self._loss(labels, preds) - - return loss_dict, preds - - -# create closure for evaluating an algorithm -def eval_test_function( - backend: Backend, - queue: Queue, - metric: autoPyTorchMetric, - budget: float, - config: Optional[Configuration], - seed: int, - output_y_hat_optimization: bool, - num_run: int, - include: Optional[Dict[str, Any]], - exclude: Optional[Dict[str, Any]], - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - pipeline_config: Optional[Dict[str, Any]] = None, - budget_type: str = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, - instance: str = None, -) -> None: - evaluator = TestEvaluator( - backend=backend, - queue=queue, - metric=metric, - configuration=config, - seed=seed, - num_run=num_run, - output_y_hat_optimization=output_y_hat_optimization, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates) - - evaluator.fit_predict_and_loss() diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py index 1a13a048d..60d319d99 100644 --- a/autoPyTorch/optimizer/smbo.py +++ b/autoPyTorch/optimizer/smbo.py @@ -1,7 +1,7 @@ import copy import json import logging.handlers -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple import ConfigSpace from ConfigSpace.configuration_space import Configuration @@ -22,7 +22,7 @@ CrossValTypes, DEFAULT_RESAMPLING_PARAMETERS, HoldoutValTypes, - NoResamplingStrategyTypes + ResamplingStrategies ) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.evaluation.tae import TargetAlgorithmQuery @@ -98,9 +98,7 @@ def __init__(self, pipeline_config: Dict[str, Any], start_num_run: int = 1, seed: int = 1, - resampling_strategy: Union[HoldoutValTypes, - CrossValTypes, - NoResamplingStrategyTypes] = HoldoutValTypes.holdout_validation, + resampling_strategy: ResamplingStrategies = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None, diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 747688168..4ace9ba0d 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -3,7 +3,7 @@ import pickle import tempfile import unittest -from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_train_function +from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_fn import ConfigSpace as CS from ConfigSpace.configuration_space import Configuration @@ -40,44 +40,9 @@ HOLDOUT_NUM_SPLITS = 1 -# Test -# ==== -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', - new=dummy_eval_train_function) -@pytest.mark.parametrize('openml_id', (40981, )) -@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', - ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) - )) -def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): - - # Get the data and check that contents of data-manager make sense - X, y = sklearn.datasets.fetch_openml( - data_id=int(openml_id), - return_X_y=True, as_frame=True - ) - X, y = X.iloc[:n_samples], y.iloc[:n_samples] - - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, y, random_state=42) - - # Search for a good configuration - estimator = TabularClassificationTask( - backend=backend, - resampling_strategy=resampling_strategy, - resampling_strategy_args=resampling_strategy_args, - seed=42, - ) - - with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): - estimator.search( - X_train=X_train, y_train=y_train, - X_test=X_test, y_test=y_test, - optimize_metric='accuracy', - total_walltime_limit=40, - func_eval_time_limit_secs=10, - enable_traditional_pipeline=False, - ) +def _get_dataset(openml_id: int, n_samples: int, seed: int = 42, split: bool = True): + X, y = sklearn.datasets.fetch_openml(data_id=int(openml_id), return_X_y=True, as_frame=True) + X, y = X[:n_samples], y[:n_samples] if split: X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=seed) @@ -86,24 +51,27 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl return X, y -def _check_created_files(estimator): +def _check_created_files(estimator, no_resampling): tmp_dir = estimator._backend.temporary_directory loaded_datamanager = estimator._backend.load_datamanager() assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) expected_files = [ - 'smac3-output/run_42/configspace.json', - 'smac3-output/run_42/runhistory.json', - 'smac3-output/run_42/scenario.txt', - 'smac3-output/run_42/stats.json', - 'smac3-output/run_42/train_insts.txt', - 'smac3-output/run_42/trajectory.json', - '.autoPyTorch/datamanager.pkl', - '.autoPyTorch/ensemble_read_preds.pkl', - '.autoPyTorch/start_time_42', - '.autoPyTorch/ensemble_history.json', - '.autoPyTorch/ensemble_read_losses.pkl', - '.autoPyTorch/true_targets_ensemble.npy', + fn + for fn in [ + 'smac3-output/run_42/configspace.json', + 'smac3-output/run_42/runhistory.json', + 'smac3-output/run_42/scenario.txt', + 'smac3-output/run_42/stats.json', + 'smac3-output/run_42/train_insts.txt', + 'smac3-output/run_42/trajectory.json', + '.autoPyTorch/datamanager.pkl', + '.autoPyTorch/start_time_42', + '.autoPyTorch/ensemble_read_preds.pkl' if not no_resampling else None, + '.autoPyTorch/ensemble_history.json' if not no_resampling else None, + '.autoPyTorch/ensemble_read_losses.pkl' if not no_resampling else None, + '.autoPyTorch/true_targets_ensemble.npy' if not no_resampling else None, + ] if fn is not None ] for expected_file in expected_files: assert os.path.exists(os.path.join(tmp_dir, expected_file)) @@ -111,11 +79,16 @@ def _check_created_files(estimator): def _check_internal_dataset_settings(estimator, resampling_strategy, task_type: str): assert estimator.dataset.task_type == task_type - expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \ - else CV_NUM_SPLITS assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy - assert len(estimator.dataset.splits) == expected_num_splits + + if isinstance(resampling_strategy, NoResamplingStrategyTypes): + if resampling_strategy == HoldoutValTypes.holdout_validation: + assert len(estimator.dataset.splits) == HOLDOUT_NUM_SPLITS + elif resampling_strategy == CrossValTypes.k_fold_cross_validation: + assert len(estimator.dataset.splits) == CV_NUM_SPLITS + else: + assert len(estimator.dataset.splits) == 1 # no resampling ==> no split, i.e. 1 def _check_smac_success(estimator, n_successful_runs: int = 1): @@ -150,6 +123,10 @@ def _check_model_file(estimator, resampling_strategy, run_key, run_key_model_run assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) + elif resampling_strategy == NoResamplingStrategyTypes.no_resampling: + model_file = os.path.join(run_key_model_run_dir, + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") + assert os.path.exists(model_file), model_file elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, @@ -169,8 +146,6 @@ def _check_model_file(estimator, resampling_strategy, run_key, run_key_model_run else: pytest.fail(resampling_strategy) - return model - def _check_test_prediction(estimator, X_test, y_test, run_key, run_key_model_run_dir, successful_num_run): test_prediction = os.path.join(run_key_model_run_dir, @@ -231,39 +206,6 @@ def _check_incumbent(estimator, successful_num_run): successful_num_run) assert 'train_loss' in incumbent_results - # Check that we can pickle - dump_file = os.path.join(estimator._backend.temporary_directory, 'dump.pkl') - - with open(dump_file, 'wb') as f: - pickle.dump(estimator, f) - - with open(dump_file, 'rb') as f: - restored_estimator = pickle.load(f) - restored_estimator.predict(X_test) - - # Test refit on dummy data - estimator.refit(dataset=backend.load_datamanager()) - - # Make sure that a configuration space is stored in the estimator - assert isinstance(estimator.get_search_space(), CS.ConfigurationSpace) - - -@pytest.mark.parametrize('openml_name', ("boston", )) -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', - new=dummy_eval_train_function) -@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', - ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) - )) -def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples): - - # Get the data and check that contents of data-manager make sense - X, y = sklearn.datasets.fetch_openml( - openml_name, - return_X_y=True, - as_frame=True - ) - X, y = X.iloc[:n_samples], y.iloc[:n_samples] def _get_estimator( backend, @@ -280,21 +222,27 @@ def _get_estimator( **kwargs ): + is_no_resample = isinstance(resampling_strategy, NoResamplingStrategyTypes) + # No resampling strategy must have ensemble_size == 0 + cls_kwargs = {key: 0 for key in ['ensemble_size'] if is_no_resample} # Search for a good configuration estimator = task_class( backend=backend, resampling_strategy=resampling_strategy, resampling_strategy_args=resampling_strategy_args, seed=42, + **cls_kwargs ) + # train size: 225, test size: 75 ==> 300 / 225 = 1.3333... + mul_factor = 1.35 if is_no_resample else 1.0 # increase time for no resample with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): estimator.search( X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimize_metric=metric, - total_walltime_limit=total_walltime_limit, - func_eval_time_limit_secs=func_eval_time_limit_secs, + total_walltime_limit=total_walltime_limit * mul_factor, + func_eval_time_limit_secs=func_eval_time_limit_secs * mul_factor, enable_traditional_pipeline=False, **kwargs ) @@ -303,15 +251,24 @@ def _get_estimator( def _check_tabular_task(estimator, X_test, y_test, task_type, resampling_strategy, n_successful_runs): + no_resampling = isinstance(resampling_strategy, NoResamplingStrategyTypes) + _check_internal_dataset_settings(estimator, resampling_strategy, task_type=task_type) - _check_created_files(estimator) + _check_created_files(estimator, no_resampling) run_key_model_run_dir, run_key, successful_num_run = _check_smac_success(estimator, n_successful_runs=n_successful_runs) _check_model_file(estimator, resampling_strategy, run_key, run_key_model_run_dir, successful_num_run) _check_test_prediction(estimator, X_test, y_test, run_key, run_key_model_run_dir, successful_num_run) - _check_ensemble_prediction(estimator, run_key, run_key_model_run_dir, successful_num_run) + + if not no_resampling: + _check_ensemble_prediction(estimator, run_key, run_key_model_run_dir, successful_num_run) + _check_incumbent(estimator, successful_num_run) + if no_resampling: + # no ensemble for no resampling, so early-return + return + # Test refit on dummy data # This process yields a mysterious bug after _check_picklable # However, we can process it in the _check_picklable function. @@ -329,14 +286,16 @@ def _check_tabular_task(estimator, X_test, y_test, task_type, resampling_strateg # Test # ==== -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_fn', +@unittest.mock.patch('autoPyTorch.evaluation.tae.eval_fn', new=dummy_eval_fn) @pytest.mark.parametrize('openml_id', (40981, )) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}), + (NoResamplingStrategyTypes.no_resampling, None) )) def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): + """NOTE: Check DummyEvaluator if something wrong""" X_train, X_test, y_train, y_test = _get_dataset(openml_id, n_samples, seed=42) estimator = _get_estimator( @@ -352,13 +311,15 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl @pytest.mark.parametrize('openml_id', (531, )) -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_fn', +@unittest.mock.patch('autoPyTorch.evaluation.tae.eval_fn', new=dummy_eval_fn) @pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', ((HoldoutValTypes.holdout_validation, None), - (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}), + (NoResamplingStrategyTypes.no_resampling, None) )) def test_tabular_regression(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): + """NOTE: Check DummyEvaluator if something wrong""" X, y = _get_dataset(openml_id, n_samples, split=False) # normalize values @@ -449,7 +410,7 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): estimator._all_supported_metrics = False with pytest.raises(ValueError, match=r".*Dummy prediction failed with run state.*"): - with unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') as dummy: + with unittest.mock.patch('autoPyTorch.evaluation.tae.eval_fn') as dummy: dummy.side_effect = MemoryError estimator._do_dummy_prediction() @@ -475,8 +436,8 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): del estimator -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', - new=dummy_eval_train_function) +@unittest.mock.patch('autoPyTorch.evaluation.tae.eval_fn', + new=dummy_eval_fn) @pytest.mark.parametrize('openml_id', (40981, )) def test_portfolio_selection(openml_id, backend, n_samples): @@ -501,8 +462,8 @@ def test_portfolio_selection(openml_id, backend, n_samples): assert any(successful_config in portfolio_configs for successful_config in successful_configs) -@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_train_function', - new=dummy_eval_train_function) +@unittest.mock.patch('autoPyTorch.evaluation.tae.eval_fn', + new=dummy_eval_fn) @pytest.mark.parametrize('openml_id', (40981, )) def test_portfolio_selection_failure(openml_id, backend, n_samples): @@ -757,117 +718,6 @@ def test_pipeline_fit_error( assert pipeline is None -@pytest.mark.parametrize('openml_id', (40981, )) -def test_tabular_classification_test_evaluator(openml_id, backend, n_samples): - - # Get the data and check that contents of data-manager make sense - X, y = sklearn.datasets.fetch_openml( - data_id=int(openml_id), - return_X_y=True, as_frame=True - ) - X, y = X.iloc[:n_samples], y.iloc[:n_samples] - - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, y, random_state=42) - - # Search for a good configuration - estimator = TabularClassificationTask( - backend=backend, - resampling_strategy=NoResamplingStrategyTypes.no_resampling, - seed=42, - ensemble_size=0 - ) - - with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): - estimator.search( - X_train=X_train, y_train=y_train, - X_test=X_test, y_test=y_test, - optimize_metric='accuracy', - total_walltime_limit=50, - func_eval_time_limit_secs=20, - enable_traditional_pipeline=False, - ) - - # Internal dataset has expected settings - assert estimator.dataset.task_type == 'tabular_classification' - - assert estimator.resampling_strategy == NoResamplingStrategyTypes.no_resampling - assert estimator.dataset.resampling_strategy == NoResamplingStrategyTypes.no_resampling - # Check for the created files - tmp_dir = estimator._backend.temporary_directory - loaded_datamanager = estimator._backend.load_datamanager() - assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) - - expected_files = [ - 'smac3-output/run_42/configspace.json', - 'smac3-output/run_42/runhistory.json', - 'smac3-output/run_42/scenario.txt', - 'smac3-output/run_42/stats.json', - 'smac3-output/run_42/train_insts.txt', - 'smac3-output/run_42/trajectory.json', - '.autoPyTorch/datamanager.pkl', - '.autoPyTorch/start_time_42', - ] - for expected_file in expected_files: - assert os.path.exists(os.path.join(tmp_dir, expected_file)), "{}/{}/{}".format( - tmp_dir, - [data for data in pathlib.Path(tmp_dir).glob('*')], - expected_file, - ) - - # Check that smac was able to find proper models - succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( - ) if 'SUCCESS' in str(run_value.status)] - assert len(succesful_runs) > 1, [(k, v) for k, v in estimator.run_history.data.items()] - - # Search for an existing run key in disc. A individual model might have - # a timeout and hence was not written to disc - successful_num_run = None - SUCCESS = False - for i, (run_key, value) in enumerate(estimator.run_history.data.items()): - if 'SUCCESS' in str(value.status): - run_key_model_run_dir = estimator._backend.get_numrun_directory( - estimator.seed, run_key.config_id + 1, run_key.budget) - successful_num_run = run_key.config_id + 1 - if os.path.exists(run_key_model_run_dir): - # Runkey config id is different from the num_run - # more specifically num_run = config_id + 1(dummy) - SUCCESS = True - break - - assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" - - model_file = os.path.join(run_key_model_run_dir, - f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") - assert os.path.exists(model_file), model_file - - # Make sure that predictions on the test data are printed and make sense - test_prediction = os.path.join(run_key_model_run_dir, - estimator._backend.get_prediction_filename( - 'test', estimator.seed, successful_num_run, - run_key.budget)) - assert os.path.exists(test_prediction), test_prediction - assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] - - y_pred = estimator.predict(X_test) - assert np.shape(y_pred)[0] == np.shape(X_test)[0] - - # Make sure that predict proba has the expected shape - probabilites = estimator.predict_proba(X_test) - assert np.shape(probabilites) == (np.shape(X_test)[0], 2) - - score = estimator.score(y_pred, y_test) - assert 'accuracy' in score - - # check incumbent config and results - incumbent_config, incumbent_results = estimator.get_incumbent_results() - assert isinstance(incumbent_config, Configuration) - assert isinstance(incumbent_results, dict) - assert 'opt_loss' in incumbent_results, "run history: {}, successful_num_run: {}".format(estimator.run_history.data, - successful_num_run) - assert 'train_loss' in incumbent_results - - @pytest.mark.parametrize("ans,task_class", ( ("continuous", TabularRegressionTask), ("multiclass", TabularClassificationTask)) diff --git a/test/test_api/utils.py b/test/test_api/utils.py index 0e757015d..45b5af562 100644 --- a/test/test_api/utils.py +++ b/test/test_api/utils.py @@ -4,11 +4,11 @@ from autoPyTorch.constants import REGRESSION_TASKS from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline +from autoPyTorch.evaluation.evaluator import Evaluator from autoPyTorch.evaluation.pipeline_class_collection import ( DummyClassificationPipeline, DummyRegressionPipeline ) -from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.pipeline.traditional_tabular_classification import TraditionalTabularClassificationPipeline from autoPyTorch.utils.common import subsampler @@ -28,7 +28,7 @@ def dummy_traditional_classification(self, time_left: int, func_eval_time_limit_ # ======== # Fixtures # ======== -class DummyTrainEvaluator(TrainEvaluator): +class DummyEvaluator(Evaluator): def _get_pipeline(self): if self.task_type in REGRESSION_TASKS: pipeline = DummyRegressionPipeline(config=1) @@ -44,37 +44,21 @@ def _fit_and_evaluate_loss(self, pipeline, split_id, train_indices, opt_indices) self.logger.info("Model fitted, now predicting") kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]} + train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) - opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) - valid_pred = self.predict(self.X_valid, **kwargs) test_pred = self.predict(self.X_test, **kwargs) + valid_pred = self.predict(self.X_valid, **kwargs) + + # No resampling ===> evaluate on test dataset + opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) if self.train else test_pred assert train_pred is not None and opt_pred is not None # mypy check return train_pred, opt_pred, valid_pred, test_pred # create closure for evaluating an algorithm -def dummy_eval_train_function( - backend, - queue, - metric, - budget: float, - config, - seed: int, - output_y_hat_optimization: bool, - num_run: int, - include, - exclude, - disable_file_output, - pipeline_config=None, - budget_type=None, - init_params=None, - logger_port=None, - all_supported_metrics=True, - search_space_updates=None, - instance: str = None, -) -> None: - evaluator = DummyTrainEvaluator( +def dummy_eval_fn(queue, fixed_pipeline_params, evaluator_params): + evaluator = DummyEvaluator( queue=queue, fixed_pipeline_params=fixed_pipeline_params, evaluator_params=evaluator_params diff --git a/test/test_datasets/test_resampling_strategies.py b/test/test_datasets/test_resampling_strategies.py index 7f14275a3..473f17182 100644 --- a/test/test_datasets/test_resampling_strategies.py +++ b/test/test_datasets/test_resampling_strategies.py @@ -1,6 +1,15 @@ import numpy as np -from autoPyTorch.datasets.resampling_strategy import CrossValFuncs, HoldOutFuncs +import pytest + +from autoPyTorch.datasets.resampling_strategy import ( + CrossValFuncs, + CrossValTypes, + HoldOutFuncs, + HoldoutValTypes, + NoResamplingStrategyTypes, + check_resampling_strategy +) def test_holdoutfuncs(): @@ -40,3 +49,12 @@ def test_crossvalfuncs(): splits = split.stratified_k_fold_cross_validation(0, 10, X, stratify=y) assert len(splits) == 10 assert all([0 in y[s[1]] for s in splits]) + + +def test_check_resampling_strategy(): + for rs in (CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes): + for rs_func in rs: + check_resampling_strategy(rs_func) + + with pytest.raises(ValueError): + check_resampling_strategy(None) diff --git a/test/test_evaluation/test_evaluators.py b/test/test_evaluation/test_evaluators.py index aae259e08..2371522d8 100644 --- a/test/test_evaluation/test_evaluators.py +++ b/test/test_evaluation/test_evaluators.py @@ -18,8 +18,11 @@ from autoPyTorch.automl_common.common.utils.backend import create from autoPyTorch.datasets.resampling_strategy import CrossValTypes, NoResamplingStrategyTypes -from autoPyTorch.evaluation.test_evaluator import TestEvaluator -from autoPyTorch.evaluation.train_evaluator import TrainEvaluator +from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams +from autoPyTorch.evaluation.evaluator import ( + Evaluator, + _CrossValidationResultsManager, +) from autoPyTorch.evaluation.utils import read_queue from autoPyTorch.pipeline.base_pipeline import BasePipeline from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy @@ -98,7 +101,7 @@ def test_merge_predictions(self): assert np.allclose(ans, cv_results._merge_predictions(preds)) -class TestTrainEvaluator(BaseEvaluatorTest, unittest.TestCase): +class TestEvaluator(BaseEvaluatorTest, unittest.TestCase): _multiprocess_can_split_ = True def setUp(self): @@ -140,26 +143,7 @@ def tearDown(self): if os.path.exists(self.ev_path): shutil.rmtree(self.ev_path) - def test_evaluate_loss(self): - D = get_binary_classification_datamanager() - backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') - backend_api.load_datamanager = lambda: D - fixed_params_dict = self.fixed_params._asdict() - fixed_params_dict.update(backend=backend_api) - evaluator = TrainEvaluator( - queue=multiprocessing.Queue(), - fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), - evaluator_params=self.eval_params - ) - evaluator.splits = None - with pytest.raises(ValueError): - evaluator.evaluate_loss() - - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_holdout(self, pipeline_mock): - pipeline_mock.fit_dictionary = {'budget_type': 'epochs', 'epochs': 50} - # Binary iris, contains 69 train samples, 31 test samples - D = get_binary_classification_datamanager() + def _get_evaluator(self, pipeline_mock, data): pipeline_mock.predict_proba.side_effect = \ lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) pipeline_mock.side_effect = lambda **kwargs: pipeline_mock @@ -167,11 +151,11 @@ def test_holdout(self, pipeline_mock): _queue = multiprocessing.Queue() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') - backend_api.load_datamanager = lambda: D + backend_api.load_datamanager = lambda: data fixed_params_dict = self.fixed_params._asdict() fixed_params_dict.update(backend=backend_api) - evaluator = TrainEvaluator( + evaluator = Evaluator( queue=_queue, fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), evaluator_params=self.eval_params @@ -181,58 +165,74 @@ def test_holdout(self, pipeline_mock): evaluator.evaluate_loss() + return evaluator + + def _check_results(self, evaluator, ans): rval = read_queue(evaluator.queue) self.assertEqual(len(rval), 1) result = rval[0]['loss'] self.assertEqual(len(rval[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - + self.assertEqual(result, ans) self.assertEqual(evaluator._save_to_backend.call_count, 1) - self.assertEqual(result, 0.5652173913043479) - self.assertEqual(pipeline_mock.fit.call_count, 1) - # 3 calls because of train, holdout and test set - self.assertEqual(pipeline_mock.predict_proba.call_count, 3) - call_args = evaluator._save_to_backend.call_args - self.assertEqual(call_args[0][0].shape[0], len(D.splits[0][1])) - self.assertIsNone(call_args[0][1]) - self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) - self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_cv(self, pipeline_mock): - D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation) + def _check_whether_save_y_opt_is_correct(self, resampling_strategy, ans): + backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') + D = get_binary_classification_datamanager(resampling_strategy) + backend_api.load_datamanager = lambda: D + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api, save_y_opt=True) + evaluator = Evaluator( + queue=multiprocessing.Queue(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + assert evaluator.fixed_pipeline_params.save_y_opt == ans - pipeline_mock.predict_proba.side_effect = \ - lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) - pipeline_mock.side_effect = lambda **kwargs: pipeline_mock - pipeline_mock.get_additional_run_info.return_value = None + def test_whether_save_y_opt_is_correct_for_no_resampling(self): + self._check_whether_save_y_opt_is_correct(NoResamplingStrategyTypes.no_resampling, False) - _queue = multiprocessing.Queue() + def test_whether_save_y_opt_is_correct_for_resampling(self): + self._check_whether_save_y_opt_is_correct(CrossValTypes.k_fold_cross_validation, True) + + def test_evaluate_loss(self): + D = get_binary_classification_datamanager() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') backend_api.load_datamanager = lambda: D - fixed_params_dict = self.fixed_params._asdict() fixed_params_dict.update(backend=backend_api) - evaluator = TrainEvaluator( - queue=_queue, + evaluator = Evaluator( + queue=multiprocessing.Queue(), fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), evaluator_params=self.eval_params ) - evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) - evaluator._save_to_backend.return_value = True + evaluator.splits = None + with pytest.raises(ValueError): + evaluator.evaluate_loss() - evaluator.evaluate_loss() + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_holdout(self, pipeline_mock): + D = get_binary_classification_datamanager() + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.5652173913043479) - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]['loss'] - self.assertEqual(len(rval[0]), 3) - self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) + self.assertEqual(pipeline_mock.fit.call_count, 1) + # 3 calls because of train, holdout and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 3) + call_args = evaluator._save_to_backend.call_args + self.assertEqual(call_args[0][0].shape[0], len(D.splits[0][1])) + self.assertIsNone(call_args[0][1]) + self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) + self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) + + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_cv(self, pipeline_mock): + D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation) + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.463768115942029) - self.assertEqual(evaluator._save_to_backend.call_count, 1) - self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) - # 9 calls because of the training, holdout and + # 15 calls because of the training, holdout and # test set (3 sets x 5 folds = 15) self.assertEqual(pipeline_mock.predict_proba.call_count, 15) call_args = evaluator._save_to_backend.call_args @@ -246,68 +246,117 @@ def test_cv(self, pipeline_mock): self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) - @unittest.mock.patch.object(TrainEvaluator, '_loss') + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_no_resampling(self, pipeline_mock): + D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) + evaluator = self._get_evaluator(pipeline_mock, D) + self._check_results(evaluator, ans=0.5806451612903225) + + self.assertEqual(pipeline_mock.fit.call_count, 1) + # 2 calls because of train and test set + self.assertEqual(pipeline_mock.predict_proba.call_count, 2) + call_args = evaluator._save_to_backend.call_args + self.assertIsNone(D.splits[0][1]) + self.assertIsNone(call_args[0][1]) + self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) + self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) + + @unittest.mock.patch.object(Evaluator, '_loss') def test_save_to_backend(self, loss_mock): - D = get_regression_datamanager() - D.name = 'test' + call_counter = 0 + no_resample_counter = 0 + for rs in [None, NoResamplingStrategyTypes.no_resampling]: + no_resampling = isinstance(rs, NoResamplingStrategyTypes) + D = get_regression_datamanager() if rs is None else get_regression_datamanager(rs) + D.name = 'test' + self.backend_mock.load_datamanager.return_value = D + _queue = multiprocessing.Queue() + loss_mock.return_value = None + + evaluator = Evaluator( + queue=_queue, + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) + evaluator.y_opt = D.train_tensors[1] + key_ans = {'seed', 'idx', 'budget', 'model', 'cv_model', + 'ensemble_predictions', 'valid_predictions', 'test_predictions'} + + for pl in [['model'], ['model2', 'model2']]: + call_counter += 1 + no_resample_counter += no_resampling + self.backend_mock.get_model_dir.return_value = True + evaluator.pipelines = pl + self.assertTrue(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) + call_list = self.backend_mock.save_numrun_to_dir.call_args_list[-1][1] + + self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, call_counter - no_resample_counter) + self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, call_counter) + self.assertEqual(call_list.keys(), key_ans) + self.assertIsNotNone(call_list['model']) + if len(pl) > 1: # ==> cross validation + # self.assertIsNotNone(call_list['cv_model']) + # TODO: Reflect the ravin's opinion + pass + else: # holdout ==> single thus no cv_model + self.assertIsNone(call_list['cv_model']) + + # Check for not containing NaNs - that the models don't predict nonsense + # for unseen data + D.train_tensors[1][0] = np.NaN + self.assertFalse(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) + + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') + def test_predict_proba_binary_classification(self, mock): + D = get_binary_classification_datamanager() self.backend_mock.load_datamanager.return_value = D + mock.predict_proba.side_effect = lambda y, batch_size=None: np.array( + [[0.1, 0.9]] * y.shape[0] + ) + mock.side_effect = lambda **kwargs: mock + _queue = multiprocessing.Queue() - loss_mock.return_value = None - evaluator = TrainEvaluator( + evaluator = Evaluator( queue=_queue, fixed_pipeline_params=self.fixed_params, evaluator_params=self.eval_params ) - evaluator.y_opt = D.train_tensors[1] - key_ans = {'seed', 'idx', 'budget', 'model', 'cv_model', - 'ensemble_predictions', 'valid_predictions', 'test_predictions'} - - for cnt, pl in enumerate([['model'], ['model2', 'model2']], start=1): - self.backend_mock.get_model_dir.return_value = True - evaluator.pipelines = pl - self.assertTrue(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) - call_list = self.backend_mock.save_numrun_to_dir.call_args_list[-1][1] - - self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, cnt) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, cnt) - self.assertEqual(call_list.keys(), key_ans) - self.assertIsNotNone(call_list['model']) - if len(pl) > 1: # ==> cross validation - # self.assertIsNotNone(call_list['cv_model']) - # TODO: Reflect the ravin's opinion - pass - else: # holdout ==> single thus no cv_model - self.assertIsNone(call_list['cv_model']) - - # Check for not containing NaNs - that the models don't predict nonsense - # for unseen data - D.train_tensors[1][0] = np.NaN - self.assertFalse(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) + + evaluator.evaluate_loss() + Y_optimization_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][ + 'ensemble_predictions'] + + for i in range(7): + self.assertEqual(0.9, Y_optimization_pred[i][1]) @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_predict_proba_binary_classification(self, mock): - D = get_binary_classification_datamanager() + def test_predict_proba_binary_classification_no_resampling(self, mock): + D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) self.backend_mock.load_datamanager.return_value = D mock.predict_proba.side_effect = lambda y, batch_size=None: np.array( [[0.1, 0.9]] * y.shape[0] ) mock.side_effect = lambda **kwargs: mock + backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') + backend_api.load_datamanager = lambda: D + + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api) _queue = multiprocessing.Queue() - evaluator = TrainEvaluator( + evaluator = Evaluator( queue=_queue, fixed_pipeline_params=self.fixed_params, evaluator_params=self.eval_params ) - evaluator.evaluate_loss() - Y_optimization_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][ + Y_test_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][-1][ 'ensemble_predictions'] for i in range(7): - self.assertEqual(0.9, Y_optimization_pred[i][1]) + self.assertEqual(0.9, Y_test_pred[i][1]) def test_get_results(self): _queue = multiprocessing.Queue() @@ -334,7 +383,7 @@ def test_additional_metrics_during_training(self, pipeline_mock): fixed_params_dict = self.fixed_params._asdict() fixed_params_dict.update(backend=backend_api) - evaluator = TrainEvaluator( + evaluator = Evaluator( queue=_queue, fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), evaluator_params=self.eval_params @@ -350,155 +399,3 @@ def test_additional_metrics_during_training(self, pipeline_mock): self.assertIn('additional_run_info', result) self.assertIn('opt_loss', result['additional_run_info']) self.assertGreater(len(result['additional_run_info']['opt_loss'].keys()), 1) - - -class TestTestEvaluator(BaseEvaluatorTest, unittest.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - """ - Creates a backend mock - """ - tmp_dir_name = self.id() - self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name) - if os.path.exists(self.ev_path): - shutil.rmtree(self.ev_path) - os.makedirs(self.ev_path, exist_ok=False) - dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)] - dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)] - dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)] - backend_mock = unittest.mock.Mock() - backend_mock.get_model_dir.return_value = self.ev_path - backend_mock.get_cv_model_dir.return_value = self.ev_path - backend_mock.get_model_path.side_effect = dummy_model_files - backend_mock.get_cv_model_path.side_effect = dummy_cv_model_files - backend_mock.get_prediction_output_path.side_effect = dummy_pred_files - backend_mock.temporary_directory = self.ev_path - self.backend_mock = backend_mock - - self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir') - self.output_dir = os.path.join(self.ev_path, 'out_dir') - - def tearDown(self): - if os.path.exists(self.ev_path): - shutil.rmtree(self.ev_path) - - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_no_resampling(self, pipeline_mock): - # Binary iris, contains 69 train samples, 31 test samples - D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) - pipeline_mock.predict_proba.side_effect = \ - lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1)) - pipeline_mock.side_effect = lambda **kwargs: pipeline_mock - pipeline_mock.get_additional_run_info.return_value = None - pipeline_mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10} - - configuration = unittest.mock.Mock(spec=Configuration) - backend_api = create(self.tmp_dir, self.output_dir, 'autoPyTorch') - backend_api.load_datamanager = lambda: D - queue_ = multiprocessing.Queue() - - evaluator = TestEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0) - evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) - evaluator.file_output.return_value = (None, {}) - - evaluator.fit_predict_and_loss() - - rval = read_queue(evaluator.queue) - self.assertEqual(len(rval), 1) - result = rval[0]['loss'] - self.assertEqual(len(rval[0]), 3) - self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - - self.assertEqual(evaluator.file_output.call_count, 1) - self.assertEqual(result, 0.5806451612903225) - self.assertEqual(pipeline_mock.fit.call_count, 1) - # 2 calls because of train and test set - self.assertEqual(pipeline_mock.predict_proba.call_count, 2) - self.assertEqual(evaluator.file_output.call_count, 1) - # Should be none as no val preds are mentioned - self.assertIsNone(evaluator.file_output.call_args[0][1]) - # Number of y_test_preds and Y_test should be the same - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], - D.test_tensors[1].shape[0]) - self.assertEqual(evaluator.pipeline.fit.call_count, 1) - - @unittest.mock.patch.object(TestEvaluator, '_loss') - def test_file_output(self, loss_mock): - - D = get_regression_datamanager(NoResamplingStrategyTypes.no_resampling) - D.name = 'test' - self.backend_mock.load_datamanager.return_value = D - configuration = unittest.mock.Mock(spec=Configuration) - queue_ = multiprocessing.Queue() - loss_mock.return_value = None - - evaluator = TestEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0) - - self.backend_mock.get_model_dir.return_value = True - evaluator.pipeline = 'model' - evaluator.Y_optimization = D.train_tensors[1] - rval = evaluator.file_output( - D.train_tensors[1], - None, - D.test_tensors[1], - ) - - self.assertEqual(rval, (None, {})) - # These targets are not saved as Fit evaluator is not used to make an ensemble - self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 0) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(), - {'seed', 'idx', 'budget', 'model', 'cv_model', - 'ensemble_predictions', 'valid_predictions', 'test_predictions'}) - self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) - self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) - - # Check for not containing NaNs - that the models don't predict nonsense - # for unseen data - D.test_tensors[1][0] = np.NaN - rval = evaluator.file_output( - D.train_tensors[1], - None, - D.test_tensors[1], - ) - self.assertEqual( - rval, - ( - 1.0, - { - 'error': - 'Model predictions for test set contains NaNs.' - }, - ) - ) - - @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') - def test_predict_proba_binary_classification(self, mock): - D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling) - self.backend_mock.load_datamanager.return_value = D - mock.predict_proba.side_effect = lambda y, batch_size=None: np.array( - [[0.1, 0.9]] * y.shape[0] - ) - mock.side_effect = lambda **kwargs: mock - mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10} - configuration = unittest.mock.Mock(spec=Configuration) - queue_ = multiprocessing.Queue() - - evaluator = TestEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0) - - evaluator.fit_predict_and_loss() - Y_test_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][-1][ - 'ensemble_predictions'] - - for i in range(7): - self.assertEqual(0.9, Y_test_pred[i][1]) - - def test_get_results(self): - queue_ = multiprocessing.Queue() - for i in range(5): - queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS)) - result = read_queue(queue_) - self.assertEqual(len(result), 5) - self.assertEqual(result[0][0], 0) - self.assertAlmostEqual(result[0][1], 1.0) diff --git a/test/test_evaluation/test_tae.py b/test/test_evaluation/test_tae.py index 351e7b633..eaf505ad7 100644 --- a/test/test_evaluation/test_tae.py +++ b/test/test_evaluation/test_tae.py @@ -90,6 +90,7 @@ def _create_taq(): backend=unittest.mock.Mock(), seed=1, metric=accuracy, + multi_objectives=["cost"], cost_for_crash=accuracy._cost_of_crash, abort_on_first_run_crash=True, pynisher_context=unittest.mock.Mock() @@ -102,7 +103,16 @@ def test_check_run_info(self): run_info = unittest.mock.Mock() run_info.budget = -1 with pytest.raises(ValueError): - taq._check_run_info(run_info) + taq.run_wrapper(run_info) + + def test_check_and_get_default_budget(self): + taq = _create_taq() + budget = taq._check_and_get_default_budget() + assert isinstance(budget, float) + + taq.fixed_pipeline_params = taq.fixed_pipeline_params._replace(budget_type='test') + with pytest.raises(ValueError): + taq._check_and_get_default_budget() def test_cutoff_update_in_run_wrapper(self): taq = _create_taq() diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py index adfe3241b..213671bb8 100644 --- a/test/test_pipeline/test_tabular_classification.py +++ b/test/test_pipeline/test_tabular_classification.py @@ -519,3 +519,16 @@ def test_train_pipeline_with_runtime_max_reached(fit_dictionary_tabular_dummy): patch.is_max_time_reached.return_value = True with pytest.raises(RuntimeError): pipeline.fit(fit_dictionary_tabular_dummy) + + +def test_get_pipeline_representation(): + pipeline = TabularClassificationPipeline( + dataset_properties={ + 'numerical_columns': [], + 'categorical_columns': [], + 'task_type': 'tabular_classification' + } + ) + repr = pipeline.get_pipeline_representation() + assert isinstance(repr, dict) + assert all(word in repr for word in ['Preprocessing', 'Estimator']) diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index e21eb961f..8ef8d26bd 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -322,8 +322,8 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): def test_get_pipeline_representation(): pipeline = TabularRegressionPipeline( dataset_properties={ - 'numerical_columns': None, - 'categorical_columns': None, + 'numerical_columns': [], + 'categorical_columns': [], 'task_type': 'tabular_classification' } )