From 1e82b2127a5ab9e8f3328cbba37717bcd85405cc Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Mon, 10 May 2021 12:04:09 +0900 Subject: [PATCH] [back] Get back to the original name to make the PR review procedure easier --- autoPyTorch/api/base_task.py | 6 ++-- autoPyTorch/api/tabular_classification.py | 4 +-- autoPyTorch/api/tabular_regression.py | 4 +-- autoPyTorch/datasets/base_dataset.py | 12 ++++---- autoPyTorch/datasets/image_dataset.py | 8 +++--- autoPyTorch/datasets/resampling_strategy.py | 28 +++++++++---------- autoPyTorch/datasets/tabular_dataset.py | 8 +++--- autoPyTorch/datasets/time_series_dataset.py | 8 +++--- autoPyTorch/optimizer/smbo.py | 4 +-- .../example_resampling_strategy.py | 12 ++++---- test/test_api/test_api.py | 26 ++++++++--------- test/test_evaluation/evaluation_util.py | 12 ++++---- test/test_evaluation/test_train_evaluator.py | 2 +- 13 files changed, 67 insertions(+), 67 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 51ae6a46e..044cb86be 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -34,7 +34,7 @@ STRING_TO_TASK_TYPES, ) from autoPyTorch.datasets.base_dataset import BaseDataset -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutTypes +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection from autoPyTorch.ensemble.singlebest_ensemble import SingleBest @@ -138,7 +138,7 @@ def __init__( include_components: Optional[Dict] = None, exclude_components: Optional[Dict] = None, backend: Optional[Backend] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None, task_type: Optional[str] = None @@ -1171,7 +1171,7 @@ def predict( assert self.ensemble_ is not None, "Load models should error out if no ensemble" self.ensemble_ = cast(Union[SingleBest, EnsembleSelection], self.ensemble_) - if isinstance(self.resampling_strategy, HoldoutTypes): + if isinstance(self.resampling_strategy, HoldoutValTypes): models = self.models_ elif isinstance(self.resampling_strategy, CrossValTypes): models = self.cv_models_ diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py index 59a6266ed..deeb5244b 100644 --- a/autoPyTorch/api/tabular_classification.py +++ b/autoPyTorch/api/tabular_classification.py @@ -15,7 +15,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline @@ -72,7 +72,7 @@ def __init__( delete_output_folder_after_terminate: bool = True, include_components: Optional[Dict] = None, exclude_components: Optional[Dict] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, backend: Optional[Backend] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py index 256ba7728..afef8ce9f 100644 --- a/autoPyTorch/api/tabular_regression.py +++ b/autoPyTorch/api/tabular_regression.py @@ -15,7 +15,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline @@ -64,7 +64,7 @@ def __init__( delete_output_folder_after_terminate: bool = True, include_components: Optional[Dict] = None, exclude_components: Optional[Dict] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, backend: Optional[Backend] = None, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py index 350f1bf71..a9f828473 100644 --- a/autoPyTorch/datasets/base_dataset.py +++ b/autoPyTorch/datasets/base_dataset.py @@ -14,7 +14,7 @@ import torchvision from autoPyTorch.constants import CLASSIFICATION_OUTPUTS, STRING_TO_OUTPUT_TYPES -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutTypes +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes from autoPyTorch.utils.common import FitRequirement BaseDatasetInputType = Union[Tuple[np.ndarray, np.ndarray], Dataset] @@ -69,7 +69,7 @@ def __init__( dataset_name: Optional[str] = None, val_tensors: Optional[BaseDatasetInputType] = None, test_tensors: Optional[BaseDatasetInputType] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, seed: Optional[int] = 42, train_transforms: Optional[torchvision.transforms.Compose] = None, @@ -85,8 +85,8 @@ def __init__( validation data test_tensors (An optional tuple of objects that have a __len__ and a __getitem__ attribute): test data - resampling_strategy (Union[CrossValTypes, HoldoutTypes]), - (default=HoldoutTypes.holdout): + resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), + (default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): arguments required for the chosen resampling strategy. @@ -196,7 +196,7 @@ def _get_indices(self) -> np.ndarray: def _process_resampling_strategy_args(self) -> None: if not any(isinstance(self.resampling_strategy, val_type) - for val_type in [HoldoutTypes, CrossValTypes]): + for val_type in [HoldoutValTypes, CrossValTypes]): raise ValueError(f"resampling_strategy {self.resampling_strategy} is not supported.") if self.resampling_strategy_args is not None and \ @@ -229,7 +229,7 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int] labels_to_stratify = self.train_tensors[-1] if self.is_stratify else None - if isinstance(self.resampling_strategy, HoldoutTypes): + if isinstance(self.resampling_strategy, HoldoutValTypes): val_share = self.resampling_strategy_args['val_share'] return self.resampling_strategy( diff --git a/autoPyTorch/datasets/image_dataset.py b/autoPyTorch/datasets/image_dataset.py index 05ef0e3bb..5abeb6d8a 100644 --- a/autoPyTorch/datasets/image_dataset.py +++ b/autoPyTorch/datasets/image_dataset.py @@ -23,7 +23,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) IMAGE_DATASET_INPUT = Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]] @@ -39,8 +39,8 @@ class ImageDataset(BaseDataset): validation data test (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]): testing data - resampling_strategy (Union[CrossValTypes, HoldoutTypes]), - (default=HoldoutTypes.holdout): + resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), + (default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): arguments required for the chosen resampling strategy. @@ -56,7 +56,7 @@ def __init__(self, train: IMAGE_DATASET_INPUT, val: Optional[IMAGE_DATASET_INPUT] = None, test: Optional[IMAGE_DATASET_INPUT] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, seed: Optional[int] = 42, train_transforms: Optional[torchvision.transforms.Compose] = None, diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py index 2e01fcfeb..f6e6ae570 100644 --- a/autoPyTorch/datasets/resampling_strategy.py +++ b/autoPyTorch/datasets/resampling_strategy.py @@ -25,7 +25,7 @@ class _ResamplingStrategyArgs(NamedTuple): class HoldoutFuncs(): @staticmethod - def holdout( + def holdout_validation( random_state: np.random.RandomState, val_share: float, indices: np.ndarray, @@ -51,7 +51,7 @@ class CrossValFuncs(): } @staticmethod - def k_fold( + def k_fold_cross_validation( random_state: np.random.RandomState, num_splits: int, indices: np.ndarray, @@ -106,18 +106,18 @@ class CrossValTypes(Enum): and is not supposed to be instantiated. Examples: This class is supposed to be used as follows - >>> cv_type = CrossValTypes.k_fold + >>> cv_type = CrossValTypes.k_fold_cross_validation >>> print(cv_type.name) - k_fold + k_fold_cross_validation >>> for cross_val_type in CrossValTypes: print(cross_val_type.name, cross_val_type.value) - k_fold functools.partial() + k_fold_cross_validation functools.partial() time_series """ - k_fold = partial(CrossValFuncs.k_fold) + k_fold_cross_validation = partial(CrossValFuncs.k_fold_cross_validation) time_series = partial(CrossValFuncs.time_series) def __call__( @@ -153,31 +153,31 @@ def __call__( ) -class HoldoutTypes(Enum): +class HoldoutValTypes(Enum): """The type of holdout validation This class is used to specify the holdout validation function and is not supposed to be instantiated. Examples: This class is supposed to be used as follows - >>> holdout_type = HoldoutTypes.holdout + >>> holdout_type = HoldoutValTypes.holdout_validation >>> print(holdout_type.name) - holdout + holdout_validation >>> print(holdout_type.value) - functools.partial() + functools.partial() - >>> for holdout_type in HoldoutTypes: + >>> for holdout_type in HoldoutValTypes: print(holdout_type.name) - holdout + holdout_validation - Additionally, HoldoutTypes. can be called directly. + Additionally, HoldoutValTypes. can be called directly. """ - holdout = partial(HoldoutFuncs.holdout) + holdout = partial(HoldoutFuncs.holdout_validation) def __call__( self, diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py index 2133734eb..5c7ada2f7 100644 --- a/autoPyTorch/datasets/tabular_dataset.py +++ b/autoPyTorch/datasets/tabular_dataset.py @@ -20,7 +20,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) @@ -44,8 +44,8 @@ class TabularDataset(BaseDataset): Y (Union[np.ndarray, pd.Series]): training data targets. X_test (Optional[Union[np.ndarray, pd.DataFrame]]): input testing data. Y_test (Optional[Union[np.ndarray, pd.DataFrame]]): testing data targets - resampling_strategy (Union[CrossValTypes, HoldoutTypes]), - (default=HoldoutTypes.holdout): + resampling_strategy (Union[CrossValTypes, HoldoutValTypes]), + (default=HoldoutValTypes.holdout_validation): strategy to split the training data. resampling_strategy_args (Optional[Dict[str, Any]]): arguments required for the chosen resampling strategy. @@ -66,7 +66,7 @@ def __init__(self, Y: Union[np.ndarray, pd.Series], X_test: Optional[Union[np.ndarray, pd.DataFrame]] = None, Y_test: Optional[Union[np.ndarray, pd.DataFrame]] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, seed: Optional[int] = 42, train_transforms: Optional[torchvision.transforms.Compose] = None, diff --git a/autoPyTorch/datasets/time_series_dataset.py b/autoPyTorch/datasets/time_series_dataset.py index c21a9ad7d..d5a21c550 100644 --- a/autoPyTorch/datasets/time_series_dataset.py +++ b/autoPyTorch/datasets/time_series_dataset.py @@ -5,7 +5,7 @@ import torchvision.transforms from autoPyTorch.datasets.base_dataset import BaseDataset -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutTypes +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes TIME_SERIES_FORECASTING_INPUT = Tuple[np.ndarray, np.ndarray] # currently only numpy arrays are supported TIME_SERIES_REGRESSION_INPUT = Tuple[np.ndarray, np.ndarray] @@ -17,9 +17,9 @@ def _check_prohibited_resampling() -> None: Args: task_name (str): Typically the Dataset class name - resampling_strategy (Union[CrossValTypes, HoldoutTypes]): + resampling_strategy (Union[CrossValTypes, HoldoutValTypes]): The splitting function - args (Union[CrossValTypes, HoldoutTypes]): + args (Union[CrossValTypes, HoldoutValTypes]): The list of cross validation functions and holdout validation functions that are suitable for the given task @@ -39,7 +39,7 @@ def __init__(self, n_steps: int, train: TIME_SERIES_FORECASTING_INPUT, val: Optional[TIME_SERIES_FORECASTING_INPUT] = None, - resampling_strategy: Union[CrossValTypes, HoldoutTypes] = HoldoutTypes.holdout, + resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: Optional[Dict[str, Any]] = None, seed: Optional[int] = 42, train_transforms: Optional[torchvision.transforms.Compose] = None, diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py index 413801ee7..2e0be66f4 100644 --- a/autoPyTorch/optimizer/smbo.py +++ b/autoPyTorch/optimizer/smbo.py @@ -19,7 +19,7 @@ from autoPyTorch.datasets.base_dataset import BaseDataset from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash @@ -92,7 +92,7 @@ def __init__(self, pipeline_config: typing.Dict[str, typing.Any], start_num_run: int = 1, seed: int = 1, - resampling_strategy: typing.Union[HoldoutTypes, CrossValTypes] = HoldoutTypes.holdout, + resampling_strategy: typing.Union[HoldoutValTypes, CrossValTypes] = HoldoutValTypes.holdout_validation, resampling_strategy_args: typing.Optional[typing.Dict[str, typing.Any]] = None, include: typing.Optional[typing.Dict[str, typing.Any]] = None, exclude: typing.Optional[typing.Dict[str, typing.Any]] = None, diff --git a/examples/tabular/40_advanced/example_resampling_strategy.py b/examples/tabular/40_advanced/example_resampling_strategy.py index d9ae3f57a..05564a4d4 100644 --- a/examples/tabular/40_advanced/example_resampling_strategy.py +++ b/examples/tabular/40_advanced/example_resampling_strategy.py @@ -24,7 +24,7 @@ import sklearn.model_selection from autoPyTorch.api.tabular_classification import TabularClassificationTask -from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutTypes +from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes if __name__ == '__main__': @@ -48,11 +48,11 @@ # To maintain logs of the run, set the next two as False delete_tmp_folder_after_terminate=True, delete_output_folder_after_terminate=True, - # 'HoldoutTypes.holdout' with 'val_share': 0.33 + # 'HoldoutValTypes.holdout_validation' with 'val_share': 0.33 # is the default argument setting for TabularClassificationTask. # It is explicitly specified in this example for demonstrational # purpose. - resampling_strategy=HoldoutTypes.holdout, + resampling_strategy=HoldoutValTypes.holdout_validation, resampling_strategy_args={'val_share': 0.33} ) @@ -90,7 +90,7 @@ # To maintain logs of the run, set the next two as False delete_tmp_folder_after_terminate=True, delete_output_folder_after_terminate=True, - resampling_strategy=CrossValTypes.k_fold, + resampling_strategy=CrossValTypes.k_fold_cross_validation, resampling_strategy_args={'num_splits': 3} ) @@ -130,8 +130,8 @@ delete_output_folder_after_terminate=True, # For demonstration purposes, we use # Stratified hold out validation. However, - # one can also use CrossValTypes.k_fold. - resampling_strategy=HoldoutTypes.holdout, + # one can also use CrossValTypes.k_fold_cross_validation. + resampling_strategy=HoldoutValTypes.holdout_validation, resampling_strategy_args={'val_share': 0.33, 'stratify': True} ) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index a880036da..59a6a3166 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -23,7 +23,7 @@ from autoPyTorch.api.tabular_regression import TabularRegressionTask from autoPyTorch.datasets.resampling_strategy import ( CrossValTypes, - HoldoutTypes, + HoldoutValTypes, ) from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy @@ -36,8 +36,8 @@ # Test # ======== @pytest.mark.parametrize('openml_id', (40981, )) -@pytest.mark.parametrize('resampling_strategy', (HoldoutTypes.holdout, - CrossValTypes.k_fold, +@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, + CrossValTypes.k_fold_cross_validation, )) def test_tabular_classification(openml_id, resampling_strategy, backend): @@ -72,7 +72,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_classification' - expected_num_splits = 1 if resampling_strategy == HoldoutTypes.holdout else 5 + expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5 assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -123,14 +123,14 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" - if resampling_strategy == HoldoutTypes.holdout: + if resampling_strategy == HoldoutValTypes.holdout_validation: model_file = os.path.join(run_key_model_run_dir, f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) - elif resampling_strategy == CrossValTypes.k_fold: + elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" @@ -195,8 +195,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): @pytest.mark.parametrize('openml_name', ("boston", )) -@pytest.mark.parametrize('resampling_strategy', (HoldoutTypes.holdout, - CrossValTypes.k_fold, +@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation, + CrossValTypes.k_fold_cross_validation, )) def test_tabular_regression(openml_name, resampling_strategy, backend): @@ -243,7 +243,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): # Internal dataset has expected settings assert estimator.dataset.task_type == 'tabular_regression' - expected_num_splits = 1 if resampling_strategy == HoldoutTypes.holdout else 5 + expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5 assert estimator.resampling_strategy == resampling_strategy assert estimator.dataset.resampling_strategy == resampling_strategy assert len(estimator.dataset.splits) == expected_num_splits @@ -294,14 +294,14 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" - if resampling_strategy == HoldoutTypes.holdout: + if resampling_strategy == HoldoutValTypes.holdout_validation: model_file = os.path.join(run_key_model_run_dir, f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) - elif resampling_strategy == CrossValTypes.k_fold: + elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" @@ -387,7 +387,7 @@ def test_tabular_input_support(openml_id, backend): # Search for a good configuration estimator = TabularClassificationTask( backend=backend, - resampling_strategy=HoldoutTypes.holdout, + resampling_strategy=HoldoutValTypes.holdout_validation, ensemble_size=0, ) @@ -411,7 +411,7 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): backend = fit_dictionary_tabular['backend'] estimator = TabularClassificationTask( backend=backend, - resampling_strategy=HoldoutTypes.holdout, + resampling_strategy=HoldoutValTypes.holdout_validation, ensemble_size=0, ) diff --git a/test/test_evaluation/evaluation_util.py b/test/test_evaluation/evaluation_util.py index afda91584..088726963 100644 --- a/test/test_evaluation/evaluation_util.py +++ b/test/test_evaluation/evaluation_util.py @@ -12,7 +12,7 @@ from sklearn import preprocessing from autoPyTorch.data.tabular_validator import TabularInputValidator -from autoPyTorch.datasets.resampling_strategy import HoldoutTypes +from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes from autoPyTorch.datasets.tabular_dataset import TabularDataset from autoPyTorch.pipeline.components.training.metrics.metrics import ( accuracy, @@ -132,7 +132,7 @@ def __fit(self, function_handle): raise e -def get_multiclass_classification_datamanager(resampling_strategy=HoldoutTypes.holdout): +def get_multiclass_classification_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation): X_train, Y_train, X_test, Y_test = get_dataset('iris') indices = list(range(X_train.shape[0])) np.random.seed(1) @@ -150,7 +150,7 @@ def get_multiclass_classification_datamanager(resampling_strategy=HoldoutTypes.h return dataset -def get_abalone_datamanager(resampling_strategy=HoldoutTypes.holdout): +def get_abalone_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation): # https://www.openml.org/d/183 X, y = sklearn.datasets.fetch_openml(data_id=183, return_X_y=True, as_frame=False) y = preprocessing.LabelEncoder().fit_transform(y) @@ -168,7 +168,7 @@ def get_abalone_datamanager(resampling_strategy=HoldoutTypes.holdout): return dataset -def get_binary_classification_datamanager(resampling_strategy=HoldoutTypes.holdout): +def get_binary_classification_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation): X_train, Y_train, X_test, Y_test = get_dataset('iris') indices = list(range(X_train.shape[0])) np.random.seed(1) @@ -194,7 +194,7 @@ def get_binary_classification_datamanager(resampling_strategy=HoldoutTypes.holdo return dataset -def get_regression_datamanager(resampling_strategy=HoldoutTypes.holdout): +def get_regression_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation): X_train, Y_train, X_test, Y_test = get_dataset('boston') indices = list(range(X_train.shape[0])) np.random.seed(1) @@ -212,7 +212,7 @@ def get_regression_datamanager(resampling_strategy=HoldoutTypes.holdout): return dataset -def get_500_classes_datamanager(resampling_strategy=HoldoutTypes.holdout): +def get_500_classes_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation): weights = ([0.002] * 475) + ([0.001] * 25) X, Y = sklearn.datasets.make_classification(n_samples=1000, n_features=20, diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py index 53db49188..952ec7c78 100644 --- a/test/test_evaluation/test_train_evaluator.py +++ b/test/test_evaluation/test_train_evaluator.py @@ -125,7 +125,7 @@ def test_holdout(self, pipeline_mock): @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_cv(self, pipeline_mock): - D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold) + D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation) pipeline_mock.predict_proba.side_effect = \ lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))