From c6de3771fbba576f30308ed50515fe3f6768f44e Mon Sep 17 00:00:00 2001 From: Mengqi <2534671415@qq.com> Date: Sat, 20 Jul 2024 16:35:52 +0800 Subject: [PATCH] perf: change 'abnormal' to 'anomaly'. --- README.md | 2 +- .../Add New Model To Framework.md | 4 +- .../anomaly_detection.md} | 26 ++++++------ .../Data_Preprocessing/Data Preprocessing.md | 2 +- docs/source/Home/CHANGELOG.md | 4 +- docs/source/Home/Introduction.md | 2 +- docs/source/model example.rst | 2 +- ...ning.model.func.algo_anomalydetection.rst} | 4 +- .../geochemistrypi.data_mining.model.func.rst | 2 +- geochemistrypi/data_mining/cli_pipeline.py | 12 +++--- geochemistrypi/data_mining/constants.py | 6 +-- ...ection.xlsx => Data_AnomalyDetection.xlsx} | Bin geochemistrypi/data_mining/enum.py | 2 +- geochemistrypi/data_mining/model/detection.py | 37 +++++++++--------- .../__init__.py | 0 .../_iforest.py | 0 .../_local_outlier_factor.py | 0 geochemistrypi/data_mining/process/detect.py | 26 ++++++------ 18 files changed, 65 insertions(+), 66 deletions(-) rename docs/source/For User/Model Example/{Abnormal_Detection/abnormal_detection.md => Anomaly_Detection/anomaly_detection.md} (96%) rename docs/source/python_apis/{geochemistrypi.data_mining.model.func.algo_abnormaldetection.rst => geochemistrypi.data_mining.model.func.algo_anomalydetection.rst} (51%) rename geochemistrypi/data_mining/data/dataset/{Data_AbnormalDetection.xlsx => Data_AnomalyDetection.xlsx} (100%) rename geochemistrypi/data_mining/model/func/{algo_abnormaldetection => algo_anomalydetection}/__init__.py (100%) rename geochemistrypi/data_mining/model/func/{algo_abnormaldetection => algo_anomalydetection}/_iforest.py (100%) rename geochemistrypi/data_mining/model/func/{algo_abnormaldetection => algo_anomalydetection}/_local_outlier_factor.py (100%) diff --git a/README.md b/README.md index a8c64248..0eb5627d 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ https://docs.qq.com/document/DQ3BDeHhxRGNzSXZN) + Data_Decomposition.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1kix82qj5--vhnm8-KhuUBH9dqYH6zcY8/edit?usp=sharing&ouid=110717816678586054594&rtpof=true&sd=true) | [[Tencent Docs]](https://docs.qq.com/document/DQ29oZ0lhUGtZUmdN?&u=6868f96d4a384b309036e04e637e367a) -+ Data_AbnormalDetectioon.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( ++ Data_AnomalyDetection.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( https://docs.qq.com/document/DQ2hqQ2N2ZGlOUWlT) **Note**: For more detail on data preparation, please refer to our online documentation in **Model Example** under the section of **FOR USER**. diff --git a/docs/source/For Developer/Add New Model To Framework.md b/docs/source/For Developer/Add New Model To Framework.md index d2276ea9..5f4a2a08 100644 --- a/docs/source/For Developer/Add New Model To Framework.md +++ b/docs/source/For Developer/Add New Model To Framework.md @@ -749,7 +749,7 @@ def activate( """Train by Scikit-learn framework.""" ``` -For unsupervised learning (clustering, decomposition and abnormaly detection), the signature of `activate` method is: +For unsupervised learning (clustering, decomposition and anomaly detection), the signature of `activate` method is: ``` def activate( self, @@ -800,7 +800,7 @@ The differences above include the signature of @dispatch and the signature of `a There are two `activate` methods defined in the Regression and Classification mode, the first method uses the Scikit-learn framework, and the second method uses the FLAML and RAY frameworks. Decomposition and Clustering algorithms only use the Scikit-learn framework. The instantiation of model workflow class inside `activate` method builds the connnectioni between Layer 3 and Layer 2. -(1) The invocatioin of model workflow class in the first activate method (Used in classification, regression,decomposition, clustering, abnormaly detection) needs to pass the hyperparameters for manual ML: +(1) The invocatioin of model workflow class in the first activate method (Used in classification, regression,decomposition, clustering, anomaly detection) needs to pass the hyperparameters for manual ML: ``` elif self.model_name == "ModelName": hyper_parameters = ModelWorkflowClass.manual_hyper_parameters() diff --git a/docs/source/For User/Model Example/Abnormal_Detection/abnormal_detection.md b/docs/source/For User/Model Example/Anomaly_Detection/anomaly_detection.md similarity index 96% rename from docs/source/For User/Model Example/Abnormal_Detection/abnormal_detection.md rename to docs/source/For User/Model Example/Anomaly_Detection/anomaly_detection.md index 426e6ad6..ea789a2d 100644 --- a/docs/source/For User/Model Example/Abnormal_Detection/abnormal_detection.md +++ b/docs/source/For User/Model Example/Anomaly_Detection/anomaly_detection.md @@ -1,4 +1,4 @@ -# Abnomal Detection - Isolation Forest +# Anomaly Detection - Isolation Forest Anomaly detection is a broad problem-solving strategy that encompasses various algorithms, each with its own approach to identifying unusual data points. One such algorithm is the Isolation Forest, which distinguishes itself by constructing an ensemble of decision trees to isolate anomalies. The algorithm's core principle is that anomalies are more easily isolated, requiring fewer splits in the trees compared to normal data points. @@ -75,19 +75,19 @@ After pressing the Enter key, the program propts the following options to let yo 4 - Data For Dimensional Reduction -5 - Data For Abnormal Detection +5 - Data For Anomaly Detection (User) ➜ @Number: 5 ``` -Here, we choose *_5 - Data For Abnormal Detection_* and press the Enter key to move forward. +Here, we choose *_5 - Data For Anomaly Detection_* and press the Enter key to move forward. Now, you should see the output below on your screen: ```bash Successfully loading the built-in training data set -'Data_AbnormalDetection.xlsx'. +'Data_AnomalyDetection.xlsx'. -------------------- @@ -778,7 +778,7 @@ Successfully store 'Data Selected Dropped-Imputed Feature-Engineering' in 'Data -We select **5 - Abnormal Detection** as our model: +We select **5 - Anomaly Detection** as our model: @@ -794,7 +794,7 @@ We select **5 - Abnormal Detection** as our model: 4 - Dimensional Reduction -5 - Abnormal Detection +5 - Anomaly Detection (Model) ➜ @Number: 5 (Press Enter key to move forward.) @@ -904,9 +904,9 @@ Expected Functionality: Successfully store 'Hyper Parameters - Isolation Forest' in 'Hyper Parameters - Isolation Forest.txt' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/parameters. ------* Abnormal Detection Data *----- +-----* Anomaly Detection Data *----- - SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_abnormal + SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_anomaly 0 53.536000 0.291000 ... 0.083000 0.861000 -1 @@ -932,11 +932,11 @@ Successfully store 'Hyper Parameters - Isolation Forest' in 'Hyper Parameters - [109 rows x 10 columns] -Successfully store 'X Abnormal Detection' in 'X Abnormal Detection.xlsx' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/data. +Successfully store 'X Anomaly Detection' in 'X Anomaly Detection.xlsx' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/data. -----* Normal Data *----- - SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_abnormal + SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_anomaly 2 50.873065 0.720622 ... 0.102185 1.920395 1 @@ -965,9 +965,9 @@ Successfully store 'X Abnormal Detection' in 'X Abnormal Detection.xlsx' in User Successfully store 'X Normal' in 'X Normal.xlsx' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/data. ------* Abnormal Data *----- +-----* Anomaly Data *----- - SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_abnormal + SIO2(WT%) TIO2(WT%) ... MNO(WT%) NA2O(WT%) is_anomaly 0 53.536000 0.291000 ... 0.083000 0.861000 -1 @@ -1035,7 +1035,7 @@ Successfully store 'X Normal' in 'X Normal.xlsx' in Users/geopi/geopi_output/Geo 106 54.200000 0.100000 ... 0.130000 1.430000 -1 ``` -Successfully store 'X Abnormal' in 'X Abnormal.xlsx' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/data. +Successfully store 'X Anomaly' in 'X Anomaly.xlsx' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/data. -----* Model Persistence *----- Successfully store 'Isolation Forest' in 'Isolation Forest.pkl' in Users/geopi/geopi_output/GeoPi-Rock Isolation Forest/Algorithm - Test 1/artifacts/model. diff --git a/docs/source/For User/Model Example/Data_Preprocessing/Data Preprocessing.md b/docs/source/For User/Model Example/Data_Preprocessing/Data Preprocessing.md index ea905770..6344deba 100644 --- a/docs/source/For User/Model Example/Data_Preprocessing/Data Preprocessing.md +++ b/docs/source/For User/Model Example/Data_Preprocessing/Data Preprocessing.md @@ -39,7 +39,7 @@ https://docs.qq.com/document/DQ3BDeHhxRGNzSXZN) + Data_Decomposition.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1kix82qj5--vhnm8-KhuUBH9dqYH6zcY8/edit?usp=sharing&ouid=110717816678586054594&rtpof=true&sd=true) | [[Tencent Docs]](https://docs.qq.com/document/DQ29oZ0lhUGtZUmdN?&u=6868f96d4a384b309036e04e637e367a) -+ Data_AbnormalDetectioon.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( ++ Data_AnomalyDetection.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( https://docs.qq.com/document/DQ2hqQ2N2ZGlOUWlT) diff --git a/docs/source/Home/CHANGELOG.md b/docs/source/Home/CHANGELOG.md index e225674b..39b9c66e 100644 --- a/docs/source/Home/CHANGELOG.md +++ b/docs/source/Home/CHANGELOG.md @@ -40,12 +40,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), + Clustering Models + Affinity Propagation Clustering + New Mode: - + Abnormal Detection + + Anomaly Detection + Isolation Forest + Docs: + Mind map of all options in README + Citation info - + Abnormal detection algorithm example + + Anomaly detection algorithm example ### Changed diff --git a/docs/source/Home/Introduction.md b/docs/source/Home/Introduction.md index 7063c32e..469c3456 100644 --- a/docs/source/Home/Introduction.md +++ b/docs/source/Home/Introduction.md @@ -146,7 +146,7 @@ https://docs.qq.com/document/DQ3BDeHhxRGNzSXZN) + Data_Decomposition.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1kix82qj5--vhnm8-KhuUBH9dqYH6zcY8/edit?usp=sharing&ouid=110717816678586054594&rtpof=true&sd=true) | [[Tencent Docs]](https://docs.qq.com/document/DQ29oZ0lhUGtZUmdN?&u=6868f96d4a384b309036e04e637e367a) -+ Data_AbnormalDetectioon.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( ++ Data_AnomalyDetection.xlsx [[Google Drive]](https://docs.google.com/spreadsheets/d/1NqTQZCkv74Sn_iOJOKRc-QnJzpaWmnzC_lET_0ZreiQ/edit?usp=sharing) | [[Tencent Docs]]( https://docs.qq.com/document/DQ2hqQ2N2ZGlOUWlT) **Note**: For more detail on data preparation, please refer to our online documentation in **Model Example** under the section of **FOR USER**. diff --git a/docs/source/model example.rst b/docs/source/model example.rst index 87ae671f..165b7ee9 100644 --- a/docs/source/model example.rst +++ b/docs/source/model example.rst @@ -9,5 +9,5 @@ Model Example Regression Clustering Decomposition - Abnormal Detection + Anomaly Detection Network Analysis \ No newline at end of file diff --git a/docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_abnormaldetection.rst b/docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_anomalydetection.rst similarity index 51% rename from docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_abnormaldetection.rst rename to docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_anomalydetection.rst index 9fc56457..011e4b83 100644 --- a/docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_abnormaldetection.rst +++ b/docs/source/python_apis/geochemistrypi.data_mining.model.func.algo_anomalydetection.rst @@ -1,10 +1,10 @@ -geochemistrypi.data\_mining.model.func.algo\_abnormaldetection package +geochemistrypi.data\_mining.model.func.algo\_anomalydetection package ====================================================================== Module contents --------------- -.. automodule:: geochemistrypi.data_mining.model.func.algo_abnormaldetection +.. automodule:: geochemistrypi.data_mining.model.func.algo_anomalydetection :members: :undoc-members: :show-inheritance: diff --git a/docs/source/python_apis/geochemistrypi.data_mining.model.func.rst b/docs/source/python_apis/geochemistrypi.data_mining.model.func.rst index 3183d2dd..895fbbdf 100644 --- a/docs/source/python_apis/geochemistrypi.data_mining.model.func.rst +++ b/docs/source/python_apis/geochemistrypi.data_mining.model.func.rst @@ -7,7 +7,7 @@ Subpackages .. toctree:: :maxdepth: 4 - geochemistrypi.data_mining.model.func.algo_abnormaldetection + geochemistrypi.data_mining.model.func.algo_anomalydetection geochemistrypi.data_mining.model.func.algo_classification geochemistrypi.data_mining.model.func.algo_clustering geochemistrypi.data_mining.model.func.algo_decomposition diff --git a/geochemistrypi/data_mining/cli_pipeline.py b/geochemistrypi/data_mining/cli_pipeline.py index 3dd117b6..631d0ec6 100644 --- a/geochemistrypi/data_mining/cli_pipeline.py +++ b/geochemistrypi/data_mining/cli_pipeline.py @@ -9,7 +9,7 @@ from rich.prompt import Confirm, Prompt from .constants import ( - ABNORMALDETECTION_MODELS, + ANOMALYDETECTION_MODELS, CLASSIFICATION_MODELS, CLASSIFICATION_MODELS_WITH_MISSING_VALUES, CLUSTERING_MODELS, @@ -43,7 +43,7 @@ from .process.classify import ClassificationModelSelection from .process.cluster import ClusteringModelSelection from .process.decompose import DecompositionModelSelection -from .process.detect import AbnormalDetectionModelSelection +from .process.detect import AnomalyDetectionModelSelection from .process.regress import RegressionModelSelection from .utils.base import check_package, clear_output, copy_files, create_geopi_output_dir, get_os, install_package, log, save_data, show_warning from .utils.mlflow_utils import retrieve_previous_experiment_id @@ -198,7 +198,7 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = elif built_in_training_data_num == 4: training_data_path = "Data_Decomposition.xlsx" elif built_in_training_data_num == 5: - training_data_path = "Data_AbnormalDetection.xlsx" + training_data_path = "Data_AnomalyDetection.xlsx" data = read_data(file_path=training_data_path) print(f"Successfully loading the built-in training data set '{training_data_path}'.") show_data_columns(data.columns) @@ -403,7 +403,7 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = # If the selected data set is with missing values and is not been imputed, then only allow the user to choose regression, classification and clustering models. # Otherwise, allow the user to choose decomposition models. if missing_value_flag and not process_missing_value_flag: - # The abnormal detection mode and decomposition mode don't support missing values. + # The anomaly detection mode and decomposition mode don't support missing values. num2option(MODE_OPTION_WITH_MISSING_VALUES) mode_num = limit_num_input(MODE_OPTION_WITH_MISSING_VALUES, SECTION[2], num_input) else: @@ -545,13 +545,13 @@ def cli_pipeline(training_data_path: str, application_data_path: Optional[str] = Modes2Models = {1: REGRESSION_MODELS_WITH_MISSING_VALUES, 2: CLASSIFICATION_MODELS_WITH_MISSING_VALUES, 3: CLUSTERING_MODELS_WITH_MISSING_VALUES} Modes2Initiators = {1: RegressionModelSelection, 2: ClassificationModelSelection, 3: ClusteringModelSelection} else: - Modes2Models = {1: REGRESSION_MODELS, 2: CLASSIFICATION_MODELS, 3: CLUSTERING_MODELS, 4: DECOMPOSITION_MODELS, 5: ABNORMALDETECTION_MODELS} + Modes2Models = {1: REGRESSION_MODELS, 2: CLASSIFICATION_MODELS, 3: CLUSTERING_MODELS, 4: DECOMPOSITION_MODELS, 5: ANOMALYDETECTION_MODELS} Modes2Initiators = { 1: RegressionModelSelection, 2: ClassificationModelSelection, 3: ClusteringModelSelection, 4: DecompositionModelSelection, - 5: AbnormalDetectionModelSelection, + 5: AnomalyDetectionModelSelection, } MODELS = Modes2Models[mode_num] num2option(MODELS) diff --git a/geochemistrypi/data_mining/constants.py b/geochemistrypi/data_mining/constants.py index 1303f60e..9acece9d 100644 --- a/geochemistrypi/data_mining/constants.py +++ b/geochemistrypi/data_mining/constants.py @@ -26,8 +26,8 @@ OPTION = ["Yes", "No"] DATA_OPTION = ["Own Data", "Testing Data (Built-in)"] -TEST_DATA_OPTION = ["Data For Regression", "Data For Classification", "Data For Clustering", "Data For Dimensional Reduction", "Data For Abnormal Detection"] -MODE_OPTION = ["Regression", "Classification", "Clustering", "Dimensional Reduction", "Abnormal Detection"] +TEST_DATA_OPTION = ["Data For Regression", "Data For Classification", "Data For Clustering", "Data For Dimensional Reduction", "Data For Anomaly Detection"] +MODE_OPTION = ["Regression", "Classification", "Clustering", "Dimensional Reduction", "Anomaly Detection"] MODE_OPTION_WITH_MISSING_VALUES = ["Regression", "Classification", "Clustering"] # The model provided to use @@ -68,7 +68,7 @@ ] CLUSTERING_MODELS = ["KMeans", "DBSCAN", "Agglomerative", "AffinityPropagation"] DECOMPOSITION_MODELS = ["PCA", "T-SNE", "MDS"] -ABNORMALDETECTION_MODELS = ["Isolation Forest", "Local Outlier Factor"] +ANOMALYDETECTION_MODELS = ["Isolation Forest", "Local Outlier Factor"] # The model can deal with missing values # Reference: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values diff --git a/geochemistrypi/data_mining/data/dataset/Data_AbnormalDetection.xlsx b/geochemistrypi/data_mining/data/dataset/Data_AnomalyDetection.xlsx similarity index 100% rename from geochemistrypi/data_mining/data/dataset/Data_AbnormalDetection.xlsx rename to geochemistrypi/data_mining/data/dataset/Data_AnomalyDetection.xlsx diff --git a/geochemistrypi/data_mining/enum.py b/geochemistrypi/data_mining/enum.py index b14a5cbb..f861a8a1 100644 --- a/geochemistrypi/data_mining/enum.py +++ b/geochemistrypi/data_mining/enum.py @@ -6,7 +6,7 @@ class ModeOption(Enum): CLASSIFICATION = "Classification" CLUSTERING = "Clustering" DIMENSIONAL_REDUCTION = "Dimensional Reduction" - ABNORMAL_DETECTION = "Abnormal Detection" + ANOMALY_DETECTION = "Anomaly Detection" class ModeOptionWithMissingValues(Enum): diff --git a/geochemistrypi/data_mining/model/detection.py b/geochemistrypi/data_mining/model/detection.py index 9dff73f2..e73b818d 100644 --- a/geochemistrypi/data_mining/model/detection.py +++ b/geochemistrypi/data_mining/model/detection.py @@ -10,18 +10,17 @@ from ..utils.base import clear_output from ._base import WorkflowBase -from .func.algo_abnormaldetection._iforest import isolation_forest_manual_hyper_parameters -from .func.algo_abnormaldetection._local_outlier_factor import local_outlier_factor_manual_hyper_parameters +from .func.algo_anomalydetection._iforest import isolation_forest_manual_hyper_parameters, local_outlier_factor_manual_hyper_parameters -class AbnormalDetectionWorkflowBase(WorkflowBase): - """The base workflow class of abnormal detection algorithms.""" +class AnomalyDetectionWorkflowBase(WorkflowBase): + """The base workflow class of anomaly detection algorithms.""" # common_function = [] def __init__(self) -> None: super().__init__() - self.mode = "Abnormal Detection" + self.mode = "Anomaly Detection" def fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> None: """Fit the model by Scikit-learn framework.""" @@ -29,7 +28,7 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> None: self.model.fit(X) def predict(self, X: pd.DataFrame) -> np.ndarray: - """Perform Abnormal Detection on samples in X by Scikit-learn framework.""" + """Perform Anomaly Detection on samples in X by Scikit-learn framework.""" y_predict = self.model.predict(X) return y_predict @@ -52,29 +51,29 @@ def _detect_data(X: pd.DataFrame, detect_label: np.ndarray) -> tuple[pd.DataFram Returns ------- - X_abnormal_detection : pd.DataFrame + X_anomaly_detection : pd.DataFrame DataFrame containing the original data with detection results. X_normal : pd.DataFrame DataFrame containing the normal data points. - X_abnormal : pd.DataFrame - DataFrame containing the abnormal data points. + X_anomaly : pd.DataFrame + DataFrame containing the anomaly data points. """ - X_abnormal_detection = X.copy() + X_anomaly_detection = X.copy() # Merge detection results into the source data - X_abnormal_detection["is_abnormal"] = detect_label - X_normal = X_abnormal_detection[X_abnormal_detection["is_abnormal"] == 1] - X_abnormal = X_abnormal_detection[X_abnormal_detection["is_abnormal"] == -1] + X_anomaly_detection["is_anomaly"] = detect_label + X_normal = X_anomaly_detection[X_anomaly_detection["is_anomaly"] == 1] + X_anomaly = X_anomaly_detection[X_anomaly_detection["is_anomaly"] == -1] - return X_abnormal_detection, X_normal, X_abnormal + return X_anomaly_detection, X_normal, X_anomaly def common_components(self) -> None: - """Invoke all common application functions for abnormal detection algorithms by Scikit-learn framework.""" + """Invoke all common application functions for anomaly detection algorithms by Scikit-learn framework.""" pass -class IsolationForestAbnormalDetection(AbnormalDetectionWorkflowBase): +class IsolationForestAnomalyDetection(AnomalyDetectionWorkflowBase): """The automation workflow of using Isolation Forest algorithm to make insightful products.""" name = "Isolation Forest" @@ -212,7 +211,7 @@ def __init__( warm_start=self.warm_start, ) - self.naming = IsolationForestAbnormalDetection.name + self.naming = IsolationForestAnomalyDetection.name @classmethod def manual_hyper_parameters(cls) -> Dict: @@ -227,7 +226,7 @@ def special_components(self, **kwargs) -> None: pass -class LocalOutlierFactorAbnormalDetection(AbnormalDetectionWorkflowBase): +class LocalOutlierFactorAnomalyDetection(AnomalyDetectionWorkflowBase): """The automation workflow of using Local Outlier Factor algorithm to make insightful products.""" name = "Local Outlier Factor" @@ -371,7 +370,7 @@ def __init__( n_jobs=self.n_jobs, ) - self.naming = LocalOutlierFactorAbnormalDetection.name + self.naming = LocalOutlierFactorAnomalyDetection.name @classmethod def manual_hyper_parameters(cls) -> Dict: diff --git a/geochemistrypi/data_mining/model/func/algo_abnormaldetection/__init__.py b/geochemistrypi/data_mining/model/func/algo_anomalydetection/__init__.py similarity index 100% rename from geochemistrypi/data_mining/model/func/algo_abnormaldetection/__init__.py rename to geochemistrypi/data_mining/model/func/algo_anomalydetection/__init__.py diff --git a/geochemistrypi/data_mining/model/func/algo_abnormaldetection/_iforest.py b/geochemistrypi/data_mining/model/func/algo_anomalydetection/_iforest.py similarity index 100% rename from geochemistrypi/data_mining/model/func/algo_abnormaldetection/_iforest.py rename to geochemistrypi/data_mining/model/func/algo_anomalydetection/_iforest.py diff --git a/geochemistrypi/data_mining/model/func/algo_abnormaldetection/_local_outlier_factor.py b/geochemistrypi/data_mining/model/func/algo_anomalydetection/_local_outlier_factor.py similarity index 100% rename from geochemistrypi/data_mining/model/func/algo_abnormaldetection/_local_outlier_factor.py rename to geochemistrypi/data_mining/model/func/algo_anomalydetection/_local_outlier_factor.py diff --git a/geochemistrypi/data_mining/process/detect.py b/geochemistrypi/data_mining/process/detect.py index 0c3e45dd..c0424f40 100644 --- a/geochemistrypi/data_mining/process/detect.py +++ b/geochemistrypi/data_mining/process/detect.py @@ -4,16 +4,16 @@ import pandas as pd from ..constants import MLFLOW_ARTIFACT_DATA_PATH -from ..model.detection import AbnormalDetectionWorkflowBase, IsolationForestAbnormalDetection, LocalOutlierFactorAbnormalDetection +from ..model.detection import AnomalyDetectionWorkflowBase, IsolationForestAnomalyDetection, LocalOutlierFactorAnomalyDetection from ._base import ModelSelectionBase -class AbnormalDetectionModelSelection(ModelSelectionBase): - """Simulate the normal way of invoking scikit-learn abnormal detection algorithms.""" +class AnomalyDetectionModelSelection(ModelSelectionBase): + """Simulate the normal way of invoking scikit-learn anomaly detection algorithms.""" def __init__(self, model_name: str) -> None: self.model_name = model_name - self.ad_workflow = AbnormalDetectionWorkflowBase() + self.ad_workflow = AnomalyDetectionWorkflowBase() self.transformer_config = {} def activate( @@ -31,8 +31,8 @@ def activate( # Model option if self.model_name == "Isolation Forest": - hyper_parameters = IsolationForestAbnormalDetection.manual_hyper_parameters() - self.ad_workflow = IsolationForestAbnormalDetection( + hyper_parameters = IsolationForestAnomalyDetection.manual_hyper_parameters() + self.ad_workflow = IsolationForestAnomalyDetection( n_estimators=hyper_parameters["n_estimators"], contamination=hyper_parameters["contamination"], max_features=hyper_parameters["max_features"], @@ -41,8 +41,8 @@ def activate( ) if self.model_name == "Local Outlier Factor": - hyper_parameters = LocalOutlierFactorAbnormalDetection.manual_hyper_parameters() - self.ad_workflow = LocalOutlierFactorAbnormalDetection( + hyper_parameters = LocalOutlierFactorAnomalyDetection.manual_hyper_parameters() + self.ad_workflow = LocalOutlierFactorAnomalyDetection( n_neighbors=hyper_parameters["n_neighbors"], contamination=hyper_parameters["contamination"], leaf_size=hyper_parameters["leaf_size"], @@ -55,23 +55,23 @@ def activate( # Use Scikit-learn style API to process input data self.ad_workflow.fit(X) y_predict = self.ad_workflow.predict(X) - X_abnormal_detection, X_normal, X_abnormal = self.ad_workflow._detect_data(X, y_predict) + X_anomaly_detection, X_normal, X_anomaly = self.ad_workflow._detect_data(X, y_predict) self.ad_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) # Save the model hyper-parameters self.ad_workflow.save_hyper_parameters(hyper_parameters, self.model_name, os.getenv("GEOPI_OUTPUT_PARAMETERS_PATH")) - # Common components for every abnormal detection algorithm + # Common components for every anomaly detection algorithm self.ad_workflow.common_components() # special components of different algorithms self.ad_workflow.special_components() - # Save abnormal detection result - self.ad_workflow.data_save(X_abnormal_detection, "X Abnormal Detection", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Abnormal Detection Data") + # Save anomaly detection result + self.ad_workflow.data_save(X_anomaly_detection, "X Anomaly Detection", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Anomaly Detection Data") self.ad_workflow.data_save(X_normal, "X Normal", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Normal Data") - self.ad_workflow.data_save(X_abnormal, "X Abnormal", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Abnormal Data") + self.ad_workflow.data_save(X_anomaly, "X Anomaly", os.getenv("GEOPI_OUTPUT_ARTIFACTS_DATA_PATH"), MLFLOW_ARTIFACT_DATA_PATH, "Anomaly Data") # Save the trained model self.ad_workflow.model_save()