Skip to content

Commit

Permalink
Merge pull request ZJUEarthData#200 from ZJUEarthData/web
Browse files Browse the repository at this point in the history
perf: abstract LinearWorkflowMixin with formula display for all linear models both in regression and in classification.
  • Loading branch information
SanyHe authored Jul 28, 2023
2 parents 0299e42 + a5cf654 commit 49d5f0b
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 151 deletions.
3 changes: 1 addition & 2 deletions geochemistrypi/data_mining/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@
# the directory where the trained model saved
MODEL_PATH = os.path.join(OUTPUT_PATH, "trained_models")

# the directory where the data is saved within the MLflow run's artifact directory
# the directory where the artifact is saved within the MLflow run's artifact directory
MLFLOW_ARTIFACT_DATA_PATH = "data"

MLFLOW_ARTIFACT_IMAGE_STATISTIC_PATH = os.path.join("image", "statistic")
MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH = os.path.join("image", "model_output")
MLFLOW_ARTIFACT_IMAGE_MAP_PATH = os.path.join("image", "map")
Expand Down
17 changes: 15 additions & 2 deletions geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..constants import SECTION
from ..data.data_readiness import limit_num_input, num2option, num_input, show_data_columns
from ..utils.base import save_data, save_fig, save_text
from .func._common_supervised import plot_decision_tree, plot_feature_importance
from .func._common_supervised import plot_decision_tree, plot_feature_importance, show_formula


class WorkflowBase(metaclass=ABCMeta):
Expand Down Expand Up @@ -300,8 +300,21 @@ def _plot_feature_importance(X_train: pd.DataFrame, trained_model: object, image
save_fig(f"Feature Importance - {algorithm_name}", local_path, mlflow_path)
save_data(data, f"Feature Importance - {algorithm_name}", local_path, mlflow_path, True)

def _plot_tree(self, trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
@staticmethod
def _plot_tree(trained_model: object, image_config: dict, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
"""Drawing decision tree diagrams."""
print("-----* Single Tree Diagram *-----")
plot_decision_tree(trained_model, image_config)
save_fig(f"Tree Diagram - {algorithm_name}", local_path, mlflow_path)


class LinearWorkflowMixin:
"""Mixin class for linear models."""

@staticmethod
def _show_formula(coef: np.ndarray, intercept: np.ndarray, features_name: np.ndarray, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
"""Show the formula."""
print(f"-----* {algorithm_name} Formula *-----")
formula = show_formula(coef, intercept, features_name)
formula_str = json.dumps(formula, indent=4)
save_text(formula_str, f"{algorithm_name} Formula", local_path, mlflow_path)
24 changes: 21 additions & 3 deletions geochemistrypi/data_mining/model/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from ..constants import MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, RAY_FLAML
from ..utils.base import save_data, save_fig, save_text
from ._base import TreeWorkflowMixin, WorkflowBase
from ._base import LinearWorkflowMixin, TreeWorkflowMixin, WorkflowBase
from .func.algo_classification._common import cross_validation, plot_2d_decision_boundary, plot_confusion_matrix, plot_precision_recall, plot_ROC, score
from .func.algo_classification._decision_tree import decision_tree_manual_hyper_parameters
from .func.algo_classification._deep_neural_network import deep_neural_network_manual_hyper_parameters
Expand Down Expand Up @@ -1485,11 +1485,11 @@ def special_components(self, is_automl: bool = False, **kwargs) -> None:
)


class LogisticRegressionClassification(ClassificationWorkflowBase):
class LogisticRegressionClassification(LinearWorkflowMixin, ClassificationWorkflowBase):
"""The automation workflow of using Logistic Regression algorithm to make insightful products."""

name = "Logistic Regression"
special_function = ["Feature Importance"]
special_function = ["Logistic Regression Formula", "Feature Importance Diagram"]

def __init__(
self,
Expand Down Expand Up @@ -1729,6 +1729,15 @@ def _plot_feature_importance(columns_name: np.ndarray, trained_model: any, algor
def special_components(self, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
GEOPI_OUTPUT_ARTIFACTS_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_PATH")
self._show_formula(
coef=self.model.coef_,
intercept=self.model.intercept_,
features_name=LogisticRegressionClassification.X.columns,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_PATH,
mlflow_path="root",
)
self._plot_feature_importance(
columns_name=LogisticRegressionClassification.X.columns,
trained_model=self.model,
Expand All @@ -1741,6 +1750,15 @@ def special_components(self, **kwargs) -> None:
def special_components(self, is_automl: bool = False, **kwargs) -> None:
"""Invoke all special application functions for this algorithms by FLAML framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
GEOPI_OUTPUT_ARTIFACTS_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_PATH")
self._show_formula(
coef=self.auto_model.coef_,
intercept=self.auto_model.intercept_,
features_name=LogisticRegressionClassification.X.columns,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_PATH,
mlflow_path="root",
)
self._plot_feature_importance(
columns_name=LogisticRegressionClassification.X.columns,
trained_model=self.auto_model,
Expand Down
61 changes: 59 additions & 2 deletions geochemistrypi/data_mining/model/func/_common_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklearn.tree import plot_tree

# <------
# Used by decsion tree including classification and regression
# Used by tree-based models including classification and regression besides XGBoost


def plot_decision_tree(trained_model: object, image_config: Dict) -> None:
Expand Down Expand Up @@ -72,7 +72,7 @@ def plot_decision_tree(trained_model: object, image_config: Dict) -> None:
)


# Used by decsion tree including classification and regression
# Used by tree-based models including classification and regression besides XGBoost
# ------>

# <------
Expand Down Expand Up @@ -139,3 +139,60 @@ def plot_feature_importance(columns_name: pd.Index, feature_importance: np.ndarr
)

return importance


# Used by tree-based models, like, random forest, extra-trees, xgboost including classification and regression
# ------>

# <------
# Used by linear models including classification and regression


def show_formula(coef: np.ndarray, intercept: np.ndarray, features_name: np.ndarray) -> Dict:
"""Show the formula of linear models.
Parameters
----------
coef : array
Coefficient of the features in the decision function.
intercept : array
Independent term in decision function.
features_name : np.ndarray
Name of the features.
Returns
-------
formula : dict
The formula of linear models.
"""
term = []
coef = np.around(coef, decimals=3).tolist()[0]

for i in range(len(coef)):
# the first value stay the same
if i == 0:
# not append if zero
if coef[i] != 0:
temp = str(coef[i]) + features_name[i]
term.append(temp)
else:
# add plus symbol if positive, maintain if negative, not append if zero
if coef[i] > 0:
temp = "+" + str(coef[i]) + features_name[i]
term.append(temp)
elif coef[i] < 0:
temp = str(coef[i]) + features_name[i]
term.append(temp)
if intercept[0] >= 0:
formula = "".join(term) + "+" + str(intercept[0])
else:
formula = "".join(term) + str(intercept[0])
print("y =", formula)

return {"y": formula}


# Used by linear models including classification and regression
# ------>
Original file line number Diff line number Diff line change
Expand Up @@ -141,42 +141,6 @@ def cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.D
return scores_result


# def contour_data(X: pd.DataFrame, trained_model: object) -> Tuple[List[np.ndarray], np.ndarray]:
# """Build up coordinate matrices as the data of contour plot.

# Parameters
# ----------
# X : pd.DataFrame (n_samples, n_components)
# The complete feature data.

# trained_model : object
# Te algorithm model class from sklearn is trained.

# Returns
# -------
# matrices : List[np.ndarray]
# Coordinate matrices.

# labels : np.ndarray
# Predicted value by the trained model with coordinate data as input data.
# """

# # build up coordinate matrices from coordinate vectors.
# xi = [np.arange(X.iloc[:, i].min(), X.iloc[:, i].max(), (X.iloc[:, i].max() - X.iloc[:, i].min()) / 50) for i in range(X.shape[1])]
# ndim = len(xi)
# s0 = (1,) * ndim
# matrices = [np.asanyarray(x).reshape(s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi)]
# matrices[0].shape = (1, -1) + s0[2:]
# matrices[1].shape = (-1, 1) + s0[2:]
# matrices = np.broadcast_arrays(*matrices, subok=True)

# # get the labels of the coordinate matrices through the trained model
# input_array = np.column_stack((i.ravel() for i in matrices))
# labels = trained_model.predict(input_array).reshape(matrices[0].shape)

# return matrices, labels


def plot_precision_recall(X_test, y_test, trained_model: object, algorithm_name: str) -> tuple:
"""Plot the precision-recall curve.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Dict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
from rich import print
Expand Down Expand Up @@ -30,33 +29,6 @@ def linear_regression_manual_hyper_parameters() -> Dict:
return hyper_parameters


def show_formula(coef, intercept, columns_name):
term = []
coef = np.around(coef, decimals=3).tolist()[0]

for i in range(len(coef)):
# the first value stay the same
if i == 0:
# not append if zero
if coef[i] != 0:
temp = str(coef[i]) + columns_name[i]
term.append(temp)
else:
# add plus symbol if positive, maintain if negative, not append if zero
if coef[i] > 0:
temp = "+" + str(coef[i]) + columns_name[i]
term.append(temp)
elif coef[i] < 0:
temp = str(coef[i]) + columns_name[i]
term.append(temp)
if intercept[0] >= 0:
# formula of linear regression
formula = "".join(term) + "+" + str(intercept[0])
else:
formula = "".join(term) + str(intercept[0])
print("y =", formula)


def plot_2d_graph(feature_data: pd.DataFrame, target_data: pd.DataFrame = None) -> None:
"""Plot a 2D graph with the data set below.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from typing import Dict, List
from typing import Dict

import numpy as np
from rich import print

from ....constants import SECTION
Expand All @@ -28,43 +27,3 @@ def polynomial_regression_manual_hyper_parameters() -> Dict:
include_bias = bool(str_input(include_biases, SECTION[2]))
hyper_parameters = {"degree": degree, "interaction_only": interaction_only, "include_bias": include_bias}
return hyper_parameters


def show_formula(coef: np.ndarray, intercept: np.ndarray, features_name: List) -> None:
"""Show the formula of polynomial regression.
Parameters
----------
coef : array
Coefficient of the features in the decision function.
intercept : array
Independent term in decision function.
features_name : list
Name of the features.
"""
term = []
coef = np.around(coef, decimals=3).tolist()[0]

for i in range(len(coef)):
# the first value stay the same
if i == 0:
# not append if zero
if coef[i] != 0:
temp = str(coef[i]) + features_name[i]
term.append(temp)
else:
# add plus symbol if positive, maintain if negative, not append if zero
if coef[i] > 0:
temp = "+" + str(coef[i]) + features_name[i]
term.append(temp)
elif coef[i] < 0:
temp = str(coef[i]) + features_name[i]
term.append(temp)
if intercept[0] >= 0:
# formula of polynomial regression
formula = "".join(term) + "+" + str(intercept[0])
else:
formula = "".join(term) + str(intercept[0])
print("y =", formula)
Loading

0 comments on commit 49d5f0b

Please sign in to comment.