Skip to content

Commit

Permalink
Merge pull request #381 from ZJUEarthData/dev/HaibinLai
Browse files Browse the repository at this point in the history
perf: Replacing output file name in PCA Decomposition to enum class
  • Loading branch information
SanyHe authored Sep 12, 2024
2 parents 9bcd22e + 2b91d6d commit a0dad6d
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 24 deletions.
63 changes: 39 additions & 24 deletions geochemistrypi/data_mining/model/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ..utils.base import clear_output, save_data, save_fig
from ._base import WorkflowBase
from .func.algo_decomposition._common import plot_2d_scatter_diagram, plot_contour, plot_heatmap
from .func.algo_decomposition._enum import DecompositionCommonFunction, PCASpecialFunction
from .func.algo_decomposition._mds import mds_manual_hyper_parameters
from .func.algo_decomposition._pca import biplot, pca_manual_hyper_parameters, triplot
from .func.algo_decomposition._tsne import tsne_manual_hyper_parameters
Expand All @@ -21,7 +22,7 @@
class DecompositionWorkflowBase(WorkflowBase):
"""The base workflow class of decomposition algorithms."""

common_function = ["Model Persistence"] # 'Decomposition Result',
common_function = [func.value for func in DecompositionCommonFunction] # 'Decomposition Result',

def __init__(self) -> None:
super().__init__()
Expand Down Expand Up @@ -117,7 +118,7 @@ class PCADecomposition(DecompositionWorkflowBase):
"""The automation workflow of using PCA algorithm to make insightful products."""

name = "PCA"
special_function = ["Principal Components", "Explained Variance Ratio", "Compositional Bi-plot", "Compositional Tri-plot"]
special_function = [func.value for func in PCASpecialFunction]

def __init__(
self,
Expand Down Expand Up @@ -268,48 +269,57 @@ def manual_hyper_parameters(cls) -> Dict:
clear_output()
return hyper_parameters

def _get_principal_components(self) -> None:
@staticmethod
def _get_principal_components(graph_name: str, n_components: Optional[int], trained_model: object) -> None:
"""Get principal components."""
print("-----* Principal Components *-----")
print(f"-----* {graph_name} *-----")
print("Every column represents one principal component respectively.")
print("Every row represents how much that row feature contributes to each principal component respectively.")
print("The tabular data looks like in format: 'rows x columns = 'features x principal components'.")
pc_name = []
for i in range(self.n_components):
for i in range(n_components):
pc_name.append(f"PC{i+1}")
self.pc_data = pd.DataFrame(self.model.components_.T)
self.pc_data.columns = pc_name
self.pc_data.set_index(DecompositionWorkflowBase.X.columns, inplace=True)
print(self.pc_data)
pc_data = pd.DataFrame(trained_model.components_.T)
pc_data.columns = pc_name
pc_data.set_index(DecompositionWorkflowBase.X.columns, inplace=True)
print(pc_data)

def _get_explained_variance_ratio(self) -> None:
@staticmethod
def _get_explained_variance_ratio(graph_name: str, trained_model: object) -> None:
"""Get explained variance ratio."""
print("-----* Explained Variance Ratio *-----")
print(self.model.explained_variance_ratio_)
print(f"-----* {graph_name} *-----")
print(trained_model.explained_variance_ratio_)

@staticmethod
def _biplot(reduced_data: pd.DataFrame, pc_data: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _biplot(reduced_data: pd.DataFrame, pc_data: pd.DataFrame, graph_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
"""Draw bi-plot."""
print("-----* Compositional Bi-plot *-----")
print(f"-----* {graph_name} *-----")
biplot(reduced_data, pc_data, algorithm_name)
save_fig(f"Compositional Bi-plot - {algorithm_name}", local_path, mlflow_path)
save_data(reduced_data, "Compositional Bi-plot - Reduced Data", local_path, mlflow_path)
save_data(pc_data, "Compositional Bi-plot - PC Data", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(reduced_data, f"{graph_name} - Reduced Data", local_path, mlflow_path)
save_data(pc_data, f"{graph_name} - PC Data", local_path, mlflow_path)

@staticmethod
def _triplot(reduced_data: pd.DataFrame, pc_data: pd.DataFrame, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
def _triplot(reduced_data: pd.DataFrame, pc_data: pd.DataFrame, graph_name: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None:
"""Draw tri-plot."""
print("-----* Compositional Tri-plot *-----")
print(f"-----* {graph_name} *-----")
triplot(reduced_data, pc_data, algorithm_name)
save_fig(f"Compositional Tri-plot - {algorithm_name}", local_path, mlflow_path)
save_data(reduced_data, "Compositional Tri-plot - Reduced Data", local_path, mlflow_path)
save_data(pc_data, "Compositional Tri-plot - PC Data", local_path, mlflow_path)
save_fig(f"{graph_name} - {algorithm_name}", local_path, mlflow_path)
save_data(reduced_data, f"{graph_name} - Reduced Data", local_path, mlflow_path)
save_data(pc_data, f"{graph_name} - PC Data", local_path, mlflow_path)

def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
self._reduced_data2pd(kwargs["reduced_data"], kwargs["components_num"])
self._get_principal_components()
self._get_explained_variance_ratio()
self._get_principal_components(
graph_name=PCASpecialFunction.PRINCIPAL_COMPONENTS.value,
trained_model=self.model,
n_components=self.n_components,
)
self._get_explained_variance_ratio(
graph_name=PCASpecialFunction.EXPLAINED_VARIANCE_RATIO.value,
trained_model=self.model,
)

GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
# Draw graphs when the number of principal components > 3
Expand All @@ -320,6 +330,7 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
self._biplot(
reduced_data=two_dimen_reduced_data,
pc_data=two_dimen_pc_data,
graph_name=PCASpecialFunction.COMPOSITIONAL_BI_PLOT.value,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
Expand All @@ -331,6 +342,7 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
self._triplot(
reduced_data=three_dimen_reduced_data,
pc_data=three_dimen_pc_data,
graph_name=PCASpecialFunction.COMPOSITIONAL_TRI_PLOT.value,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
Expand All @@ -342,6 +354,7 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
self._biplot(
reduced_data=two_dimen_reduced_data,
pc_data=two_dimen_pc_data,
graph_name=PCASpecialFunction.COMPOSITIONAL_BI_PLOT.value,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
Expand All @@ -350,6 +363,7 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
self._triplot(
reduced_data=self.X_reduced,
pc_data=self.pc_data,
graph_name=PCASpecialFunction.COMPOSITIONAL_TRI_PLOT.value,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
Expand All @@ -358,6 +372,7 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
self._biplot(
reduced_data=self.X_reduced,
pc_data=self.pc_data,
graph_name=PCASpecialFunction.COMPOSITIONAL_BI_PLOT.value,
algorithm_name=self.naming,
local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH,
mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH,
Expand Down
12 changes: 12 additions & 0 deletions geochemistrypi/data_mining/model/func/algo_decomposition/_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from enum import Enum


class DecompositionCommonFunction(Enum):
MODEL_PERSISTENCE = "Model Persistence"


class PCASpecialFunction(Enum):
PRINCIPAL_COMPONENTS = "Principal Components"
EXPLAINED_VARIANCE_RATIO = "Explained Variance Ratio"
COMPOSITIONAL_BI_PLOT = "Compositional Bi-plot"
COMPOSITIONAL_TRI_PLOT = "Compositional Tri-plot"

0 comments on commit a0dad6d

Please sign in to comment.