Skip to content

Commit

Permalink
Merge pull request #360 from ZJUEarthData/web
Browse files Browse the repository at this point in the history
docs: rewrite the tutorial of framework.
  • Loading branch information
SanyHe authored Jul 7, 2024
2 parents 170da01 + 67df2f5 commit 98ae7c9
Show file tree
Hide file tree
Showing 8 changed files with 613 additions and 390 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ Geochemistry π was selected for featuring as an Editor’s Highlight in EOS

Eos Website: https://eos.org/editor-highlights/machine-learning-for-geochemists-who-dont-want-to-code.

![Geochemistry pi news](https://github.com/ZJUEarthData/geochemistrypi/assets/47497750/bdd33a31-824a-492e-adcf-e660da4eaf1d)

## Quick Installation

Our software is well tested on **macOS** and **Windows** system with **Python 3.9**. Other systems and Python version are not guranteed.
Expand Down
955 changes: 579 additions & 376 deletions docs/source/For Developer/Add New Model To Framework.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion geochemistrypi/data_mining/model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class WorkflowBase(metaclass=ABCMeta):

@classmethod
def show_info(cls) -> None:
"""Display how many functions the algorithm will provide."""
"""Display what application functions the algorithm will provide."""
print("*-*" * 2, cls.name, "is running ...", "*-*" * 2)
print("Expected Functionality:")
function = cls.common_function + cls.special_function
Expand Down
25 changes: 15 additions & 10 deletions geochemistrypi/data_mining/model/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from .func.algo_clustering._agglomerative import agglomerative_manual_hyper_parameters
from .func.algo_clustering._common import plot_silhouette_diagram, plot_silhouette_value_diagram, scatter2d, scatter3d, score
from .func.algo_clustering._dbscan import dbscan_manual_hyper_parameters
from .func.algo_clustering._enum import ClusteringCommonFunction, KMeansSpecialFunction
from .func.algo_clustering._kmeans import kmeans_manual_hyper_parameters


class ClusteringWorkflowBase(WorkflowBase):
"""The base workflow class of clustering algorithms."""

common_function = ["Cluster Centers", "Cluster Labels", "Model Persistence"]
common_function = [func.value for func in ClusteringCommonFunction]

def __init__(self):
super().__init__()
Expand Down Expand Up @@ -58,12 +59,12 @@ def get_labels(self):
save_data(self.clustering_result, f"{self.naming} Result", GEOPI_OUTPUT_ARTIFACTS_DATA_PATH, MLFLOW_ARTIFACT_DATA_PATH)

@staticmethod
def _score(data: pd.DataFrame, labels: pd.DataFrame, algorithm_name: str, store_path: str) -> None:
def _score(data: pd.DataFrame, labels: pd.DataFrame, func_name: str, algorithm_name: str, store_path: str) -> None:
"""Calculate the score of the model."""
print("-----* Model Score *-----")
print(f"-----* {func_name} *-----")
scores = score(data, labels)
scores_str = json.dumps(scores, indent=4)
save_text(scores_str, f"Model Score - {algorithm_name}", store_path)
save_text(scores_str, f"{func_name}- {algorithm_name}", store_path)
mlflow.log_metrics(scores)

@staticmethod
Expand Down Expand Up @@ -112,6 +113,7 @@ def common_components(self) -> None:
self._score(
data=self.X,
labels=self.clustering_result["clustering result"],
func_name=ClusteringCommonFunction.MODEL_SCORE.value,
algorithm_name=self.naming,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)
Expand Down Expand Up @@ -190,7 +192,7 @@ class KMeansClustering(ClusteringWorkflowBase):
"""The automation workflow of using KMeans algorithm to make insightful products."""

name = "KMeans"
special_function = ["KMeans Score"]
special_function = [func.value for func in KMeansSpecialFunction]

def __init__(
self,
Expand Down Expand Up @@ -304,14 +306,15 @@ def __init__(

self.naming = KMeansClustering.name

def _get_inertia_scores(self, algorithm_name: str, store_path: str) -> None:
@staticmethod
def _get_inertia_scores(func_name: str, algorithm_name: str, trained_model: object, store_path: str) -> None:
"""Get the scores of the clustering result."""
print("-----* KMeans Inertia Scores *-----")
print("Inertia Score: ", self.model.inertia_)
inertia_scores = {"Inertia Score": self.model.inertia_}
print(f"-----* {func_name} *-----")
print(f"{func_name}: ", trained_model.inertia_)
inertia_scores = {f"{func_name}": trained_model.inertia_}
mlflow.log_metrics(inertia_scores)
inertia_scores_str = json.dumps(inertia_scores, indent=4)
save_text(inertia_scores_str, f"KMeans Inertia Scores - {algorithm_name}", store_path)
save_text(inertia_scores_str, f"{func_name} - {algorithm_name}", store_path)

@classmethod
def manual_hyper_parameters(cls) -> Dict:
Expand All @@ -325,7 +328,9 @@ def special_components(self, **kwargs: Union[Dict, np.ndarray, int]) -> None:
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_METRICS_PATH = os.getenv("GEOPI_OUTPUT_METRICS_PATH")
self._get_inertia_scores(
func_name=KMeansSpecialFunction.INERTIA_SCORE.value,
algorithm_name=self.naming,
trained_model=self.model,
store_path=GEOPI_OUTPUT_METRICS_PATH,
)

Expand Down
12 changes: 12 additions & 0 deletions geochemistrypi/data_mining/model/func/algo_clustering/_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from enum import Enum


class ClusteringCommonFunction(Enum):
CLUSTER_CENTERS = "Cluster Centers"
CLUSTER_LABELS = "Cluster Labels"
MODEL_PERSISTENCE = "Model Persistence"
MODEL_SCORE = "Model Score"


class KMeansSpecialFunction(Enum):
INERTIA_SCORE = "Inertia Score"
4 changes: 2 additions & 2 deletions geochemistrypi/data_mining/model/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def common_components(self) -> None:
)

@dispatch(bool)
def common_components(self, is_automl: bool) -> None:
def common_components(self, is_automl: bool = False) -> None:
"""Invoke all common application functions for regression algorithms by FLAML framework."""
GEOPI_OUTPUT_METRICS_PATH = os.getenv("GEOPI_OUTPUT_METRICS_PATH")
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
Expand Down Expand Up @@ -904,7 +904,7 @@ def manual_hyper_parameters(cls) -> Dict:
return hyper_parameters

@dispatch()
def special_components(self):
def special_components(self, **kwargs):
"""Invoke all special application functions for this algorithms by Scikit-learn framework."""
GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH = os.getenv("GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH")
self._plot_feature_importance(
Expand Down
2 changes: 2 additions & 0 deletions geochemistrypi/data_mining/process/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def activate(
) -> None:
"""Train by Scikit-learn framework."""

# Load the required data into the base class's attributes
self.clf_workflow.data_upload(X=X, y=y, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)

# Customize label
Expand Down Expand Up @@ -163,6 +164,7 @@ def activate(
validation_fraction=hyper_parameters["validation_fraction"],
n_iter_no_change=hyper_parameters["n_iter_no_change"],
)
# Display what application functions the algorithm will provide
self.clf_workflow.show_info()

# Use Scikit-learn style API to process input data
Expand Down
1 change: 0 additions & 1 deletion geochemistrypi/data_mining/process/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def __init__(self, model_name: str) -> None:
self.ad_workflow = AbnormalDetectionWorkflowBase()
self.transformer_config = {}

# @dispatch(object, object, object, object, object, object)
def activate(
self,
X: pd.DataFrame,
Expand Down

0 comments on commit 98ae7c9

Please sign in to comment.