DOC: add documentation to WFacer main modules.

CederGroupHub · Sep 18, 2023 · 985f402 · 985f402
1 parent d678912
commit 985f402
Show file tree

Hide file tree

Showing 18 changed files with 474 additions and 359 deletions.
diff --git a/WFacer/convergence.py b/WFacer/convergence.py
@@ -9,16 +9,16 @@
 def compare_min_energy_structures_by_composition(min_e1, min_e2, matcher=None):
     """Compare minimum energy and structure by composition for convergence check.
 
-     We will only compare keys that exist in both older and newer iterations.
-     If one composition appears in the older one but not the newer one, we will not
-     claim convergence.
+    We will only compare keys that exist in both older and newer iterations.
+    If one composition appears in the older one but not the newer one, we will not
+    claim convergence.
 
     Args:
         min_e1 (defaultdict):
             Minimum energies and structures from an earlier iteration.
         min_e2 (defaultdict):
             Minimum energies and structures from a later iteration.
-            See docs in WFacer.wrangling.
+            See documentation of :mod:`WFacer.wrangling`.
         matcher (StructureMatcher): optional
             A StructureMatcher used compare structures.
             wrangler.cluster_subspace._site_matcher is recommended.
@@ -56,7 +56,10 @@ def compare_fitted_coefs(cluster_subspace, coefs_prev, coefs_now):
 
     Returns:
         float:
-            || ECI' - ECI ||_1 / ||ECI||_1.
+            :math:`|| J' - J ||_1 / ||J||_1`,
+            where :math:`J` represents the coefficients from the last
+            iteration and :math:`J'` represents coefficients from the
+            current iteration.
     """
     # Get ECIs from coefficients.
     eci_prev = ClusterExpansion(cluster_subspace, coefficients=coefs_prev).eci

diff --git a/WFacer/enumeration.py b/WFacer/enumeration.py
@@ -1,10 +1,10 @@
 """This module implements a StructureEnumerator class for CE sampling.
 
-Algorithm based on:
+The algorithm is based on the work of
+`A. Seko et al <https://doi.org/10.1103/PhysRevB.80.165122>`_.
 
-Ground state structures will also be added to the structure pool, but
-they are not added here. They will be added in the convergence checker
-module.
+Ground state structures will also be included in the structure pool if not
+included yet.
 """
 
 __author__ = "Fengyu Xie"
@@ -26,7 +26,7 @@
 from .utils.supercells import get_three_factors, is_duplicate_sc
 
 
-# TODO: in the future, may employ mcsqs type algos.
+# TODO: in the future, may employ mcsqs-like algos.
 def enumerate_matrices(
     objective_sc_size,
     cluster_subspace,
@@ -44,12 +44,12 @@ def enumerate_matrices(
         objective_sc_size(int):
             Objective supercell size in the number of primitive cells.
             Better be a multiple of det(conv_mat).
-        cluster_subspace(smol.ClusterSubspace):
+        cluster_subspace(ClusterSubspace):
             The cluster subspace. cluster_subspace.structure must
             be pre-processed such that it is the true primitive cell
             in under its space group symmetry.
-            Note: The cluster_subspace.structure must be reduced to a
-            primitive cell!
+             .. note:: The structure of :class:`ClusterSubspace` must be reduced to a
+              primitive cell!
         supercell_from_conventional(bool): optional
             Whether to enumerate supercell matrices in the form M@T, where
             M is an integer matrix, T is the primitive to conventional cell
@@ -61,11 +61,12 @@ def enumerate_matrices(
         min_sc_angle(float):
             Minimum allowed angle of the supercell lattice. By default, set
             to 30, to prevent over-skewing.
-        kwargs:
-            keyword arguments to pass into SpaceGroupAnalyzer.
+        **kwargs:
+            keyword arguments to pass into :class:`SpaceGroupAnalyzer`.
 
     Returns:
-        List of 2D lists.
+        List of 2D lists:
+          Enumerated super-cell matrices.
     """
     if not supercell_from_conventional:
         conv_mat = np.eye(3, dtype=int)
@@ -184,7 +185,8 @@ def truncate_cluster_subspace(cluster_subspace, sc_matrices):
             Enumerated super-cell matrices.
 
     Returns:
-        ClusterSubspace: truncated subspace without aliased orbits.
+        ClusterSubspace:
+         Truncated subspace without aliased orbits.
     """
     alias = []
     for m in sc_matrices:
@@ -221,30 +223,30 @@ def enumerate_compositions_as_counts(
 ):
     """Enumerate compositions in a given supercell size.
 
-    Results will be returned in "counts" format
-    (see smol.moca.CompositionSpace).
+    Results will be returned in "counts" format,
+    see documentation of :mod:`smol.moca.composition`.
 
     Args:
         sc_size(int):
             The super-cell size in the number of prim cells.
         comp_space(CompositionSpace): optional
             Composition space in a primitive cell. If not given,
-            arguments "bits" and "sublattice_sizes" must be given.
+            arguments **bits** and **sublattice_sizes** must be given.
         bits(List[List[Species|DummySpecies|Element|Vacancy]]):
             Allowed species on each sub-lattice.
         sublattice_sizes(List[int]):
-            Number of sites in each sub-lattice in a prim cell.
+            The number of sites in each sub-lattice in a prim cell.
         comp_enumeration_step(int):
             Step in returning the enumerated compositions.
             If step = N > 1, on each dimension of the composition space,
             we will only yield one composition every N compositions.
             Default to 1.
         kwargs:
-            Other keyword arguments to initialize CompositionSpace.
+            Other keyword arguments used to initialize a :class:`CompositionSpace`.
 
     Returns:
-        Enumerated possible compositions in "counts" format, not normalized:
-            2D np.ndarray[int]
+        2D np.ndarray[int]:
+         Enumerated possible compositions in "counts" format (**NOT** normalized).
     """
     if comp_space is None:
         if bits is None or sublattice_sizes is None:
@@ -271,7 +273,7 @@ def enumerate_compositions_as_counts(
 def get_num_structs_to_sample(
     all_counts, num_structs_select, scale=3, min_num_per_composition=2
 ):
-    """Get number of structures to sample in each McSampleGenerator.
+    """Get number of structures to sample in each :class:`McSampleGenerator`.
 
     Args:
         all_counts(ArrayLike):
@@ -428,8 +430,8 @@ def generate_training_structures(
             Note that option "structure" might be significantly slower since
             it has to attempt reducing every structure to its primitive cell
             before matching. It should be used with caution.
-        kwargs:
-            Keyword arguments for utils.selection.select_initial_rows.
+        **kwargs:
+            Keyword arguments for :func:`WFacer.utils.selection.select_initial_rows`.
 
     Returns:
         list[Structure], list[3*3 list[list[int]]], list[list[float]]:

diff --git a/WFacer/fit.py b/WFacer/fit.py
@@ -27,30 +27,32 @@ def fit_ecis_from_wrangler(
 ):
     """Fit ECIs from a fully processed wrangler.
 
-    No weights will be used.
+    .. note:: Currently, this function does not support adjusting sample weights.
 
     Args:
         wrangler(CeDataWrangler):
-            A CeDataWrangler storing all training structures.
+            A :class:`CeDataWrangler` to store all training structures.
         estimator_name(str):
             The name of estimator, following the rules in
-            smol.utils.class_name_from_str.
+            :mod:`smol.utils.class_name_from_str`.
         optimizer_name(str):
-            Name of hyperparameter optimizer. Currently, only supports GridSearch and
-            LineSearch.
+            The name of hyperparameter optimizer. Currently, only supports
+            :class:`GridSearch` and :class:`LineSearch` from :mod:`sparse-lm`.
         param_grid(dict|list[tuple]):
-            Parameter grid to initialize the optimizer. See docs of
-            sparselm.model_selection.
+            Parameter grid to initialize the optimizer. See documentation of
+            :mod:`sparselm.model_selection`.
         use_hierarchy(bool): optional
             Whether to use cluster hierarchy constraints when available. Default to
             true.
         center_point_external(bool): optional
-            Whether to fit the point and external terms with linear regression
-            first, then fit the residue with regressor. Default to None, which means
-            when the feature matrix is full rank, will not use centering, otherwise
-            centers. If set to True, will force centering, but use at your own risk
-            because this may cause very large CV. If set to False, will never use
-            centering.
+            Whether to perform centering operation, which means to fit the point and
+            the external terms using linear regression first, then fit the residue
+            with the specified regressor. Default to None, which means
+            when the feature matrix is full-ranked, will not use centering, otherwise
+            will perform centering.
+            If set to True, will always use centering, but use at your own risk
+            because this may cause very large CV when the feature matrix is full rank.
+            If set to False, will never perform centering.
         filter_unique_correlations(bool):
             If the wrangler have structures with duplicated correlation vectors,
             whether to fit with only the one with the lowest energy.
@@ -59,8 +61,9 @@ def fit_ecis_from_wrangler(
             Other keyword arguments to initialize an estimator.
         optimizer_kwargs(dict): optional
             Other keyword arguments to initialize an optimizer.
-        kwargs:
-            Keyword arguments used by estimator._fit. For example, solver arguments.
+        **kwargs**:
+            Keyword arguments used by the estimator._fit method.
+            For example, solver specifications.
 
     Returns:
         Estimator, 1D np.ndarray, float, float, float, 1D np.ndarray:

diff --git a/WFacer/jobs.py b/WFacer/jobs.py
@@ -1,4 +1,4 @@
-"""Unitary jobs used by Maker."""
+"""Unitary jobs used by an atomate2 workflow."""
 import logging
 from copy import deepcopy
 from warnings import warn
@@ -110,7 +110,7 @@ def _enumerate_structures(
 
 
 def _get_vasp_makers(options):
-    """Get required vasp makers."""
+    """Get the required VASP makers."""
     relax_gen_kwargs = options["relax_generator_kwargs"]
     relax_generator = RelaxSetGenerator(**relax_gen_kwargs)
     relax_maker_kwargs = options["relax_maker_kwargs"]
@@ -140,7 +140,7 @@ def _get_vasp_makers(options):
 
 
 def _check_flow_convergence(taskdoc):
-    """Check vasp convergence for a single structure."""
+    """Check VASP convergence for a single structure."""
     try:
         status = taskdoc.calcs_reversed[0].has_vasp_completed
         if status == TaskState.FAILED:
@@ -320,7 +320,7 @@ def get_structure_calculation_flows(enum_output, last_ce_document):
 def calculate_structures_job(enum_output, last_ce_document):
     """Calculate newly enumerated structures.
 
-    Note: it will replace itself with workflows to run for
+    .. note:: This job will replace itself with the calculation jobs to run for
     each structure.
     Args:
         enum_output(dict):
@@ -330,7 +330,7 @@ def calculate_structures_job(enum_output, last_ce_document):
 
     Returns:
         list[TaskDoc]:
-            Results of VASP calculations as TaskDoc.
+            Results of VASP calculations, in the form of :class:`emmet.core.TaskDoc`.
     """
     project_name = last_ce_document.project_name
     iter_id = last_ce_document.last_iter_id + 1
@@ -344,23 +344,24 @@ def calculate_structures_job(enum_output, last_ce_document):
 
 
 def parse_calculations(taskdocs, enum_output, last_ce_document):
-    """Parse finished calculations into CeDataWrangler.
+    """Parse finished calculations into :class:`CeDataWrangler`.
 
     Gives CeDataEntry with full decoration. Each computed structure
     will be re-decorated and re-inserted every iteration.
+
     Args:
         taskdocs(list[TaskDoc]):
-            Task documents generated by vasp computations of
-            added structures.
+            Task documents generated as results of VASP computations.
         enum_output(dict):
             Output by enumeration job.
         last_ce_document(CeOutputsDocument):
             The last cluster expansion outputs document.
 
     Returns:
-        dict
-            Updated wrangler, all entries before decoration,
-            and all computed properties.
+        dict:
+            A dictionary containing the updated wrangler with successfully decorated
+            and mapped calculations, the computed structure entries of all structures
+            before decoration, and the computed properties for all structures.
     """
     options = last_ce_document.ce_options
     prim_specs = last_ce_document.prim_specs
@@ -478,7 +479,8 @@ def fit_calculations(parse_output, last_ce_document):
 
     Returns:
         dict:
-           Dictionary containing fitted CE information.
+           A dictionary containing the CE coefficients, the cross-validation error,
+           the RMSE, and the optimal hyperparameters.
     """
     options = last_ce_document.ce_options
     _, coefs, cv, cv_std, rmse, params = fit_ecis_from_wrangler(
@@ -514,7 +516,8 @@ def update_document(enum_output, parse_output, fit_output, last_ce_document):
 
     Returns:
         CeOutputDocument:
-            The updated document.
+            The updated :class:`CeOutputDocument` upon finishing the current
+            iteration.
     """
     ce_document = deepcopy(last_ce_document)
     ce_document.data_wrangler = deepcopy(parse_output["wrangler"])
@@ -554,8 +557,9 @@ def update_document(enum_output, parse_output, fit_output, last_ce_document):
 def initialize_document(prim, project_name="ace-work", options=None):
     """Initialize an empty cluster expansion document.
 
-    In this job, a cluster subspace will be created, super-cells
-    and compositions will also be enumerated.
+    A :class:`ClusterSubspace` instance will be created and trimmed out of duplicacy
+    in this job, the supercell matrices and compositions to be used for structure
+    generation will also be enumerated.
 
     Args:
         prim(structure):
@@ -569,7 +573,8 @@ def initialize_document(prim, project_name="ace-work", options=None):
         options(dict): optional
             A dictionary including all options to set up the automatic
             workflow.
-            For available options, see docs in preprocessing.py.
+            For available options, see documentation of
+            :mod:'WFacer.preprocessing'.
     """
     # Pre-process options.
     options = options or {}

diff --git a/WFacer/maker.py b/WFacer/maker.py
@@ -1,4 +1,4 @@
-"""Automatic jobflow maker."""
+"""Automatic cluster expansion workflow maker."""
 from dataclasses import dataclass, field
 from warnings import warn
 
@@ -16,7 +16,7 @@
 
 @job
 def ce_step_trigger(last_ce_document):
-    """Trigger a step in CE iteration.
+    """Triggers a CE iteration.
 
     Args:
         last_ce_document(CeOutputsDocument):
@@ -25,7 +25,7 @@ def ce_step_trigger(last_ce_document):
 
     Returns:
         Response:
-            Either a CeOutputsDocument if converged, or a
+            A :class:`CeOutputsDocument` if converged, or a
             response to replace with another step.
     """
     iter_id = last_ce_document.last_iter_id + 1
@@ -124,7 +124,7 @@ def make(self, prim, last_document=None, add_num_iterations=None):
 
         Returns:
             Flow:
-                The iterative cluster expansion workflow.
+                The iterative automatic cluster expansion workflow.
         """
         if last_document is None:
             initialize = initialize_document_job(