Skip to content

Commit

Permalink
DOC: add documentation to WFacer main modules.
Browse files Browse the repository at this point in the history
  • Loading branch information
qchempku2017 committed Sep 18, 2023
1 parent d678912 commit 985f402
Show file tree
Hide file tree
Showing 18 changed files with 474 additions and 359 deletions.
13 changes: 8 additions & 5 deletions WFacer/convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
def compare_min_energy_structures_by_composition(min_e1, min_e2, matcher=None):
"""Compare minimum energy and structure by composition for convergence check.
We will only compare keys that exist in both older and newer iterations.
If one composition appears in the older one but not the newer one, we will not
claim convergence.
We will only compare keys that exist in both older and newer iterations.
If one composition appears in the older one but not the newer one, we will not
claim convergence.
Args:
min_e1 (defaultdict):
Minimum energies and structures from an earlier iteration.
min_e2 (defaultdict):
Minimum energies and structures from a later iteration.
See docs in WFacer.wrangling.
See documentation of :mod:`WFacer.wrangling`.
matcher (StructureMatcher): optional
A StructureMatcher used compare structures.
wrangler.cluster_subspace._site_matcher is recommended.
Expand Down Expand Up @@ -56,7 +56,10 @@ def compare_fitted_coefs(cluster_subspace, coefs_prev, coefs_now):
Returns:
float:
|| ECI' - ECI ||_1 / ||ECI||_1.
:math:`|| J' - J ||_1 / ||J||_1`,
where :math:`J` represents the coefficients from the last
iteration and :math:`J'` represents coefficients from the
current iteration.
"""
# Get ECIs from coefficients.
eci_prev = ClusterExpansion(cluster_subspace, coefficients=coefs_prev).eci
Expand Down
46 changes: 24 additions & 22 deletions WFacer/enumeration.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""This module implements a StructureEnumerator class for CE sampling.
Algorithm based on:
The algorithm is based on the work of
`A. Seko et al <https://doi.org/10.1103/PhysRevB.80.165122>`_.
Ground state structures will also be added to the structure pool, but
they are not added here. They will be added in the convergence checker
module.
Ground state structures will also be included in the structure pool if not
included yet.
"""

__author__ = "Fengyu Xie"
Expand All @@ -26,7 +26,7 @@
from .utils.supercells import get_three_factors, is_duplicate_sc


# TODO: in the future, may employ mcsqs type algos.
# TODO: in the future, may employ mcsqs-like algos.
def enumerate_matrices(
objective_sc_size,
cluster_subspace,
Expand All @@ -44,12 +44,12 @@ def enumerate_matrices(
objective_sc_size(int):
Objective supercell size in the number of primitive cells.
Better be a multiple of det(conv_mat).
cluster_subspace(smol.ClusterSubspace):
cluster_subspace(ClusterSubspace):
The cluster subspace. cluster_subspace.structure must
be pre-processed such that it is the true primitive cell
in under its space group symmetry.
Note: The cluster_subspace.structure must be reduced to a
primitive cell!
.. note:: The structure of :class:`ClusterSubspace` must be reduced to a
primitive cell!
supercell_from_conventional(bool): optional
Whether to enumerate supercell matrices in the form M@T, where
M is an integer matrix, T is the primitive to conventional cell
Expand All @@ -61,11 +61,12 @@ def enumerate_matrices(
min_sc_angle(float):
Minimum allowed angle of the supercell lattice. By default, set
to 30, to prevent over-skewing.
kwargs:
keyword arguments to pass into SpaceGroupAnalyzer.
**kwargs:
keyword arguments to pass into :class:`SpaceGroupAnalyzer`.
Returns:
List of 2D lists.
List of 2D lists:
Enumerated super-cell matrices.
"""
if not supercell_from_conventional:
conv_mat = np.eye(3, dtype=int)
Expand Down Expand Up @@ -184,7 +185,8 @@ def truncate_cluster_subspace(cluster_subspace, sc_matrices):
Enumerated super-cell matrices.
Returns:
ClusterSubspace: truncated subspace without aliased orbits.
ClusterSubspace:
Truncated subspace without aliased orbits.
"""
alias = []
for m in sc_matrices:
Expand Down Expand Up @@ -221,30 +223,30 @@ def enumerate_compositions_as_counts(
):
"""Enumerate compositions in a given supercell size.
Results will be returned in "counts" format
(see smol.moca.CompositionSpace).
Results will be returned in "counts" format,
see documentation of :mod:`smol.moca.composition`.
Args:
sc_size(int):
The super-cell size in the number of prim cells.
comp_space(CompositionSpace): optional
Composition space in a primitive cell. If not given,
arguments "bits" and "sublattice_sizes" must be given.
arguments **bits** and **sublattice_sizes** must be given.
bits(List[List[Species|DummySpecies|Element|Vacancy]]):
Allowed species on each sub-lattice.
sublattice_sizes(List[int]):
Number of sites in each sub-lattice in a prim cell.
The number of sites in each sub-lattice in a prim cell.
comp_enumeration_step(int):
Step in returning the enumerated compositions.
If step = N > 1, on each dimension of the composition space,
we will only yield one composition every N compositions.
Default to 1.
kwargs:
Other keyword arguments to initialize CompositionSpace.
Other keyword arguments used to initialize a :class:`CompositionSpace`.
Returns:
Enumerated possible compositions in "counts" format, not normalized:
2D np.ndarray[int]
2D np.ndarray[int]:
Enumerated possible compositions in "counts" format (**NOT** normalized).
"""
if comp_space is None:
if bits is None or sublattice_sizes is None:
Expand All @@ -271,7 +273,7 @@ def enumerate_compositions_as_counts(
def get_num_structs_to_sample(
all_counts, num_structs_select, scale=3, min_num_per_composition=2
):
"""Get number of structures to sample in each McSampleGenerator.
"""Get number of structures to sample in each :class:`McSampleGenerator`.
Args:
all_counts(ArrayLike):
Expand Down Expand Up @@ -428,8 +430,8 @@ def generate_training_structures(
Note that option "structure" might be significantly slower since
it has to attempt reducing every structure to its primitive cell
before matching. It should be used with caution.
kwargs:
Keyword arguments for utils.selection.select_initial_rows.
**kwargs:
Keyword arguments for :func:`WFacer.utils.selection.select_initial_rows`.
Returns:
list[Structure], list[3*3 list[list[int]]], list[list[float]]:
Expand Down
33 changes: 18 additions & 15 deletions WFacer/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,32 @@ def fit_ecis_from_wrangler(
):
"""Fit ECIs from a fully processed wrangler.
No weights will be used.
.. note:: Currently, this function does not support adjusting sample weights.
Args:
wrangler(CeDataWrangler):
A CeDataWrangler storing all training structures.
A :class:`CeDataWrangler` to store all training structures.
estimator_name(str):
The name of estimator, following the rules in
smol.utils.class_name_from_str.
:mod:`smol.utils.class_name_from_str`.
optimizer_name(str):
Name of hyperparameter optimizer. Currently, only supports GridSearch and
LineSearch.
The name of hyperparameter optimizer. Currently, only supports
:class:`GridSearch` and :class:`LineSearch` from :mod:`sparse-lm`.
param_grid(dict|list[tuple]):
Parameter grid to initialize the optimizer. See docs of
sparselm.model_selection.
Parameter grid to initialize the optimizer. See documentation of
:mod:`sparselm.model_selection`.
use_hierarchy(bool): optional
Whether to use cluster hierarchy constraints when available. Default to
true.
center_point_external(bool): optional
Whether to fit the point and external terms with linear regression
first, then fit the residue with regressor. Default to None, which means
when the feature matrix is full rank, will not use centering, otherwise
centers. If set to True, will force centering, but use at your own risk
because this may cause very large CV. If set to False, will never use
centering.
Whether to perform centering operation, which means to fit the point and
the external terms using linear regression first, then fit the residue
with the specified regressor. Default to None, which means
when the feature matrix is full-ranked, will not use centering, otherwise
will perform centering.
If set to True, will always use centering, but use at your own risk
because this may cause very large CV when the feature matrix is full rank.
If set to False, will never perform centering.
filter_unique_correlations(bool):
If the wrangler have structures with duplicated correlation vectors,
whether to fit with only the one with the lowest energy.
Expand All @@ -59,8 +61,9 @@ def fit_ecis_from_wrangler(
Other keyword arguments to initialize an estimator.
optimizer_kwargs(dict): optional
Other keyword arguments to initialize an optimizer.
kwargs:
Keyword arguments used by estimator._fit. For example, solver arguments.
**kwargs**:
Keyword arguments used by the estimator._fit method.
For example, solver specifications.
Returns:
Estimator, 1D np.ndarray, float, float, float, 1D np.ndarray:
Expand Down
37 changes: 21 additions & 16 deletions WFacer/jobs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Unitary jobs used by Maker."""
"""Unitary jobs used by an atomate2 workflow."""
import logging
from copy import deepcopy
from warnings import warn
Expand Down Expand Up @@ -110,7 +110,7 @@ def _enumerate_structures(


def _get_vasp_makers(options):
"""Get required vasp makers."""
"""Get the required VASP makers."""
relax_gen_kwargs = options["relax_generator_kwargs"]
relax_generator = RelaxSetGenerator(**relax_gen_kwargs)
relax_maker_kwargs = options["relax_maker_kwargs"]
Expand Down Expand Up @@ -140,7 +140,7 @@ def _get_vasp_makers(options):


def _check_flow_convergence(taskdoc):
"""Check vasp convergence for a single structure."""
"""Check VASP convergence for a single structure."""
try:
status = taskdoc.calcs_reversed[0].has_vasp_completed
if status == TaskState.FAILED:
Expand Down Expand Up @@ -320,7 +320,7 @@ def get_structure_calculation_flows(enum_output, last_ce_document):
def calculate_structures_job(enum_output, last_ce_document):
"""Calculate newly enumerated structures.
Note: it will replace itself with workflows to run for
.. note:: This job will replace itself with the calculation jobs to run for
each structure.
Args:
enum_output(dict):
Expand All @@ -330,7 +330,7 @@ def calculate_structures_job(enum_output, last_ce_document):
Returns:
list[TaskDoc]:
Results of VASP calculations as TaskDoc.
Results of VASP calculations, in the form of :class:`emmet.core.TaskDoc`.
"""
project_name = last_ce_document.project_name
iter_id = last_ce_document.last_iter_id + 1
Expand All @@ -344,23 +344,24 @@ def calculate_structures_job(enum_output, last_ce_document):


def parse_calculations(taskdocs, enum_output, last_ce_document):
"""Parse finished calculations into CeDataWrangler.
"""Parse finished calculations into :class:`CeDataWrangler`.
Gives CeDataEntry with full decoration. Each computed structure
will be re-decorated and re-inserted every iteration.
Args:
taskdocs(list[TaskDoc]):
Task documents generated by vasp computations of
added structures.
Task documents generated as results of VASP computations.
enum_output(dict):
Output by enumeration job.
last_ce_document(CeOutputsDocument):
The last cluster expansion outputs document.
Returns:
dict
Updated wrangler, all entries before decoration,
and all computed properties.
dict:
A dictionary containing the updated wrangler with successfully decorated
and mapped calculations, the computed structure entries of all structures
before decoration, and the computed properties for all structures.
"""
options = last_ce_document.ce_options
prim_specs = last_ce_document.prim_specs
Expand Down Expand Up @@ -478,7 +479,8 @@ def fit_calculations(parse_output, last_ce_document):
Returns:
dict:
Dictionary containing fitted CE information.
A dictionary containing the CE coefficients, the cross-validation error,
the RMSE, and the optimal hyperparameters.
"""
options = last_ce_document.ce_options
_, coefs, cv, cv_std, rmse, params = fit_ecis_from_wrangler(
Expand Down Expand Up @@ -514,7 +516,8 @@ def update_document(enum_output, parse_output, fit_output, last_ce_document):
Returns:
CeOutputDocument:
The updated document.
The updated :class:`CeOutputDocument` upon finishing the current
iteration.
"""
ce_document = deepcopy(last_ce_document)
ce_document.data_wrangler = deepcopy(parse_output["wrangler"])
Expand Down Expand Up @@ -554,8 +557,9 @@ def update_document(enum_output, parse_output, fit_output, last_ce_document):
def initialize_document(prim, project_name="ace-work", options=None):
"""Initialize an empty cluster expansion document.
In this job, a cluster subspace will be created, super-cells
and compositions will also be enumerated.
A :class:`ClusterSubspace` instance will be created and trimmed out of duplicacy
in this job, the supercell matrices and compositions to be used for structure
generation will also be enumerated.
Args:
prim(structure):
Expand All @@ -569,7 +573,8 @@ def initialize_document(prim, project_name="ace-work", options=None):
options(dict): optional
A dictionary including all options to set up the automatic
workflow.
For available options, see docs in preprocessing.py.
For available options, see documentation of
:mod:'WFacer.preprocessing'.
"""
# Pre-process options.
options = options or {}
Expand Down
8 changes: 4 additions & 4 deletions WFacer/maker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Automatic jobflow maker."""
"""Automatic cluster expansion workflow maker."""
from dataclasses import dataclass, field
from warnings import warn

Expand All @@ -16,7 +16,7 @@

@job
def ce_step_trigger(last_ce_document):
"""Trigger a step in CE iteration.
"""Triggers a CE iteration.
Args:
last_ce_document(CeOutputsDocument):
Expand All @@ -25,7 +25,7 @@ def ce_step_trigger(last_ce_document):
Returns:
Response:
Either a CeOutputsDocument if converged, or a
A :class:`CeOutputsDocument` if converged, or a
response to replace with another step.
"""
iter_id = last_ce_document.last_iter_id + 1
Expand Down Expand Up @@ -124,7 +124,7 @@ def make(self, prim, last_document=None, add_num_iterations=None):
Returns:
Flow:
The iterative cluster expansion workflow.
The iterative automatic cluster expansion workflow.
"""
if last_document is None:
initialize = initialize_document_job(
Expand Down
Loading

0 comments on commit 985f402

Please sign in to comment.