Skip to content

Commit

Permalink
Added encoding and parsing for TeraChem optimization calculations.
Browse files Browse the repository at this point in the history
  • Loading branch information
coltonbh committed Sep 13, 2024
1 parent 198fcd6 commit 1b261df
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 12 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ repos:
- id: mypy
additional_dependencies:
[tokenize-rt==3.2.0, pydantic>=1.0.0, types-paramiko, types-toml, qcio>=0.11.8]
- repo: https://github.com/crate-ci/typos
rev: v1.24.5
hooks:
- id: typos
- repo: local
hooks:
- id: tests
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@ init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
warn_untyped_fields = true

[tool.typos]
# Exclude specific files or directories
files.extend-exclude = [
"tests/data/terachem_opt/tc.out", # Single file
]
5 changes: 5 additions & 0 deletions qcparse/encoders/terachem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ def encode(inp_obj: ProgramInput) -> NativeInput:
calctype = "frequencies"
elif inp_obj.calctype.value == CalcType.optimization:
calctype = "minimize"
if not inp_obj.keywords.get("new_minimizer", "no") == "yes":
raise EncoderError(
"Only the new_minimizer is supported for optimizations. Add "
"'new_minimizer': 'yes' to the keywords."
)
elif inp_obj.calctype.value == CalcType.transition_state:
calctype = "ts"
else:
Expand Down
7 changes: 4 additions & 3 deletions qcparse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import warnings
from importlib import import_module
from pathlib import Path
from typing import List, Union
from typing import List, Optional, Union

from qcio import ProgramInput, SinglePointResults
from qcio import CalcType, ProgramInput, SinglePointResults

from .exceptions import EncoderError, MatchNotFoundError, ParserError
from .models import NativeInput, ParserSpec, registry, single_point_results_namespace
Expand All @@ -20,6 +20,7 @@ def parse(
data_or_path: Union[str, bytes, Path],
program: str,
filetype: str = "stdout",
calctype: Optional[CalcType] = None,
) -> SinglePointResults:
"""Parse a file using the parsers registered for the given program.
Expand Down Expand Up @@ -53,7 +54,7 @@ def parse(

# Get the calctype if filetype is 'stdout'
if filetype == "stdout":
calctype = parsers.parse_calctype(file_content)
calctype = calctype if calctype else parsers.parse_calctype(file_content)

# Get all the parsers for the program, filetype, and calctype
parser_specs: List[ParserSpec] = registry.get_parsers(program, filetype, calctype)
Expand Down
25 changes: 17 additions & 8 deletions qcparse/parsers/terachem.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,14 @@ def parse_optimization_dir(
directory: Union[Path, str],
stdout: str,
*,
input_data: ProgramInput,
inp_obj: ProgramInput,
) -> OptimizationResults:
"""Parse the output directory of a TeraChem optimization calculation.
Args:
directory: Path to the directory containing the TeraChem output files.
stdout: The contents of the TeraChem stdout file.
inp_obj: The input object used for the calculation.
Returns:
OptimizationResults object
Expand All @@ -200,7 +201,12 @@ def parse_optimization_dir(
structures = Structure.open(directory / "optim.xyz")
assert isinstance(structures, list), "Expected multiple structures in optim.xyz"

# Parse their gradients
# Parse Values
from qcparse import parse

# Parse all the values from the stdout file
spr = parse(stdout, "terachem", "stdout", CalcType.energy)

gradients = parse_gradients(stdout)
program_version = parse_version_string(stdout)

Expand All @@ -210,14 +216,17 @@ def parse_optimization_dir(
input_data=ProgramInput(
calctype=CalcType.gradient,
structure=structure,
model=input_data.model,
keywords=input_data.keywords,
model=inp_obj.model,
keywords=inp_obj.keywords,
),
results=SinglePointResults(
# TeraChem places the energy as the first comment in the xyz file
energy=structure.extras[Structure._xyz_comment_key][0],
# Will be coerced by Pydantic to np.ndarray
gradient=gradient, # type: ignore
**{
**spr.model_dump(),
# TeraChem places the energy as the first comment in the xyz file
"energy": structure.extras[Structure._xyz_comment_key][0],
# # Will be coerced by Pydantic to np.ndarray
"gradient": gradient, # type: ignore
}
),
success=True,
provenance=Provenance(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_terachem.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_parse_optimization_dir(test_data_dir, prog_inp):
opt_inp = prog_inp("optimization")
stdout = (test_data_dir / "terachem_opt" / "tc.out").read_text()
opt_results = parse_optimization_dir(
test_data_dir / "terachem_opt", stdout, input_data=opt_inp
test_data_dir / "terachem_opt", stdout, inp_obj=opt_inp
)
for prog_output in opt_results.trajectory:
assert prog_output.input_data.calctype == CalcType.gradient
Expand Down

0 comments on commit 1b261df

Please sign in to comment.