diff --git a/README.md b/README.md index 66f1c61..4b32c50 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,14 @@ Install with `pip install fastlogfileparser` or `conda` (forthcoming!). 4. Retrieves values at every step, not just convergence ## Usage -### Gaussian +The best way to see how `fastlogfileparser` works is to check out the [tests](./test/gaussian_test.py)! +They show the syntax for importing, calling, and then accessing the values. +A brief summary of overall workflow and usage is provided below. -There is a single function `fast_gaussian_logfile_parser` inside `fastlogfileparser.gaussian` which reads logfiles and returns the result as a [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple) (which prevents accidentally changing the values and allows using `.` syntax to access them). +### Design +There is a single function `fast_{software}_logfile_parser` inside `fastlogfileparser.{software}` (where `{software}` is the name of the corresponding package like `gaussian` or `orca`) which reads log files and returns the result as a [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple) (which prevents accidentally changing the values and allows using `.` syntax to access them). -#### Usage Example +### Usage Example ```python from fastlogfileparser.gaussian import fast_gaussian_logfile_parser as fglp @@ -28,7 +31,7 @@ print(job_1.frequency_modes) print(job_1._fields) # can also be accessed via -from fastlogfileparser.gaussian import FIELDS +from fastlogfileparser.gaussian import ALL_FIELDS ``` Fast logfile parser is fastest when you ask it to retrieve only the fields you want, i.e.: @@ -36,22 +39,50 @@ Fast logfile parser is fastest when you ask it to retrieve only the fields you w job_1, job_2, job_3 = fglp(FNAME, get=("gibbs", "scf")) ``` -#### Retrieved Values +### Retrieved Values + +#### Gaussian | Quantity | Key | Type | Frequency | | -------- | --- | ---- | --------- | +| Route Section | `route_section` | string | 1/job | +| Normal Termination | `normal_termination` | boolean | 1/job | +| Error | `error_string` | str | 1/job | +| Maximum Allowed Steps | `max_steps` | int | 1/job | +| CPU Time | `cpu_time` | float | 1/job | +| Wall Time | `wall_time` | float | 1/job | | Gibbs free energy at 298K | `gibbs` | float | 1/job | | Gibbs free energy at 0K | `e0_zpe` | float | 1/job | | Enthalpy at 298K | `e0_h` | float | 1/job | -| E0 $^1$ | `E0` | float | 1/job | +| HF $^1$ | `hf` | float | 1/job | | Per-atom Zero Point Energy | `zpe_per_atom` | float | 1/job | -| Standardized xyz coords | `std_xyz` | list[float] | 1/step/job | -| ... | ... | ... | ... | +| Wavefunction Energy $^3$ | `wavefunction_energy` | float | 1/job | +| SCF Energy | `scf` | list[float] | 1/job | +| Vibrational Frequencies | `frequencies` | list[float] | 1/job | +| Frequency Modes | `frequency_modes` | list[list[float]] | 1/job | +| Standardized xyz coords | `std_xyz` | list[list[float]] | 1/step/job | +| Input xyz coords | `xyz` | list[list[float]] | 1/step/job | +| Standardized forces | `std_forces` | list[list[float]] | 1/step/job | +| Mulliken Charges (Summed into Heavy) | `mulliken_charges_summed` | list[list[float]] | 2/job | +| Charge and Multiplicity | `charge_and_multiplicity` | list[int] | 1/job | | Number of Atoms $^2$ | `number_of_atoms` | int | 1/job | | Number of Optimization Steps $^2$ | `number_of_optimization_steps` | int | 1/job | $1$ equals E0 only for non-wavefunction methods
$2$ requires `std_xyz` to be parsed to find these values
+$3$ E0 for wavefunction methods
+ +#### Orca + +| Quantity | Key | Type | Frequency | +| -------- | --- | ---- | --------- | +| Route Section | `route_section` | string | 1/job | +| Total Run Time $^1$ | `run_time` | float | 1/job | +| Charge and Multiplicity | `charge_and_multiplicity` | list[int] | 1/job | +| Final Single Point Energy | `energy` | float | 1/job | +| Input xyz coords | `input_coordinates` | list[list[float]] | 1/job | + +$1$ ignores milliseconds
## How much fast-ly-er? `FastLogfileParser` uses REGEX and only REGEX to retrieve data from logfiles, spending as much time in Python's excellent C-based REGEX library as possible. diff --git a/fastlogfileparser/gaussian/utils/postprocessing.py b/fastlogfileparser/gaussian/utils/postprocessing.py index 36ce5a6..5a3be8d 100644 --- a/fastlogfileparser/gaussian/utils/postprocessing.py +++ b/fastlogfileparser/gaussian/utils/postprocessing.py @@ -7,6 +7,18 @@ _unix_time_to_seconds, ) + +def _mulliken(in_list): + out = [] + for i in in_list: + inner_out = [] + for row in i.split(sep="\n"): + atom_idx, _, mulliken_charge, _ = row.split() + inner_out.append([int(atom_idx), float(mulliken_charge)]) + out.append(inner_out) + return out + + POSTPROCESSING_FUNCTIONS = { "cpu_time": _unix_time_to_seconds, "wall_time": _unix_time_to_seconds, @@ -27,4 +39,5 @@ "xyz": _columns_to_floats, "route_section": lambda in_list: in_list[0], "charge_and_multiplicity": _charge_and_multiplicity, + "mulliken_charges_summed": _mulliken, } diff --git a/fastlogfileparser/gaussian/utils/regexes.py b/fastlogfileparser/gaussian/utils/regexes.py index 3429391..c6b3a6b 100644 --- a/fastlogfileparser/gaussian/utils/regexes.py +++ b/fastlogfileparser/gaussian/utils/regexes.py @@ -42,6 +42,12 @@ r"([\s+\d+\s+\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d]+)\n" r"(?:\s+\d+\s+\d+\s+\d+)?\n" ), + "mulliken_charges_summed": ( + r" Mulliken charges and spin densities with hydrogens summed into heavy atoms:\n" + r" 1 2\n" + r"((?:\s+\d+\s+[a-zA-Z]{1,3}\s+-?\d+\.\d+\s+-?\d+\.\d+)+)\n" + r" APT charges:" + ), "charge_and_multiplicity": r" Charge = {1,2}(-?\d) Multiplicity = (\d)", } @@ -57,7 +63,7 @@ RETRIEVAL_PATTERNS = {**DATA, **METADATA} # other options: -# homo-lumo gap, polarizability, dipole moment, mulliken and APT partial charges, occupancy +# homo-lumo gap, polarizability, dipole moment, APT partial charges, occupancy COMPILED_PATTERNS = {pattern_name: re.compile(pattern) for (pattern_name, pattern) in RETRIEVAL_PATTERNS.items()} diff --git a/pyproject.toml b/pyproject.toml index 596d462..433078d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "fastlogfileparser" -version = "1.0.0a6" +version = "1.0.0a7" authors = [ { name = "Jackson Burns" }, ] diff --git a/test/gaussian_test.py b/test/gaussian_test.py index c533ce2..196c0b4 100644 --- a/test/gaussian_test.py +++ b/test/gaussian_test.py @@ -18,6 +18,48 @@ def test_duplicated_frequencies_handling(): assert result.frequencies == [355.51, 1029.1913, 1349.894, 1491.2134, 3851.6427, 3853.9729] +@pytest.mark.dependency(**pytest_dep_args) +def test_mulliken_charges(): + """ + Mulliken charges summed into heavy atoms. + """ + + file = os.path.join(os.path.dirname(__file__), "data", "rxn_233.log") + result, _, _ = fast_gaussian_logfile_parser(file) + assert result.mulliken_charges_summed == [ + [ + [2, -0.022831], + [3, 0.023347], + [13, 0.264456], + [14, 0.279877], + [15, 0.244798], + [16, 0.135469], + [17, 0.178793], + [18, 0.254435], + [19, -0.293571], + [20, -0.302496], + [21, -0.278843], + [22, -0.245202], + [23, -0.238233], + ], + [ + [2, -0.050449], + [3, 0.010743], + [13, 0.283808], + [14, 0.299583], + [15, 0.220744], + [16, 0.157101], + [17, 0.138796], + [18, 0.243924], + [19, -0.225737], + [20, -0.287798], + [21, -0.292178], + [22, -0.25534], + [23, -0.243197], + ], + ] + + @pytest.mark.dependency(**pytest_dep_args) def test_fast_gaussian_logfile_parser(): """