diff --git a/README.md b/README.md
index 66f1c61..4b32c50 100644
--- a/README.md
+++ b/README.md
@@ -9,11 +9,14 @@ Install with `pip install fastlogfileparser` or `conda` (forthcoming!).
4. Retrieves values at every step, not just convergence
## Usage
-### Gaussian
+The best way to see how `fastlogfileparser` works is to check out the [tests](./test/gaussian_test.py)!
+They show the syntax for importing, calling, and then accessing the values.
+A brief summary of overall workflow and usage is provided below.
-There is a single function `fast_gaussian_logfile_parser` inside `fastlogfileparser.gaussian` which reads logfiles and returns the result as a [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple) (which prevents accidentally changing the values and allows using `.` syntax to access them).
+### Design
+There is a single function `fast_{software}_logfile_parser` inside `fastlogfileparser.{software}` (where `{software}` is the name of the corresponding package like `gaussian` or `orca`) which reads log files and returns the result as a [namedtuple](https://docs.python.org/3/library/collections.html#collections.namedtuple) (which prevents accidentally changing the values and allows using `.` syntax to access them).
-#### Usage Example
+### Usage Example
```python
from fastlogfileparser.gaussian import fast_gaussian_logfile_parser as fglp
@@ -28,7 +31,7 @@ print(job_1.frequency_modes)
print(job_1._fields)
# can also be accessed via
-from fastlogfileparser.gaussian import FIELDS
+from fastlogfileparser.gaussian import ALL_FIELDS
```
Fast logfile parser is fastest when you ask it to retrieve only the fields you want, i.e.:
@@ -36,22 +39,50 @@ Fast logfile parser is fastest when you ask it to retrieve only the fields you w
job_1, job_2, job_3 = fglp(FNAME, get=("gibbs", "scf"))
```
-#### Retrieved Values
+### Retrieved Values
+
+#### Gaussian
| Quantity | Key | Type | Frequency |
| -------- | --- | ---- | --------- |
+| Route Section | `route_section` | string | 1/job |
+| Normal Termination | `normal_termination` | boolean | 1/job |
+| Error | `error_string` | str | 1/job |
+| Maximum Allowed Steps | `max_steps` | int | 1/job |
+| CPU Time | `cpu_time` | float | 1/job |
+| Wall Time | `wall_time` | float | 1/job |
| Gibbs free energy at 298K | `gibbs` | float | 1/job |
| Gibbs free energy at 0K | `e0_zpe` | float | 1/job |
| Enthalpy at 298K | `e0_h` | float | 1/job |
-| E0 $^1$ | `E0` | float | 1/job |
+| HF $^1$ | `hf` | float | 1/job |
| Per-atom Zero Point Energy | `zpe_per_atom` | float | 1/job |
-| Standardized xyz coords | `std_xyz` | list[float] | 1/step/job |
-| ... | ... | ... | ... |
+| Wavefunction Energy $^3$ | `wavefunction_energy` | float | 1/job |
+| SCF Energy | `scf` | list[float] | 1/job |
+| Vibrational Frequencies | `frequencies` | list[float] | 1/job |
+| Frequency Modes | `frequency_modes` | list[list[float]] | 1/job |
+| Standardized xyz coords | `std_xyz` | list[list[float]] | 1/step/job |
+| Input xyz coords | `xyz` | list[list[float]] | 1/step/job |
+| Standardized forces | `std_forces` | list[list[float]] | 1/step/job |
+| Mulliken Charges (Summed into Heavy) | `mulliken_charges_summed` | list[list[float]] | 2/job |
+| Charge and Multiplicity | `charge_and_multiplicity` | list[int] | 1/job |
| Number of Atoms $^2$ | `number_of_atoms` | int | 1/job |
| Number of Optimization Steps $^2$ | `number_of_optimization_steps` | int | 1/job |
$1$ equals E0 only for non-wavefunction methods
$2$ requires `std_xyz` to be parsed to find these values
+$3$ E0 for wavefunction methods
+
+#### Orca
+
+| Quantity | Key | Type | Frequency |
+| -------- | --- | ---- | --------- |
+| Route Section | `route_section` | string | 1/job |
+| Total Run Time $^1$ | `run_time` | float | 1/job |
+| Charge and Multiplicity | `charge_and_multiplicity` | list[int] | 1/job |
+| Final Single Point Energy | `energy` | float | 1/job |
+| Input xyz coords | `input_coordinates` | list[list[float]] | 1/job |
+
+$1$ ignores milliseconds
## How much fast-ly-er?
`FastLogfileParser` uses REGEX and only REGEX to retrieve data from logfiles, spending as much time in Python's excellent C-based REGEX library as possible.
diff --git a/fastlogfileparser/gaussian/utils/postprocessing.py b/fastlogfileparser/gaussian/utils/postprocessing.py
index 36ce5a6..5a3be8d 100644
--- a/fastlogfileparser/gaussian/utils/postprocessing.py
+++ b/fastlogfileparser/gaussian/utils/postprocessing.py
@@ -7,6 +7,18 @@
_unix_time_to_seconds,
)
+
+def _mulliken(in_list):
+ out = []
+ for i in in_list:
+ inner_out = []
+ for row in i.split(sep="\n"):
+ atom_idx, _, mulliken_charge, _ = row.split()
+ inner_out.append([int(atom_idx), float(mulliken_charge)])
+ out.append(inner_out)
+ return out
+
+
POSTPROCESSING_FUNCTIONS = {
"cpu_time": _unix_time_to_seconds,
"wall_time": _unix_time_to_seconds,
@@ -27,4 +39,5 @@
"xyz": _columns_to_floats,
"route_section": lambda in_list: in_list[0],
"charge_and_multiplicity": _charge_and_multiplicity,
+ "mulliken_charges_summed": _mulliken,
}
diff --git a/fastlogfileparser/gaussian/utils/regexes.py b/fastlogfileparser/gaussian/utils/regexes.py
index 3429391..c6b3a6b 100644
--- a/fastlogfileparser/gaussian/utils/regexes.py
+++ b/fastlogfileparser/gaussian/utils/regexes.py
@@ -42,6 +42,12 @@
r"([\s+\d+\s+\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d\s+-?\d\.\d\d]+)\n"
r"(?:\s+\d+\s+\d+\s+\d+)?\n"
),
+ "mulliken_charges_summed": (
+ r" Mulliken charges and spin densities with hydrogens summed into heavy atoms:\n"
+ r" 1 2\n"
+ r"((?:\s+\d+\s+[a-zA-Z]{1,3}\s+-?\d+\.\d+\s+-?\d+\.\d+)+)\n"
+ r" APT charges:"
+ ),
"charge_and_multiplicity": r" Charge = {1,2}(-?\d) Multiplicity = (\d)",
}
@@ -57,7 +63,7 @@
RETRIEVAL_PATTERNS = {**DATA, **METADATA}
# other options:
-# homo-lumo gap, polarizability, dipole moment, mulliken and APT partial charges, occupancy
+# homo-lumo gap, polarizability, dipole moment, APT partial charges, occupancy
COMPILED_PATTERNS = {pattern_name: re.compile(pattern) for (pattern_name, pattern) in RETRIEVAL_PATTERNS.items()}
diff --git a/pyproject.toml b/pyproject.toml
index 596d462..433078d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "fastlogfileparser"
-version = "1.0.0a6"
+version = "1.0.0a7"
authors = [
{ name = "Jackson Burns" },
]
diff --git a/test/gaussian_test.py b/test/gaussian_test.py
index c533ce2..196c0b4 100644
--- a/test/gaussian_test.py
+++ b/test/gaussian_test.py
@@ -18,6 +18,48 @@ def test_duplicated_frequencies_handling():
assert result.frequencies == [355.51, 1029.1913, 1349.894, 1491.2134, 3851.6427, 3853.9729]
+@pytest.mark.dependency(**pytest_dep_args)
+def test_mulliken_charges():
+ """
+ Mulliken charges summed into heavy atoms.
+ """
+
+ file = os.path.join(os.path.dirname(__file__), "data", "rxn_233.log")
+ result, _, _ = fast_gaussian_logfile_parser(file)
+ assert result.mulliken_charges_summed == [
+ [
+ [2, -0.022831],
+ [3, 0.023347],
+ [13, 0.264456],
+ [14, 0.279877],
+ [15, 0.244798],
+ [16, 0.135469],
+ [17, 0.178793],
+ [18, 0.254435],
+ [19, -0.293571],
+ [20, -0.302496],
+ [21, -0.278843],
+ [22, -0.245202],
+ [23, -0.238233],
+ ],
+ [
+ [2, -0.050449],
+ [3, 0.010743],
+ [13, 0.283808],
+ [14, 0.299583],
+ [15, 0.220744],
+ [16, 0.157101],
+ [17, 0.138796],
+ [18, 0.243924],
+ [19, -0.225737],
+ [20, -0.287798],
+ [21, -0.292178],
+ [22, -0.25534],
+ [23, -0.243197],
+ ],
+ ]
+
+
@pytest.mark.dependency(**pytest_dep_args)
def test_fast_gaussian_logfile_parser():
"""