diff --git a/.vscode/settings.json b/.vscode/settings.json index ccf10a5..7418e60 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,8 @@ "python.formatting.provider": "black", "cSpell.words": [ "calcinfo", + "calctype", + "calctypes", "CUDA", "Hartree", "htmlcov", diff --git a/CHANGELOG.md b/CHANGELOG.md index bc85ae2..9358346 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [unreleased] +### Changed + +- Updated to used `qcio>=0.3.0` flattened models and the `SinglePointResults`object. + ## [0.3.1] ### Fixed diff --git a/README.md b/README.md index 83aa5ae..6d3003f 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ A library for parsing Quantum Chemistry output files into structured data object ## ☝️ NOTE -This package was originally designed to run as a standalone parser to generate `SinglePointSuccessfulOutput` and `SinglePointFailedOutput` objects parsing all input and provenance data in addition to computed output data; however, once [qcop](https://github.com/coltonbh/qcop) was built to power quantum chemistry programs the only parsing needed was for the simpler `SinglePointComputedProperties` values. There are still remnants of the original `parse` function in the repo and I've left them for now in case I find a use for the general purpose parsing. +This package was originally designed to run as a standalone parser to generate `SinglePointOutput` and `ProgramFailure` objects parsing all input and provenance data in addition to computed output data; however, once [qcop](https://github.com/coltonbh/qcop) was built to power quantum chemistry programs the only parsing needed was for the simpler `SinglePointResults` values. There are still remnants of the original `parse` function in the repo and I've left them for now in case I find a use for the general purpose parsing. ## ✨ Basic Usage @@ -21,41 +21,41 @@ This package was originally designed to run as a standalone parser to generate ` python -m pip install qcparse ``` -- Parse a file into a `SinglePointComputedProperties` object with a single line of code. +- Parse a file into a `SinglePointResults` object with a single line of code. ```python - from qcparse import parse_computed_props + from qcparse import parse_results - computed = parse_computed_props("/path/to/tc.out", "terachem") + results = parse_results("/path/to/tc.out", "terachem") ``` -- The `computed` object will be a `SinglePointComputedProperties` object. Run `dir(computed)` inside a Python interpreter to see the various values you can access. A few prominent values are shown here as an example: +- The `results` object will be a `SinglePointResults` object. Run `dir(results)` inside a Python interpreter to see the various values you can access. A few prominent values are shown here as an example: ```python - from qcparse import parse_computed_props + from qcparse import parse_results - computed = parse_computed_props("/path/to/tc.out", "terachem") + results = parse_results("/path/to/tc.out", "terachem") - computed.energy - computed.gradient # If a gradient calc - computed.hessian # If a hessian calc + results.energy + results.gradient # If a gradient calc + results.hessian # If a hessian calc - computed.calcinfo_nmo # Number of molecular orbitals + results.calcinfo_nmo # Number of molecular orbitals ``` - Parsed values can be written to disk like this: ```py - with open("computed.json", "w") as f: + with open("results.json", "w") as f: f.write(result.json()) ``` - And read from disk like this: ```py - from qcio import SinglePointComputedProperties as SPProps + from qcio import SinglePointResults - computed = SPProps.open("myresult.json") + results = SinglePointResults.open("results.json") ``` - You can also run `qcparse` from the command line like this: @@ -63,30 +63,9 @@ This package was originally designed to run as a standalone parser to generate ` ```sh qcparse -h # Get help message for cli - qcparse terachem ./path/to/tc.out > computed.json # Parse TeraChem stdout to json + qcparse terachem ./path/to/tc.out > results.json # Parse TeraChem stdout to json ``` -## 🤩 Next Steps - -This package is integrated into [qcop](https://github.com/coltonbh/qcop). This means you can use `qcop` to power your QC programs using standard input data structures in pure Python and get back standardized Python output objects. - -```python -from qcop import compute -from qcio import Molecule, SinglePointInput - -molecule = Molecule.open("mymolecule.xyz") -sp_input = SinglePointInput( - molecule=molecule, - program_args={ - "calc_type": "gradient", # "energy" | "gradient" | "hessian" - "model": {"method": "b3lyp", "basis": "6-31gs"}, - "keywords": {"restricted": True, "purify": "no"} # Keywords are optional - }) - -# result will be SinglePointSuccessfulOutput or SinglePointFailedOutput -result = compute(sp_input, "terachem") -``` - ## 💻 Contributing If there's data you'd like parsed fromI output files, please open an issue in this repo explaining the data items you'd like parsed and include an example output file containing the data, like [this](https://github.com/coltonbh/qcparse/issues/2). diff --git a/docs/dev-decisions.md b/docs/dev-decisions.md index 1bc4313..9f376fe 100644 --- a/docs/dev-decisions.md +++ b/docs/dev-decisions.md @@ -6,7 +6,7 @@ ## UPDATE DESIGN DECISION: -- I don't see a strong reason for making this package a standalone package that parses everything required for a `SinglePointSuccessfulResult` object including input data, provenance data, xyz files, etc... While the original idea was to have a cli tool to run on TeraChem files, now that I've build my own data structures and driver program, there's no reason to parse anything but `SinglePointComputedProperties` values because we should just be driving the programs with `qcop/qcpilot`. So why waste time parsing a bunch of extra data? I've left the original `parse` function and some basic `cli` functionality in case I change my mind, but perhaps I just strip this down to the bare bones and K.I.S.S? The only downside would be walking in to someone else's old data and wanting to slurp it all in, but perhaps there's no reason to build for that use case now... Just go with SIMPLE and keep the code maintainable. All the logic for parsing inputs and handling failed computations was making the package quite complex (cases where .xyz file not available, or determining if output was a success/failure), this should be the SIMPLEST package of the `qc` suite, yet it was become the most complex and difficult to reason about. +- I don't see a strong reason for making this package a standalone package that parses everything required for a `SinglePointSuccessfulResult` object including input data, provenance data, xyz files, etc... While the original idea was to have a cli tool to run on TeraChem files, now that I've build my own data structures and driver program, there's no reason to parse anything but `SinglePointComputedProps` values because we should just be driving the programs with `qcop/qcpilot`. So why waste time parsing a bunch of extra data? I've left the original `parse` function and some basic `cli` functionality in case I change my mind, but perhaps I just strip this down to the bare bones and K.I.S.S? The only downside would be walking in to someone else's old data and wanting to slurp it all in, but perhaps there's no reason to build for that use case now... Just go with SIMPLE and keep the code maintainable. All the logic for parsing inputs and handling failed computations was making the package quite complex (cases where .xyz file not available, or determining if output was a success/failure), this should be the SIMPLEST package of the `qc` suite, yet it was become the most complex and difficult to reason about. ## Basic Architectural Overview and Program Flow @@ -21,7 +21,7 @@ 1. Create a file in the `parsers` named after the quantum chemistry program, e.g., `qchem.py`. 2. Create `class FileType(str, Enum)` in the file registering the file types the parsers support. -3. If `stdout` is a file type then create a `def get_calc_type(string: str) -> CalcType` function that returns the `CalcType` for the file. One of `CalcType.energy`, `CalcType.gradient`, or `CalcType.hessian`. +3. If `stdout` is a file type then create a `def get_calctype(string: str) -> CalcType` function that returns the `CalcType` for the file. One of `CalcType.energy`, `CalcType.gradient`, or `CalcType.hessian`. 4. Create simple parser functions that accept file data and an output object. The parser should parse a single piece of data from the file and set it on the output object at its corresponding location found on the `qcio.SinglePointOutput` object. Register this parser by decorating it with the `@parser` decorator. The decorator must declare `filetype` and can optionally declare `required` (`True` by default), `input_data` (`False` by default), and `only` (`None` by default). See the `qcparse.decorators` for details on what these mean. ```py diff --git a/poetry.lock b/poetry.lock index b444c84..9010180 100644 --- a/poetry.lock +++ b/poetry.lock @@ -458,48 +458,48 @@ virtualenv = ">=20.10.0" [[package]] name = "pydantic" -version = "1.10.9" +version = "1.10.10" description = "Data validation and settings management using python type hints" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e692dec4a40bfb40ca530e07805b1208c1de071a18d26af4a2a0d79015b352ca"}, - {file = "pydantic-1.10.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c52eb595db83e189419bf337b59154bdcca642ee4b2a09e5d7797e41ace783f"}, - {file = "pydantic-1.10.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:939328fd539b8d0edf244327398a667b6b140afd3bf7e347cf9813c736211896"}, - {file = "pydantic-1.10.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b48d3d634bca23b172f47f2335c617d3fcb4b3ba18481c96b7943a4c634f5c8d"}, - {file = "pydantic-1.10.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f0b7628fb8efe60fe66fd4adadd7ad2304014770cdc1f4934db41fe46cc8825f"}, - {file = "pydantic-1.10.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e1aa5c2410769ca28aa9a7841b80d9d9a1c5f223928ca8bec7e7c9a34d26b1d4"}, - {file = "pydantic-1.10.9-cp310-cp310-win_amd64.whl", hash = "sha256:eec39224b2b2e861259d6f3c8b6290d4e0fbdce147adb797484a42278a1a486f"}, - {file = "pydantic-1.10.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d111a21bbbfd85c17248130deac02bbd9b5e20b303338e0dbe0faa78330e37e0"}, - {file = "pydantic-1.10.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e9aec8627a1a6823fc62fb96480abe3eb10168fd0d859ee3d3b395105ae19a7"}, - {file = "pydantic-1.10.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07293ab08e7b4d3c9d7de4949a0ea571f11e4557d19ea24dd3ae0c524c0c334d"}, - {file = "pydantic-1.10.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ee829b86ce984261d99ff2fd6e88f2230068d96c2a582f29583ed602ef3fc2c"}, - {file = "pydantic-1.10.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4b466a23009ff5cdd7076eb56aca537c745ca491293cc38e72bf1e0e00de5b91"}, - {file = "pydantic-1.10.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7847ca62e581e6088d9000f3c497267868ca2fa89432714e21a4fb33a04d52e8"}, - {file = "pydantic-1.10.9-cp311-cp311-win_amd64.whl", hash = "sha256:7845b31959468bc5b78d7b95ec52fe5be32b55d0d09983a877cca6aedc51068f"}, - {file = "pydantic-1.10.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:517a681919bf880ce1dac7e5bc0c3af1e58ba118fd774da2ffcd93c5f96eaece"}, - {file = "pydantic-1.10.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67195274fd27780f15c4c372f4ba9a5c02dad6d50647b917b6a92bf00b3d301a"}, - {file = "pydantic-1.10.9-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2196c06484da2b3fded1ab6dbe182bdabeb09f6318b7fdc412609ee2b564c49a"}, - {file = "pydantic-1.10.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6257bb45ad78abacda13f15bde5886efd6bf549dd71085e64b8dcf9919c38b60"}, - {file = "pydantic-1.10.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3283b574b01e8dbc982080d8287c968489d25329a463b29a90d4157de4f2baaf"}, - {file = "pydantic-1.10.9-cp37-cp37m-win_amd64.whl", hash = "sha256:5f8bbaf4013b9a50e8100333cc4e3fa2f81214033e05ac5aa44fa24a98670a29"}, - {file = "pydantic-1.10.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9cd67fb763248cbe38f0593cd8611bfe4b8ad82acb3bdf2b0898c23415a1f82"}, - {file = "pydantic-1.10.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f50e1764ce9353be67267e7fd0da08349397c7db17a562ad036aa7c8f4adfdb6"}, - {file = "pydantic-1.10.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73ef93e5e1d3c8e83f1ff2e7fdd026d9e063c7e089394869a6e2985696693766"}, - {file = "pydantic-1.10.9-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:128d9453d92e6e81e881dd7e2484e08d8b164da5507f62d06ceecf84bf2e21d3"}, - {file = "pydantic-1.10.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ad428e92ab68798d9326bb3e5515bc927444a3d71a93b4a2ca02a8a5d795c572"}, - {file = "pydantic-1.10.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fab81a92f42d6d525dd47ced310b0c3e10c416bbfae5d59523e63ea22f82b31e"}, - {file = "pydantic-1.10.9-cp38-cp38-win_amd64.whl", hash = "sha256:963671eda0b6ba6926d8fc759e3e10335e1dc1b71ff2a43ed2efd6996634dafb"}, - {file = "pydantic-1.10.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:970b1bdc6243ef663ba5c7e36ac9ab1f2bfecb8ad297c9824b542d41a750b298"}, - {file = "pydantic-1.10.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7e1d5290044f620f80cf1c969c542a5468f3656de47b41aa78100c5baa2b8276"}, - {file = "pydantic-1.10.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83fcff3c7df7adff880622a98022626f4f6dbce6639a88a15a3ce0f96466cb60"}, - {file = "pydantic-1.10.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0da48717dc9495d3a8f215e0d012599db6b8092db02acac5e0d58a65248ec5bc"}, - {file = "pydantic-1.10.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0a2aabdc73c2a5960e87c3ffebca6ccde88665616d1fd6d3db3178ef427b267a"}, - {file = "pydantic-1.10.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9863b9420d99dfa9c064042304868e8ba08e89081428a1c471858aa2af6f57c4"}, - {file = "pydantic-1.10.9-cp39-cp39-win_amd64.whl", hash = "sha256:e7c9900b43ac14110efa977be3da28931ffc74c27e96ee89fbcaaf0b0fe338e1"}, - {file = "pydantic-1.10.9-py3-none-any.whl", hash = "sha256:6cafde02f6699ce4ff643417d1a9223716ec25e228ddc3b436fe7e2d25a1f305"}, - {file = "pydantic-1.10.9.tar.gz", hash = "sha256:95c70da2cd3b6ddf3b9645ecaa8d98f3d80c606624b6d245558d202cd23ea3be"}, + {file = "pydantic-1.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adad1ee4ab9888f12dac2529276704e719efcf472e38df7813f5284db699b4ec"}, + {file = "pydantic-1.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a7db03339893feef2092ff7b1afc9497beed15ebd4af84c3042a74abce02d48"}, + {file = "pydantic-1.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b3714b97ff84b2689654851c2426389bcabfac9080617bcf4306c69db606f6"}, + {file = "pydantic-1.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edfdf0a5abc5c9bf2052ebaec20e67abd52e92d257e4f2d30e02c354ed3e6030"}, + {file = "pydantic-1.10.10-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20a3b30fd255eeeb63caa9483502ba96b7795ce5bf895c6a179b3d909d9f53a6"}, + {file = "pydantic-1.10.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db4c7f7e60ca6f7d6c1785070f3e5771fcb9b2d88546e334d2f2c3934d949028"}, + {file = "pydantic-1.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:a2d5be50ac4a0976817144c7d653e34df2f9436d15555189f5b6f61161d64183"}, + {file = "pydantic-1.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:566a04ba755e8f701b074ffb134ddb4d429f75d5dced3fbd829a527aafe74c71"}, + {file = "pydantic-1.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f79db3652ed743309f116ba863dae0c974a41b688242482638b892246b7db21d"}, + {file = "pydantic-1.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c62376890b819bebe3c717a9ac841a532988372b7e600e76f75c9f7c128219d5"}, + {file = "pydantic-1.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4870f13a4fafd5bc3e93cff3169222534fad867918b188e83ee0496452978437"}, + {file = "pydantic-1.10.10-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:990027e77cda6072a566e433b6962ca3b96b4f3ae8bd54748e9d62a58284d9d7"}, + {file = "pydantic-1.10.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8c40964596809eb616d94f9c7944511f620a1103d63d5510440ed2908fc410af"}, + {file = "pydantic-1.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:ea9eebc2ebcba3717e77cdeee3f6203ffc0e78db5f7482c68b1293e8cc156e5e"}, + {file = "pydantic-1.10.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:762aa598f79b4cac2f275d13336b2dd8662febee2a9c450a49a2ab3bec4b385f"}, + {file = "pydantic-1.10.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dab5219659f95e357d98d70577b361383057fb4414cfdb587014a5f5c595f7b"}, + {file = "pydantic-1.10.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3d4ee957a727ccb5a36f1b0a6dbd9fad5dedd2a41eada99a8df55c12896e18d"}, + {file = "pydantic-1.10.10-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b69f9138dec566962ec65623c9d57bee44412d2fc71065a5f3ebb3820bdeee96"}, + {file = "pydantic-1.10.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7aa75d1bd9cc275cf9782f50f60cddaf74cbaae19b6ada2a28e737edac420312"}, + {file = "pydantic-1.10.10-cp37-cp37m-win_amd64.whl", hash = "sha256:9f62a727f5c590c78c2d12fda302d1895141b767c6488fe623098f8792255fe5"}, + {file = "pydantic-1.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aac218feb4af73db8417ca7518fb3bade4534fcca6e3fb00f84966811dd94450"}, + {file = "pydantic-1.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88546dc10a40b5b52cae87d64666787aeb2878f9a9b37825aedc2f362e7ae1da"}, + {file = "pydantic-1.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c41bbaae89e32fc582448e71974de738c055aef5ab474fb25692981a08df808a"}, + {file = "pydantic-1.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b71bd504d1573b0b722ae536e8ffb796bedeef978979d076bf206e77dcc55a5"}, + {file = "pydantic-1.10.10-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e088e3865a2270ecbc369924cd7d9fbc565667d9158e7f304e4097ebb9cf98dd"}, + {file = "pydantic-1.10.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3403a090db45d4027d2344859d86eb797484dfda0706cf87af79ace6a35274ef"}, + {file = "pydantic-1.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:e0014e29637125f4997c174dd6167407162d7af0da73414a9340461ea8573252"}, + {file = "pydantic-1.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9965e49c6905840e526e5429b09e4c154355b6ecc0a2f05492eda2928190311d"}, + {file = "pydantic-1.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:748d10ab6089c5d196e1c8be9de48274f71457b01e59736f7a09c9dc34f51887"}, + {file = "pydantic-1.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86936c383f7c38fd26d35107eb669c85d8f46dfceae873264d9bab46fe1c7dde"}, + {file = "pydantic-1.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a26841be620309a9697f5b1ffc47dce74909e350c5315ccdac7a853484d468a"}, + {file = "pydantic-1.10.10-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:409b810f387610cc7405ab2fa6f62bdf7ea485311845a242ebc0bd0496e7e5ac"}, + {file = "pydantic-1.10.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ce937a2a2c020bcad1c9fde02892392a1123de6dda906ddba62bfe8f3e5989a2"}, + {file = "pydantic-1.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:37ebddef68370e6f26243acc94de56d291e01227a67b2ace26ea3543cf53dd5f"}, + {file = "pydantic-1.10.10-py3-none-any.whl", hash = "sha256:a5939ec826f7faec434e2d406ff5e4eaf1716eb1f247d68cd3d0b3612f7b4c8a"}, + {file = "pydantic-1.10.10.tar.gz", hash = "sha256:3b8d5bd97886f9eb59260594207c9f57dce14a6f869c6ceea90188715d29921a"}, ] [package.dependencies] @@ -615,21 +615,23 @@ files = [ [[package]] name = "qcio" -version = "0.2.0" +version = "0.2.1" description = "Beautiful and user friendly data structures for quantum chemistry." category = "main" optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "qcio-0.2.0-py3-none-any.whl", hash = "sha256:bd85ddddb3d4711652207e311ff935d85f25a9c684336f10c5db3a11e60b83bb"}, - {file = "qcio-0.2.0.tar.gz", hash = "sha256:683687462b97e3ecf82d5c1d6885f6bc0a33e158fee46ef4fcaa8dacf8151671"}, -] +python-versions = "^3.8" +files = [] +develop = true [package.dependencies] numpy = ">=1.20" -pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" -pyyaml = ">=6.0,<7.0" -toml = ">=0.10.2,<0.11.0" +pydantic = ">=1.7.4,!=1.8,!=1.8.1,<2.0.0" +pyyaml = "^6.0" +toml = "^0.10.2" + +[package.source] +type = "directory" +url = "../qcio" [[package]] name = "ruff" @@ -735,4 +737,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "3f6888dda759be7840f30c8c2eb77582fd856c710a78630e80822a6226c73324" +content-hash = "94f674f95cf65eed47303b1cdec70de9dea44c3d806e9da466a84583d929452f" diff --git a/pyproject.toml b/pyproject.toml index 0f0fa99..393cb20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,8 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.8.1" pydantic = ">=1.7.4,!=1.8,!=1.8.1,<2.0.0" -qcio = ">=0.2.0" - +qcio = { path = "../qcio", develop = true } +# qcio = ">=0.2.0" [tool.poetry.group.dev.dependencies] mypy = "^1.1.1" diff --git a/qcparse/__init__.py b/qcparse/__init__.py index 81cf71b..101738f 100644 --- a/qcparse/__init__.py +++ b/qcparse/__init__.py @@ -4,7 +4,7 @@ __version__ = metadata.version(__name__) -from .main import parse, parse_computed_props # noqa: F401 +from .main import parse, parse_results # noqa: F401 from .registry import registry # noqa: F401 -__all__ = ["parse", "parse_computed_props", "registry"] +__all__ = ["parse", "parse_results", "registry"] diff --git a/qcparse/cli.py b/qcparse/cli.py index 422341f..f10d589 100644 --- a/qcparse/cli.py +++ b/qcparse/cli.py @@ -2,7 +2,7 @@ import json from pathlib import Path -from .main import parse_computed_props +from .main import parse_results def main(): @@ -18,9 +18,7 @@ def main(): args = parser.parse_args() - computed_props = parse_computed_props( - Path(args.filepath), args.program, args.filetype - ) + computed_props = parse_results(Path(args.filepath), args.program, args.filetype) # Hacking in pretty print since probably preferred for most users # Can update to result.json(indent=4) when this PR accepted # https://github.com/MolSSI/QCElemental/pull/307 diff --git a/qcparse/main.py b/qcparse/main.py index 7e35058..ae8591c 100644 --- a/qcparse/main.py +++ b/qcparse/main.py @@ -4,12 +4,7 @@ from pathlib import Path from typing import List, Optional, Union -from qcio import ( - SinglePointComputedProperties, - SinglePointFailedOutput, - SinglePointInput, - SinglePointSuccessfulOutput, -) +from qcio import ProgramFailure, ProgramInput, SinglePointOutput, SinglePointResults from .exceptions import MatchNotFoundError from .models import single_point_data_collector @@ -17,16 +12,18 @@ from .registry import ParserSpec, registry from .utils import get_file_content -__all__ = ["parse", "parse_computed_props", "registry"] +__all__ = ["parse", "parse_results", "registry"] -def parse_computed_props( +def parse_results( data_or_path: Union[str, bytes, Path], program: str, filetype: str = "stdout", -) -> SinglePointComputedProperties: +) -> SinglePointResults: """Parse a file using the parsers registered for the given program. + Can expand function to return other Results objects in the future. + Args: data_or_path: File contents (str or bytes) or path to the file to parse. program: The QC program that generated the output file. @@ -39,7 +36,7 @@ def parse_computed_props( >>> registry.supported_filetypes('program_name') Returns: - A SinglePointComputedProperties object containing the parsed data. + A SinglePointResults object containing the parsed data. Raises: MatchNotFoundError: If a required parser fails to parse its data. @@ -51,18 +48,18 @@ def parse_computed_props( # Get the calculation type if filetype is 'stdout' if filetype == "stdout": - parse_calc_type = import_module(f"qcparse.parsers.{program}").parse_calc_type - calc_type = parse_calc_type(file_content) + parse_calctype = import_module(f"qcparse.parsers.{program}").parse_calctype + calctype = parse_calctype(file_content) else: - calc_type = None + calctype = None # Get all the parsers for the program and filetype parsers: List[ParserSpec] = registry.get_parsers( program, filetype=filetype, collect_inputs=False, - calc_type=calc_type, + calctype=calctype, ) # Apply parsers to the file content. @@ -80,7 +77,7 @@ def parse_computed_props( # Remove scratch data del data_collector.scratch - return SinglePointComputedProperties(**data_collector.computed.dict()) + return SinglePointResults(**data_collector.computed.dict()) # TODO: Finish out this function for parsing full inputs, outputs, and failures @@ -90,8 +87,8 @@ def parse( data_or_path: Union[str, bytes, Path], program: str, filetype: str = "stdout", - input_data: Optional[SinglePointInput] = None, -) -> Union[SinglePointSuccessfulOutput, SinglePointFailedOutput]: + input_data: Optional[ProgramInput] = None, +) -> Union[SinglePointOutput, ProgramFailure]: """Parse a file using the parsers registered for the given program. Args: @@ -110,7 +107,7 @@ def parse( Returns: - SinglePointOutput or SinglePointFailure object encapsulating the parsed data. + SinglePointOutput or ProgramFailure object encapsulating the parsed data. """ raise NotImplementedError("This function needs work before it is ready to use.") # file_content, filepath = get_file_content(data_or_path) @@ -123,9 +120,9 @@ def parse( # # Get the calculation type if filetype is 'stdout' # if filetype == "stdout": - # parse_calc_type = import_module(f"qcparse.parsers.{program}").parse_calc_type - # calc_type = parse_calc_type(file_content) - # data_collector.input_data.program_args.calc_type = calc_type + # parse_calctype = import_module(f"qcparse.parsers.{program}").parse_calctype + # calctype = parse_calctype(file_content) + # data_collector.input_data.calctype = calctype # # Determine if calculation succeeded # parse_calculation_succeeded = import_module( @@ -137,14 +134,14 @@ def parse( # data_collector.stdout = file_content # else: - # calc_type = None + # calctype = None # # Get all the parsers for the program and filetype # parsers: List[ParserSpec] = registry.get_parsers( # program, # filetype=filetype, # collect_inputs=collect_inputs, - # calc_type=calc_type, + # calctype=calctype, # ) # # Apply parsers to the file content. @@ -175,12 +172,10 @@ def parse( # # Remove scratch data # del data_collector.scratch - # return SinglePointSuccessfulOutput(**data_collector.dict()) + # return SinglePointOutput(**data_collector.dict()) if __name__ == "__main__": # output = parse("./tests/data/water.gradient.out", "terachem", "stdout") - props, prov = parse_computed_props( - "./tests/data/water.gradient.out", "terachem", "stdout" - ) + props, prov = parse_results("./tests/data/water.gradient.out", "terachem", "stdout") print(props) diff --git a/qcparse/models.py b/qcparse/models.py index aeef0e5..fafcccb 100644 --- a/qcparse/models.py +++ b/qcparse/models.py @@ -55,8 +55,7 @@ def single_point_data_collector(collect_inputs: bool = True) -> ParsedDataCollec if collect_inputs: # Input Objects output_obj.input_data = ParsedDataCollector() - output_obj.input_data.program_args = ParsedDataCollector() - output_obj.input_data.program_args.model = ParsedDataCollector() + output_obj.input_data.model = ParsedDataCollector() # Output Objects output_obj.computed = ParsedDataCollector() diff --git a/qcparse/parsers/terachem.py b/qcparse/parsers/terachem.py index e9cb42d..d3de2c5 100644 --- a/qcparse/parsers/terachem.py +++ b/qcparse/parsers/terachem.py @@ -18,7 +18,7 @@ from pathlib import Path from typing import Optional, Union -from qcio import Molecule, SinglePointInput, SPCalcType +from qcio import CalcType, Molecule, ProgramInput from qcparse.exceptions import MatchNotFoundError from qcparse.models import ParsedDataCollector @@ -30,17 +30,17 @@ class FileType(str, Enum): stdout = "stdout" -def parse_calc_type(string: str) -> SPCalcType: - """Parse the calc_type from TeraChem stdout.""" - calc_types = ( - (SPCalcType.energy, r"SINGLE POINT ENERGY CALCULATIONS"), - (SPCalcType.gradient, r"SINGLE POINT GRADIENT CALCULATIONS"), - (SPCalcType.hessian, r"FREQUENCY ANALYSIS"), +def parse_calctype(string: str) -> CalcType: + """Parse the calctype from TeraChem stdout.""" + calctypes = ( + (CalcType.energy, r"SINGLE POINT ENERGY CALCULATIONS"), + (CalcType.gradient, r"SINGLE POINT GRADIENT CALCULATIONS"), + (CalcType.hessian, r"FREQUENCY ANALYSIS"), ) - for calc_type, regex in calc_types: + for calctype, regex in calctypes: match = re.search(regex, string) if match: - return calc_type + return calctype raise MatchNotFoundError(regex, string) @@ -49,7 +49,7 @@ def post_process( file_content: Union[str, bytes], filetype: str, filepath: Optional[Path] = None, - input_data: Optional[SinglePointInput] = None, + input_data: Optional[ProgramInput] = None, ): """Any post processing required after parsing is done here. @@ -85,9 +85,7 @@ def parse_energy(string: str, data_collector: ParsedDataCollector): def parse_method(string: str, data_collector: ParsedDataCollector): """Parse the method from TeraChem stdout.""" regex = r"Method: (\S+)" - data_collector.input_data.program_args.model.method = regex_search( - regex, string - ).group(1) + data_collector.input_data.model.method = regex_search(regex, string).group(1) @parser(filetype=FileType.stdout) @@ -101,9 +99,7 @@ def parse_working_directory(string: str, data_collector: ParsedDataCollector): def parse_basis(string: str, data_collector: ParsedDataCollector): """Parse the basis from TeraChem stdout.""" regex = r"Using basis set: (\S+)" - data_collector.input_data.program_args.model.basis = regex_search( - regex, string - ).group(1) + data_collector.input_data.model.basis = regex_search(regex, string).group(1) def parse_git_commit(string: str) -> str: @@ -149,7 +145,7 @@ def calculation_succeeded(string: str) -> bool: return True -@parser(filetype=FileType.stdout, only=[SPCalcType.gradient, SPCalcType.hessian]) +@parser(filetype=FileType.stdout, only=[CalcType.gradient, CalcType.hessian]) def parse_gradient(string: str, data_collector: ParsedDataCollector): """Parse gradient from TeraChem stdout.""" # This will match all floats after the dE/dX dE/dY dE/dZ header and stop at the @@ -168,7 +164,7 @@ def parse_gradient(string: str, data_collector: ParsedDataCollector): data_collector.computed.gradient = gradient -@parser(filetype=FileType.stdout, only=[SPCalcType.hessian]) +@parser(filetype=FileType.stdout, only=[CalcType.hessian]) def parse_hessian(string: str, data_collector: ParsedDataCollector): """Parse Hessian Matrix from TeraChem stdout diff --git a/qcparse/parsers/utils.py b/qcparse/parsers/utils.py index c64dcd4..61f495f 100644 --- a/qcparse/parsers/utils.py +++ b/qcparse/parsers/utils.py @@ -3,7 +3,7 @@ import re from typing import List, Optional -from qcio import SPCalcType +from qcio import CalcType from qcparse.exceptions import MatchNotFoundError from qcparse.registry import registry @@ -14,7 +14,7 @@ def parser( *, required: bool = True, input_data: bool = False, - only: Optional[List[SPCalcType]] = None, + only: Optional[List[CalcType]] = None, ): """Decorator to register a function as a parser for program output filetype. @@ -26,8 +26,8 @@ def parser( molecular structure, instead of computed output data. If True the parser will be not be called if a SinglePointInput object is passed as input_data to the top-level parse function. - only: Only register the parser on these SPCalcTypes. If None the parser will be - registered for all SPCalcTypes. + only: Only register the parser on these CalcTypes. If None the parser will be + registered for all CalcTypes. """ def decorator(func): diff --git a/qcparse/registry.py b/qcparse/registry.py index 7e4621e..6be801f 100644 --- a/qcparse/registry.py +++ b/qcparse/registry.py @@ -2,7 +2,7 @@ from typing import Callable, Dict, List, Optional from pydantic import BaseModel -from qcio import SPCalcType +from qcio import CalcType from .exceptions import RegistryError @@ -16,14 +16,14 @@ class ParserSpec(BaseModel): required: Whether the parser is required to be successful for the parsing to be considered successful. If True and the parser fails a MatchNotFoundError will be raised. If False and the parser fails the value will be ignored. - calc_types: The calculation types that the parser work on. + calctypes: The calculation types that the parser work on. """ parser: Callable filetype: str required: bool input_data: bool = False - calc_types: List[SPCalcType] + calctypes: List[CalcType] class ParserRegistry(BaseModel): @@ -38,7 +38,7 @@ def register( filetype: str, required: bool, input_data: bool, - only: Optional[List[SPCalcType]], + only: Optional[List[CalcType]] = None, ) -> None: """Register a new parser function. @@ -59,8 +59,7 @@ def register( required=required, input_data=input_data, # If only not passed then register for all calculation types - calc_types=only - or [SPCalcType.energy, SPCalcType.gradient, SPCalcType.hessian], + calctypes=only or [CalcType.energy, CalcType.gradient, CalcType.hessian], ) self.registry[program].append(parser_info) @@ -69,7 +68,7 @@ def get_parsers( program: str, filetype: Optional[str] = None, collect_inputs: bool = True, - calc_type: Optional[SPCalcType] = None, + calctype: Optional[CalcType] = None, ) -> List[ParserSpec]: """Get all parser functions for a given program. @@ -77,7 +76,7 @@ def get_parsers( program: The program to get parsers for. filetype: If given only return parsers for this filetype. collect_inputs: If False return only parsers for output data. - calc_type: Filter parsers for a given calculation type. + calctype: Filter parsers for a given calculation type. Returns: List of ParserSpec objects. @@ -94,8 +93,8 @@ def get_parsers( if not collect_inputs: parsers = [p_spec for p_spec in parsers if not p_spec.input_data] - if calc_type: - parsers = [p_spec for p_spec in parsers if calc_type in p_spec.calc_types] + if calctype: + parsers = [p_spec for p_spec in parsers if calctype in p_spec.calctypes] return parsers def supported_programs(self) -> List[str]: diff --git a/tests/test_main.py b/tests/test_main.py index 064e895..794c632 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,6 @@ -from qcparse.main import parse_computed_props +from qcparse.main import parse_results def test_pcp_terachem_energy(terachem_energy_stdout): - computed_props = parse_computed_props(terachem_energy_stdout, "terachem") + computed_props = parse_results(terachem_energy_stdout, "terachem") assert computed_props.energy == -76.3861099088 diff --git a/tests/test_main_terachem.py b/tests/test_main_terachem.py index 3db1cf2..61e1774 100644 --- a/tests/test_main_terachem.py +++ b/tests/test_main_terachem.py @@ -1,43 +1,43 @@ import pytest -from qcio import SinglePointFailedOutput, SinglePointSuccessfulOutput, SPCalcType +from qcio import CalcType, ProgramFailure, SinglePointOutput from qcparse import parse @pytest.mark.skip @pytest.mark.parametrize( - "filename,return_type,calc_type", + "filename,return_type,calctype", ( - ("water.energy.out", SinglePointSuccessfulOutput, SPCalcType.energy.value), - ("water.gradient.out", SinglePointSuccessfulOutput, SPCalcType.gradient.value), + ("water.energy.out", SinglePointOutput, CalcType.energy.value), + ("water.gradient.out", SinglePointOutput, CalcType.gradient.value), ( "water.frequencies.out", - SinglePointSuccessfulOutput, - SPCalcType.hessian.value, + SinglePointOutput, + CalcType.hessian.value, ), ( "caffeine.gradient.out", - SinglePointSuccessfulOutput, - SPCalcType.gradient.value, + SinglePointOutput, + CalcType.gradient.value, ), ( "caffeine.frequencies.out", - SinglePointSuccessfulOutput, - SPCalcType.hessian.value, + SinglePointOutput, + CalcType.hessian.value, ), - ("failure.basis.out", SinglePointFailedOutput, SPCalcType.gradient.value), - ("failure.nocuda.out", SinglePointFailedOutput, SPCalcType.gradient.value), + ("failure.basis.out", ProgramFailure, CalcType.gradient.value), + ("failure.nocuda.out", ProgramFailure, CalcType.gradient.value), ), ) -def test_parse(test_data_dir, filename, return_type, calc_type, data_collector): +def test_parse(test_data_dir, filename, return_type, calctype, data_collector): """Test that the parser returns the correct type of output object""" data_collector = parse(test_data_dir / filename, "terachem", "stdout") assert isinstance(data_collector, return_type) - assert data_collector.input_data.program_args.calc_type == calc_type + assert data_collector.input_data.calctype == calctype - if isinstance(data_collector, SinglePointFailedOutput): + if isinstance(data_collector, ProgramFailure): # Guarantee stdout included assert isinstance(data_collector.error.extras["stdout"], str) @@ -54,7 +54,7 @@ def test_parse(test_data_dir, filename, return_type, calc_type, data_collector): def test_parse_ignore_xyz(test_data_dir, filename, driver): # This tests that qcel still does correct validation on the gradient and output = parse(test_data_dir / filename, ignore_xyz=True) - assert isinstance(output, SinglePointSuccessfulOutput) + assert isinstance(output, SinglePointOutput) assert output.driver == driver # Using a hydrogen atoms as dummy assert output.molecule.symbols[0] == "H" diff --git a/tests/test_parser_registry.py b/tests/test_parser_registry.py index 4b9eb63..b5ea3d1 100644 --- a/tests/test_parser_registry.py +++ b/tests/test_parser_registry.py @@ -30,12 +30,12 @@ def test_get_parsers_program_collect_inputs(): assert not parser.input_data -def test_get_parsers_program_calc_type(): - parsers = registry.get_parsers("terachem", calc_type="gradient") +def test_get_parsers_program_calctype(): + parsers = registry.get_parsers("terachem", calctype="gradient") assert parsers for parser in parsers: assert isinstance(parser, ParserSpec) - assert "gradient" in parser.calc_types + assert "gradient" in parser.calctypes def test_supported_programs(): diff --git a/tests/test_terachem.py b/tests/test_terachem.py index 3ea26d2..bfdd10f 100644 --- a/tests/test_terachem.py +++ b/tests/test_terachem.py @@ -1,13 +1,13 @@ from pathlib import Path import pytest -from qcio import SPCalcType +from qcio import CalcType from qcparse.exceptions import MatchNotFoundError from qcparse.parsers.terachem import ( calculation_succeeded, parse_basis, - parse_calc_type, + parse_calctype, parse_energy, parse_git_commit, parse_gradient, @@ -60,22 +60,22 @@ def test_parse_energy_raises_exception(data_collector): @pytest.mark.parametrize( - "filename,calc_type", + "filename,calctype", ( - ("water.energy.out", SPCalcType.energy), - ("water.gradient.out", SPCalcType.gradient), - ("water.frequencies.out", SPCalcType.hessian), + ("water.energy.out", CalcType.energy), + ("water.gradient.out", CalcType.gradient), + ("water.frequencies.out", CalcType.hessian), ), ) -def test_parse_calc_type(test_data_dir, filename, calc_type): +def test_parse_calctype(test_data_dir, filename, calctype): with open(test_data_dir / filename) as f: string = f.read() - assert parse_calc_type(string) == calc_type + assert parse_calctype(string) == calctype -def test_parse_calc_type_raises_exception(): +def test_parse_calctype_raises_exception(): with pytest.raises(MatchNotFoundError): - parse_calc_type("No driver here") + parse_calctype("No driver here") @pytest.mark.parametrize( @@ -103,12 +103,12 @@ def test_parse_method(test_data_dir, filename, method, data_collector): with open(test_data_dir / filename) as f: tcout = f.read() parse_method(tcout, data_collector) - assert data_collector.input_data.program_args.model.method == method + assert data_collector.input_data.model.method == method def test_parse_basis(terachem_energy_stdout, data_collector): parse_basis(terachem_energy_stdout, data_collector) - assert data_collector.input_data.program_args.model.basis == "6-31g" + assert data_collector.input_data.model.basis == "6-31g" def test_parse_version(terachem_energy_stdout, data_collector):