Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quipGapXYZ: add unit convert and tag synonym matching #678

Open
wants to merge 7 commits into
base: devel
Choose a base branch
from
238 changes: 186 additions & 52 deletions dpdata/xyz/quip_gap_xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@

import numpy as np

from ..unit import EnergyConversion, ForceConversion

e_conv_kcalpermol2eV = EnergyConversion("kcal_mol", "eV").value()
e_conv_au2eV = EnergyConversion("hartree", "eV").value()
f_conv_kcalpermolperang2eVperang = ForceConversion(
"kcal_mol/angstrom", "eV/angstrom"
).value()
f_conv_auperang2eVperang = ForceConversion("hartree/angstrom", "eV/angstrom").value()
f_conv_kcalpermolperbohr2eVperang = ForceConversion(
"kcal_mol/bohr", "eV/angstrom"
).value()
f_conv_au2eVperang = ForceConversion("hartree/bohr", "eV/angstrom").value()


class QuipGapxyzSystems:
"""deal with QuipGapxyzFile."""
Expand Down Expand Up @@ -82,56 +95,72 @@ def handle_single_xyz_frame(lines):
force_array = None
virials = None
for kv_dict in prop_list:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "force":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
force_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except Exception as e:
Comment on lines +98 to +161
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improved field handling and error management.

The changes improve handling of fields like species, pos, Z, tags, and forces. The logic is sound, but avoid using bare except statements for better debugging.

-  except Exception as e:
+  except KeyError as e:
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "force":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
)
field_length = int(kv_dict["value"])
force_array = data_array[:, used_colomn : used_colomn + field_length]
used_colomn += field_length
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except Exception as e:
try:
if kv_dict["key"] == "species":
if kv_dict["datatype"] != "S":
raise RuntimeError(
"datatype for species must be 'S' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
type_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "pos":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
coords_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
elif kv_dict["key"] == "Z":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
Z_array = data_array[
:, used_colomn : used_colomn + field_length
].flatten()
used_colomn += field_length
continue
elif kv_dict["key"] == "tags":
if kv_dict["datatype"] != "I":
raise RuntimeError(
"datatype for tags must be 'I' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
used_colomn += field_length
continue
elif kv_dict["key"] == "force" or kv_dict["key"] == "forces":
if kv_dict["datatype"] != "R":
raise RuntimeError(
"datatype for pos must be 'R' instead of {}".format(
kv_dict["datatype"]
)
)
field_length = int(kv_dict["value"])
force_array = data_array[
:, used_colomn : used_colomn + field_length
]
used_colomn += field_length
continue
except KeyError as e:
Tools
Ruff

133-133: Local variable Z_array is assigned to but never used

Remove assignment to unused variable Z_array

(F841)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the error it is expected to catch?

print("unknown field {}".format(kv_dict["key"]), e)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason not to raise an error?

continue
else:
raise RuntimeError("unknown field {}".format(kv_dict["key"]))

type_num_dict = OrderedDict()
atom_type_list = []
Expand Down Expand Up @@ -165,21 +194,126 @@ def handle_single_xyz_frame(lines):
else:
virials = None

try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except Exception:
pass
# print('No units information contained.')

Comment on lines +197 to +203
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proper handling of missing unit information.

The try-except block for handling missing unit information should raise an exception or log a warning if units are not found, rather than passing silently.

-  except Exception:
-    pass
-    # print('No units information contained.')
+  except KeyError as e:
+    raise ValueError("No units information contained.") from e
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except Exception:
pass
# print('No units information contained.')
try:
e_units = np.array([field_dict["energy-unit"].lower()])
f_units = np.array([field_dict["force-unit"].lower()])
except KeyError as e:
raise ValueError("No units information contained.") from e

info_dict = {}
info_dict["nopbc"] = False
info_dict["atom_names"] = list(type_num_array[:, 0])
info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int))
info_dict["atom_types"] = np.array(atom_type_list).astype(int)
info_dict["cells"] = np.array(
[
np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape(
3, 3
)
]
).astype("float32")

info_dict["coords"] = np.array([coords_array]).astype("float32")
"""
info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32")
info_dict["forces"] = np.array([force_array]).astype("float32")
"""
Comment on lines +211 to +214
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is not used anymore, it should be removed.


try:
if e_units == "kcal/mol":
info_dict["energies"] = (
np.array([field_dict["energy"]]).astype("float32")
* e_conv_kcalpermol2eV
)
elif e_units in ["hartree", "au", "a.u."]:
info_dict["energies"] = (
np.array([field_dict["energy"]]).astype("float32") * e_conv_au2eV
)
elif e_units == "ev":
info_dict["energies"] = np.array([field_dict["energy"]]).astype(
"float32"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it converted to float32?

)
else:
info_dict["energies"] = np.array([field_dict["energy"]]).astype(
"float32"
)
except Exception:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the expected error to catch here? It's hard to read.

try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raise exceptions with raise ... from None.

Within an except clause, raise exceptions with raise ... from None to distinguish them from errors in exception handling.

-  raise ValueError("Error while accessing energy fields in field_dict.")
+  raise ValueError("Error while accessing energy fields in field_dict.") from None
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
raise ValueError("Error while accessing energy fields in field_dict.")
raise ValueError("Error while accessing energy fields in field_dict.") from None
Tools
Ruff

252-252: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


Comment on lines +235 to +253
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improved handling of possible energy fields and raising exceptions.

The changes improve handling of possible energy fields and raising exceptions. Use raise ... from None to distinguish exceptions from errors in exception handling.

-  raise ValueError("Error while accessing energy fields in field_dict.")
+  raise ValueError("Error while accessing energy fields in field_dict.") from None
Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.")
try:
possible_fields = [
"energy",
"energies",
"Energies",
"potential-energy.energy",
"Energy",
]
for key in possible_fields:
if key in field_dict:
info_dict["energies"] = np.array(
[field_dict[key]], dtype="float32"
)
break
else:
raise ValueError("No valid energy field found in field_dict.")
except KeyError:
raise ValueError("Error while accessing energy fields in field_dict.") from None
Tools
Ruff

252-252: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

try:
if f_units == "kcal/mol/angstrom":
<<<<<<< HEAD
info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperang2eVperang
elif f_units == "hartree/angstrom" or f_units == "hartree/ang" or f_units == "hartree/ang.":
info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_auperang2eVperang
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_kcalpermolperbohr2eVperang
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = np.array([force_array]).astype("float32") * f_conv_au2eVperang
elif f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang.":
info_dict["forces"] = np.array([force_array]).astype("float32")
else: info_dict["forces"] = np.array([force_array]).astype("float32")
except Exception:
info_dict["forces"] = np.array([force_array]).astype("float32")
=======
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperang2eVperang
)
elif (
f_units == "hartree/angstrom"
or f_units == "hartree/ang"
or f_units == "hartree/ang."
):
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_auperang2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32")
* f_conv_kcalpermolperbohr2eVperang
)
elif f_units == "kcal/mol/bohr":
info_dict["forces"] = (
np.array([force_array]).astype("float32") * f_conv_au2eVperang
Comment on lines +273 to +275
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove redundant conditional block for kcal/mol/bohr.

The conditional block for kcal/mol/bohr is redundant and should be removed.

-  elif f_units == "kcal/mol/bohr":
-    info_dict["forces"] = (
-      np.array([force_array]).astype("float32") * f_conv_au2eVperang
-    )

Committable suggestion was skipped due to low confidence.

)
elif (
f_units == "ev/angstrom" or f_units == "ev/ang" or f_units == "ev/ang."
):
info_dict["forces"] = np.array([force_array]).astype("float32")
else:
info_dict["forces"] = np.array([force_array]).astype("float32")
except:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid using bare except statements.

Using bare except statements can catch unexpected errors and make debugging difficult. Specify the exception type or use except Exception as e to handle specific exceptions.

-  except:
+  except KeyError:

Committable suggestion was skipped due to low confidence.

Tools
Ruff

283-283: Do not use bare except

(E722)

info_dict["forces"] = np.array([force_array]).astype("float32")
>>>>>>> origin/devel
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unit conversion for forces and redundant conditional block.

The new logic for force unit conversion is correctly implemented but contains a redundant conditional block for kcal/mol/bohr and uses a bare except statement.

-  elif f_units == "kcal/mol/bohr":
-    info_dict["forces"] = (
-      np.array([force_array]).astype("float32") * f_conv_au2eVperang
-    )
-  except:
-    info_dict["forces"] = np.array([force_array]).astype("float32")
+  except KeyError as e:
+    raise ValueError("Error while accessing force fields in field_dict.") from e

Committable suggestion was skipped due to low confidence.

Tools
Ruff

255-256: SyntaxError: Expected an indented block after if statement


256-256: SyntaxError: Expected except or finally after try block


256-256: SyntaxError: Expected a statement


256-256: SyntaxError: Expected a statement


256-256: SyntaxError: Expected a statement


257-257: SyntaxError: Unexpected indentation


258-258: SyntaxError: unindent does not match any outer indentation level


258-258: SyntaxError: Expected a statement


258-258: SyntaxError: Invalid annotated assignment target


258-259: SyntaxError: Expected an expression


259-259: SyntaxError: Unexpected indentation


260-260: SyntaxError: unindent does not match any outer indentation level


260-260: SyntaxError: Expected a statement


260-260: SyntaxError: Invalid annotated assignment target


260-261: SyntaxError: Expected an expression


261-261: SyntaxError: Unexpected indentation


262-262: SyntaxError: unindent does not match any outer indentation level


262-262: SyntaxError: Expected a statement


262-262: SyntaxError: Invalid annotated assignment target


262-263: SyntaxError: Expected an expression


263-263: SyntaxError: Unexpected indentation


264-264: SyntaxError: unindent does not match any outer indentation level


264-264: SyntaxError: Expected a statement


264-264: SyntaxError: Invalid annotated assignment target


264-265: SyntaxError: Expected an expression


265-265: SyntaxError: Unexpected indentation


266-266: SyntaxError: unindent does not match any outer indentation level


266-266: SyntaxError: Expected a statement


266-266: SyntaxError: Expected a statement


267-267: SyntaxError: Unexpected indentation


267-267: SyntaxError: Expected a statement


267-268: SyntaxError: Expected an expression


268-268: SyntaxError: Unexpected indentation


269-269: SyntaxError: Expected a statement


269-269: SyntaxError: Expected a statement


269-269: SyntaxError: Expected a statement


269-269: SyntaxError: Expected a statement


269-270: SyntaxError: Expected a statement


270-270: SyntaxError: Unexpected indentation


274-274: SyntaxError: unindent does not match any outer indentation level


274-274: SyntaxError: Expected a statement


275-275: SyntaxError: Invalid annotated assignment target


278-279: SyntaxError: Expected an expression


279-279: SyntaxError: Unexpected indentation


282-282: SyntaxError: unindent does not match any outer indentation level


282-282: SyntaxError: Expected a statement


282-282: SyntaxError: Invalid annotated assignment target


282-283: SyntaxError: Expected an expression


283-283: SyntaxError: Unexpected indentation


287-287: SyntaxError: unindent does not match any outer indentation level


287-287: SyntaxError: Expected a statement


287-287: SyntaxError: Invalid annotated assignment target


287-288: SyntaxError: Expected an expression


288-288: SyntaxError: Unexpected indentation


291-291: SyntaxError: unindent does not match any outer indentation level


291-291: SyntaxError: Expected a statement


292-292: SyntaxError: Invalid annotated assignment target


293-294: SyntaxError: Expected an expression


294-294: SyntaxError: Unexpected indentation


295-295: SyntaxError: unindent does not match any outer indentation level


295-295: SyntaxError: Expected a statement


295-295: SyntaxError: Expected a statement


295-296: SyntaxError: Expected a statement


296-296: SyntaxError: Unexpected indentation


297-297: SyntaxError: unindent does not match any outer indentation level


297-297: SyntaxError: Expected a statement


297-297: SyntaxError: Expected a statement


297-298: SyntaxError: Expected a statement


298-298: SyntaxError: Unexpected indentation


299-299: SyntaxError: Expected a statement


299-299: SyntaxError: Expected a statement


299-299: SyntaxError: Expected a statement


299-299: SyntaxError: Expected a statement


if virials is not None:
info_dict["virials"] = virials
info_dict["orig"] = np.zeros(3)
if "Lattice" in field_dict and field_dict["Lattice"].strip():
lattice_values = list(filter(bool, field_dict["Lattice"].split(" ")))
info_dict["cells"] = np.array(
[np.array(lattice_values).reshape(3, 3)]
).astype("float32")
else:
lattice_values = np.array(
[[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]
)

info_dict["nopbc"] = True
info_dict["cells"] = np.array(
[np.array(lattice_values).reshape(3, 3)]
).astype("float32")

return info_dict
Loading