Skip to content

Commit

Permalink
Add solvent interface placement code (#765)
Browse files Browse the repository at this point in the history
* Add solvent interface placement code

* circular import

* adding new notebook for using fairchem models with NEBs without CatTSunami enumeration (#764)

* adding new notebook for using fairchem models with NEBs

* adding md tutorials

* blocking code cells that arent needed or take too long

* packmol install fix

* add packmol to github actions path

* speed up random slab generation

* support for pymatgen update

* save ion id when randomly sampled

* vasp 6.3 ml flags are slightly different

* add vdw to bulks

* add lasph

* ncore=4

* typing, docstring, cleanup geometry

* typing

---------

Co-authored-by: Brook Wander <[email protected]>
  • Loading branch information
mshuaibii and brookwander authored Aug 9, 2024
1 parent 21d936f commit 917056a
Show file tree
Hide file tree
Showing 14 changed files with 853 additions and 44 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ jobs:
pip install -e packages/fairchem-demo-ocpapi[dev]
pip install -e packages/fairchem-applications-cattsunami
- name: Install additional dependencies
run: |
wget https://github.com/m3g/packmol/archive/refs/tags/v20.15.0.tar.gz
tar -xzvf v20.15.0.tar.gz
cd packmol-20.15.0
./configure
make
echo "$(readlink -f .)" >> $GITHUB_PATH
- name: Test core with pytest
run: |
pytest tests -vv --ignore=tests/demo/ocpapi/tests/integration/ --cov-report=xml --cov=fairchem -c ./packages/fairchem-core/pyproject.toml
Expand Down
3 changes: 3 additions & 0 deletions src/fairchem/data/oc/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
from .adsorbate import Adsorbate
from .adsorbate_slab_config import AdsorbateSlabConfig
from .bulk import Bulk
from .interface_config import InterfaceConfig
from .ion import Ion
from .multi_adsorbate_slab_config import MultipleAdsorbateSlabConfig
from .slab import Slab
from .solvent import Solvent
269 changes: 269 additions & 0 deletions src/fairchem/data/oc/core/interface_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
from __future__ import annotations

import os
import subprocess
import tempfile
from shutil import which
from typing import TYPE_CHECKING

import ase.io
import numpy as np
from fairchem.data.oc.core.multi_adsorbate_slab_config import (
MultipleAdsorbateSlabConfig,
)

if TYPE_CHECKING:
from fairchem.data.oc.core.adsorbate import Adsorbate
from fairchem.data.oc.core.ion import Ion
from fairchem.data.oc.core.slab import Slab
from fairchem.data.oc.core.solvent import Solvent

from fairchem.data.oc.utils.geometry import (
BoxGeometry,
Geometry,
PlaneBoundTriclinicGeometry,
)

# Code adapted from https://github.com/henriasv/molecular-builder/tree/master


class InterfaceConfig(MultipleAdsorbateSlabConfig):
"""
Class to represent a solvent, adsorbate, slab, ion config. This class only
returns a fixed combination of adsorbates placed on the surface. Solvent
placement is performed by packmol
(https://m3g.github.io/packmol/userguide.shtml), with the number of solvent
molecules controlled by its corresponding density. Ion placement is random
within the desired volume.
Arguments
---------
slab: Slab
Slab object.
adsorbates: List[Adsorbate]
List of adsorbate objects to place on the slab.
solvent: Solvent
Solvent object
ions: List[Ion] = []
List of ion objects to place
num_sites: int
Number of sites to sample.
num_configurations: int
Number of configurations to generate per slab+adsorbate(s) combination.
This corresponds to selecting different site combinations to place
the adsorbates on.
interstitial_gap: float
Minimum distance, in Angstroms, between adsorbate and slab atoms as
well as the inter-adsorbate distance.
vacuum_size: int
Size of vacuum layer to add to both ends of the resulting atoms object.
solvent_interstitial_gap: float
Minimum distance, in Angstroms, between the solvent environment and the
adsorbate-slab environment.
solvent_depth: float
Volume depth to be used to pack solvents inside.
pbc_shift: float
Cushion to add to the packmol volume to avoid overlapping atoms over pbc.
packmol_tolerance: float
Packmol minimum distance to impose between molecules.
mode: str
"random", "heuristic", or "random_site_heuristic_placement".
This affects surface site sampling and adsorbate placement on each site.
In "random", we do a Delaunay triangulation of the surface atoms, then
sample sites uniformly at random within each triangle. When placing the
adsorbate, we randomly rotate it along xyz, and place it such that the
center of mass is at the site.
In "heuristic", we use Pymatgen's AdsorbateSiteFinder to find the most
energetically favorable sites, i.e., ontop, bridge, or hollow sites.
When placing the adsorbate, we randomly rotate it along z with only
slight rotation along x and y, and place it such that the binding atom
is at the site.
In "random_site_heuristic_placement", we do a Delaunay triangulation of
the surface atoms, then sample sites uniformly at random within each
triangle. When placing the adsorbate, we randomly rotate it along z with
only slight rotation along x and y, and place it such that the binding
atom is at the site.
In all cases, the adsorbate is placed at the closest position of no
overlap with the slab plus `interstitial_gap` along the surface normal.
"""

def __init__(
self,
slab: Slab,
adsorbates: list[Adsorbate],
solvent: Solvent,
ions: list[Ion] | None = None,
num_sites: int = 100,
num_configurations: int = 1,
interstitial_gap: float = 0.1,
vacuum_size: int = 15,
solvent_interstitial_gap: float = 2,
solvent_depth: float = 8,
pbc_shift: float = 0.0,
packmol_tolerance: float = 2,
mode: str = "random_site_heuristic_placement",
):
super().__init__(
slab=slab,
adsorbates=adsorbates,
num_sites=num_sites,
num_configurations=num_configurations,
interstitial_gap=interstitial_gap,
mode=mode,
)

self.solvent = solvent
self.ions = ions
self.vacuum_size = vacuum_size
self.solvent_depth = solvent_depth
self.solvent_interstitial_gap = solvent_interstitial_gap
self.pbc_shift = pbc_shift
self.packmol_tolerance = packmol_tolerance

self.n_mol_per_volume = solvent.molecules_per_volume

self.atoms_list, self.metadata_list = self.create_interface_on_sites(
self.atoms_list, self.metadata_list
)

def create_interface_on_sites(
self, atoms_list: list[ase.Atoms], metadata_list: list[dict]
):
"""
Given adsorbate+slab configurations generated from
(Multi)AdsorbateSlabConfig and its corresponding metadata, create the
solvent/ion interface on top of the provided atoms objects.
"""
atoms_interface_list = []
metadata_interface_list = []

for atoms, adsorbate_metadata in zip(atoms_list, metadata_list):
cell = atoms.cell.copy()
unit_normal = cell[2] / np.linalg.norm(cell[2])
cell[2] = self.solvent_depth * unit_normal
volume = cell.volume
n_solvent_mols = int(self.n_mol_per_volume * volume)

if cell.orthorhombic:
box_length = cell.lengths()
geometry = BoxGeometry(
center=box_length / 2, length=box_length - self.pbc_shift
)
else:
geometry = PlaneBoundTriclinicGeometry(cell, pbc=self.pbc_shift)

solvent_ions_atoms = self.create_packmol_atoms(geometry, n_solvent_mols)
solvent_ions_atoms.set_cell(cell)

max_z = atoms.positions[:, 2].max() + self.solvent_interstitial_gap
translation_vec = cell[2]
translation_vec[2] = max_z
solvent_ions_atoms.translate(translation_vec)

interface_atoms = atoms + solvent_ions_atoms
interface_atoms.center(vacuum=self.vacuum_size, axis=2)
interface_atoms.wrap()

atoms_interface_list.append(interface_atoms)

metadata = {
"adsorbates": adsorbate_metadata,
"solvent": self.solvent.name,
"ions": [x.name for x in self.ions],
"ion_concentrations": [
x.get_ion_concentration(volume) for x in self.ions
],
"solvent_depth": self.solvent_depth,
"solvent_volume": volume,
"slab_millers": self.slab.millers,
"slab_shift": self.slab.shift,
"slab_top": self.slab.top,
"bulk_idx": self.slab.bulk.bulk_id_from_db,
}
metadata_interface_list.append(metadata)

return atoms_interface_list, metadata_interface_list

def create_packmol_atoms(self, geometry: Geometry, n_solvent_mols: int):
"""
Pack solvent molecules in a provided unit cell volume. Packmol is used
to randomly pack solvent molecules in the desired volume.
Arguments:
geometry (Geometry): Geometry object corresponding to the desired cell.
n_solvent_mols (int): Number of solvent molecules to pack in the volume.
"""
cell = geometry.cell
with tempfile.TemporaryDirectory() as tmp_dir:
output_path = os.path.join(tmp_dir, "out.pdb")
self.solvent.atoms.write(f"{tmp_dir}/solvent.pdb", format="proteindatabank")

# When placing a single ion, packmol strangely always places it at
# the boundary of the cell. This hacky fix manually places
# the ion in a random location in the cell. Packmol then will fix
# these atoms and not optimize them during its optimization, only
# optimizing solvent molecules arround the ion.
for i, ion in enumerate(self.ions):
ion_atoms = ion.atoms.copy()
ion_atoms.set_cell(cell)
self.randomize_coords(ion_atoms)
ion_atoms.write(f"{tmp_dir}/ion_{i}.pdb", format="proteindatabank")

# write packmol input
packmol_input = os.path.join(tmp_dir, "input.inp")
with open(packmol_input, "w") as f:
f.write(f"tolerance {self.packmol_tolerance}\n")
f.write("filetype pdb\n")
f.write(f"output {output_path}\n")

# write solvent
f.write(
geometry.packmol_structure(
f"{tmp_dir}/solvent.pdb", n_solvent_mols, "inside"
)
)

for i in range(len(self.ions)):
f.write(f"structure {tmp_dir}/ion_{i}.pdb\n")
f.write(" number 1\n")
f.write(" fixed 0 0 0 0 0 0\n")
f.write("end structure\n\n")

self.run_packmol(packmol_input)

solvent_ions_atoms = ase.io.read(output_path, format="proteindatabank")
solvent_ions_atoms.set_pbc(True)
solvent_ions_atoms.set_tags([3] * len(solvent_ions_atoms))

return solvent_ions_atoms

def run_packmol(self, packmol_input: str):
"""
Run packmol.
"""
packmol_cmd = which("packmol")
if not packmol_cmd:
raise OSError("packmol not found.")

ps = subprocess.Popen(
f"{packmol_cmd} < {packmol_input}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
out, err = ps.communicate()
if err:
raise OSError(err.decode("utf-8"))

def randomize_coords(self, atoms: ase.Atoms):
"""
Randomly place the atoms in its unit cell.
"""
cell_weights = np.random.rand(3)
cell_weights /= np.sum(cell_weights)
xyz = np.dot(cell_weights, atoms.cell)
atoms.set_center_of_mass(xyz)
69 changes: 69 additions & 0 deletions src/fairchem/data/oc/core/ion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import pickle

import ase
import ase.units as units
import numpy as np
from fairchem.data.oc.databases.pkls import ION_PKL_PATH


class Ion:
"""
Initializes an ion object in one of 2 ways:
- Directly pass in an ase.Atoms object.
- Pass in index of ion to select from ion database.
Arguments
---------
ion_atoms: ase.Atoms
ion structure.
ion_id_from_db: int
Index of ion to select.
ion_db_path: str
Path to ion database.
"""

def __init__(
self,
ion_atoms: ase.Atoms = None,
ion_id_from_db: int | None = None,
ion_db_path: str = ION_PKL_PATH,
):
self.ion_id_from_db = ion_id_from_db
self.ion_db_path = ion_db_path

if ion_atoms is not None:
self.atoms = ion_atoms.copy()
self.name = str(self.atoms.symbols)
else:
with open(ion_db_path, "rb") as fp:
ion_db = pickle.load(fp)
if ion_id_from_db is not None:
self._load_ion(ion_db[ion_id_from_db])
else:
self.ion_id_from_db = np.random.randint(len(ion_db))
self._load_ion(ion_db[self.ion_id_from_db])

def __len__(self):
return len(self.atoms)

def __str__(self):
return self.name

def _load_ion(self, ion: dict) -> None:
"""
Saves the fields from an ion stored in a database. Fields added
after the first revision are conditionally added for backwards
compatibility with older database files.
"""
self.atoms = ion["atoms"]
self.name = ion["name"]
self.charge = ion["charge"]

def get_ion_concentration(self, volume):
"""
Compute the ion concentration units of M, given a volume in units of
Angstrom^3.
"""
return 1e27 / (units._Nav * volume)
18 changes: 9 additions & 9 deletions src/fairchem/data/oc/core/slab.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ def from_bulk_get_random_slab(
else:
# If we're not saving all slabs, just tile and tag one
assert bulk is not None
untiled_slabs = compute_slabs(
bulk.atoms,
max_miller=max_miller,
bulk_struct = standardize_bulk(bulk.atoms)
avail_millers = get_symmetrically_distinct_miller_indices(
bulk_struct, max_miller
)
sampled_miller_idx = np.random.randint(len(avail_millers))
slabs = Slab.from_bulk_get_specific_millers(
avail_millers[sampled_miller_idx], bulk
)
slab_idx = np.random.randint(len(untiled_slabs))
unit_slab_struct, millers, shift, top, oriented_bulk = untiled_slabs[
slab_idx
]
slab_atoms = tile_and_tag_atoms(unit_slab_struct, bulk.atoms, min_ab=min_ab)

return cls(bulk, slab_atoms, millers, shift, top, oriented_bulk)
# If multiple shifts exist, sample one randomly
return np.random.choice(slabs)

@classmethod
def from_bulk_get_specific_millers(
Expand Down
Loading

0 comments on commit 917056a

Please sign in to comment.