Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add solvent interface placement code #765

Merged
merged 18 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ jobs:
pip install -e packages/fairchem-demo-ocpapi[dev]
pip install -e packages/fairchem-applications-cattsunami

- name: Install additional dependencies
run: |
wget https://github.com/m3g/packmol/archive/refs/tags/v20.15.0.tar.gz
tar -xzvf v20.15.0.tar.gz
cd packmol-20.15.0
./configure
make
echo "$(readlink -f .)" >> $GITHUB_PATH
mshuaibii marked this conversation as resolved.
Show resolved Hide resolved

- name: Test core with pytest
run: |
pytest tests -vv --ignore=tests/demo/ocpapi/tests/integration/ --cov-report=xml --cov=fairchem -c ./packages/fairchem-core/pyproject.toml
Expand Down
3 changes: 3 additions & 0 deletions src/fairchem/data/oc/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
from .adsorbate import Adsorbate
from .adsorbate_slab_config import AdsorbateSlabConfig
from .bulk import Bulk
from .interface_config import InterfaceConfig
from .ion import Ion
from .multi_adsorbate_slab_config import MultipleAdsorbateSlabConfig
from .slab import Slab
from .solvent import Solvent
244 changes: 244 additions & 0 deletions src/fairchem/data/oc/core/interface_config.py
mshuaibii marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
from __future__ import annotations

import os
import subprocess
import tempfile
from shutil import which
from typing import TYPE_CHECKING

import ase.io
import numpy as np
from fairchem.data.oc.core.multi_adsorbate_slab_config import (
MultipleAdsorbateSlabConfig,
)

if TYPE_CHECKING:
from fairchem.data.oc.core.adsorbate import Adsorbate
from fairchem.data.oc.core.ion import Ion
from fairchem.data.oc.core.slab import Slab
from fairchem.data.oc.core.solvent import Solvent

from fairchem.data.oc.utils.geometry import BoxGeometry, PlaneBoundTriclinicGeometry

# Code adapted from https://github.com/henriasv/molecular-builder/tree/master


class InterfaceConfig(MultipleAdsorbateSlabConfig):
"""
Class to represent a solvent, adsorbate, slab, ion config. This class only
returns a fixed combination of adsorbates placed on the surface. Solvent
placement is performed by packmol
(https://m3g.github.io/packmol/userguide.shtml), with the number of solvent
molecules controlled by its corresponding density. Ion placement is random
within the desired volume.

Arguments
---------
slab: Slab
Slab object.
adsorbates: List[Adsorbate]
List of adsorbate objects to place on the slab.
solvent: Solvent
Solvent object
ions: List[Ion] = []
List of ion objects to place
num_sites: int
Number of sites to sample.
num_configurations: int
Number of configurations to generate per slab+adsorbate(s) combination.
This corresponds to selecting different site combinations to place
the adsorbates on.
interstitial_gap: float
Minimum distance, in Angstroms, between adsorbate and slab atoms as
well as the inter-adsorbate distance.
vacuum_size: int
Size of vacuum layer to add to both ends of the resulting atoms object.
solvent_interstitial_gap: float
Minimum distance, in Angstroms, between the solvent environment and the
adsorbate-slab environment.
solvent_depth: float
Volume depth to be used to pack solvents inside.
pbc_shift: float
Cushion to add to the packmol volume to avoid overlapping atoms over pbc.
packmol_tolerance: float
Packmol minimum distance to impose between molecules.
mode: str
"random", "heuristic", or "random_site_heuristic_placement".
This affects surface site sampling and adsorbate placement on each site.

In "random", we do a Delaunay triangulation of the surface atoms, then
sample sites uniformly at random within each triangle. When placing the
adsorbate, we randomly rotate it along xyz, and place it such that the
center of mass is at the site.

In "heuristic", we use Pymatgen's AdsorbateSiteFinder to find the most
energetically favorable sites, i.e., ontop, bridge, or hollow sites.
When placing the adsorbate, we randomly rotate it along z with only
slight rotation along x and y, and place it such that the binding atom
is at the site.

In "random_site_heuristic_placement", we do a Delaunay triangulation of
the surface atoms, then sample sites uniformly at random within each
triangle. When placing the adsorbate, we randomly rotate it along z with
only slight rotation along x and y, and place it such that the binding
atom is at the site.

In all cases, the adsorbate is placed at the closest position of no
overlap with the slab plus `interstitial_gap` along the surface normal.
"""

def __init__(
self,
slab: Slab,
adsorbates: list[Adsorbate],
solvent: Solvent,
ions: list[Ion] | None = None,
num_sites: int = 100,
num_configurations: int = 1,
interstitial_gap: float = 0.1,
vacuum_size: int = 15,
solvent_interstitial_gap: float = 2,
solvent_depth: float = 8,
pbc_shift: float = 0.0,
packmol_tolerance: float = 2,
mode: str = "random_site_heuristic_placement",
):
super().__init__(
slab=slab,
adsorbates=adsorbates,
num_sites=num_sites,
num_configurations=num_configurations,
interstitial_gap=interstitial_gap,
mode=mode,
)

self.solvent = solvent
self.ions = ions
self.vacuum_size = vacuum_size
self.solvent_depth = solvent_depth
self.solvent_interstitial_gap = solvent_interstitial_gap
self.pbc_shift = pbc_shift
self.packmol_tolerance = packmol_tolerance

self.n_mol_per_volume = solvent.get_molecules_per_volume

self.atoms_list, self.metadata_list = self.create_interface_on_sites(
self.atoms_list, self.metadata_list
)

def create_interface_on_sites(self, atoms_list, metadata_list):
atoms_interface_list = []
metadata_interface_list = []

for atoms, adsorbate_metadata in zip(atoms_list, metadata_list):
cell = atoms.cell.copy()
unit_normal = cell[2] / np.linalg.norm(cell[2])
cell[2] = self.solvent_depth * unit_normal
volume = cell.volume
n_solvent_mols = int(self.n_mol_per_volume * volume)

if cell.orthorhombic:
box_length = cell.lengths()
geometry = BoxGeometry(
center=box_length / 2, length=box_length - self.pbc_shift
)
else:
geometry = PlaneBoundTriclinicGeometry(cell, pbc=self.pbc_shift)

solvent_ions_atoms = self.create_packmol_atoms(geometry, n_solvent_mols)
solvent_ions_atoms.set_cell(cell)

max_z = atoms.positions[:, 2].max() + self.solvent_interstitial_gap
translation_vec = cell[2]
translation_vec[2] = max_z
solvent_ions_atoms.translate(translation_vec)

interface_atoms = atoms + solvent_ions_atoms
interface_atoms.center(vacuum=self.vacuum_size, axis=2)
interface_atoms.wrap()

atoms_interface_list.append(interface_atoms)

metadata = {
"adsorbates": adsorbate_metadata,
"solvent": self.solvent.name,
"ions": [x.name for x in self.ions],
"ion_concentrations": [
x.get_ion_concentration(volume) for x in self.ions
],
"solvent_depth": self.solvent_depth,
"solvent_volume": volume,
"slab_millers": self.slab.millers,
"slab_shift": self.slab.shift,
"slab_top": self.slab.top,
"bulk_idx": self.slab.bulk.bulk_id_from_db,
}
metadata_interface_list.append(metadata)

return atoms_interface_list, metadata_interface_list

def create_packmol_atoms(self, geometry, n_solvent_mols):
cell = geometry.cell
with tempfile.TemporaryDirectory() as tmp_dir:
output_path = os.path.join(tmp_dir, "out.pdb")
self.solvent.atoms.write(f"{tmp_dir}/solvent.pdb", format="proteindatabank")

# When placing a single ion, packmol strangely always places it at
# the boundary of the cell. This hacky fix manually places
# the ion in a random location in the cell. Packmol then will fix
# these atoms and not optimize them during its optimization, only
# optimizing solvent molecules arround the ion.
for i, ion in enumerate(self.ions):
ion_atoms = ion.atoms.copy()
ion_atoms.set_cell(cell)
self.randomize_coords(ion_atoms)
ion_atoms.write(f"{tmp_dir}/ion_{i}.pdb", format="proteindatabank")

# write packmol input
packmol_input = os.path.join(tmp_dir, "input.inp")
with open(packmol_input, "w") as f:
f.write(f"tolerance {self.packmol_tolerance}\n")
f.write("filetype pdb\n")
f.write(f"output {output_path}\n")

# write solvent
f.write(
geometry.packmol_structure(
f"{tmp_dir}/solvent.pdb", n_solvent_mols, "inside"
)
)

for i in range(len(self.ions)):
f.write(f"structure {tmp_dir}/ion_{i}.pdb\n")
f.write(" number 1\n")
f.write(" fixed 0 0 0 0 0 0\n")
f.write("end structure\n\n")

self.run_packmol(packmol_input)

solvent_ions_atoms = ase.io.read(output_path, format="proteindatabank")
solvent_ions_atoms.set_pbc(True)
solvent_ions_atoms.set_tags([3] * len(solvent_ions_atoms))

return solvent_ions_atoms

def run_packmol(self, packmol_input):
packmol_cmd = which("packmol")
if not packmol_cmd:
raise OSError("packmol not found.")

ps = subprocess.Popen(
f"{packmol_cmd} < {packmol_input}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
out, err = ps.communicate()
if err:
raise OSError(err.decode("utf-8"))

def randomize_coords(self, atoms):
cell_weights = np.random.rand(3)
mshuaibii marked this conversation as resolved.
Show resolved Hide resolved
cell_weights /= np.sum(cell_weights)
xyz = np.dot(cell_weights, atoms.cell)
atoms.set_center_of_mass(xyz)
69 changes: 69 additions & 0 deletions src/fairchem/data/oc/core/ion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import pickle

import ase
import ase.units as units
import numpy as np
from fairchem.data.oc.databases.pkls import ION_PKL_PATH


class Ion:
"""
Initializes an ion object in one of 2 ways:
- Directly pass in an ase.Atoms object.
- Pass in index of ion to select from ion database.

Arguments
---------
ion_atoms: ase.Atoms
ion structure.
ion_id_from_db: int
Index of ion to select.
ion_db_path: str
Path to ion database.
"""

def __init__(
self,
ion_atoms: ase.Atoms = None,
ion_id_from_db: int | None = None,
ion_db_path: str = ION_PKL_PATH,
):
self.ion_id_from_db = ion_id_from_db
self.ion_db_path = ion_db_path

if ion_atoms is not None:
self.atoms = ion_atoms.copy()
self.name = str(self.atoms.symbols)
else:
with open(ion_db_path, "rb") as fp:
ion_db = pickle.load(fp)
if ion_id_from_db is not None:
self._load_ion(ion_db[ion_id_from_db])
else:
self.ion_id_from_db = np.random.randint(len(ion_db))
self._load_ion(ion_db[self.ion_id_from_db])

def __len__(self):
return len(self.atoms)

def __str__(self):
return self.name

def _load_ion(self, ion: dict) -> None:
"""
Saves the fields from an ion stored in a database. Fields added
after the first revision are conditionally added for backwards
compatibility with older database files.
"""
self.atoms = ion["atoms"]
self.name = ion["name"]
self.charge = ion["charge"]

def get_ion_concentration(self, volume):
"""
Compute the ion concentration units of M, given a volume in units of
Angstrom^3.
"""
return 1e27 / (units._Nav * volume)
18 changes: 9 additions & 9 deletions src/fairchem/data/oc/core/slab.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ def from_bulk_get_random_slab(
else:
# If we're not saving all slabs, just tile and tag one
assert bulk is not None
untiled_slabs = compute_slabs(
bulk.atoms,
max_miller=max_miller,
bulk_struct = standardize_bulk(bulk.atoms)
avail_millers = get_symmetrically_distinct_miller_indices(
bulk_struct, max_miller
)
sampled_miller_idx = np.random.randint(len(avail_millers))
slabs = Slab.from_bulk_get_specific_millers(
avail_millers[sampled_miller_idx], bulk
)
slab_idx = np.random.randint(len(untiled_slabs))
unit_slab_struct, millers, shift, top, oriented_bulk = untiled_slabs[
slab_idx
]
slab_atoms = tile_and_tag_atoms(unit_slab_struct, bulk.atoms, min_ab=min_ab)

return cls(bulk, slab_atoms, millers, shift, top, oriented_bulk)
# If multiple shifts exist, sample one randomly
return np.random.choice(slabs)
mshuaibii marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def from_bulk_get_specific_millers(
Expand Down
Loading