Skip to content

Commit

Permalink
refact: refactor merging iso using VF2 (#2221)
Browse files Browse the repository at this point in the history
Fix #2165.

The previous implementation is not realiable (see #2165). The new
implementation is realiable but a bit slower.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit


- **New Features**
- Added support for handling molecular isomer distinctions in the
molecule naming and graph generation processes.

- **Refactor**
- Removed the "Merge isomers" step from the process flow, simplifying
the handling of molecular structures.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
njzjz and pre-commit-ci[bot] authored May 8, 2024
1 parent 37b0bef commit 4b5b988
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 62 deletions.
54 changes: 0 additions & 54 deletions reacnetgenerator/_mergeiso.py

This file was deleted.

34 changes: 33 additions & 1 deletion reacnetgenerator/_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class _CollectPaths(SharedRNGData, metaclass=ABCMeta):
atomtype: np.ndarray
selectatoms: list
split: int
miso: int
mname: np.ndarray

def __init__(self, rng):
Expand All @@ -76,6 +77,7 @@ def __init__(self, rng):
"atomtype",
"selectatoms",
"split",
"miso",
],
["mname", "atomnames", "allmoleculeroute", "splitmoleculeroute"],
)
Expand Down Expand Up @@ -314,6 +316,8 @@ class _CollectSMILESPaths(_CollectPaths):
def _printmoleculename(self):
mname = []
d = defaultdict(list)
name_mapping = {}
name_mapping_graph = defaultdict(dict)
em = iso.numerical_edge_match(["atom", "level"], ["None", 1])
self.n_unknown = 0
with WriteBuffer(open(self.moleculefilename, "w"), sep="\n") as fm, open(
Expand Down Expand Up @@ -346,6 +350,23 @@ def _raise_anyway(*args, **kwargs):
else:
d[str(molecule)].append(molecule)
name = molecule.smiles
if self.miso > 0:
if name in name_mapping:
name = name_mapping[name]
else:
# check if the name is isomorphic to the previous molecules
molecule = Molecule(self, atoms, bonds)
# the formula should be the same
mng = name_mapping_graph[molecule.name]
for isomer, mol in mng.items():
if mol.isomorphic(molecule, em):
# use the previous SMILES
name_mapping[name] = isomer
name = isomer
break
else:
mng[name] = molecule
name_mapping[name] = name
mname.append(name)
fm.append(listtostirng((name, atoms, bonds), sep=(" ", ";", ",")))
self.mname = np.array(mname)
Expand All @@ -370,6 +391,7 @@ def __init__(self, cmp, atoms, bonds):
self.bonds = bonds
self._atomtypes = cmp.atomtype[atoms]
self._atomnames = cmp.atomnames[atoms]
self._miso = cmp.miso
self.graph = self._makemoleculegraph()
counter = Counter(self._atomnames)
self.name = "".join(
Expand Down Expand Up @@ -402,7 +424,17 @@ def smiles(self, value):
def _makemoleculegraph(self):
graph = nx.Graph()
for line in self.bonds:
graph.add_edge(line[0], line[1], level=line[2])
if self._miso == 0:
# normal mode
graph.add_edge(line[0], line[1], level=line[2])
elif self._miso == 1:
# merge the isomers with same atoms and same bond-network but different bond orders
graph.add_edge(line[0], line[1], level=1)
elif self._miso == 2:
# merge the isomers with same atoms with different bond-network
pass
else:
raise ValueError(f"Unknown isomer identification method: {self._miso}.")
for atomnumber, atomtype in zip(self.atoms, self._atomtypes):
graph.add_node(atomnumber, atom=atomtype)
return graph
Expand Down
7 changes: 0 additions & 7 deletions reacnetgenerator/reacnetgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
from ._hmmfilter import _HMMFilter
from ._logging import logger
from ._matrix import _GenerateMatrix
from ._mergeiso import _mergeISO
from ._path import _CollectPaths
from ._reachtml import _HTMLResult
from .utils import must_be_list
Expand Down Expand Up @@ -258,7 +257,6 @@ def runanddraw(
processthing.extend(
(
self.Status.DETECT,
self.Status.MISO,
self.Status.HMM,
self.Status.PATH,
self.Status.MATRIX,
Expand All @@ -278,7 +276,6 @@ def run(self) -> None:
processthing.extend(
(
self.Status.DETECT,
self.Status.MISO,
self.Status.HMM,
self.Status.PATH,
self.Status.MATRIX,
Expand All @@ -303,7 +300,6 @@ class Status(Enum):
- DOWNLOAD: Download trajectory from urls
- DETECT: Read bond information and detect molecules
- HMM: HMM filter
- MISO: Merge isomers
- PATH: Indentify isomers and collect reaction paths
- MATRIX: Reaction matrix generation
- NETWORK: Draw reaction network
Expand All @@ -312,7 +308,6 @@ class Status(Enum):

INIT = "Init"
DETECT = "Read bond information and detect molecules"
MISO = "Merge isomers"
HMM = "HMM filter"
PATH = "Indentify isomers and collect reaction paths"
MATRIX = "Reaction matrix generation"
Expand All @@ -336,8 +331,6 @@ def _process(self, steps: Union[List[Status], Tuple[Status, ...]]) -> None:
for i, runstep in enumerate(steps, 1):
if runstep == self.Status.DETECT:
_Detect.gettype(self).detect()
elif runstep == self.Status.MISO:
_mergeISO(self).merge()
elif runstep == self.Status.HMM:
_HMMFilter(self).filter()
elif runstep == self.Status.PATH:
Expand Down

0 comments on commit 4b5b988

Please sign in to comment.