From 50b853338bd4a301ac4ad2f8753bf55db8d75b1f Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 5 Oct 2017 14:35:34 -0400 Subject: [PATCH 01/57] Create molecule.converter module for RDKit and OB conversion Move to/from RDKit/OB methods to new module Update affected imports --- rmgpy/molecule/converter.pxd | 37 +++++ rmgpy/molecule/converter.py | 275 ++++++++++++++++++++++++++++++++ rmgpy/molecule/generator.pxd | 4 - rmgpy/molecule/generator.py | 110 +------------ rmgpy/molecule/generatorTest.py | 1 + rmgpy/molecule/molecule.py | 9 +- rmgpy/molecule/moleculeTest.py | 2 +- rmgpy/molecule/parser.pxd | 4 - rmgpy/molecule/parser.py | 123 +------------- setup.py | 1 + 10 files changed, 322 insertions(+), 244 deletions(-) create mode 100644 rmgpy/molecule/converter.pxd create mode 100644 rmgpy/molecule/converter.py diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd new file mode 100644 index 0000000000..9a28f2a877 --- /dev/null +++ b/rmgpy/molecule/converter.pxd @@ -0,0 +1,37 @@ +############################################################################### +# # +# RMG - Reaction Mechanism Generator # +# # +# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # +# # +# Permission is hereby granted, free of charge, to any person obtaining a # +# copy of this software and associated documentation files (the 'Software'), # +# to deal in the Software without restriction, including without limitation # +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # +# and/or sell copies of the Software, and to permit persons to whom the # +# Software is furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in # +# all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # +# DEALINGS IN THE SOFTWARE. # +# # +############################################################################### + +from .molecule cimport Atom, Bond, Molecule + + +cpdef toRDKitMol(Molecule mol, bint removeHs=*, bint returnMapping=*, bint sanitize=*) + +cpdef Molecule fromRDKitMol(Molecule mol, object rdkitmol) + +cpdef toOBMol(Molecule mol, bint returnMapping=*) + +cpdef Molecule fromOBMol(Molecule mol, object obmol) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py new file mode 100644 index 0000000000..338beff8fb --- /dev/null +++ b/rmgpy/molecule/converter.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# # +# RMG - Reaction Mechanism Generator # +# # +# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # +# # +# Permission is hereby granted, free of charge, to any person obtaining a # +# copy of this software and associated documentation files (the 'Software'), # +# to deal in the Software without restriction, including without limitation # +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # +# and/or sell copies of the Software, and to permit persons to whom the # +# Software is furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in # +# all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # +# DEALINGS IN THE SOFTWARE. # +# # +############################################################################### + +""" +This module provides methods for converting molecules between RMG, RDKit, and OpenBabel. +""" +import logging +import sys + +import cython +import openbabel +from rdkit import Chem + +from rmgpy.molecule import element as elements +from rmgpy.molecule.molecule import Atom, Bond + + +def toRDKitMol(mol, removeHs=True, returnMapping=False, sanitize=True): + """ + Convert a molecular structure to a RDKit rdmol object. Uses + `RDKit `_ to perform the conversion. + Perceives aromaticity and, unless removeHs==False, removes Hydrogen atoms. + + If returnMapping==True then it also returns a dictionary mapping the + atoms to RDKit's atom indices. + """ + + # Sort the atoms before converting to ensure output is consistent + # between different runs + mol.sortAtoms() + atoms = mol.vertices + rdAtomIndices = {} # dictionary of RDKit atom indices + rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) + for index, atom in enumerate(mol.vertices): + rdAtom = Chem.rdchem.Atom(atom.element.symbol) + rdAtom.SetNumRadicalElectrons(atom.radicalElectrons) + if atom.element.symbol == 'C' and atom.lonePairs == 1 and mol.multiplicity == 1: rdAtom.SetNumRadicalElectrons(2) + rdkitmol.AddAtom(rdAtom) + if removeHs and atom.symbol == 'H': + pass + else: + rdAtomIndices[atom] = index + + rdBonds = Chem.rdchem.BondType + orders = {'S': rdBonds.SINGLE, 'D': rdBonds.DOUBLE, 'T': rdBonds.TRIPLE, 'B': rdBonds.AROMATIC} + # Add the bonds + for atom1 in mol.vertices: + for atom2, bond in atom1.edges.iteritems(): + index1 = atoms.index(atom1) + index2 = atoms.index(atom2) + if index1 < index2: + order_string = bond.getOrderStr() + order = orders[order_string] + rdkitmol.AddBond(index1, index2, order) + + # Make editable mol into a mol and rectify the molecule + rdkitmol = rdkitmol.GetMol() + if sanitize: + Chem.SanitizeMol(rdkitmol) + if removeHs: + rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=sanitize) + if returnMapping: + return rdkitmol, rdAtomIndices + return rdkitmol + + +def fromRDKitMol(mol, rdkitmol): + """ + Convert a RDKit Mol object `rdkitmol` to a molecular structure. Uses + `RDKit `_ to perform the conversion. + This Kekulizes everything, removing all aromatic atom types. + """ + cython.declare(i=cython.int, + radicalElectrons=cython.int, + charge=cython.int, + lonePairs=cython.int, + number=cython.int, + order=cython.float, + atom=Atom, + atom1=Atom, + atom2=Atom, + bond=Bond) + + mol.vertices = [] + + # Add hydrogen atoms to complete molecule if needed + rdkitmol.UpdatePropertyCache(strict=False) + rdkitmol = Chem.AddHs(rdkitmol) + Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True) + + # iterate through atoms in rdkitmol + for i in xrange(rdkitmol.GetNumAtoms()): + rdkitatom = rdkitmol.GetAtomWithIdx(i) + + # Use atomic number as key for element + number = rdkitatom.GetAtomicNum() + element = elements.getElement(number) + + # Process charge + charge = rdkitatom.GetFormalCharge() + radicalElectrons = rdkitatom.GetNumRadicalElectrons() + + atom = Atom(element, radicalElectrons, charge, '', 0) + mol.vertices.append(atom) + + # Add bonds by iterating again through atoms + for j in xrange(0, i): + rdkitatom2 = rdkitmol.GetAtomWithIdx(j + 1) + rdkitbond = rdkitmol.GetBondBetweenAtoms(i, j) + if rdkitbond is not None: + order = 0 + + # Process bond type + rdbondtype = rdkitbond.GetBondType() + if rdbondtype.name == 'SINGLE': order = 1 + elif rdbondtype.name == 'DOUBLE': order = 2 + elif rdbondtype.name == 'TRIPLE': order = 3 + elif rdbondtype.name == 'AROMATIC': order = 1.5 + + bond = Bond(mol.vertices[i], mol.vertices[j], order) + mol.addBond(bond) + + # Set atom types and connectivity values + mol.update() + + # Assume this is always true + # There are cases where 2 radicalElectrons is a singlet, but + # the triplet is often more stable, + mol.multiplicity = mol.getRadicalCount() + 1 + # mol.updateAtomTypes() + + return mol + + +def debugRDKitMol(rdmol, level=logging.INFO): + """ + Takes an rdkit molecule object and logs some debugging information + equivalent to calling rdmol.Debug() but uses our logging framework. + Default logging level is INFO but can be controlled with the `level` parameter. + Also returns the message as a string, should you want it for something. + """ + import tempfile + import os + my_temp_file = tempfile.NamedTemporaryFile() + try: + old_stdout_file_descriptor = os.dup(sys.stdout.fileno()) + except: + message = "Can't access the sys.stdout file descriptor, so can't capture RDKit debug info" + print message + rdmol.Debug() + return message + os.dup2(my_temp_file.fileno(), sys.stdout.fileno()) + rdmol.Debug() + os.dup2(old_stdout_file_descriptor, sys.stdout.fileno()) + my_temp_file.file.seek(0) + message = my_temp_file.file.read() + message = "RDKit Molecule debugging information:\n" + message + logging.log(level, message) + return message + + +def toOBMol(mol, returnMapping=False): + """ + Convert a molecular structure to an OpenBabel OBMol object. Uses + `OpenBabel `_ to perform the conversion. + """ + + # Sort the atoms to ensure consistent output + mol.sortAtoms() + atoms = mol.vertices + + obAtomIds = {} # dictionary of OB atom IDs + obmol = openbabel.OBMol() + for atom in atoms: + a = obmol.NewAtom() + a.SetAtomicNum(atom.number) + a.SetFormalCharge(atom.charge) + obAtomIds[atom] = a.GetId() + orders = {1: 1, 2: 2, 3: 3, 1.5: 5} + for atom1 in mol.vertices: + for atom2, bond in atom1.edges.iteritems(): + index1 = atoms.index(atom1) + index2 = atoms.index(atom2) + if index1 < index2: + order = orders[bond.order] + obmol.AddBond(index1+1, index2+1, order) + + obmol.AssignSpinMultiplicity(True) + + if returnMapping: + return obmol, obAtomIds + + return obmol + + +def fromOBMol(mol, obmol): + """ + Convert a OpenBabel Mol object `obmol` to a molecular structure. Uses + `OpenBabel `_ to perform the conversion. + """ + # Below are the declared variables for cythonizing the module + # cython.declare(i=cython.int) + # cython.declare(radicalElectrons=cython.int, charge=cython.int, lonePairs=cython.int) + # cython.declare(atom=Atom, atom1=Atom, atom2=Atom, bond=Bond) + + mol.vertices = [] + + # Add hydrogen atoms to complete molecule if needed + obmol.AddHydrogens() + # TODO Chem.rdmolops.Kekulize(obmol, clearAromaticFlags=True) + + # iterate through atoms in obmol + for obatom in openbabel.OBMolAtomIter(obmol): + idx = obatom.GetIdx()#openbabel idx starts at 1! + + # Use atomic number as key for element + number = obatom.GetAtomicNum() + element = elements.getElement(number) + # Process charge + charge = obatom.GetFormalCharge() + obatom_multiplicity = obatom.GetSpinMultiplicity() + radicalElectrons = obatom_multiplicity - 1 if obatom_multiplicity != 0 else 0 + + atom = Atom(element, radicalElectrons, charge, '', 0) + mol.vertices.append(atom) + + # iterate through bonds in obmol + for obbond in openbabel.OBMolBondIter(obmol): + # Process bond type + oborder = obbond.GetBondOrder() + if oborder not in [1,2,3] and obbond.IsAromatic() : + oborder = 1.5 + + bond = Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder)#python array indices start at 0 + mol.addBond(bond) + + + # Set atom types and connectivity values + mol.updateConnectivityValues() + mol.updateAtomTypes() + mol.updateMultiplicity() + + # Assume this is always true + # There are cases where 2 radicalElectrons is a singlet, but + # the triplet is often more stable, + mol.multiplicity = mol.getRadicalCount() + 1 + + return mol \ No newline at end of file diff --git a/rmgpy/molecule/generator.pxd b/rmgpy/molecule/generator.pxd index db4d84659a..4abdbab005 100644 --- a/rmgpy/molecule/generator.pxd +++ b/rmgpy/molecule/generator.pxd @@ -44,10 +44,6 @@ cpdef str toSMARTS(Molecule mol) cpdef str toSMILES(Molecule mol) -cpdef toOBMol(Molecule mol, bint returnMapping=*) - -cpdef toRDKitMol(Molecule mol, bint removeHs=*, bint returnMapping=*, bint sanitize=*) - cpdef bint is_valid_combo(list combo, Molecule mol, list distances) cpdef list find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atoms) diff --git a/rmgpy/molecule/generator.py b/rmgpy/molecule/generator.py index a10cae3a87..c3cb94f95a 100644 --- a/rmgpy/molecule/generator.py +++ b/rmgpy/molecule/generator.py @@ -31,9 +31,7 @@ # global imports import cython -import logging import itertools -import sys # local imports try: @@ -45,6 +43,7 @@ from .molecule import Atom, Bond, Molecule from .pathfinder import compute_atom_distance from .util import partition, agglomerate, generate_combo +from rmgpy.molecule.converter import toOBMol, toRDKitMol import rmgpy.molecule.element as element import rmgpy.molecule.inchi as inchiutil @@ -308,113 +307,6 @@ def toSMILES(mol): return Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) return Chem.MolToSmiles(rdkitmol) -def toOBMol(mol, returnMapping=False): - """ - Convert a molecular structure to an OpenBabel OBMol object. Uses - `OpenBabel `_ to perform the conversion. - """ - - # Sort the atoms to ensure consistent output - mol.sortAtoms() - atoms = mol.vertices - - obAtomIds = {} # dictionary of OB atom IDs - obmol = openbabel.OBMol() - for atom in atoms: - a = obmol.NewAtom() - a.SetAtomicNum(atom.number) - a.SetFormalCharge(atom.charge) - obAtomIds[atom] = a.GetId() - orders = {1: 1, 2: 2, 3: 3, 1.5: 5} - for atom1 in mol.vertices: - for atom2, bond in atom1.edges.iteritems(): - index1 = atoms.index(atom1) - index2 = atoms.index(atom2) - if index1 < index2: - order = orders[bond.order] - obmol.AddBond(index1+1, index2+1, order) - - obmol.AssignSpinMultiplicity(True) - - if returnMapping: - return obmol, obAtomIds - - return obmol - -def debugRDKitMol(rdmol, level=logging.INFO): - """ - Takes an rdkit molecule object and logs some debugging information - equivalent to calling rdmol.Debug() but uses our logging framework. - Default logging level is INFO but can be controlled with the `level` parameter. - Also returns the message as a string, should you want it for something. - """ - import tempfile - import os - my_temp_file = tempfile.NamedTemporaryFile() - try: - old_stdout_file_descriptor = os.dup(sys.stdout.fileno()) - except: - message = "Can't access the sys.stdout file descriptor, so can't capture RDKit debug info" - print message - rdmol.Debug() - return message - os.dup2(my_temp_file.fileno(), sys.stdout.fileno()) - rdmol.Debug() - os.dup2(old_stdout_file_descriptor, sys.stdout.fileno()) - my_temp_file.file.seek(0) - message = my_temp_file.file.read() - message = "RDKit Molecule debugging information:\n" + message - logging.log(level, message) - return message - - -def toRDKitMol(mol, removeHs=True, returnMapping=False, sanitize=True): - """ - Convert a molecular structure to a RDKit rdmol object. Uses - `RDKit `_ to perform the conversion. - Perceives aromaticity and, unless removeHs==False, removes Hydrogen atoms. - - If returnMapping==True then it also returns a dictionary mapping the - atoms to RDKit's atom indices. - """ - - # Sort the atoms before converting to ensure output is consistent - # between different runs - mol.sortAtoms() - atoms = mol.vertices - rdAtomIndices = {} # dictionary of RDKit atom indices - rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) - for index, atom in enumerate(mol.vertices): - rdAtom = Chem.rdchem.Atom(atom.element.symbol) - rdAtom.SetNumRadicalElectrons(atom.radicalElectrons) - if atom.element.symbol == 'C' and atom.lonePairs == 1 and mol.multiplicity == 1: rdAtom.SetNumRadicalElectrons(2) - rdkitmol.AddAtom(rdAtom) - if removeHs and atom.symbol == 'H': - pass - else: - rdAtomIndices[atom] = index - - rdBonds = Chem.rdchem.BondType - orders = {'S': rdBonds.SINGLE, 'D': rdBonds.DOUBLE, 'T': rdBonds.TRIPLE, 'B': rdBonds.AROMATIC} - # Add the bonds - for atom1 in mol.vertices: - for atom2, bond in atom1.edges.iteritems(): - index1 = atoms.index(atom1) - index2 = atoms.index(atom2) - if index1 < index2: - order_string = bond.getOrderStr() - order = orders[order_string] - rdkitmol.AddBond(index1, index2, order) - - # Make editable mol into a mol and rectify the molecule - rdkitmol = rdkitmol.GetMol() - if sanitize: - Chem.SanitizeMol(rdkitmol) - if removeHs: - rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=sanitize) - if returnMapping: - return rdkitmol, rdAtomIndices - return rdkitmol def is_valid_combo(combo, mol, distances): """ diff --git a/rmgpy/molecule/generatorTest.py b/rmgpy/molecule/generatorTest.py index 018bc33230..9a06c92756 100644 --- a/rmgpy/molecule/generatorTest.py +++ b/rmgpy/molecule/generatorTest.py @@ -36,6 +36,7 @@ from .molecule import Atom, Molecule from .inchi import P_LAYER_PREFIX, U_LAYER_PREFIX from .generator import * +from rmgpy.molecule.converter import debugRDKitMol class RDKitTest(unittest.TestCase): def testDebugger(self): diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 4c722abc20..f56c80228f 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -45,7 +45,6 @@ import itertools from copy import deepcopy -import element as elements try: import openbabel except: @@ -55,6 +54,8 @@ from rmgpy.molecule.pathfinder import find_shortest_path from .atomtype import AtomType, atomTypes, getAtomType, AtomTypeError import rmgpy.constants as constants +import rmgpy.molecule.element as elements +import rmgpy.molecule.converter as converter import rmgpy.molecule.parser as parser import rmgpy.molecule.generator as generator import rmgpy.molecule.resonance as resonance @@ -1480,7 +1481,7 @@ def toRDKitMol(self, *args, **kwargs): """ Convert a molecular structure to a RDKit rdmol object. """ - return generator.toRDKitMol(self, *args, **kwargs) + return converter.toRDKitMol(self, *args, **kwargs) def toAdjacencyList(self, label='', removeH=False, removeLonePairs=False, oldStyle=False): """ @@ -1856,7 +1857,7 @@ def getAromaticRings(self, rings=None): return [], [] try: - rdkitmol, rdAtomIndices = generator.toRDKitMol(self, removeHs=False, returnMapping=True) + rdkitmol, rdAtomIndices = converter.toRDKitMol(self, removeHs=False, returnMapping=True) except ValueError: logging.warning('Unable to check aromaticity by converting to RDKit Mol.') else: @@ -1883,7 +1884,7 @@ def getAromaticRings(self, rings=None): logging.info('Trying to use OpenBabel to check aromaticity.') try: - obmol, obAtomIds = generator.toOBMol(self, returnMapping=True) + obmol, obAtomIds = converter.toOBMol(self, returnMapping=True) except ImportError: logging.warning('Unable to check aromaticity by converting for OB Mol.') return [], [] diff --git a/rmgpy/molecule/moleculeTest.py b/rmgpy/molecule/moleculeTest.py index c04dd035b1..c34dd845b6 100644 --- a/rmgpy/molecule/moleculeTest.py +++ b/rmgpy/molecule/moleculeTest.py @@ -1532,7 +1532,7 @@ def testRDKitMolAtomMapping(self): Test that the atom mapping returned by toRDKitMol contains the correct atom indices of the atoms of the molecule when hydrogens are removed. """ - from .generator import toRDKitMol + from rmgpy.molecule.converter import toRDKitMol adjlist = ''' 1 H u0 p0 c0 {2,S} diff --git a/rmgpy/molecule/parser.pxd b/rmgpy/molecule/parser.pxd index 75dcfb4371..6e4fef104c 100644 --- a/rmgpy/molecule/parser.pxd +++ b/rmgpy/molecule/parser.pxd @@ -62,10 +62,6 @@ cpdef Molecule fromSMARTS(Molecule mol, str smartsstr, str backend=*) cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) -cpdef Molecule fromRDKitMol(Molecule mol, object rdkitmol) - -cpdef Molecule fromOBMol(Molecule mol, object obmol) - cdef Molecule __lookup(Molecule mol, str identifier, str type_identifier) # parser helper functions: diff --git a/rmgpy/molecule/parser.py b/rmgpy/molecule/parser.py index be1be1a1b7..a1e91b955b 100644 --- a/rmgpy/molecule/parser.py +++ b/rmgpy/molecule/parser.py @@ -52,11 +52,11 @@ from rmgpy.molecule import element as elements from .molecule import Atom, Bond, Molecule from .adjlist import ConsistencyChecker +from rmgpy.molecule.converter import fromRDKitMol, fromOBMol import rmgpy.molecule.inchi as inchiutil import rmgpy.molecule.util as util import rmgpy.molecule.pathfinder as pathfinder -import rmgpy.molecule.generator as generator # constants @@ -365,127 +365,6 @@ def fromSMARTS(mol, smartsstr, backend = 'rdkit'): return __parse(mol, smartsstr, 'sma', backend) -def fromRDKitMol(mol, rdkitmol): - """ - Convert a RDKit Mol object `rdkitmol` to a molecular structure. Uses - `RDKit `_ to perform the conversion. - This Kekulizes everything, removing all aromatic atom types. - """ - cython.declare(i=cython.int, - radicalElectrons=cython.int, - charge=cython.int, - lonePairs=cython.int, - number=cython.int, - order=cython.float, - atom=Atom, - atom1=Atom, - atom2=Atom, - bond=Bond) - - mol.vertices = [] - - # Add hydrogen atoms to complete molecule if needed - rdkitmol.UpdatePropertyCache(strict=False) - rdkitmol = Chem.AddHs(rdkitmol) - Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True) - - # iterate through atoms in rdkitmol - for i in xrange(rdkitmol.GetNumAtoms()): - rdkitatom = rdkitmol.GetAtomWithIdx(i) - - # Use atomic number as key for element - number = rdkitatom.GetAtomicNum() - element = elements.getElement(number) - - # Process charge - charge = rdkitatom.GetFormalCharge() - radicalElectrons = rdkitatom.GetNumRadicalElectrons() - - atom = Atom(element, radicalElectrons, charge, '', 0) - mol.vertices.append(atom) - - # Add bonds by iterating again through atoms - for j in xrange(0, i): - rdkitatom2 = rdkitmol.GetAtomWithIdx(j + 1) - rdkitbond = rdkitmol.GetBondBetweenAtoms(i, j) - if rdkitbond is not None: - order = 0 - - # Process bond type - rdbondtype = rdkitbond.GetBondType() - if rdbondtype.name == 'SINGLE': order = 1 - elif rdbondtype.name == 'DOUBLE': order = 2 - elif rdbondtype.name == 'TRIPLE': order = 3 - elif rdbondtype.name == 'AROMATIC': order = 1.5 - - bond = Bond(mol.vertices[i], mol.vertices[j], order) - mol.addBond(bond) - - # Set atom types and connectivity values - mol.update() - - # Assume this is always true - # There are cases where 2 radicalElectrons is a singlet, but - # the triplet is often more stable, - mol.multiplicity = mol.getRadicalCount() + 1 - # mol.updateAtomTypes() - - return mol - -def fromOBMol(mol, obmol): - """ - Convert a OpenBabel Mol object `obmol` to a molecular structure. Uses - `OpenBabel `_ to perform the conversion. - """ - # Below are the declared variables for cythonizing the module - # cython.declare(i=cython.int) - # cython.declare(radicalElectrons=cython.int, charge=cython.int, lonePairs=cython.int) - # cython.declare(atom=Atom, atom1=Atom, atom2=Atom, bond=Bond) - - mol.vertices = [] - - # Add hydrogen atoms to complete molecule if needed - obmol.AddHydrogens() - # TODO Chem.rdmolops.Kekulize(obmol, clearAromaticFlags=True) - - # iterate through atoms in obmol - for obatom in openbabel.OBMolAtomIter(obmol): - idx = obatom.GetIdx()#openbabel idx starts at 1! - - # Use atomic number as key for element - number = obatom.GetAtomicNum() - element = elements.getElement(number) - # Process charge - charge = obatom.GetFormalCharge() - obatom_multiplicity = obatom.GetSpinMultiplicity() - radicalElectrons = obatom_multiplicity - 1 if obatom_multiplicity != 0 else 0 - - atom = Atom(element, radicalElectrons, charge, '', 0) - mol.vertices.append(atom) - - # iterate through bonds in obmol - for obbond in openbabel.OBMolBondIter(obmol): - # Process bond type - oborder = obbond.GetBondOrder() - if oborder not in [1,2,3] and obbond.IsAromatic() : - oborder = 1.5 - - bond = Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder)#python array indices start at 0 - mol.addBond(bond) - - - # Set atom types and connectivity values - mol.updateConnectivityValues() - mol.updateAtomTypes() - mol.updateMultiplicity() - - # Assume this is always true - # There are cases where 2 radicalElectrons is a singlet, but - # the triplet is often more stable, - mol.multiplicity = mol.getRadicalCount() + 1 - - return mol - def fixCharge(mol, u_indices): """ Tries to fix a number of structural features in the molecule related to charge, diff --git a/setup.py b/setup.py index 58cf01b0d2..4d461521e8 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,7 @@ def getMainExtensionModules(): Extension('rmgpy.molecule.molecule', ['rmgpy/molecule/molecule.py'], include_dirs=['.']), Extension('rmgpy.molecule.symmetry', ['rmgpy/molecule/symmetry.py'], include_dirs=['.']), Extension('rmgpy.molecule.vf2', ['rmgpy/molecule/vf2.pyx'], include_dirs=['.']), + Extension('rmgpy.molecule.converter', ['rmgpy/molecule/converter.py'], include_dirs=['.']), Extension('rmgpy.molecule.parser', ['rmgpy/molecule/parser.py'], include_dirs=['.']), Extension('rmgpy.molecule.generator', ['rmgpy/molecule/generator.py'], include_dirs=['.']), Extension('rmgpy.molecule.util', ['rmgpy/molecule/util.py'], include_dirs=['.']), From 670f1d92470dc368345038bdb0b2a557200122a7 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 5 Oct 2017 16:09:05 -0400 Subject: [PATCH 02/57] Move InChI related methods from generator/parser to inchi module --- rmgpy/molecule/generator.pxd | 14 - rmgpy/molecule/generator.py | 351 +------------ rmgpy/molecule/generatorTest.py | 2 +- rmgpy/molecule/inchi.pxd | 30 ++ rmgpy/molecule/inchi.py | 889 ++++++++++++++++++++++++++++++++ rmgpy/molecule/parser.pxd | 19 +- rmgpy/molecule/parser.py | 520 +------------------ rmgpy/molecule/parserTest.py | 4 +- 8 files changed, 929 insertions(+), 900 deletions(-) diff --git a/rmgpy/molecule/generator.pxd b/rmgpy/molecule/generator.pxd index 4abdbab005..fdceb8b8a0 100644 --- a/rmgpy/molecule/generator.pxd +++ b/rmgpy/molecule/generator.pxd @@ -32,8 +32,6 @@ cpdef dict _known_smiles_radicals cpdef str toInChI(Molecule mol) -cpdef str create_U_layer(Molecule mol, str auxinfo) - cpdef str toAugmentedInChI(Molecule mol) cpdef str toInChIKey(Molecule mol) @@ -43,15 +41,3 @@ cpdef str toAugmentedInChIKey(Molecule mol) cpdef str toSMARTS(Molecule mol) cpdef str toSMILES(Molecule mol) - -cpdef bint is_valid_combo(list combo, Molecule mol, list distances) - -cpdef list find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atoms) - -cpdef Molecule generate_minimum_resonance_isomer(Molecule mol) - -cpdef list get_unpaired_electrons(Molecule mol) - -cpdef list compute_agglomerate_distance(list agglomerates, Molecule mol) - -cpdef str create_P_layer(Molecule mol, str auxinfo) \ No newline at end of file diff --git a/rmgpy/molecule/generator.py b/rmgpy/molecule/generator.py index c3cb94f95a..02083d3f0a 100644 --- a/rmgpy/molecule/generator.py +++ b/rmgpy/molecule/generator.py @@ -31,7 +31,6 @@ # global imports import cython -import itertools # local imports try: @@ -40,14 +39,11 @@ pass from rdkit import Chem -from .molecule import Atom, Bond, Molecule -from .pathfinder import compute_atom_distance -from .util import partition, agglomerate, generate_combo +from .molecule import Atom from rmgpy.molecule.converter import toOBMol, toRDKitMol -import rmgpy.molecule.element as element import rmgpy.molecule.inchi as inchiutil -import rmgpy.molecule.resonance as resonance + # global variables: #: This dictionary is used to shortcut lookups of a molecule's SMILES string from its chemical formula. @@ -126,55 +122,6 @@ def toInChI(mol): obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) return obConversion.WriteString(obmol).strip() -def create_U_layer(mol, auxinfo): - """ - Creates a string with the positions of the atoms that bear unpaired electrons. The string - can be used to complement the InChI with an additional layer that allows for the differentiation - between structures with multiple unpaired electrons. - - The string is composed of a prefix ('u') followed by the positions of each of the unpaired electrons, - sorted in numerical order. - - Example: - - methyl radical ([CH3]) : u1 - - triplet methylene biradical ([CH2]) : u1,1 - - ethane-1,2-diyl biradical ([CH2][CH2]): u1,2 - - When the molecule does not bear any unpaired electrons, None is returned. - - """ - - cython.declare( - minmol=Molecule, - #rdkitmol=, - u_layer=list, - i=int, - at=Atom, - equivalent_atoms=list, - ) - - if mol.getRadicalCount() == 0: - return None - elif mol.getFormula() == 'H': - return inchiutil.U_LAYER_PREFIX + '1' - - - # find the resonance isomer with the lowest u index: - minmol = generate_minimum_resonance_isomer(mol) - - # create preliminary u-layer: - u_layer = [] - for i, at in enumerate(minmol.atoms): - u_layer.extend([i+1] * at.radicalElectrons) - - # extract equivalent atom pairs from E-layer of auxiliary info: - equivalent_atoms = inchiutil.parse_E_layer(auxinfo) - if equivalent_atoms: - # select lowest u-layer: - u_layer = find_lowest_u_layer(minmol, u_layer, equivalent_atoms) - - return (inchiutil.U_LAYER_PREFIX + ','.join(map(str, u_layer))) - def toAugmentedInChI(mol): """ @@ -193,7 +140,7 @@ def toAugmentedInChI(mol): ) inchi = toInChI(mol) - ulayer, player = create_augmented_layers(mol) + ulayer, player = inchiutil.create_augmented_layers(mol) aug_inchi = inchiutil.compose_aug_inchi(inchi, ulayer, player) @@ -243,7 +190,7 @@ def toAugmentedInChIKey(mol): key = toInChIKey(mol) - ulayer, player = create_augmented_layers(mol) + ulayer, player = inchiutil.create_augmented_layers(mol) return inchiutil.compose_aug_inchi_key(key, ulayer, player) @@ -308,293 +255,3 @@ def toSMILES(mol): return Chem.MolToSmiles(rdkitmol) -def is_valid_combo(combo, mol, distances): - """ - Check if the combination of atom indices refers to - atoms that are adjacent in the molecule. - """ - cython.declare( - agglomerates=list, - new_distances=list, - orig_dist=dict, - new_dist=dict, - ) - - # compute shortest path between atoms - agglomerates = agglomerate(combo) - new_distances = compute_agglomerate_distance(agglomerates, mol) - - # combo is valid if the distance is equal to the parameter distance - - if len(distances) != len(new_distances): return False - - for orig_dist, new_dist in zip(distances, new_distances): - # only compare the values of the dictionaries: - if sorted(orig_dist.values()) != sorted(new_dist.values()): - return False - - return True - -def find_lowest_u_layer(mol, u_layer, equivalent_atoms): - """ - Searches for the "minimum" combination of indices of atoms that bear unpaired electrons. - - It does so by using the information on equivalent atoms to permute equivalent atoms to - obtain a combination of atoms that is the (numerically) lowest possible combination. - - Each possible combination is valid if and only if the distances between the atoms of the - combination is identical to the distances between the original combination. - - First, the algorithm partitions equivalent atoms that bear an unpaired electron. - Next, the combinations are generated, and for each combination it is verified whether - it pertains to a "valid" combination. - - Returns a list of indices corresponding to the lowest combination of atom indices bearing - unpaired electrons. - """ - - cython.declare( - new_u_layer=list, - grouped_electrons=list, - corresponding_E_layers=list, - group=list, - e_layer=list, - combos=list, - orig_agglomerates=list, - orig_distances=list, - selected_group=list, - combo=list, - ) - if not equivalent_atoms: - return u_layer - - new_u_layer = [] - - grouped_electrons, corresponding_E_layers = partition(u_layer, equivalent_atoms) - - # don't process atoms that do not belong to an equivalence layer - for group, e_layer in zip(grouped_electrons[:], corresponding_E_layers[:]): - if not e_layer: - new_u_layer.extend(group) - grouped_electrons.remove(group) - corresponding_E_layers.remove(e_layer) - - - combos = generate_combo(grouped_electrons, corresponding_E_layers) - # compute original distance: - orig_agglomerates = agglomerate(grouped_electrons) - orig_distances = compute_agglomerate_distance(orig_agglomerates, mol) - - # deflate the list of lists to be able to numerically compare them - selected_group = sorted(itertools.chain.from_iterable(grouped_electrons)) - - # see if any of the combos is valid and results in a lower numerical combination than the original - for combo in combos: - if is_valid_combo(combo, mol, orig_distances): - combo = sorted(itertools.chain.from_iterable(combo)) - if combo < selected_group: - selected_group = combo - - # add the minimized unpaired electron positions to the u-layer: - new_u_layer.extend(selected_group) - - return sorted(new_u_layer) - -def generate_minimum_resonance_isomer(mol): - """ - Select the resonance isomer that is isomorphic to the parameter isomer, with the lowest unpaired - electrons descriptor, unless this unnecessarily forms a charged molecule. - - First, we generate all isomorphic resonance isomers. - Next, we return the candidate with the lowest unpaired electrons metric. - - The metric is a sorted list with indices of the atoms that bear an unpaired electron - """ - - cython.declare( - atom=Atom, - candidates=list, - sel=Molecule, - cand=Molecule, - metric_sel=list, - charge_sel=int, - charge_cand=int, - metric_cand=list, - ) - - candidates = resonance.generate_isomorphic_resonance_structures(mol) - - sel = mol - metric_sel = get_unpaired_electrons(sel) - charge_sel = sum([abs(atom.charge) for atom in sel.vertices]) - for cand in candidates: - metric_cand = get_unpaired_electrons(cand) - if metric_cand < metric_sel: - charge_cand = sum([abs(atom.charge) for atom in cand.vertices]) - if charge_cand <= charge_sel: - sel = cand - metric_sel = metric_cand - charge_sel = charge_cand - return sel - - -def get_unpaired_electrons(mol): - """ - Returns a sorted list of the indices of the atoms that bear one or more - unpaired electrons. - """ - - cython.declare( - locations=list, - index=int, - at=Atom, - ) - locations = [] - for index, at in enumerate(mol.atoms): - if at.radicalElectrons >= 1: - locations.append(index) - - return sorted(locations) - -def compute_agglomerate_distance(agglomerates, mol): - """ - Iterates over a list of lists containing atom indices. - For each list the distances between the atoms is computed. - A list of distances is returned. - - """ - - cython.declare( - distances=list, - agglomerate=list, - dist=dict, - ) - - distances = [] - for agglomerate in agglomerates: - dist = compute_atom_distance(agglomerate, mol) - distances.append(dist) - - return distances - -def has_unexpected_lone_pairs(mol): - """ - Iterates over the atoms of the Molecule and returns whether - at least one atom bears an unexpected number of lone pairs. - - E.g. - carbon with > 0 lone pairs - nitrogen with > 1 lone pairs - oxygen with > 2 lone pairs - - The expected number of lone pairs of an element is equal to - """ - - for at in mol.atoms: - try: - exp = element.PeriodicSystem.lone_pairs[at.symbol] - except KeyError: - raise Exception("Unrecognized element: {}".format(at.symbol)) - else: - if at.lonePairs != element.PeriodicSystem.lone_pairs[at.symbol]: return True - - return False - -def create_augmented_layers(mol): - """ - - The indices in the string refer to the atom indices in the molecule, according to the atom order - obtained by sorting the atoms using the InChI canonicalization algorithm. - - First a deep copy is created of the original molecule and hydrogen atoms are removed from the molecule. - Next, the molecule is converted into an InChI string, and the auxiliary information of the inchification - procedure is retrieved. - - The N-layer is parsed and used to sort the atoms of the original order according - to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished - with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed - as to ensure the atoms with the lowest indices are used to compose the string. - - """ - - if mol.getRadicalCount() == 0 and not has_unexpected_lone_pairs(mol): - return None, None - elif mol.getFormula() == 'H': - return inchiutil.U_LAYER_PREFIX + '1', None - else: - molcopy = mol.copy(deep=True) - - hydrogens = filter(lambda at: at.number == 1, molcopy.atoms) - [molcopy.removeAtom(h) for h in hydrogens] - - rdkitmol = toRDKitMol(molcopy) - _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon')# suppress stereo warnings - - # extract the atom numbers from N-layer of auxiliary info: - atom_indices = inchiutil.parse_N_layer(auxinfo) - atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] - - # sort the atoms based on the order of the atom indices - molcopy.atoms = [x for (y,x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] - - ulayer = create_U_layer(molcopy, auxinfo) - - player = create_P_layer(molcopy, auxinfo) - - return ulayer, player - -def create_P_layer(mol, auxinfo): - """ - - Creates a string with the positions of the atoms that bear an unexpected number of lone pairs. The string - can be used to complement the InChI with an additional layer that allows for the differentiation - between structures with lone pairs. - - The string is composed of a prefix ('P_LAYER_PREFIX') followed by the positions of each of the atoms with an - unexpected number of lone pairs, sorted in numerical order. - - Example: - - singlet methylene biradical ([CH2]) : 'P_LAYER_PREFIX'1 - - When the molecule does not bear any atoms with an unexpected number of lone pairs, - None is returned. - - - """ - - # TODO: find the resonance isomer with the lowest p index: - minmol = mol - - # create preliminary p-layer: - p_layer = [] - for i, at in enumerate(mol.atoms): - try: - exp = element.PeriodicSystem.lone_pairs[at.symbol] - except KeyError: - raise Exception("Unrecognized element: {}".format(at.symbol)) - else: - if at.lonePairs != element.PeriodicSystem.lone_pairs[at.symbol]: - if at.lonePairs == 0: - p_layer.append('{}{}'.format(i, '(0)')) - else: - p_layer.extend([i+1] * at.lonePairs) - - # extract equivalent atom pairs from E-layer of auxiliary info: - equivalent_atoms = inchiutil.parse_E_layer(auxinfo) - if equivalent_atoms: - # select lowest u-layer: - u_layer = find_lowest_p_layer(minmol, p_layer, equivalent_atoms) - - if p_layer: - return (inchiutil.P_LAYER_PREFIX + inchiutil.P_LAYER_SEPARATOR.join(map(str, p_layer))) - else: - return None - -def find_lowest_p_layer(minmol, p_layer, equivalent_atoms): - """ - Permute the equivalent atoms and return the combination with the - lowest p-layer. - - TODO: The presence of unpaired electrons complicates stuff. - """ - return minmol diff --git a/rmgpy/molecule/generatorTest.py b/rmgpy/molecule/generatorTest.py index 9a06c92756..8e8226019a 100644 --- a/rmgpy/molecule/generatorTest.py +++ b/rmgpy/molecule/generatorTest.py @@ -34,7 +34,7 @@ from rmgpy.species import Species from .molecule import Atom, Molecule -from .inchi import P_LAYER_PREFIX, U_LAYER_PREFIX +from .inchi import P_LAYER_PREFIX, U_LAYER_PREFIX, create_augmented_layers, has_unexpected_lone_pairs from .generator import * from rmgpy.molecule.converter import debugRDKitMol diff --git a/rmgpy/molecule/inchi.pxd b/rmgpy/molecule/inchi.pxd index 23d7ec2292..228c79cad2 100644 --- a/rmgpy/molecule/inchi.pxd +++ b/rmgpy/molecule/inchi.pxd @@ -25,6 +25,8 @@ # # ############################################################################### +from .molecule cimport Atom, Bond, Molecule + cpdef tuple decompose(string) cpdef str ignore_prefix(str string) @@ -38,3 +40,31 @@ cpdef list parse_H_layer(str inchi) cpdef list parse_E_layer(str auxinfo) cpdef list parse_N_layer(str auxinfo) + +cpdef str create_U_layer(Molecule mol, str auxinfo) + +cpdef bint is_valid_combo(list combo, Molecule mol, list distances) + +cpdef list find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atoms) + +cpdef Molecule generate_minimum_resonance_isomer(Molecule mol) + +cpdef list get_unpaired_electrons(Molecule mol) + +cpdef list compute_agglomerate_distance(list agglomerates, Molecule mol) + +cpdef str create_P_layer(Molecule mol, str auxinfo) + +cpdef reset_lone_pairs(Molecule mol, list p_indices) + +cdef Molecule fix_unsaturated_bond_to_biradical(Molecule mol, str inchi, list u_indices) + +cpdef bint isUnsaturated(Molecule mol) + +cpdef check(Molecule mol, aug_inchi) + +cpdef fix_oxygen_unsaturated_bond(Molecule mol, list u_indices) + +cpdef fixCharge(Molecule mol, list u_indices) + +cpdef fix_triplet_to_singlet(Molecule mol, list p_indices) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 04db0fd098..922a30cd19 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -30,9 +30,21 @@ import cython import re +import itertools + +from rdkit import Chem + from rmgpy.exceptions import InchiException # search for (*) PARENTHESES +from rmgpy.molecule.adjlist import ConsistencyChecker +from rmgpy.molecule.molecule import Atom, Bond, Molecule +from rmgpy.molecule.converter import toRDKitMol +from rmgpy.molecule.util import agglomerate, partition, generate_combo, swap +import rmgpy.molecule.resonance as resonance +import rmgpy.molecule.element as elements +import rmgpy.molecule.pathfinder as pathfinder + PARENTHESES = re.compile( r'\((.[^\(\)]*)\)') INCHI_PREFIX = 'InChI=1' @@ -303,6 +315,883 @@ def parse_N_layer(auxinfo): return indices + +def create_U_layer(mol, auxinfo): + """ + Creates a string with the positions of the atoms that bear unpaired electrons. The string + can be used to complement the InChI with an additional layer that allows for the differentiation + between structures with multiple unpaired electrons. + + The string is composed of a prefix ('u') followed by the positions of each of the unpaired electrons, + sorted in numerical order. + + Example: + - methyl radical ([CH3]) : u1 + - triplet methylene biradical ([CH2]) : u1,1 + - ethane-1,2-diyl biradical ([CH2][CH2]): u1,2 + + When the molecule does not bear any unpaired electrons, None is returned. + + """ + + cython.declare( + minmol=Molecule, + #rdkitmol=, + u_layer=list, + i=int, + at=Atom, + equivalent_atoms=list, + ) + + if mol.getRadicalCount() == 0: + return None + elif mol.getFormula() == 'H': + return U_LAYER_PREFIX + '1' + + + # find the resonance isomer with the lowest u index: + minmol = generate_minimum_resonance_isomer(mol) + + # create preliminary u-layer: + u_layer = [] + for i, at in enumerate(minmol.atoms): + u_layer.extend([i+1] * at.radicalElectrons) + + # extract equivalent atom pairs from E-layer of auxiliary info: + equivalent_atoms = parse_E_layer(auxinfo) + if equivalent_atoms: + # select lowest u-layer: + u_layer = find_lowest_u_layer(minmol, u_layer, equivalent_atoms) + + return (U_LAYER_PREFIX + ','.join(map(str, u_layer))) + + +def is_valid_combo(combo, mol, distances): + """ + Check if the combination of atom indices refers to + atoms that are adjacent in the molecule. + """ + cython.declare( + agglomerates=list, + new_distances=list, + orig_dist=dict, + new_dist=dict, + ) + + # compute shortest path between atoms + agglomerates = agglomerate(combo) + new_distances = compute_agglomerate_distance(agglomerates, mol) + + # combo is valid if the distance is equal to the parameter distance + + if len(distances) != len(new_distances): return False + + for orig_dist, new_dist in zip(distances, new_distances): + # only compare the values of the dictionaries: + if sorted(orig_dist.values()) != sorted(new_dist.values()): + return False + + return True + + +def find_lowest_u_layer(mol, u_layer, equivalent_atoms): + """ + Searches for the "minimum" combination of indices of atoms that bear unpaired electrons. + + It does so by using the information on equivalent atoms to permute equivalent atoms to + obtain a combination of atoms that is the (numerically) lowest possible combination. + + Each possible combination is valid if and only if the distances between the atoms of the + combination is identical to the distances between the original combination. + + First, the algorithm partitions equivalent atoms that bear an unpaired electron. + Next, the combinations are generated, and for each combination it is verified whether + it pertains to a "valid" combination. + + Returns a list of indices corresponding to the lowest combination of atom indices bearing + unpaired electrons. + """ + + cython.declare( + new_u_layer=list, + grouped_electrons=list, + corresponding_E_layers=list, + group=list, + e_layer=list, + combos=list, + orig_agglomerates=list, + orig_distances=list, + selected_group=list, + combo=list, + ) + if not equivalent_atoms: + return u_layer + + new_u_layer = [] + + grouped_electrons, corresponding_E_layers = partition(u_layer, equivalent_atoms) + + # don't process atoms that do not belong to an equivalence layer + for group, e_layer in zip(grouped_electrons[:], corresponding_E_layers[:]): + if not e_layer: + new_u_layer.extend(group) + grouped_electrons.remove(group) + corresponding_E_layers.remove(e_layer) + + + combos = generate_combo(grouped_electrons, corresponding_E_layers) + # compute original distance: + orig_agglomerates = agglomerate(grouped_electrons) + orig_distances = compute_agglomerate_distance(orig_agglomerates, mol) + + # deflate the list of lists to be able to numerically compare them + selected_group = sorted(itertools.chain.from_iterable(grouped_electrons)) + + # see if any of the combos is valid and results in a lower numerical combination than the original + for combo in combos: + if is_valid_combo(combo, mol, orig_distances): + combo = sorted(itertools.chain.from_iterable(combo)) + if combo < selected_group: + selected_group = combo + + # add the minimized unpaired electron positions to the u-layer: + new_u_layer.extend(selected_group) + + return sorted(new_u_layer) + + +def generate_minimum_resonance_isomer(mol): + """ + Select the resonance isomer that is isomorphic to the parameter isomer, with the lowest unpaired + electrons descriptor. + + First, we generate all isomorphic resonance isomers. + Next, we return the candidate with the lowest unpaired electrons metric. + + The metric is a sorted list with indices of the atoms that bear an unpaired electron + """ + + cython.declare( + candidates=list, + sel=Molecule, + cand=Molecule, + metric_sel=list, + metric_cand=list, + ) + + + candidates = resonance.generate_isomorphic_resonance_structures(mol) + + sel = candidates[0] + metric_sel = get_unpaired_electrons(sel) + for cand in candidates[1:]: + metric_cand = get_unpaired_electrons(cand) + if metric_cand < metric_sel: + sel = cand + metric_sel = metric_cand + + return sel + + +def get_unpaired_electrons(mol): + """ + Returns a sorted list of the indices of the atoms that bear one or more + unpaired electrons. + """ + + cython.declare( + locations=list, + index=int, + at=Atom, + ) + locations = [] + for index, at in enumerate(mol.atoms): + if at.radicalElectrons >= 1: + locations.append(index) + + return sorted(locations) + + +def compute_agglomerate_distance(agglomerates, mol): + """ + Iterates over a list of lists containing atom indices. + For each list the distances between the atoms is computed. + A list of distances is returned. + + """ + + cython.declare( + distances=list, + agglomerate=list, + dist=dict, + ) + + distances = [] + for agglomerate in agglomerates: + dist = pathfinder.compute_atom_distance(agglomerate, mol) + distances.append(dist) + + return distances + + +def has_unexpected_lone_pairs(mol): + """ + Iterates over the atoms of the Molecule and returns whether + at least one atom bears an unexpected number of lone pairs. + + E.g. + carbon with > 0 lone pairs + nitrogen with > 1 lone pairs + oxygen with > 2 lone pairs + + The expected number of lone pairs of an element is equal to + """ + + for at in mol.atoms: + try: + exp = elements.PeriodicSystem.lone_pairs[at.symbol] + except KeyError: + raise Exception("Unrecognized element: {}".format(at.symbol)) + else: + if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: return True + + return False + + +def create_augmented_layers(mol): + """ + + The indices in the string refer to the atom indices in the molecule, according to the atom order + obtained by sorting the atoms using the InChI canonicalization algorithm. + + First a deep copy is created of the original molecule and hydrogen atoms are removed from the molecule. + Next, the molecule is converted into an InChI string, and the auxiliary information of the inchification + procedure is retrieved. + + The N-layer is parsed and used to sort the atoms of the original order according + to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished + with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed + as to ensure the atoms with the lowest indices are used to compose the string. + + """ + + if mol.getRadicalCount() == 0 and not has_unexpected_lone_pairs(mol): + return None, None + elif mol.getFormula() == 'H': + return U_LAYER_PREFIX + '1', None + else: + molcopy = mol.copy(deep=True) + + hydrogens = filter(lambda at: at.number == 1, molcopy.atoms) + [molcopy.removeAtom(h) for h in hydrogens] + + rdkitmol = toRDKitMol(molcopy) + _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon')# suppress stereo warnings + + # extract the atom numbers from N-layer of auxiliary info: + atom_indices = parse_N_layer(auxinfo) + atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] + + # sort the atoms based on the order of the atom indices + molcopy.atoms = [x for (y,x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] + + ulayer = create_U_layer(molcopy, auxinfo) + + player = create_P_layer(molcopy, auxinfo) + + return ulayer, player + + +def create_P_layer(mol, auxinfo): + """ + + Creates a string with the positions of the atoms that bear an unexpected number of lone pairs. The string + can be used to complement the InChI with an additional layer that allows for the differentiation + between structures with lone pairs. + + The string is composed of a prefix ('P_LAYER_PREFIX') followed by the positions of each of the atoms with an + unexpected number of lone pairs, sorted in numerical order. + + Example: + - singlet methylene biradical ([CH2]) : 'P_LAYER_PREFIX'1 + + When the molecule does not bear any atoms with an unexpected number of lone pairs, + None is returned. + + + """ + + # TODO: find the resonance isomer with the lowest p index: + minmol = mol + + # create preliminary p-layer: + p_layer = [] + for i, at in enumerate(mol.atoms): + try: + exp = elements.PeriodicSystem.lone_pairs[at.symbol] + except KeyError: + raise Exception("Unrecognized element: {}".format(at.symbol)) + else: + if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: + if at.lonePairs == 0: + p_layer.append('{}{}'.format(i, '(0)')) + else: + p_layer.extend([i+1] * at.lonePairs) + + # extract equivalent atom pairs from E-layer of auxiliary info: + equivalent_atoms = parse_E_layer(auxinfo) + if equivalent_atoms: + # select lowest u-layer: + u_layer = find_lowest_p_layer(minmol, p_layer, equivalent_atoms) + + if p_layer: + return (P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_layer))) + else: + return None + + +def find_lowest_p_layer(minmol, p_layer, equivalent_atoms): + """ + Permute the equivalent atoms and return the combination with the + lowest p-layer. + + TODO: The presence of unpaired electrons complicates stuff. + """ + return minmol + + +def check(mol, aug_inchi): + """ + Check if the molecular structure is correct. + + Checks whether the multiplicity contained in the augmented inchi, + corresponds to the number of unpaired electrons + 1 found in the molecule. + + Checks whether the valence of each atom is compatible with the bond order, + number of unpaired electrons, lone pairs and charge. + + """ + cython.declare(inchi=str, + at=Atom + ) + + ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) + inchi, u_indices, p_indices = decompose(str(aug_inchi)) + assert(mol.getRadicalCount() == len(u_indices)) + + for at in mol.atoms: + ConsistencyChecker.check_partial_charge(at) + + +def fix_oxygen_unsaturated_bond(mol, u_indices): + """ + Searches for a radical or a charged oxygen atom connected to + a closed-shell carbon via an unsatured bond. + + Decrements the unsatured bond, + transfers the unpaired electron from O to C or + converts the charge from O to an unpaired electron on C, + increases the lone pair count of O to 2. + + Only do this once per molecule. + """ + + for at in mol.atoms: + if at.isOxygen() and at.radicalElectrons == 1 and at.lonePairs == 1: + bonds = mol.getBonds(at) + oxygen = at + for atom2, bond in bonds.iteritems(): + if bond.isTriple(): + bond.decrementOrder() + oxygen.radicalElectrons -= 1 + atom2.radicalElectrons += 1 + oxygen.lonePairs += 1 + return + elif at.isOxygen() and at.charge == 1 and at.lonePairs == 1: + bonds = mol.getBonds(at) + oxygen = at + + start = oxygen + # search for 3-atom-2-bond [X=X-X] paths + paths = pathfinder.find_allyl_end_with_charge(start) + for path in paths: + end = path[-1] + start.charge += 1 if start.charge < 0 else -1 + end.charge += 1 if end.charge < 0 else -1 + start.lonePairs += 1 + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd elements + for bond in bonds[::2]: # even bonds + assert isinstance(bond, Bond) + bond.decrementOrder() + for bond in bonds[1::2]: # odd bonds + assert isinstance(bond, Bond) + bond.incrementOrder() + return + else: + for atom2, bond in bonds.iteritems(): + if not bond.isSingle() and atom2.charge == 0: + oxygen.charge -= 1 + if (mol.atoms.index(atom2) + 1) in u_indices: + bond.decrementOrder() + atom2.radicalElectrons += 1 + u_indices.remove(mol.atoms.index(atom2) + 1) + oxygen.lonePairs += 1 + return + + +def fixCharge(mol, u_indices): + """ + Tries to fix a number of structural features in the molecule related to charge, + based on the information from the parameter list of atom indices with unpaired electrons. + """ + + if not u_indices: + return + + is_charged = sum([abs(at.charge) for at in mol.atoms]) != 0 + is_correct = mol.getRadicalCount() == (mol.multiplicity - 1) + if mol.multiplicity < 3 or is_correct or not is_charged: + return + + # converting charges to unpaired electrons for atoms in the u-layer + convert_charge_to_unpaired_electron(mol, u_indices) + + # convert neighboring atoms (or delocalized paths) to unpaired electrons + convert_delocalized_charge_to_unpaired_electron(mol, u_indices) + + fix_adjacent_charges(mol) + + +def check_bond_order_oxygen(mol): + """Check if total bond order of oxygen atoms is smaller than 4.""" + from rmgpy.molecule.util import ORDERS + + for at in mol.atoms: + if at.number == 8: + order = sum([ORDERS[b.order] for _, b in at.bonds.iteritems()]) + not_correct = order >= 4 + if not_correct: + return False + + return True + + +def find_mobile_h_system(mol, all_mobile_h_atoms_couples, test_indices): + """ + + """ + dummy = test_indices[:] + + for mobile_h_atom_couple in all_mobile_h_atoms_couples: + for test_index in test_indices: + if test_index in mobile_h_atom_couple: + original_atom = test_index + dummy.remove(test_index) + mobile_h_atom_couple.remove(test_index) + new_partner = mobile_h_atom_couple[0] + central = dummy[0] + return mol.atoms[central - 1], mol.atoms[original_atom - 1], mol.atoms[new_partner - 1] + + raise Exception('We should always have found the mobile-H system. All mobile H couples: {}, test indices: {}' + .format(all_mobile_h_atoms_couples, test_indices)) + + +def fix_adjacent_charges(mol): + """ + Searches for pairs of charged atoms. + Neutralizes one unit of charge on each atom, + and increments the bond order of the bond in between + the atoms. + """ + for at in mol.atoms: + if at.charge != 0: + for neigh, bond in at.bonds.iteritems(): + if neigh.charge != 0: + bond.incrementOrder() + at.charge += 1 if at.charge < 0 else -1 + neigh.charge += 1 if neigh.charge < 0 else -1 + + +def convert_charge_to_unpaired_electron(mol, u_indices): + """ + Iterates over the atoms foundin the parameter list and + converts a unit of charge on atoms into an unpaired electron. + + Removes treated atoms from the parameter list. + """ + for at in mol.atoms: + at_index = mol.atoms.index(at) + 1 + if at.charge != 0 and at_index in u_indices: + at.charge += 1 if at.charge < 0 else -1 + at.radicalElectrons += 1 + u_indices.remove(at_index) + + +def convert_delocalized_charge_to_unpaired_electron(mol, u_indices): + """ + Iterates over the atom indices of the parameter list and searches + a charged atom that is connected to that atom via some kind of + delocalization path. + + """ + u_indices_copy = u_indices[:] + for index in u_indices_copy: + start = mol.atoms[index - 1] + + found = convert_4_atom_3_bond_path(start) + if found: + u_indices.remove(index) + continue + + found = convert_3_atom_2_bond_path(start, mol) + if found: + u_indices.remove(index) + continue + + +def convert_4_atom_3_bond_path(start): + """ + Searches for 4-atom-3-bond [X=X-X=X+] paths starting from the parameter atom. + If a path is found, the starting atom receives an unpaired electron while + the bonds in the delocalization path are "inverted". A unit of charge on the + end atom is neutralized and a lone pair is added. + """ + path = pathfinder.find_butadiene_end_with_charge(start) + + if path is not None: + start.radicalElectrons += 1 + end = path[-1] + end.charge += 1 if end.charge < 0 else -1 + end.lonePairs += 1 + + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd + for bond in bonds[::2]: # even + assert isinstance(bond, Bond) + bond.decrementOrder() + for bond in bonds[1::2]: # odd bonds + assert isinstance(bond, Bond) + bond.incrementOrder() + + return True + + return False + + +def convert_3_atom_2_bond_path(start, mol): + """ + Searches for 3-atom-2-bond [X=X-X+] paths paths starting from the parameter atom. + If a correct path is found, the starting atom receives an unpaired electron while + the bonds in the delocalization path are "inverted". A unit of charge on the + end atom is neutralized and a lone pair is added. + + If it turns out the path was invalid, the actions are reverted, and another path + is tried instead. + + To facilitate reverting the changes, we use a reaction recipe and populate it + with a number of actions that reflect the changes in bond orders and unpaired + electrons that the molecule should undergo. + """ + from rmgpy.data.kinetics.family import ReactionRecipe + + def is_valid(mol): + """Check if total bond order of oxygen atoms is smaller than 4.""" + + for at in mol.atoms: + if at.number == 8: + order = at.getBondOrdersForAtom() + not_correct = order >= 4 + if not_correct: + return False + + return True + + index = mol.atoms.index(start) + 1 + + paths = pathfinder.find_allyl_end_with_charge(start) + + for path in paths: + # label atoms so that we can use the labels in the actions of the recipe + for i, at in enumerate(path[::2]): + at.label = str(i) + # we have found the atom we are looking for + recipe = ReactionRecipe() + recipe.addAction(['GAIN_RADICAL', start.label, 1]) + + end = path[-1] + end_original_charge = end.charge + + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd elements + for bond in bonds[::2]: # even + recipe.addAction(['CHANGE_BOND', bond.atom1.label, -1, bond.atom2.label]) + for bond in bonds[1::2]: # odd + recipe.addAction(['CHANGE_BOND', bond.atom1.label, 1, bond.atom2.label]) + + end.charge += 1 if end.charge < 0 else -1 + recipe.applyForward(mol) + + if is_valid(mol): + # unlabel atoms so that they never cause trouble downstream + for i, at in enumerate(path[::2]): + at.label = '' + return True + else: + recipe.applyReverse(mol) + end.charge = end_original_charge + + # unlabel atoms so that they never cause trouble downstream + for i, at in enumerate(path[::2]): + assert isinstance(at, Atom) + at.label = '' + + return False + + +def fix(mol, aug_inchi): + """ + Fixes a number of structural features of the erroneous Molecule + parsed by the backends, based on multiplicity and unpaired electron information + stored in the augmented inchi. + """ + + u_indices = aug_inchi.u_indices[:] if aug_inchi.u_indices else [] + p_indices = aug_inchi.p_indices[:] if aug_inchi.p_indices else [] + + # ignore atoms that bear already unpaired electrons: + for i in set(u_indices[:]): + atom = mol.atoms[i - 1] + [u_indices.remove(i) for _ in range(atom.radicalElectrons)] + + # ignore atoms that bear already lone pairs: + for i in set(p_indices[:]): + atom = mol.atoms[i - 1] + [p_indices.remove(i) for _ in range(atom.lonePairs)] + + fix_triplet_to_singlet(mol, p_indices) + + fixCharge(mol, u_indices) + + reset_lone_pairs(mol, p_indices) + + fix_oxygen_unsaturated_bond(mol, u_indices) + + fix_unsaturated_bond(mol, u_indices, aug_inchi) + + check(mol, aug_inchi) + + +def fix_triplet_to_singlet(mol, p_indices): + """ + Iterates over the atoms and checks whether atoms bearing two unpaired electrons are + also present in the p_indices list. + + If so, convert to the two unpaired electrons into a lone pair, and remove that atom + index from the p_indices list. + """ + + for at in mol.atoms: + index = mol.atoms.index(at) + 1 + if mol.getRadicalCount() == 2 and index in p_indices: + at.lonePairs += 1 + at.radicalElectrons -= 2 + p_indices.remove(index) + + +def fix_butadiene_path(start, end): + """ + Searches for a 1,3-butadiene path between the start and end atom. + Adds an unpaired electron to start and end atom, and "inverts" the bonds + in between them. + """ + path = pathfinder.find_butadiene(start, end) + if path is not None: + start.radicalElectrons += 1 + end.radicalElectrons += 1 + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd elements + for bond in bonds[::2]: # even bonds + assert isinstance(bond, Bond) + bond.decrementOrder() + for bond in bonds[1::2]: # odd bonds + assert isinstance(bond, Bond) + bond.incrementOrder() + + return True + + return False + + +def fix_mobile_h(mol, inchi, u1, u2): + """ + + Identifies a system of atoms bearing unpaired electrons and mobile hydrogens + at the same time. + + The system will consist of a central atom that does not bear any mobile hydrogens, + but that is bound to an atom that does bear a mobile hydrogen, called the "original atom". + + The algorithm identifies the "new partner" atom that is part of the mobile hydrogen + system. + + Next, the mobile hydrogen is transferred from the original atom, to the new partner, + and a bond is removed and added respectively. + + Finally, the central atom and the original atom will each receive an unpaired electron, + and the bond between them will decrease in order. + """ + + mobile_hydrogens = parse_H_layer(inchi) + + if mobile_hydrogens: + # WIP: only consider the first system of mobile hydrogens: + mobile_hydrogens = mobile_hydrogens[0] + + # find central atom: + central, original, new_partner = swap(mobile_hydrogens, [u1, u2]) + + central, original, new_partner = \ + mol.atoms[central - 1], mol.atoms[original - 1], mol.atoms[new_partner - 1] + + # search hydrogen atom and bond + hydrogen = None + for at, bond in original.bonds.iteritems(): + if at.number == 1: + hydrogen = at + mol.removeBond(bond) + break + + new_h_bond = Bond(new_partner, hydrogen, order='S') + mol.addBond(new_h_bond) + + mol.getBond(central, new_partner).decrementOrder() + + central.radicalElectrons += 1 + original.radicalElectrons += 1 + return True + + return False + + +def convert_unsaturated_bond_to_triplet(bond): + """ + Decrements the bond if it is unsatured, and adds an unpaired + electron to each of the atoms connected by the bond. + """ + if not bond.isSingle(): + for at in (bond.atom1, bond.atom2): + at.radicalElectrons += 1 + bond.decrementOrder() + return True + return False + + +def reset_lone_pairs(mol, p_indices): + """ + Iterates over the atoms of the molecule and + resets the atom's lone pair count to the value stored in the p_indices list, + or to the default value. + + """ + for at in mol.atoms: + index = mol.atoms.index(at) + 1 # 1-based index + count = p_indices.count(index) + if count != 0: + at.lonePairs = count + else: + order = at.getBondOrdersForAtom() + at.lonePairs = (elements.PeriodicSystem.valence_electrons[ + at.symbol] - order - at.radicalElectrons - at.charge) / 2 + + +def fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): + """ + Convert an unsaturated bond (double, triple) into a bond + with a lower bond order (single, double), and give an unpaired electron + to each of the neighboring atoms, with indices referring to the 1-based + index in the InChI string. + """ + cython.declare(u1=cython.int, u2=cython.int) + cython.declare(atom1=Atom, atom2=Atom) + cython.declare(b=Bond) + + combos = itertools.combinations(u_indices, 2) + + isFixed = False + for u1, u2 in combos: + atom1 = mol.atoms[u1 - 1] # convert to 0-based index for atoms in molecule + atom2 = mol.atoms[u2 - 1] # convert to 0-based index for atoms in molecule + if mol.hasBond(atom1, atom2): + b = mol.getBond(atom1, atom2) + isFixed = convert_unsaturated_bond_to_triplet(b) + if isFixed: + break + + else: + isFixed = fix_mobile_h(mol, inchi, u1, u2) + if isFixed: + break + else: + isFixed = fix_butadiene_path(atom1, atom2) + if isFixed: + break + + if isFixed: + u_indices.remove(u1) + u_indices.remove(u2) + return mol + else: + raise Exception( + 'Could not convert an unsaturated bond into a biradical for the \ + indices {} provided in the molecule: {}.' + .format(u_indices, mol.toAdjacencyList()) + ) + + +def isUnsaturated(mol): + """ + Does the molecule have a bond that's not single? + Eg. a bond that is double or triple or benzene + """ + cython.declare(atom1=Atom, + atom2=Atom, + bonds=dict, + bond=Bond) + for atom1 in mol.atoms: + bonds = mol.getBonds(atom1) + for atom2, bond in bonds.iteritems(): + if not bond.isSingle(): + return True + + return False + + +def fix_unsaturated_bond(mol, indices, aug_inchi): + """ + Adds unpaired electrons to the molecule by converting unsaturated bonds into triplets. + + It does so by converting an unsaturated bond into a triplet, and verifying whether + the total number of unpaired electrons matches the multiplicity. + + Finishes when all unsaturated bonds have been tried, or when there are no pairs + of atoms that should be unpaired electrons left. + """ + + correct = mol.getRadicalCount() == (mol.multiplicity - 1) + + if not correct and not indices: + raise Exception('Cannot correct {} based on {} by converting unsaturated bonds into unpaired electrons...' \ + .format(mol.toAdjacencyList(), aug_inchi)) + + unsaturated = isUnsaturated(mol) + + while not correct and unsaturated and len(indices) > 1: + mol = fix_unsaturated_bond_to_biradical(mol, aug_inchi.inchi, indices) + correct = mol.getRadicalCount() == (mol.multiplicity - 1) + unsaturated = isUnsaturated(mol) + + class InChI(str): """InChI is a type of string in which the InChI=1 prefix is ignored.""" def __new__(self, inchi): diff --git a/rmgpy/molecule/parser.pxd b/rmgpy/molecule/parser.pxd index 6e4fef104c..28e3f53593 100644 --- a/rmgpy/molecule/parser.pxd +++ b/rmgpy/molecule/parser.pxd @@ -54,6 +54,8 @@ cdef Molecule __parse(Molecule mol, str identifier, str type_identifier, str bac cpdef Molecule parse_openbabel(Molecule mol, str identifier, str type_identifier) +cpdef isCorrectlyParsed(Molecule mol, str identifier) + cpdef Molecule fromInChI(Molecule mol, str inchistr, backend=*) cpdef Molecule fromSMILES(Molecule mol, str smilesstr, str backend=*) @@ -64,20 +66,3 @@ cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) cdef Molecule __lookup(Molecule mol, str identifier, str type_identifier) -# parser helper functions: - -cpdef reset_lone_pairs(Molecule mol, list p_indices) - -cdef Molecule fix_unsaturated_bond_to_biradical(Molecule mol, str inchi, list u_indices) - -cpdef bint isUnsaturated(Molecule mol) - -cpdef isCorrectlyParsed(Molecule mol, str identifier) - -cpdef check(Molecule mol, aug_inchi) - -cpdef fix_oxygen_unsaturated_bond(Molecule mol, list u_indices) - -cpdef fixCharge(Molecule mol, list u_indices) - -cpdef fix_triplet_to_singlet(Molecule mol, list p_indices) diff --git a/rmgpy/molecule/parser.py b/rmgpy/molecule/parser.py index a1e91b955b..310ffc7184 100644 --- a/rmgpy/molecule/parser.py +++ b/rmgpy/molecule/parser.py @@ -49,14 +49,10 @@ from rdkit import Chem -from rmgpy.molecule import element as elements -from .molecule import Atom, Bond, Molecule -from .adjlist import ConsistencyChecker from rmgpy.molecule.converter import fromRDKitMol, fromOBMol import rmgpy.molecule.inchi as inchiutil import rmgpy.molecule.util as util -import rmgpy.molecule.pathfinder as pathfinder # constants @@ -224,83 +220,6 @@ def __lookup(mol, identifier, type_identifier): except KeyError: return None -def check(mol, aug_inchi): - """ - Check if the molecular structure is correct. - - Checks whether the multiplicity contained in the augmented inchi, - corresponds to the number of unpaired electrons + 1 found in the molecule. - - Checks whether the valence of each atom is compatible with the bond order, - number of unpaired electrons, lone pairs and charge. - - """ - cython.declare(inchi=str, - at=Atom - ) - - ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) - inchi, u_indices, p_indices = inchiutil.decompose(str(aug_inchi)) - assert(mol.getRadicalCount() == len(u_indices)) - - for at in mol.atoms: - ConsistencyChecker.check_partial_charge(at) - -def fix_oxygen_unsaturated_bond(mol, u_indices): - """ - Searches for a radical or a charged oxygen atom connected to - a closed-shell carbon via an unsatured bond. - - Decrements the unsatured bond, - transfers the unpaired electron from O to C or - converts the charge from O to an unpaired electron on C, - increases the lone pair count of O to 2. - - Only do this once per molecule. - """ - - for at in mol.atoms: - if at.isOxygen() and at.radicalElectrons == 1 and at.lonePairs == 1: - bonds = mol.getBonds(at) - oxygen = at - for atom2, bond in bonds.iteritems(): - if bond.isTriple(): - bond.decrementOrder() - oxygen.radicalElectrons -= 1 - atom2.radicalElectrons += 1 - oxygen.lonePairs += 1 - return - elif at.isOxygen() and at.charge == 1 and at.lonePairs == 1: - bonds = mol.getBonds(at) - oxygen = at - - start = oxygen - # search for 3-atom-2-bond [X=X-X] paths - paths = pathfinder.find_allyl_end_with_charge(start) - for path in paths: - end = path[-1] - start.charge += 1 if start.charge < 0 else -1 - end.charge += 1 if end.charge < 0 else -1 - start.lonePairs += 1 - # filter bonds from path and convert bond orders: - bonds = path[1::2]#odd elements - for bond in bonds[::2]:# even bonds - assert isinstance(bond, Bond) - bond.decrementOrder() - for bond in bonds[1::2]:# odd bonds - assert isinstance(bond, Bond) - bond.incrementOrder() - return - else: - for atom2, bond in bonds.iteritems(): - if not bond.isSingle() and atom2.charge == 0: - oxygen.charge -= 1 - if (mol.atoms.index(atom2) + 1) in u_indices: - bond.decrementOrder() - atom2.radicalElectrons += 1 - u_indices.remove(mol.atoms.index(atom2) + 1) - oxygen.lonePairs += 1 - return def fromInChI(mol, inchistr, backend='try-all'): """ @@ -340,7 +259,7 @@ def fromAugmentedInChI(mol, aug_inchi): mol.multiplicity = len(aug_inchi.u_indices) + 1 if aug_inchi.u_indices else 1 - fix(mol, aug_inchi) + inchiutil.fix(mol, aug_inchi) mol.updateAtomTypes() @@ -365,440 +284,3 @@ def fromSMARTS(mol, smartsstr, backend = 'rdkit'): return __parse(mol, smartsstr, 'sma', backend) -def fixCharge(mol, u_indices): - """ - Tries to fix a number of structural features in the molecule related to charge, - based on the information from the parameter list of atom indices with unpaired electrons. - """ - - if not u_indices: - return - - is_charged = sum([abs(at.charge) for at in mol.atoms]) != 0 - is_correct = mol.getRadicalCount() == (mol.multiplicity - 1) - if mol.multiplicity < 3 or is_correct or not is_charged: - return - - # converting charges to unpaired electrons for atoms in the u-layer - convert_charge_to_unpaired_electron(mol, u_indices) - - # convert neighboring atoms (or delocalized paths) to unpaired electrons - convert_delocalized_charge_to_unpaired_electron(mol, u_indices) - - fix_adjacent_charges(mol) - -def check_bond_order_oxygen(mol): - """Check if total bond order of oxygen atoms is smaller than 4.""" - from rmgpy.molecule.util import ORDERS - - for at in mol.atoms: - if at.number == 8: - order = sum([ORDERS[b.order] for _, b in at.bonds.iteritems()]) - not_correct = order >= 4 - if not_correct: - return False - - return True - -def find_mobile_h_system(mol, all_mobile_h_atoms_couples, test_indices): - """ - - """ - dummy = test_indices[:] - - for mobile_h_atom_couple in all_mobile_h_atoms_couples: - for test_index in test_indices: - if test_index in mobile_h_atom_couple: - original_atom = test_index - dummy.remove(test_index) - mobile_h_atom_couple.remove(test_index) - new_partner = mobile_h_atom_couple[0] - central = dummy[0] - return mol.atoms[central - 1], mol.atoms[original_atom - 1], mol.atoms[new_partner - 1] - - raise Exception('We should always have found the mobile-H system. All mobile H couples: {}, test indices: {}' - .format(all_mobile_h_atoms_couples, test_indices)) - -def fix_adjacent_charges(mol): - """ - Searches for pairs of charged atoms. - Neutralizes one unit of charge on each atom, - and increments the bond order of the bond in between - the atoms. - """ - for at in mol.atoms: - if at.charge != 0: - for neigh, bond in at.bonds.iteritems(): - if neigh.charge != 0: - bond.incrementOrder() - at.charge += 1 if at.charge < 0 else -1 - neigh.charge += 1 if neigh.charge < 0 else -1 - -def convert_charge_to_unpaired_electron(mol, u_indices): - """ - Iterates over the atoms foundin the parameter list and - converts a unit of charge on atoms into an unpaired electron. - - Removes treated atoms from the parameter list. - """ - for at in mol.atoms: - at_index = mol.atoms.index(at) + 1 - if at.charge != 0 and at_index in u_indices: - at.charge += 1 if at.charge < 0 else -1 - at.radicalElectrons += 1 - u_indices.remove(at_index) - -def convert_delocalized_charge_to_unpaired_electron(mol, u_indices): - """ - Iterates over the atom indices of the parameter list and searches - a charged atom that is connected to that atom via some kind of - delocalization path. - - """ - u_indices_copy = u_indices[:] - for index in u_indices_copy: - start = mol.atoms[index -1] - - found = convert_4_atom_3_bond_path(start) - if found: - u_indices.remove(index) - continue - - found = convert_3_atom_2_bond_path(start, mol) - if found: - u_indices.remove(index) - continue - -def convert_4_atom_3_bond_path(start): - """ - Searches for 4-atom-3-bond [X=X-X=X+] paths starting from the parameter atom. - If a path is found, the starting atom receives an unpaired electron while - the bonds in the delocalization path are "inverted". A unit of charge on the - end atom is neutralized and a lone pair is added. - """ - path = pathfinder.find_butadiene_end_with_charge(start) - - if path is not None: - start.radicalElectrons += 1 - end = path[-1] - end.charge += 1 if end.charge < 0 else -1 - end.lonePairs += 1 - - # filter bonds from path and convert bond orders: - bonds = path[1::2]#odd - for bond in bonds[::2]:# even - assert isinstance(bond, Bond) - bond.decrementOrder() - for bond in bonds[1::2]:# odd bonds - assert isinstance(bond, Bond) - bond.incrementOrder() - - return True - - return False - -def convert_3_atom_2_bond_path(start, mol): - """ - Searches for 3-atom-2-bond [X=X-X+] paths paths starting from the parameter atom. - If a correct path is found, the starting atom receives an unpaired electron while - the bonds in the delocalization path are "inverted". A unit of charge on the - end atom is neutralized and a lone pair is added. - - If it turns out the path was invalid, the actions are reverted, and another path - is tried instead. - - To facilitate reverting the changes, we use a reaction recipe and populate it - with a number of actions that reflect the changes in bond orders and unpaired - electrons that the molecule should undergo. - """ - from rmgpy.data.kinetics.family import ReactionRecipe - - def is_valid(mol): - """Check if total bond order of oxygen atoms is smaller than 4.""" - - for at in mol.atoms: - if at.number == 8: - order = at.getBondOrdersForAtom() - not_correct = order >= 4 - if not_correct: - return False - - return True - - index = mol.atoms.index(start) + 1 - - paths = pathfinder.find_allyl_end_with_charge(start) - - for path in paths: - # label atoms so that we can use the labels in the actions of the recipe - for i, at in enumerate(path[::2]): - at.label = str(i) - # we have found the atom we are looking for - recipe = ReactionRecipe() - recipe.addAction(['GAIN_RADICAL', start.label, 1]) - - end = path[-1] - end_original_charge = end.charge - - # filter bonds from path and convert bond orders: - bonds = path[1::2]#odd elements - for bond in bonds[::2]:# even - recipe.addAction(['CHANGE_BOND', bond.atom1.label, -1, bond.atom2.label]) - for bond in bonds[1::2]:# odd - recipe.addAction(['CHANGE_BOND', bond.atom1.label, 1, bond.atom2.label]) - - end.charge += 1 if end.charge < 0 else -1 - recipe.applyForward(mol) - - if is_valid(mol): - # unlabel atoms so that they never cause trouble downstream - for i, at in enumerate(path[::2]): - at.label = '' - return True - else: - recipe.applyReverse(mol) - end.charge = end_original_charge - - # unlabel atoms so that they never cause trouble downstream - for i, at in enumerate(path[::2]): - assert isinstance(at, Atom) - at.label = '' - - return False - -def fix(mol, aug_inchi): - """ - Fixes a number of structural features of the erroneous Molecule - parsed by the backends, based on multiplicity and unpaired electron information - stored in the augmented inchi. - """ - - u_indices = aug_inchi.u_indices[:] if aug_inchi.u_indices else [] - p_indices = aug_inchi.p_indices[:] if aug_inchi.p_indices else [] - - # ignore atoms that bear already unpaired electrons: - for i in set(u_indices[:]): - atom = mol.atoms[i - 1] - [u_indices.remove(i) for _ in range(atom.radicalElectrons)] - - # ignore atoms that bear already lone pairs: - for i in set(p_indices[:]): - atom = mol.atoms[i - 1] - [p_indices.remove(i) for _ in range(atom.lonePairs)] - - - fix_triplet_to_singlet(mol, p_indices) - - fixCharge(mol, u_indices) - - reset_lone_pairs(mol, p_indices) - - fix_oxygen_unsaturated_bond(mol, u_indices) - - fix_unsaturated_bond(mol, u_indices, aug_inchi) - - check(mol, aug_inchi) - - -def fix_triplet_to_singlet(mol, p_indices): - """ - Iterates over the atoms and checks whether atoms bearing two unpaired electrons are - also present in the p_indices list. - - If so, convert to the two unpaired electrons into a lone pair, and remove that atom - index from the p_indices list. - """ - - for at in mol.atoms: - index = mol.atoms.index(at) + 1 - if mol.getRadicalCount() == 2 and index in p_indices: - at.lonePairs += 1 - at.radicalElectrons -= 2 - p_indices.remove(index) - - -def fix_butadiene_path(start, end): - """ - Searches for a 1,3-butadiene path between the start and end atom. - Adds an unpaired electron to start and end atom, and "inverts" the bonds - in between them. - """ - path = pathfinder.find_butadiene(start, end) - if path is not None: - start.radicalElectrons += 1 - end.radicalElectrons += 1 - # filter bonds from path and convert bond orders: - bonds = path[1::2]#odd elements - for bond in bonds[::2]:# even bonds - assert isinstance(bond, Bond) - bond.decrementOrder() - for bond in bonds[1::2]:# odd bonds - assert isinstance(bond, Bond) - bond.incrementOrder() - - return True - - return False - -def fix_mobile_h(mol, inchi, u1, u2): - """ - - Identifies a system of atoms bearing unpaired electrons and mobile hydrogens - at the same time. - - The system will consist of a central atom that does not bear any mobile hydrogens, - but that is bound to an atom that does bear a mobile hydrogen, called the "original atom". - - The algorithm identifies the "new partner" atom that is part of the mobile hydrogen - system. - - Next, the mobile hydrogen is transferred from the original atom, to the new partner, - and a bond is removed and added respectively. - - Finally, the central atom and the original atom will each receive an unpaired electron, - and the bond between them will decrease in order. - """ - - mobile_hydrogens = inchiutil.parse_H_layer(inchi) - - if mobile_hydrogens: - # WIP: only consider the first system of mobile hydrogens: - mobile_hydrogens = mobile_hydrogens[0] - - #find central atom: - central, original, new_partner = util.swap(mobile_hydrogens, [u1, u2]) - - central, original, new_partner = \ - mol.atoms[central - 1], mol.atoms[original - 1], mol.atoms[new_partner - 1] - - # search hydrogen atom and bond - hydrogen = None - for at, bond in original.bonds.iteritems(): - if at.number == 1: - hydrogen = at - mol.removeBond(bond) - break - - new_h_bond = Bond(new_partner, hydrogen, order='S') - mol.addBond(new_h_bond) - - mol.getBond(central, new_partner).decrementOrder() - - central.radicalElectrons += 1 - original.radicalElectrons += 1 - return True - - return False - -def convert_unsaturated_bond_to_triplet(bond): - """ - Decrements the bond if it is unsatured, and adds an unpaired - electron to each of the atoms connected by the bond. - """ - if not bond.isSingle(): - for at in (bond.atom1, bond.atom2): - at.radicalElectrons += 1 - bond.decrementOrder() - return True - return False - -def reset_lone_pairs(mol, p_indices): - """ - Iterates over the atoms of the molecule and - resets the atom's lone pair count to the value stored in the p_indices list, - or to the default value. - - """ - for at in mol.atoms: - index = mol.atoms.index(at) + 1 #1-based index - count = p_indices.count(index) - if count != 0: - at.lonePairs = count - else: - order = at.getBondOrdersForAtom() - at.lonePairs = (elements.PeriodicSystem.valence_electrons[at.symbol] - order - at.radicalElectrons - at.charge) / 2 - -def fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): - """ - Convert an unsaturated bond (double, triple) into a bond - with a lower bond order (single, double), and give an unpaired electron - to each of the neighboring atoms, with indices referring to the 1-based - index in the InChI string. - """ - cython.declare(u1=cython.int, u2=cython.int) - cython.declare(atom1=Atom, atom2=Atom) - cython.declare(b=Bond) - - combos = itertools.combinations(u_indices, 2) - - isFixed = False - for u1, u2 in combos: - atom1 = mol.atoms[u1 - 1] # convert to 0-based index for atoms in molecule - atom2 = mol.atoms[u2 - 1] # convert to 0-based index for atoms in molecule - if mol.hasBond(atom1, atom2): - b = mol.getBond(atom1, atom2) - isFixed = convert_unsaturated_bond_to_triplet(b) - if isFixed: - break - - else: - isFixed = fix_mobile_h(mol, inchi, u1, u2) - if isFixed: - break - else: - isFixed = fix_butadiene_path(atom1, atom2) - if isFixed: - break - - if isFixed: - u_indices.remove(u1) - u_indices.remove(u2) - return mol - else: - raise Exception( - 'Could not convert an unsaturated bond into a biradical for the \ - indices {} provided in the molecule: {}.' - .format(u_indices, mol.toAdjacencyList()) - ) - -def isUnsaturated(mol): - """ - Does the molecule have a bond that's not single? - Eg. a bond that is double or triple or benzene - """ - cython.declare(atom1=Atom, - atom2=Atom, - bonds=dict, - bond=Bond) - for atom1 in mol.atoms: - bonds = mol.getBonds(atom1) - for atom2, bond in bonds.iteritems(): - if not bond.isSingle(): - return True - - return False - - -def fix_unsaturated_bond(mol, indices, aug_inchi): - """ - Adds unpaired electrons to the molecule by converting unsaturated bonds into triplets. - - It does so by converting an unsaturated bond into a triplet, and verifying whether - the total number of unpaired electrons matches the multiplicity. - - Finishes when all unsaturated bonds have been tried, or when there are no pairs - of atoms that should be unpaired electrons left. - """ - - correct = mol.getRadicalCount() == (mol.multiplicity - 1) - - if not correct and not indices: - raise Exception( 'Cannot correct {} based on {} by converting unsaturated bonds into unpaired electrons...'\ - .format(mol.toAdjacencyList(), aug_inchi)) - - unsaturated = isUnsaturated(mol) - - while not correct and unsaturated and len(indices) > 1: - mol = fix_unsaturated_bond_to_biradical(mol, aug_inchi.inchi, indices) - correct = mol.getRadicalCount() == (mol.multiplicity - 1) - unsaturated = isUnsaturated(mol) diff --git a/rmgpy/molecule/parserTest.py b/rmgpy/molecule/parserTest.py index 17fd266ca5..3856dcff0c 100644 --- a/rmgpy/molecule/parserTest.py +++ b/rmgpy/molecule/parserTest.py @@ -30,10 +30,10 @@ import unittest +from rmgpy.molecule.atomtype import atomTypes +from rmgpy.molecule.inchi import reset_lone_pairs from rmgpy.molecule.molecule import Molecule from rmgpy.molecule.parser import * -from rmgpy.molecule.atomtype import atomTypes -from external.wip import work_in_progress class ParserTest(unittest.TestCase): From 6cf8796263dc02767fb900cbf730ecad6c239520 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 5 Oct 2017 16:59:22 -0400 Subject: [PATCH 03/57] Combine generator and parser into new translator module Also update related imports --- rmgpy/molecule/generator.pxd | 43 --- rmgpy/molecule/generator.py | 257 --------------- rmgpy/molecule/generatorTest.py | 2 +- rmgpy/molecule/inchiparsingTest.py | 2 +- rmgpy/molecule/molecule.py | 23 +- rmgpy/molecule/parserTest.py | 2 +- rmgpy/molecule/{parser.pxd => translator.pxd} | 25 +- rmgpy/molecule/{parser.py => translator.py} | 304 +++++++++++++++--- rmgpy/qm/molecule.py | 1 - setup.py | 3 +- 10 files changed, 292 insertions(+), 370 deletions(-) delete mode 100644 rmgpy/molecule/generator.pxd delete mode 100644 rmgpy/molecule/generator.py rename rmgpy/molecule/{parser.pxd => translator.pxd} (90%) rename rmgpy/molecule/{parser.py => translator.py} (56%) diff --git a/rmgpy/molecule/generator.pxd b/rmgpy/molecule/generator.pxd deleted file mode 100644 index fdceb8b8a0..0000000000 --- a/rmgpy/molecule/generator.pxd +++ /dev/null @@ -1,43 +0,0 @@ -############################################################################### -# # -# RMG - Reaction Mechanism Generator # -# # -# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # -# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # -# # -# Permission is hereby granted, free of charge, to any person obtaining a # -# copy of this software and associated documentation files (the 'Software'), # -# to deal in the Software without restriction, including without limitation # -# the rights to use, copy, modify, merge, publish, distribute, sublicense, # -# and/or sell copies of the Software, and to permit persons to whom the # -# Software is furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in # -# all copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # -# DEALINGS IN THE SOFTWARE. # -# # -############################################################################### - -from .molecule cimport Atom, Molecule - -cpdef dict _known_smiles_molecules -cpdef dict _known_smiles_radicals - -cpdef str toInChI(Molecule mol) - -cpdef str toAugmentedInChI(Molecule mol) - -cpdef str toInChIKey(Molecule mol) - -cpdef str toAugmentedInChIKey(Molecule mol) - -cpdef str toSMARTS(Molecule mol) - -cpdef str toSMILES(Molecule mol) diff --git a/rmgpy/molecule/generator.py b/rmgpy/molecule/generator.py deleted file mode 100644 index 02083d3f0a..0000000000 --- a/rmgpy/molecule/generator.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# # -# RMG - Reaction Mechanism Generator # -# # -# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # -# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # -# # -# Permission is hereby granted, free of charge, to any person obtaining a # -# copy of this software and associated documentation files (the 'Software'), # -# to deal in the Software without restriction, including without limitation # -# the rights to use, copy, modify, merge, publish, distribute, sublicense, # -# and/or sell copies of the Software, and to permit persons to whom the # -# Software is furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in # -# all copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # -# DEALINGS IN THE SOFTWARE. # -# # -############################################################################### - -# global imports - -import cython - -# local imports -try: - import openbabel -except: - pass -from rdkit import Chem - -from .molecule import Atom -from rmgpy.molecule.converter import toOBMol, toRDKitMol - -import rmgpy.molecule.inchi as inchiutil - -# global variables: - -#: This dictionary is used to shortcut lookups of a molecule's SMILES string from its chemical formula. -_known_smiles_molecules = { - 'N2': 'N#N', - 'CH4': 'C', - 'CH2O': 'C=O', - 'H2O': 'O', - 'C2H6': 'CC', - 'H2': '[H][H]', - 'H2O2': 'OO', - 'C3H8': 'CCC', - 'Ar': '[Ar]', - 'He': '[He]', - 'CH4O': 'CO', - 'CO2': 'O=C=O', - 'CO': '[C-]#[O+]', - 'O2': 'O=O', - 'C': '[C]', # for this to be in the "molecule" list it must be singlet with 2 lone pairs - 'H2S': 'S', - 'N2O': 'N#[N+][O-]', - 'NH3': 'N', - 'O3': '[O-][O+]=O', - 'Cl2': '[Cl][Cl]', - 'ClH': 'Cl', - 'I2': '[I][I]', - 'HI': 'I', - } - -_known_smiles_radicals = { - 'CH3': '[CH3]', - 'HO': '[OH]', - 'C2H5': 'C[CH2]', - 'O': '[O]', - 'S': '[S]', - 'N': '[N]', - 'HO2': '[O]O', - 'CH': '[CH]', - 'CH2': '[CH2]', - 'H': '[H]', - 'C': '[C]', # this, in the radical list, could be triplet or quintet. - 'O2': '[O][O]', - 'S2': '[S][S]', - 'OS': '[S][O]', - 'HS': '[SH]', - 'H2N': '[NH2]', - 'HN': '[NH]', - 'NO': '[N]=O', - 'NO2': 'N(=O)[O]', - 'Cl': '[Cl]', - 'I': '[I]', - } - -def toInChI(mol): - """ - Convert a molecular structure to an InChI string. Uses - `RDKit `_ to perform the conversion. - Perceives aromaticity. - - or - - Convert a molecular structure to an InChI string. Uses - `OpenBabel `_ to perform the conversion. - """ - try: - if not Chem.inchi.INCHI_AVAILABLE: - return "RDKitInstalledWithoutInChI" - rdkitmol = toRDKitMol(mol) - return Chem.inchi.MolToInchi(rdkitmol, options='-SNon') - except: - pass - - obmol = toOBMol(mol) - obConversion = openbabel.OBConversion() - obConversion.SetOutFormat('inchi') - obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) - return obConversion.WriteString(obmol).strip() - - -def toAugmentedInChI(mol): - """ - This function generates the augmented InChI canonical identifier, and that allows for the differentiation - between structures with spin states and multiple unpaired electrons. - - Two additional layers are added to the InChI: - - unpaired electrons layer: the position of the unpaired electrons in the molecule - - """ - - cython.declare( - inchi=str, - ulayer=str, - aug_inchi=str, - ) - inchi = toInChI(mol) - - ulayer, player = inchiutil.create_augmented_layers(mol) - - aug_inchi = inchiutil.compose_aug_inchi(inchi, ulayer, player) - - return aug_inchi - -def toInChIKey(mol): - """ - Convert a molecular structure to an InChI Key string. Uses - `OpenBabel `_ to perform the conversion. - - or - - Convert a molecular structure to an InChI Key string. Uses - `RDKit `_ to perform the conversion. - - Removes check-sum dash (-) and character so that only - the 14 + 9 characters remain. - """ - try: - if not Chem.inchi.INCHI_AVAILABLE: - return "RDKitInstalledWithoutInChI" - inchi = toInChI(mol) - return Chem.inchi.InchiToInchiKey(inchi)[:-2] - except: - pass - - -# for atom in mol.vertices: -# if atom.isNitrogen(): - obmol = toOBMol(mol) - obConversion = openbabel.OBConversion() - obConversion.SetOutFormat('inchi') - obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) - obConversion.SetOptions('K', openbabel.OBConversion.OUTOPTIONS) - return obConversion.WriteString(obmol).strip()[:-2] - -def toAugmentedInChIKey(mol): - """ - Adds additional layers to the InChIKey, - generating the "augmented" InChIKey. - """ - - cython.declare( - key=str, - ulayer=str - ) - - key = toInChIKey(mol) - - ulayer, player = inchiutil.create_augmented_layers(mol) - - return inchiutil.compose_aug_inchi_key(key, ulayer, player) - -def toSMARTS(mol): - """ - Convert a molecular structure to an SMARTS string. Uses - `RDKit `_ to perform the conversion. - Perceives aromaticity and removes Hydrogen atoms. - """ - rdkitmol = toRDKitMol(mol) - - return Chem.MolToSmarts(rdkitmol) - - -def toSMILES(mol): - """ - Convert a molecular structure to an SMILES string. - - If there is a Nitrogen atom present it uses - `OpenBabel `_ to perform the conversion, - and the SMILES may or may not be canonical. - - Otherwise, it uses `RDKit `_ to perform the - conversion, so it will be canonical SMILES. - While converting to an RDMolecule it will perceive aromaticity - and removes Hydrogen atoms. - """ - - # If we're going to have to check the formula anyway, - # we may as well shortcut a few small known molecules. - # Dictionary lookups are O(1) so this should be fast: - # The dictionary is defined at the top of this file. - - cython.declare( - atom=Atom, - # obmol=, - # rdkitmol=, - ) - - try: - if mol.isRadical(): - return _known_smiles_radicals[mol.getFormula()] - else: - return _known_smiles_molecules[mol.getFormula()] - except KeyError: - # It wasn't in the above list. - pass - for atom in mol.vertices: - if atom.isNitrogen(): - obmol = toOBMol(mol) - try: - SMILEwriter = openbabel.OBConversion() - SMILEwriter.SetOutFormat('smi') - SMILEwriter.SetOptions("i",SMILEwriter.OUTOPTIONS) # turn off isomer and stereochemistry information (the @ signs!) - except: - pass - return SMILEwriter.WriteString(obmol).strip() - - rdkitmol = toRDKitMol(mol, sanitize=False) - if not mol.isAromatic(): - return Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) - return Chem.MolToSmiles(rdkitmol) - - diff --git a/rmgpy/molecule/generatorTest.py b/rmgpy/molecule/generatorTest.py index 8e8226019a..584738a648 100644 --- a/rmgpy/molecule/generatorTest.py +++ b/rmgpy/molecule/generatorTest.py @@ -35,7 +35,7 @@ from rmgpy.species import Species from .molecule import Atom, Molecule from .inchi import P_LAYER_PREFIX, U_LAYER_PREFIX, create_augmented_layers, has_unexpected_lone_pairs -from .generator import * +from .translator import * from rmgpy.molecule.converter import debugRDKitMol class RDKitTest(unittest.TestCase): diff --git a/rmgpy/molecule/inchiparsingTest.py b/rmgpy/molecule/inchiparsingTest.py index 5fec4cf23b..cfb7868d3c 100644 --- a/rmgpy/molecule/inchiparsingTest.py +++ b/rmgpy/molecule/inchiparsingTest.py @@ -38,7 +38,7 @@ from .util import retrieveElementCount from .inchi import compose_aug_inchi, P_LAYER_PREFIX, P_LAYER_SEPARATOR, U_LAYER_PREFIX, U_LAYER_SEPARATOR -from .parser import * +from .translator import fromAugmentedInChI class InChIParsingTest(unittest.TestCase): diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index f56c80228f..220445caff 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -56,8 +56,7 @@ import rmgpy.constants as constants import rmgpy.molecule.element as elements import rmgpy.molecule.converter as converter -import rmgpy.molecule.parser as parser -import rmgpy.molecule.generator as generator +import rmgpy.molecule.translator as translator import rmgpy.molecule.resonance as resonance from .kekulize import kekulize from .adjlist import Saturator @@ -1319,21 +1318,21 @@ def fromInChI(self, inchistr, backend='try-all'): """ Convert an InChI string `inchistr` to a molecular structure. """ - parser.fromInChI(self, inchistr, backend) + translator.fromInChI(self, inchistr, backend) return self def fromAugmentedInChI(self, aug_inchi): """ Convert an Augmented InChI string `aug_inchi` to a molecular structure. """ - parser.fromAugmentedInChI(self, aug_inchi) + translator.fromAugmentedInChI(self, aug_inchi) return self def fromSMILES(self, smilesstr, backend='try-all'): """ Convert a SMILES string `smilesstr` to a molecular structure. """ - parser.fromSMILES(self, smilesstr, backend) + translator.fromSMILES(self, smilesstr, backend) return self def fromSMARTS(self, smartsstr): @@ -1342,7 +1341,7 @@ def fromSMARTS(self, smartsstr): `RDKit `_ to perform the conversion. This Kekulizes everything, removing all aromatic atom types. """ - parser.fromSMARTS(self, smartsstr) + translator.fromSMARTS(self, smartsstr) return self def fromAdjacencyList(self, adjlist, saturateH=False): @@ -1414,7 +1413,7 @@ def toInChI(self): Convert a molecular structure to an InChI string. Uses `OpenBabel `_ to perform the conversion. """ - return generator.toInChI(self) + return translator.toInChI(self) def toAugmentedInChI(self): """ @@ -1423,7 +1422,7 @@ def toAugmentedInChI(self): Separate layer with a forward slash character. """ - return generator.toAugmentedInChI(self) + return translator.toAugmentedInChI(self) def toInChIKey(self): @@ -1439,7 +1438,7 @@ def toInChIKey(self): Removes check-sum dash (-) and character so that only the 14 + 9 characters remain. """ - return generator.toInChIKey(self) + return translator.toInChIKey(self) def toAugmentedInChIKey(self): """ @@ -1449,7 +1448,7 @@ def toAugmentedInChIKey(self): Simply append the multiplicity string, do not separate by a character like forward slash. """ - return generator.toAugmentedInChIKey(self) + return translator.toAugmentedInChIKey(self) def toSMARTS(self): @@ -1458,7 +1457,7 @@ def toSMARTS(self): `RDKit `_ to perform the conversion. Perceives aromaticity and removes Hydrogen atoms. """ - return generator.toSMARTS(self) + return translator.toSMARTS(self) def toSMILES(self): @@ -1475,7 +1474,7 @@ def toSMILES(self): and removes Hydrogen atoms. """ - return generator.toSMILES(self) + return translator.toSMILES(self) def toRDKitMol(self, *args, **kwargs): """ diff --git a/rmgpy/molecule/parserTest.py b/rmgpy/molecule/parserTest.py index 3856dcff0c..a19a22dd0f 100644 --- a/rmgpy/molecule/parserTest.py +++ b/rmgpy/molecule/parserTest.py @@ -33,7 +33,7 @@ from rmgpy.molecule.atomtype import atomTypes from rmgpy.molecule.inchi import reset_lone_pairs from rmgpy.molecule.molecule import Molecule -from rmgpy.molecule.parser import * +from rmgpy.molecule.translator import * class ParserTest(unittest.TestCase): diff --git a/rmgpy/molecule/parser.pxd b/rmgpy/molecule/translator.pxd similarity index 90% rename from rmgpy/molecule/parser.pxd rename to rmgpy/molecule/translator.pxd index 28e3f53593..593901741c 100644 --- a/rmgpy/molecule/parser.pxd +++ b/rmgpy/molecule/translator.pxd @@ -25,24 +25,29 @@ # # ############################################################################### -# global imports - +from .molecule cimport Atom, Molecule cimport element as elements cimport inchi as inchiutil -# no .pxd files for these: -#from .util cimport retrieveElementCount, VALENCES, ORDERS -#from .inchi cimport AugmentedInChI, compose_aug_inchi_key, compose_aug_inchi, INCHI_PREFIX, MULT_PREFIX, U_LAYER_PREFIX - -from .molecule cimport Atom, Bond, Molecule - cpdef list BACKENDS cpdef dict INSTALLED_BACKENDS cpdef dict INCHI_LOOKUPS cpdef dict SMILES_LOOKUPS +cpdef dict _known_smiles_molecules +cpdef dict _known_smiles_radicals + +cpdef str toInChI(Molecule mol) + +cpdef str toAugmentedInChI(Molecule mol) -# from functions: +cpdef str toInChIKey(Molecule mol) + +cpdef str toAugmentedInChIKey(Molecule mol) + +cpdef str toSMARTS(Molecule mol) + +cpdef str toSMILES(Molecule mol) cdef Molecule __fromSMILES(Molecule mol, str smilesstr, str backend) @@ -63,6 +68,6 @@ cpdef Molecule fromSMILES(Molecule mol, str smilesstr, str backend=*) cpdef Molecule fromSMARTS(Molecule mol, str smartsstr, str backend=*) cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) - + cdef Molecule __lookup(Molecule mol, str identifier, str type_identifier) diff --git a/rmgpy/molecule/parser.py b/rmgpy/molecule/translator.py similarity index 56% rename from rmgpy/molecule/parser.py rename to rmgpy/molecule/translator.py index 310ffc7184..3a73ceba6b 100644 --- a/rmgpy/molecule/parser.py +++ b/rmgpy/molecule/translator.py @@ -28,13 +28,15 @@ # # ############################################################################### -# global imports +""" +This module provides methods for translating to and from common molecule +representation formats, e.g. SMILES, InChI, SMARTS. +""" + import cython -import logging import itertools - -# local imports +import logging # Assume that OB is not installed by default INSTALLED_BACKENDS = { @@ -44,12 +46,12 @@ try: import openbabel INSTALLED_BACKENDS['OB'] = True -except : +except: pass - from rdkit import Chem -from rmgpy.molecule.converter import fromRDKitMol, fromOBMol +from .molecule import Atom +from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol import rmgpy.molecule.inchi as inchiutil import rmgpy.molecule.util as util @@ -57,42 +59,255 @@ # constants BACKENDS = [ - 'rdkit', - ] + 'rdkit', +] if INSTALLED_BACKENDS['OB']: - BACKENDS.insert(0,'openbabel') + BACKENDS.insert(0, 'openbabel') INCHI_LOOKUPS = { - 'H': '[H]',#RDkit was improperly handling the Hydrogen radical from InChI - 'He': '[He]', - } + 'H': '[H]', # RDkit was improperly handling the Hydrogen radical from InChI + 'He': '[He]', +} SMILES_LOOKUPS = { '[He]': # RDKit improperly handles helium and returns it in a triplet state - """ - He - multiplicity 1 - 1 He u0 p1 - """, + """ + He + multiplicity 1 + 1 He u0 p1 + """, '[Ar]': # RDKit improperly handles argon - """ - Ar - multiplicity 1 - 1 Ar u0 p4 - """, + """ + Ar + multiplicity 1 + 1 Ar u0 p4 + """, '[C]': # We'd return the quintuplet without this - """ - multiplicity 3 - 1 C u2 p1 c0 - """, + """ + multiplicity 3 + 1 C u2 p1 c0 + """, '[CH]': # We'd return the quartet without this - """ - multiplicity 2 - 1 C u1 p1 c0 {2,S} - 2 H u0 p0 c0 {1,S} - """, + """ + multiplicity 2 + 1 C u1 p1 c0 {2,S} + 2 H u0 p0 c0 {1,S} + """, +} + +#: This dictionary is used to shortcut lookups of a molecule's SMILES string from its chemical formula. +_known_smiles_molecules = { + 'N2': 'N#N', + 'CH4': 'C', + 'CH2O': 'C=O', + 'H2O': 'O', + 'C2H6': 'CC', + 'H2': '[H][H]', + 'H2O2': 'OO', + 'C3H8': 'CCC', + 'Ar': '[Ar]', + 'He': '[He]', + 'CH4O': 'CO', + 'CO2': 'O=C=O', + 'CO': '[C-]#[O+]', + 'O2': 'O=O', + 'C': '[C]', # for this to be in the "molecule" list it must be singlet with 2 lone pairs + 'H2S': 'S', + 'N2O': 'N#[N+][O-]', + 'NH3': 'N', + 'O3': '[O-][O+]=O', + 'Cl2': '[Cl][Cl]', + 'ClH': 'Cl', + 'I2': '[I][I]', + 'HI': 'I', +} + +_known_smiles_radicals = { + 'CH3': '[CH3]', + 'HO': '[OH]', + 'C2H5': 'C[CH2]', + 'O': '[O]', + 'S': '[S]', + 'N': '[N]', + 'HO2': '[O]O', + 'CH': '[CH]', + 'CH2': '[CH2]', + 'H': '[H]', + 'C': '[C]', # this, in the radical list, could be triplet or quintet. + 'O2': '[O][O]', + 'S2': '[S][S]', + 'OS': '[S][O]', + 'HS': '[SH]', + 'H2N': '[NH2]', + 'HN': '[NH]', + 'NO': '[N]=O', + 'NO2': 'N(=O)[O]', + 'Cl': '[Cl]', + 'I': '[I]', +} + + +def toInChI(mol): + """ + Convert a molecular structure to an InChI string. Uses + `RDKit `_ to perform the conversion. + Perceives aromaticity. + + or + + Convert a molecular structure to an InChI string. Uses + `OpenBabel `_ to perform the conversion. + """ + try: + if not Chem.inchi.INCHI_AVAILABLE: + return "RDKitInstalledWithoutInChI" + rdkitmol = toRDKitMol(mol) + return Chem.inchi.MolToInchi(rdkitmol, options='-SNon') + except: + pass + + obmol = toOBMol(mol) + obConversion = openbabel.OBConversion() + obConversion.SetOutFormat('inchi') + obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) + return obConversion.WriteString(obmol).strip() + + +def toAugmentedInChI(mol): + """ + This function generates the augmented InChI canonical identifier, and that allows for the differentiation + between structures with spin states and multiple unpaired electrons. + + Two additional layers are added to the InChI: + - unpaired electrons layer: the position of the unpaired electrons in the molecule + + """ + + cython.declare( + inchi=str, + ulayer=str, + aug_inchi=str, + ) + inchi = toInChI(mol) + + ulayer, player = inchiutil.create_augmented_layers(mol) + + aug_inchi = inchiutil.compose_aug_inchi(inchi, ulayer, player) + + return aug_inchi + + +def toInChIKey(mol): + """ + Convert a molecular structure to an InChI Key string. Uses + `OpenBabel `_ to perform the conversion. + + or + + Convert a molecular structure to an InChI Key string. Uses + `RDKit `_ to perform the conversion. + + Removes check-sum dash (-) and character so that only + the 14 + 9 characters remain. + """ + try: + if not Chem.inchi.INCHI_AVAILABLE: + return "RDKitInstalledWithoutInChI" + inchi = toInChI(mol) + return Chem.inchi.InchiToInchiKey(inchi)[:-2] + except: + pass + + # for atom in mol.vertices: + # if atom.isNitrogen(): + obmol = toOBMol(mol) + obConversion = openbabel.OBConversion() + obConversion.SetOutFormat('inchi') + obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) + obConversion.SetOptions('K', openbabel.OBConversion.OUTOPTIONS) + return obConversion.WriteString(obmol).strip()[:-2] + + +def toAugmentedInChIKey(mol): + """ + Adds additional layers to the InChIKey, + generating the "augmented" InChIKey. + """ + + cython.declare( + key=str, + ulayer=str + ) + + key = toInChIKey(mol) + + ulayer, player = inchiutil.create_augmented_layers(mol) + + return inchiutil.compose_aug_inchi_key(key, ulayer, player) + + +def toSMARTS(mol): + """ + Convert a molecular structure to an SMARTS string. Uses + `RDKit `_ to perform the conversion. + Perceives aromaticity and removes Hydrogen atoms. + """ + rdkitmol = toRDKitMol(mol) + + return Chem.MolToSmarts(rdkitmol) + + +def toSMILES(mol): + """ + Convert a molecular structure to an SMILES string. + + If there is a Nitrogen atom present it uses + `OpenBabel `_ to perform the conversion, + and the SMILES may or may not be canonical. + + Otherwise, it uses `RDKit `_ to perform the + conversion, so it will be canonical SMILES. + While converting to an RDMolecule it will perceive aromaticity + and removes Hydrogen atoms. + """ + + # If we're going to have to check the formula anyway, + # we may as well shortcut a few small known molecules. + # Dictionary lookups are O(1) so this should be fast: + # The dictionary is defined at the top of this file. + + cython.declare( + atom=Atom, + # obmol=, + # rdkitmol=, + ) + + try: + if mol.isRadical(): + return _known_smiles_radicals[mol.getFormula()] + else: + return _known_smiles_molecules[mol.getFormula()] + except KeyError: + # It wasn't in the above list. + pass + for atom in mol.vertices: + if atom.isNitrogen(): + obmol = toOBMol(mol) + try: + SMILEwriter = openbabel.OBConversion() + SMILEwriter.SetOutFormat('smi') + SMILEwriter.SetOptions("i", + SMILEwriter.OUTOPTIONS) # turn off isomer and stereochemistry information (the @ signs!) + except: + pass + return SMILEwriter.WriteString(obmol).strip() + + rdkitmol = toRDKitMol(mol, sanitize=False) + if not mol.isAromatic(): + return Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) + return Chem.MolToSmiles(rdkitmol) + -} def __fromSMILES(mol, smilesstr, backend): """Replace the Molecule `mol` with that given by the SMILES `smilesstr` @@ -109,18 +324,20 @@ def __fromSMILES(mol, smilesstr, backend): else: raise NotImplementedError('Unrecognized backend for SMILES parsing: {0}'.format(backend)) + def __fromInChI(mol, inchistr, backend): """Replace the Molecule `mol` with that given by the InChI `inchistr` using the backend `backend`""" if backend.lower() == 'rdkit': rdkitmol = Chem.inchi.MolFromInchi(inchistr, removeHs=False) mol = fromRDKitMol(mol, rdkitmol) - return mol + return mol elif backend.lower() == 'openbabel': return parse_openbabel(mol, inchistr, 'inchi') else: raise NotImplementedError('Unrecognized backend for InChI parsing: {0}'.format(backend)) + def __fromSMARTS(mol, smartsstr, backend): """Replace the Molecule `mol` with that given by the SMARTS `smartsstr` using the backend `backend`""" @@ -133,15 +350,16 @@ def __fromSMARTS(mol, smartsstr, backend): else: raise NotImplementedError('Unrecognized backend for SMARTS parsing: {0}'.format(backend)) + def __parse(mol, identifier, type_identifier, backend): """ Parses the identifier based on the type of identifier (inchi/smi/sma) and the backend used. - + First, look up the identifier in a dictionary to see if it can be processed this way. - If not in the dictionary, parse it through the specified backed, + If not in the dictionary, parse it through the specified backed, or try all backends. """ @@ -151,7 +369,7 @@ def __parse(mol, identifier, type_identifier, backend): mol.updateAtomTypes() return mol - for _backend in (BACKENDS if backend=='try-all' else [backend]): + for _backend in (BACKENDS if backend == 'try-all' else [backend]): if type_identifier == 'smi': __fromSMILES(mol, identifier, _backend) elif type_identifier == 'inchi': @@ -170,10 +388,11 @@ def __parse(mol, identifier, type_identifier, backend): logging.error("Unable to correctly parse %s with backend %s", identifier, backend) raise Exception("Couldn't parse {0}".format(identifier)) + def parse_openbabel(mol, identifier, type_identifier): """Converts the identifier to a Molecule using Openbabel.""" obConversion = openbabel.OBConversion() - obConversion.SetInAndOutFormats(type_identifier, "smi")#SetInFormat(identifier) does not exist. + obConversion.SetInAndOutFormats(type_identifier, "smi") # SetInFormat(identifier) does not exist. obmol = openbabel.OBMol() obConversion.ReadString(obmol, identifier) obmol.AddHydrogens() @@ -199,6 +418,7 @@ def isCorrectlyParsed(mol, identifier): return all(conditions) + def __lookup(mol, identifier, type_identifier): """ Looks up the identifier and parses it the way we think is best. @@ -223,7 +443,7 @@ def __lookup(mol, identifier, type_identifier): def fromInChI(mol, inchistr, backend='try-all'): """ - Convert an InChI string `inchistr` to a molecular structure. Uses + Convert an InChI string `inchistr` to a molecular structure. Uses a user-specified backend for conversion, currently supporting rdkit (default) and openbabel. """ @@ -236,7 +456,6 @@ def fromInChI(mol, inchistr, backend='try-all'): return __parse(mol, inchiutil.INCHI_PREFIX + '/' + inchistr, 'inchi', backend) - def fromAugmentedInChI(mol, aug_inchi): """ Creates a Molecule object from the augmented inchi. @@ -265,16 +484,17 @@ def fromAugmentedInChI(mol, aug_inchi): return mol + def fromSMILES(mol, smilesstr, backend='try-all'): """ - Convert a SMILES string `smilesstr` to a molecular structure. Uses + Convert a SMILES string `smilesstr` to a molecular structure. Uses a user-specified backend for conversion, currently supporting rdkit (default) and openbabel. """ return __parse(mol, smilesstr, 'smi', backend) -def fromSMARTS(mol, smartsstr, backend = 'rdkit'): +def fromSMARTS(mol, smartsstr, backend='rdkit'): """ Convert a SMARTS string `smartsstr` to a molecular structure. Uses `RDKit `_ to perform the conversion. diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py index e35485eae9..e79c3dc1b3 100644 --- a/rmgpy/qm/molecule.py +++ b/rmgpy/qm/molecule.py @@ -48,7 +48,6 @@ import symmetry import qmdata from qmdata import parseCCLibData -from rmgpy.molecule import parser class Geometry: """ diff --git a/setup.py b/setup.py index 4d461521e8..61a8dd271e 100644 --- a/setup.py +++ b/setup.py @@ -77,8 +77,7 @@ def getMainExtensionModules(): Extension('rmgpy.molecule.symmetry', ['rmgpy/molecule/symmetry.py'], include_dirs=['.']), Extension('rmgpy.molecule.vf2', ['rmgpy/molecule/vf2.pyx'], include_dirs=['.']), Extension('rmgpy.molecule.converter', ['rmgpy/molecule/converter.py'], include_dirs=['.']), - Extension('rmgpy.molecule.parser', ['rmgpy/molecule/parser.py'], include_dirs=['.']), - Extension('rmgpy.molecule.generator', ['rmgpy/molecule/generator.py'], include_dirs=['.']), + Extension('rmgpy.molecule.translator', ['rmgpy/molecule/translator.py'], include_dirs=['.']), Extension('rmgpy.molecule.util', ['rmgpy/molecule/util.py'], include_dirs=['.']), Extension('rmgpy.molecule.inchi', ['rmgpy/molecule/inchi.py'], include_dirs=['.']), Extension('rmgpy.molecule.resonance', ['rmgpy/molecule/resonance.py'], include_dirs=['.']), From 070e2bb8189990d0de13c8b0dc5dc0539c23eec0 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 5 Oct 2017 17:24:14 -0400 Subject: [PATCH 04/57] Move generator/parser unit tests to new locations In converterTest, translatorTest, and inchiTest --- rmgpy/molecule/converterTest.py | 52 ++ rmgpy/molecule/inchiTest.py | 151 ++++- rmgpy/molecule/parserTest.py | 423 -------------- .../{generatorTest.py => translatorTest.py} | 517 ++++++++++++------ 4 files changed, 564 insertions(+), 579 deletions(-) create mode 100644 rmgpy/molecule/converterTest.py delete mode 100644 rmgpy/molecule/parserTest.py rename rmgpy/molecule/{generatorTest.py => translatorTest.py} (60%) diff --git a/rmgpy/molecule/converterTest.py b/rmgpy/molecule/converterTest.py new file mode 100644 index 0000000000..7fcb2efd30 --- /dev/null +++ b/rmgpy/molecule/converterTest.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# # +# RMG - Reaction Mechanism Generator # +# # +# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # +# # +# Permission is hereby granted, free of charge, to any person obtaining a # +# copy of this software and associated documentation files (the 'Software'), # +# to deal in the Software without restriction, including without limitation # +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # +# and/or sell copies of the Software, and to permit persons to whom the # +# Software is furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in # +# all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # +# DEALINGS IN THE SOFTWARE. # +# # +############################################################################### + +""" +This module contains unit test for the converter module. +""" + +import unittest + +from .converter import debugRDKitMol + + +class RDKitTest(unittest.TestCase): + def testDebugger(self): + """ + Test the debugRDKitMol(rdmol) function doesn't crash + + We can't really test it in the unit testing framework, because + that already captures and redirects standard output, and that + conflicts with the function, but this checks it doesn't crash. + """ + import rdkit.Chem + import logging + rdmol = rdkit.Chem.MolFromSmiles('CCC') + message = debugRDKitMol(rdmol, level=logging.INFO) diff --git a/rmgpy/molecule/inchiTest.py b/rmgpy/molecule/inchiTest.py index d055600d73..3b77375f54 100644 --- a/rmgpy/molecule/inchiTest.py +++ b/rmgpy/molecule/inchiTest.py @@ -29,8 +29,9 @@ ############################################################################### import unittest +from external.wip import work_in_progress -from .molecule import Molecule +from .molecule import Atom, Molecule from .inchi import * @@ -182,5 +183,153 @@ def test_inchi_p_layer_zero_lp(self): inchi, u_indices, p_indices = decompose(string) self.assertEquals([(1,0)], p_indices) +class CreateULayerTest(unittest.TestCase): + def testC4H6(self): + """ + Test that 3-butene-1,2-diyl biradical is always resulting in the + same u-layer, regardless of the original order. + """ + + # radical positions 3 and 4 + adjlist1 = """ +1 C u0 p0 c0 {2,D} {5,S} {6,S} +2 C u0 p0 c0 {1,D} {3,S} {7,S} +3 C u1 p0 c0 {2,S} {4,S} {8,S} +4 C u1 p0 c0 {3,S} {9,S} {10,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {1,S} +7 H u0 p0 c0 {2,S} +8 H u0 p0 c0 {3,S} +9 H u0 p0 c0 {4,S} +10 H u0 p0 c0 {4,S} + + """ + + # radical positions 1 and 2 + adjlist2 = """ +1 C u1 p0 c0 {2,S} {5,S} {6,S} +2 C u1 p0 c0 {1,S} {3,S} {7,S} +3 C u0 p0 c0 {2,S} {4,D} {8,S} +4 C u0 p0 c0 {3,D} {9,S} {10,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {1,S} +7 H u0 p0 c0 {2,S} +8 H u0 p0 c0 {3,S} +9 H u0 p0 c0 {4,S} +10 H u0 p0 c0 {4,S} + """ + + u_layers = [] + for adjlist in [adjlist1, adjlist2]: + mol = Molecule().fromAdjacencyList(adjlist) + u_layer = create_augmented_layers(mol)[0] + u_layers.append(u_layer) + + self.assertEquals(u_layers[0], u_layers[1]) + + +class ExpectedLonePairsTest(unittest.TestCase): + def test_SingletCarbon(self): + mol = Molecule(atoms=[Atom(element='C', lonePairs=1)]) + unexpected = has_unexpected_lone_pairs(mol) + self.assertTrue(unexpected) + + def test_NormalCarbon(self): + mol = Molecule(atoms=[Atom(element='C', lonePairs=0)]) + unexpected = has_unexpected_lone_pairs(mol) + self.assertFalse(unexpected) + + def test_NormalOxygen(self): + mol = Molecule(atoms=[Atom(element='O', lonePairs=2)]) + unexpected = has_unexpected_lone_pairs(mol) + self.assertFalse(unexpected) + + def test_Oxygen_3LP(self): + mol = Molecule(atoms=[Atom(element='O', lonePairs=3)]) + unexpected = has_unexpected_lone_pairs(mol) + self.assertTrue(unexpected) + + +class CreateAugmentedLayersTest(unittest.TestCase): + def test_Methane(self): + smi = 'C' + mol = Molecule().fromSMILES(smi) + ulayer, player = create_augmented_layers(mol) + self.assertTrue(not ulayer) + self.assertTrue(not player) + + def test_SingletMethylene(self): + adjlist = """ +multiplicity 1 +1 C u0 p1 c0 {2,S} {3,S} +2 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""" + mol = Molecule().fromAdjacencyList(adjlist) + ulayer, player = create_augmented_layers(mol) + self.assertTrue(not ulayer) + self.assertEquals(P_LAYER_PREFIX + '1', player) + + def test_TripletMethylene(self): + adjlist = """ +multiplicity 3 +1 C u2 p0 c0 {2,S} {3,S} +2 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""" + mol = Molecule().fromAdjacencyList(adjlist) + ulayer, player = create_augmented_layers(mol) + self.assertEquals(U_LAYER_PREFIX + '1,1', ulayer) + self.assertTrue(not player) + + @work_in_progress + def test_Nitrate(self): + """ + Test that N atom in the p-layer has correct symbol. + """ + + adjlist = """ +1 O u0 p2 c0 {4,D} +2 O u0 p3 c-1 {4,S} +3 O u0 p3 c-1 {4,S} +4 N u0 p0 c+1 {1,D} {2,S} {3,S} +""" + mol = Molecule().fromAdjacencyList(adjlist) + ulayer, player = create_augmented_layers(mol) + self.assertTrue(not ulayer) + self.assertTrue(player.contains(P_LAYER_PREFIX + '1(0)')) + + +class ResetLonePairsTest(unittest.TestCase): + + def test_Methane(self): + smi = 'C' + mol = Molecule().fromSMILES(smi) + p_indices = [] + + reset_lone_pairs(mol, p_indices) + + for at in mol.atoms: + self.assertEquals(at.lonePairs, 0) + + def test_SingletMethylene(self): + adjlist = """ +multiplicity 1 +1 C u0 p1 c0 {2,S} {3,S} +2 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +""" + mol = Molecule().fromAdjacencyList(adjlist) + p_indices = [1] + + reset_lone_pairs(mol, p_indices) + + for at in mol.atoms: + if at.symbol == 'C': + self.assertEquals(at.lonePairs, 1) + else: + self.assertEquals(at.lonePairs, 0) + + if __name__ == '__main__': unittest.main() diff --git a/rmgpy/molecule/parserTest.py b/rmgpy/molecule/parserTest.py deleted file mode 100644 index a19a22dd0f..0000000000 --- a/rmgpy/molecule/parserTest.py +++ /dev/null @@ -1,423 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# # -# RMG - Reaction Mechanism Generator # -# # -# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # -# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # -# # -# Permission is hereby granted, free of charge, to any person obtaining a # -# copy of this software and associated documentation files (the 'Software'), # -# to deal in the Software without restriction, including without limitation # -# the rights to use, copy, modify, merge, publish, distribute, sublicense, # -# and/or sell copies of the Software, and to permit persons to whom the # -# Software is furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in # -# all copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # -# DEALINGS IN THE SOFTWARE. # -# # -############################################################################### - -import unittest - -from rmgpy.molecule.atomtype import atomTypes -from rmgpy.molecule.inchi import reset_lone_pairs -from rmgpy.molecule.molecule import Molecule -from rmgpy.molecule.translator import * - - -class ParserTest(unittest.TestCase): - - def setUp(self): - - self.methane = Molecule().fromAdjacencyList(""" -1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} -2 H u0 p0 c0 {1,S} -3 H u0 p0 c0 {1,S} -4 H u0 p0 c0 {1,S} -5 H u0 p0 c0 {1,S} -""") - self.methylamine = Molecule().fromAdjacencyList(""" -1 N u0 p1 c0 {2,S} {3,S} {4,S} -2 C u0 p0 c0 {1,S} {5,S} {6,S} {7,S} -3 H u0 p0 c0 {1,S} -4 H u0 p0 c0 {1,S} -5 H u0 p0 c0 {2,S} -6 H u0 p0 c0 {2,S} -7 H u0 p0 c0 {2,S} -""") - - def test_fromAugmentedInChI(self): - aug_inchi = 'InChI=1S/CH4/h1H4' - mol = fromAugmentedInChI(Molecule(), aug_inchi) - self.assertTrue(not mol.InChI == '') - self.assertTrue(mol.isIsomorphic(self.methane)) - - aug_inchi = 'InChI=1/CH4/h1H4' - mol = fromAugmentedInChI(Molecule(), aug_inchi) - self.assertTrue(not mol.InChI == '') - self.assertTrue(mol.isIsomorphic(self.methane)) - - def compare(self, adjlist, smiles): - """ - Compare result of parsing an adjacency list and a SMILES string. - - The adjacency list is presumed correct and this is to test the SMILES parser. - """ - mol1 = Molecule().fromAdjacencyList(adjlist) - mol2 = Molecule(SMILES=smiles) - self.assertTrue(mol1.isIsomorphic(mol2), "Parsing SMILES={!r} gave unexpected molecule\n{}".format(smiles, mol2.toAdjacencyList())) - - - def test_fromSMILES(self): - smiles = 'C' - mol = fromSMILES(Molecule(), smiles) - self.assertTrue(mol.isIsomorphic(self.methane)) - - #Test that atomtypes that rely on lone pairs for identity are typed correctly - smiles = 'CN' - mol = fromSMILES(Molecule(), smiles) - self.assertEquals(mol.atoms[1].atomType, atomTypes['N3s'] ) - - # Test N2 - adjlist = ''' - 1 N u0 p1 c0 {2,T} - 2 N u0 p1 c0 {1,T} - ''' - smiles = 'N#N' - self.compare(adjlist, smiles) - - # Test CH4 - adjlist = ''' - 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} - 2 H u0 p0 c0 {1,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {1,S} - 5 H u0 p0 c0 {1,S} - ''' - smiles = 'C' - self.compare(adjlist, smiles) - - - # Test H2O - adjlist = ''' - 1 O u0 p2 c0 {2,S} {3,S} - 2 H u0 p0 c0 {1,S} - 3 H u0 p0 c0 {1,S} - ''' - smiles = 'O' - self.compare(adjlist, smiles) - - - # Test C2H6 - adjlist = ''' - 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} - 2 C u0 p0 c0 {1,S} {6,S} {7,S} {8,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {1,S} - 5 H u0 p0 c0 {1,S} - 6 H u0 p0 c0 {2,S} - 7 H u0 p0 c0 {2,S} - 8 H u0 p0 c0 {2,S} - ''' - smiles = 'CC' - self.compare(adjlist, smiles) - - - # Test H2 - adjlist = ''' - 1 H u0 p0 c0 {2,S} - 2 H u0 p0 c0 {1,S} - ''' - smiles = '[H][H]' - self.compare(adjlist, smiles) - - - # Test H2O2 - adjlist = ''' - 1 O u0 p2 c0 {2,S} {3,S} - 2 O u0 p2 c0 {1,S} {4,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {2,S} - ''' - smiles = 'OO' - self.compare(adjlist, smiles) - - - # Test C3H8 - adjlist = ''' - 1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S} - 2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S} - 3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S} - 4 H u0 p0 c0 {1,S} - 5 H u0 p0 c0 {1,S} - 6 H u0 p0 c0 {1,S} - 7 H u0 p0 c0 {2,S} - 8 H u0 p0 c0 {2,S} - 9 H u0 p0 c0 {3,S} - 10 H u0 p0 c0 {3,S} - 11 H u0 p0 c0 {3,S} - ''' - smiles = 'CCC' - self.compare(adjlist, smiles) - - - # Test Ar - adjlist = ''' - 1 Ar u0 p4 c0 - ''' - smiles = '[Ar]' - self.compare(adjlist, smiles) - - - # Test He - adjlist = ''' - 1 He u0 p1 c0 - ''' - smiles = '[He]' - self.compare(adjlist, smiles) - - - # Test CH4O - adjlist = ''' - 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} - 2 O u0 p2 c0 {1,S} {6,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {1,S} - 5 H u0 p0 c0 {1,S} - 6 H u0 p0 c0 {2,S} - ''' - smiles = 'CO' - self.compare(adjlist, smiles) - - - # Test CO2 - adjlist = ''' - 1 O u0 p2 c0 {2,D} - 2 C u0 p0 c0 {1,D} {3,D} - 3 O u0 p2 c0 {2,D} - ''' - smiles = 'O=C=O' - self.compare(adjlist, smiles) - - - # Test CO - adjlist = ''' - 1 C u0 p1 c-1 {2,T} - 2 O u0 p1 c+1 {1,T} - ''' - smiles = '[C-]#[O+]' - self.compare(adjlist, smiles) - - - # Test C2H4 - adjlist = ''' - 1 C u0 p0 c0 {2,D} {3,S} {4,S} - 2 C u0 p0 c0 {1,D} {5,S} {6,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {1,S} - 5 H u0 p0 c0 {2,S} - 6 H u0 p0 c0 {2,S} - ''' - smiles = 'C=C' - self.compare(adjlist, smiles) - - - # Test O2 - adjlist = ''' - 1 O u0 p2 c0 {2,D} - 2 O u0 p2 c0 {1,D} - ''' - smiles = 'O=O' - self.compare(adjlist, smiles) - - - # Test CH3 - adjlist = ''' - multiplicity 2 - 1 C u1 p0 c0 {2,S} {3,S} {4,S} - 2 H u0 p0 c0 {1,S} - 3 H u0 p0 c0 {1,S} - 4 H u0 p0 c0 {1,S} - ''' - smiles = '[CH3]' - self.compare(adjlist, smiles) - - - # Test HO - adjlist = ''' - multiplicity 2 - 1 O u1 p2 c0 {2,S} - 2 H u0 p0 c0 {1,S} - ''' - smiles = '[OH]' - self.compare(adjlist, smiles) - - - # Test C2H5 - adjlist = ''' - multiplicity 2 - 1 C u0 p0 c0 {2,S} {5,S} {6,S} {7,S} - 2 C u1 p0 c0 {1,S} {3,S} {4,S} - 3 H u0 p0 c0 {2,S} - 4 H u0 p0 c0 {2,S} - 5 H u0 p0 c0 {1,S} - 6 H u0 p0 c0 {1,S} - 7 H u0 p0 c0 {1,S} - ''' - smiles = 'C[CH2]' - self.compare(adjlist, smiles) - - - # Test O - adjlist = ''' - multiplicity 3 - 1 O u2 p2 c0 - ''' - smiles = '[O]' - self.compare(adjlist, smiles) - - - # Test HO2 - adjlist = ''' - multiplicity 2 - 1 O u1 p2 c0 {2,S} - 2 O u0 p2 c0 {1,S} {3,S} - 3 H u0 p0 c0 {2,S} - ''' - smiles = '[O]O' - self.compare(adjlist, smiles) - - - # Test CH, methylidyne. - # Wikipedia reports: - # The ground state is a doublet radical with one unpaired electron, - # and the first two excited states are a quartet radical with three - # unpaired electrons and a doublet radical with one unpaired electron. - # With the quartet radical only 71 kJ above the ground state, a sample - # of methylidyne exists as a mixture of electronic states even at - # room temperature, giving rise to complex reactions. - # - adjlist = ''' - multiplicity 2 - 1 C u1 p1 c0 {2,S} - 2 H u0 p0 c0 {1,S} - ''' - smiles = '[CH]' - self.compare(adjlist, smiles) - - - # Test H - adjlist = ''' - multiplicity 2 - 1 H u1 p0 c0 - ''' - smiles = '[H]' - self.compare(adjlist, smiles) - - - # Test atomic C, which is triplet in ground state - adjlist = ''' - multiplicity 3 - 1 C u2 p1 c0 - ''' - smiles = '[C]' - self.compare(adjlist, smiles) - - - # Test O2 - adjlist = ''' - multiplicity 3 - 1 O u1 p2 c0 {2,S} - 2 O u1 p2 c0 {1,S} - ''' - smiles = '[O][O]' - self.compare(adjlist, smiles) - - - def test_fromInChI(self): - inchi = 'InChI=1S/CH4/h1H4' - mol = fromInChI(Molecule(), inchi) - self.assertTrue(mol.isIsomorphic(self.methane)) - #Test that atomtypes that rely on lone pairs for identity are typed correctly - inchi = "InChI=1S/CH5N/c1-2/h2H2,1H3" - mol = fromInChI(Molecule(), inchi) - self.assertEquals(mol.atoms[1].atomType, atomTypes['N3s'] ) - - #current implementation of SMARTS is broken - def test_fromSMARTS(self): - smarts = '[CH4]' - mol = fromSMARTS(Molecule(), smarts) - self.assertTrue(mol.isIsomorphic(self.methane)) - - def test_toRDKitMol(self): - """ - Test that toRDKitMol returns correct indices and atom mappings. - """ - bondOrderDict = {'SINGLE':1,'DOUBLE':2,'TRIPLE':3,'AROMATIC':1.5} - mol = fromSMILES(Molecule(), 'C1CCC=C1C=O') - rdkitmol, rdAtomIndices = mol.toRDKitMol(removeHs=False, returnMapping=True, sanitize=True) - for atom in mol.atoms: - # Check that all atoms are found in mapping - self.assertTrue(atom in rdAtomIndices) - # Check that all bonds are in rdkitmol with correct mapping and order - for connectedAtom, bond in atom.bonds.iteritems(): - bondType = str(rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom],rdAtomIndices[connectedAtom]).GetBondType()) - rdkitBondOrder = bondOrderDict[bondType] - self.assertEqual(bond.order, rdkitBondOrder) - - # Test for removeHs = True - rdkitmol2, rdAtomIndices2 = mol.toRDKitMol(removeHs=True, returnMapping=True, sanitize=True) - - for atom in mol.atoms: - # Check that all non-hydrogen atoms are found in mapping - if atom.symbol != 'H': - self.assertTrue(atom in rdAtomIndices) - # Check that all bonds connected to non-hydrogen have the correct mapping and order - for connectedAtom, bond in atom.bonds.iteritems(): - if connectedAtom.symbol != 'H': - bondType = str(rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom],rdAtomIndices[connectedAtom]).GetBondType()) - rdkitBondOrder = bondOrderDict[bondType] - self.assertEqual(bond.order, rdkitBondOrder) - - - -class ResetLonePairsTest(unittest.TestCase): - - def test_Methane(self): - smi = 'C' - mol = Molecule().fromSMILES(smi) - p_indices = [] - - reset_lone_pairs(mol, p_indices) - - for at in mol.atoms: - self.assertEquals(at.lonePairs, 0) - - def test_SingletMethylene(self): - adjlist = """ -multiplicity 1 -1 C u0 p1 c0 {2,S} {3,S} -2 H u0 p0 c0 {1,S} -3 H u0 p0 c0 {1,S} -""" - mol = Molecule().fromAdjacencyList(adjlist) - p_indices = [1] - - reset_lone_pairs(mol, p_indices) - - for at in mol.atoms: - if at.symbol == 'C': - self.assertEquals(at.lonePairs, 1) - else: - self.assertEquals(at.lonePairs, 0) diff --git a/rmgpy/molecule/generatorTest.py b/rmgpy/molecule/translatorTest.py similarity index 60% rename from rmgpy/molecule/generatorTest.py rename to rmgpy/molecule/translatorTest.py index 584738a648..53fdea61ea 100644 --- a/rmgpy/molecule/generatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -28,73 +28,19 @@ # # ############################################################################### +""" +This module contains unit test for the translator module. +""" + import re import unittest from external.wip import work_in_progress +from rmgpy.molecule.atomtype import atomTypes +from rmgpy.molecule.molecule import Molecule +from rmgpy.molecule.translator import * from rmgpy.species import Species -from .molecule import Atom, Molecule -from .inchi import P_LAYER_PREFIX, U_LAYER_PREFIX, create_augmented_layers, has_unexpected_lone_pairs -from .translator import * -from rmgpy.molecule.converter import debugRDKitMol - -class RDKitTest(unittest.TestCase): - def testDebugger(self): - """ - Test the debugRDKitMol(rdmol) function doesn't crash - - We can't really test it in the unit testing framework, because - that already captures and redirects standard output, and that - conflicts with the function, but this checks it doesn't crash. - """ - import rdkit.Chem - import logging - rdmol = rdkit.Chem.MolFromSmiles('CCC') - message = debugRDKitMol(rdmol, level=logging.INFO) -class CreateULayerTest(unittest.TestCase): - def testC4H6(self): - """ - Test that 3-butene-1,2-diyl biradical is always resulting in the - same u-layer, regardless of the original order. - """ - - # radical positions 3 and 4 - adjlist1 = """ -1 C u0 p0 c0 {2,D} {5,S} {6,S} -2 C u0 p0 c0 {1,D} {3,S} {7,S} -3 C u1 p0 c0 {2,S} {4,S} {8,S} -4 C u1 p0 c0 {3,S} {9,S} {10,S} -5 H u0 p0 c0 {1,S} -6 H u0 p0 c0 {1,S} -7 H u0 p0 c0 {2,S} -8 H u0 p0 c0 {3,S} -9 H u0 p0 c0 {4,S} -10 H u0 p0 c0 {4,S} - - """ - - # radical positions 1 and 2 - adjlist2 = """ -1 C u1 p0 c0 {2,S} {5,S} {6,S} -2 C u1 p0 c0 {1,S} {3,S} {7,S} -3 C u0 p0 c0 {2,S} {4,D} {8,S} -4 C u0 p0 c0 {3,D} {9,S} {10,S} -5 H u0 p0 c0 {1,S} -6 H u0 p0 c0 {1,S} -7 H u0 p0 c0 {2,S} -8 H u0 p0 c0 {3,S} -9 H u0 p0 c0 {4,S} -10 H u0 p0 c0 {4,S} - """ - - u_layers = [] - for adjlist in [adjlist1, adjlist2]: - mol = Molecule().fromAdjacencyList(adjlist) - u_layer = create_augmented_layers(mol)[0] - u_layers.append(u_layer) - - self.assertEquals(u_layers[0], u_layers[1]) class InChIGenerationTest(unittest.TestCase): def compare(self, adjlist, aug_inchi): @@ -149,7 +95,7 @@ def test_C7H8(self): aug_inchi = 'InChI=1S/C7H8/c1-7-5-3-2-4-6-7/h2-6H,1H3/u2,3' self.compare(adjlist, aug_inchi) - + def test_C8H8(self): """Looks a lot like cycloctene but with 1 double bond replaced by a biradical.""" @@ -316,10 +262,10 @@ def test_C7H9(self): 7 C 1 {4,S} {5,S} {6,S} """ - aug_inchi = 'InChI=1S/C7H9/c1-4-7(5-2)6-3/h4-6H,1-3H2/u1,4,7' + aug_inchi = 'InChI=1S/C7H9/c1-4-7(5-2)6-3/h4-6H,1-3H2/u1,2,4' self.compare(adjlist, aug_inchi) - def test_C7H9(self): + def test_C11H16(self): adjlist = """ 1 C 0 {5,D} @@ -364,25 +310,25 @@ def test_singlet_vs_closed_shell(self): closed_shell_aug_inchi = closed_shell.getAugmentedInChI() self.assertTrue(singlet_aug_inchi != closed_shell_aug_inchi) -# def test_C6H5(self): -# """Test that the u-layer of phenyl shows atom 1.""" -# adjlist = """ -# multiplicity 2 -# 1 C u0 p0 c0 {2,D} {3,S} {10,S} -# 2 C u0 p0 c0 {1,D} {5,S} {7,S} -# 3 C u0 p0 c0 {1,S} {6,D} {8,S} -# 4 C u0 p0 c0 {5,D} {6,S} {11,S} -# 5 C u0 p0 c0 {2,S} {4,D} {9,S} -# 6 C u1 p0 c0 {3,D} {4,S} -# 7 H u0 p0 c0 {2,S} -# 8 H u0 p0 c0 {3,S} -# 9 H u0 p0 c0 {5,S} -# 10 H u0 p0 c0 {1,S} -# 11 H u0 p0 c0 {4,S} -# """ - -# aug_inchi = 'InChI=1S/C6H5/c1-2-4-6-5-3-1/h1-5H/u1' -# self.compare(adjlist, aug_inchi) + # def test_C6H5(self): + # """Test that the u-layer of phenyl shows atom 1.""" + # adjlist = """ + # multiplicity 2 + # 1 C u0 p0 c0 {2,D} {3,S} {10,S} + # 2 C u0 p0 c0 {1,D} {5,S} {7,S} + # 3 C u0 p0 c0 {1,S} {6,D} {8,S} + # 4 C u0 p0 c0 {5,D} {6,S} {11,S} + # 5 C u0 p0 c0 {2,S} {4,D} {9,S} + # 6 C u1 p0 c0 {3,D} {4,S} + # 7 H u0 p0 c0 {2,S} + # 8 H u0 p0 c0 {3,S} + # 9 H u0 p0 c0 {5,S} + # 10 H u0 p0 c0 {1,S} + # 11 H u0 p0 c0 {4,S} + # """ + + # aug_inchi = 'InChI=1S/C6H5/c1-2-4-6-5-3-1/h1-5H/u1' + # self.compare(adjlist, aug_inchi) @work_in_progress def test_C5H6_triplet_singlet(self): @@ -413,76 +359,6 @@ def test_C5H6_triplet_singlet(self): aug_inchi = 'InChI=1S/C5H6/c1-3-5-4-2/h1-3H2/u1,2/lp4,5' self.compare(adjlist, aug_inchi) -class ExpectedLonePairsTest(unittest.TestCase): - - def test_SingletCarbon(self): - mol = Molecule(atoms=[Atom(element='C', lonePairs=1)]) - unexpected = has_unexpected_lone_pairs(mol) - self.assertTrue(unexpected) - - def test_NormalCarbon(self): - mol = Molecule(atoms=[Atom(element='C', lonePairs=0)]) - unexpected = has_unexpected_lone_pairs(mol) - self.assertFalse(unexpected) - - def test_NormalOxygen(self): - mol = Molecule(atoms=[Atom(element='O', lonePairs=2)]) - unexpected = has_unexpected_lone_pairs(mol) - self.assertFalse(unexpected) - - def test_Oxygen_3LP(self): - mol = Molecule(atoms=[Atom(element='O', lonePairs=3)]) - unexpected = has_unexpected_lone_pairs(mol) - self.assertTrue(unexpected) - -class CreateAugmentedLayersTest(unittest.TestCase): - def test_Methane(self): - smi = 'C' - mol = Molecule().fromSMILES(smi) - ulayer, player = create_augmented_layers(mol) - self.assertTrue(not ulayer) - self.assertTrue(not player) - - def test_SingletMethylene(self): - adjlist = """ -multiplicity 1 -1 C u0 p1 c0 {2,S} {3,S} -2 H u0 p0 c0 {1,S} -3 H u0 p0 c0 {1,S} -""" - mol = Molecule().fromAdjacencyList(adjlist) - ulayer, player = create_augmented_layers(mol) - self.assertTrue(not ulayer) - self.assertEquals(P_LAYER_PREFIX + '1', player) - - def test_TripletMethylene(self): - adjlist = """ -multiplicity 3 -1 C u2 p0 c0 {2,S} {3,S} -2 H u0 p0 c0 {1,S} -3 H u0 p0 c0 {1,S} -""" - mol = Molecule().fromAdjacencyList(adjlist) - ulayer, player = create_augmented_layers(mol) - self.assertEquals(U_LAYER_PREFIX + '1,1', ulayer) - self.assertTrue(not player) - - @work_in_progress - def test_Nitrate(self): - """ - Test that N atom in the p-layer has correct symbol. - """ - - adjlist = """ -1 O u0 p2 c0 {4,D} -2 O u0 p3 c-1 {4,S} -3 O u0 p3 c-1 {4,S} -4 N u0 p0 c+1 {1,D} {2,S} {3,S} -""" - mol = Molecule().fromAdjacencyList(adjlist) - ulayer, player = create_augmented_layers(mol) - self.assertTrue(not ulayer) - self.assertTrue(player.contains(P_LAYER_PREFIX + '1(0)')) class SMILESGenerationTest(unittest.TestCase): def compare(self, adjlist, smiles): @@ -766,5 +642,336 @@ def test_various(self): smiles = '[O][O]' self.compare(adjlist, smiles) -if __name__ == '__main__': - unittest.main() + +class ParsingTest(unittest.TestCase): + def setUp(self): + + self.methane = Molecule().fromAdjacencyList(""" +1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} +2 H u0 p0 c0 {1,S} +3 H u0 p0 c0 {1,S} +4 H u0 p0 c0 {1,S} +5 H u0 p0 c0 {1,S} +""") + self.methylamine = Molecule().fromAdjacencyList(""" +1 N u0 p1 c0 {2,S} {3,S} {4,S} +2 C u0 p0 c0 {1,S} {5,S} {6,S} {7,S} +3 H u0 p0 c0 {1,S} +4 H u0 p0 c0 {1,S} +5 H u0 p0 c0 {2,S} +6 H u0 p0 c0 {2,S} +7 H u0 p0 c0 {2,S} +""") + + def test_fromAugmentedInChI(self): + aug_inchi = 'InChI=1S/CH4/h1H4' + mol = fromAugmentedInChI(Molecule(), aug_inchi) + self.assertTrue(not mol.InChI == '') + self.assertTrue(mol.isIsomorphic(self.methane)) + + aug_inchi = 'InChI=1/CH4/h1H4' + mol = fromAugmentedInChI(Molecule(), aug_inchi) + self.assertTrue(not mol.InChI == '') + self.assertTrue(mol.isIsomorphic(self.methane)) + + def compare(self, adjlist, smiles): + """ + Compare result of parsing an adjacency list and a SMILES string. + + The adjacency list is presumed correct and this is to test the SMILES parser. + """ + mol1 = Molecule().fromAdjacencyList(adjlist) + mol2 = Molecule(SMILES=smiles) + self.assertTrue(mol1.isIsomorphic(mol2), + "Parsing SMILES={!r} gave unexpected molecule\n{}".format(smiles, mol2.toAdjacencyList())) + + def test_fromSMILES(self): + smiles = 'C' + mol = fromSMILES(Molecule(), smiles) + self.assertTrue(mol.isIsomorphic(self.methane)) + + # Test that atomtypes that rely on lone pairs for identity are typed correctly + smiles = 'CN' + mol = fromSMILES(Molecule(), smiles) + self.assertEquals(mol.atoms[1].atomType, atomTypes['N3s']) + + # Test N2 + adjlist = ''' + 1 N u0 p1 c0 {2,T} + 2 N u0 p1 c0 {1,T} + ''' + smiles = 'N#N' + self.compare(adjlist, smiles) + + # Test CH4 + adjlist = ''' + 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} + 2 H u0 p0 c0 {1,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + ''' + smiles = 'C' + self.compare(adjlist, smiles) + + # Test H2O + adjlist = ''' + 1 O u0 p2 c0 {2,S} {3,S} + 2 H u0 p0 c0 {1,S} + 3 H u0 p0 c0 {1,S} + ''' + smiles = 'O' + self.compare(adjlist, smiles) + + # Test C2H6 + adjlist = ''' + 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} + 2 C u0 p0 c0 {1,S} {6,S} {7,S} {8,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + 6 H u0 p0 c0 {2,S} + 7 H u0 p0 c0 {2,S} + 8 H u0 p0 c0 {2,S} + ''' + smiles = 'CC' + self.compare(adjlist, smiles) + + # Test H2 + adjlist = ''' + 1 H u0 p0 c0 {2,S} + 2 H u0 p0 c0 {1,S} + ''' + smiles = '[H][H]' + self.compare(adjlist, smiles) + + # Test H2O2 + adjlist = ''' + 1 O u0 p2 c0 {2,S} {3,S} + 2 O u0 p2 c0 {1,S} {4,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {2,S} + ''' + smiles = 'OO' + self.compare(adjlist, smiles) + + # Test C3H8 + adjlist = ''' + 1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S} + 2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S} + 3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + 6 H u0 p0 c0 {1,S} + 7 H u0 p0 c0 {2,S} + 8 H u0 p0 c0 {2,S} + 9 H u0 p0 c0 {3,S} + 10 H u0 p0 c0 {3,S} + 11 H u0 p0 c0 {3,S} + ''' + smiles = 'CCC' + self.compare(adjlist, smiles) + + # Test Ar + adjlist = ''' + 1 Ar u0 p4 c0 + ''' + smiles = '[Ar]' + self.compare(adjlist, smiles) + + # Test He + adjlist = ''' + 1 He u0 p1 c0 + ''' + smiles = '[He]' + self.compare(adjlist, smiles) + + # Test CH4O + adjlist = ''' + 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} + 2 O u0 p2 c0 {1,S} {6,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + 6 H u0 p0 c0 {2,S} + ''' + smiles = 'CO' + self.compare(adjlist, smiles) + + # Test CO2 + adjlist = ''' + 1 O u0 p2 c0 {2,D} + 2 C u0 p0 c0 {1,D} {3,D} + 3 O u0 p2 c0 {2,D} + ''' + smiles = 'O=C=O' + self.compare(adjlist, smiles) + + # Test CO + adjlist = ''' + 1 C u0 p1 c-1 {2,T} + 2 O u0 p1 c+1 {1,T} + ''' + smiles = '[C-]#[O+]' + self.compare(adjlist, smiles) + + # Test C2H4 + adjlist = ''' + 1 C u0 p0 c0 {2,D} {3,S} {4,S} + 2 C u0 p0 c0 {1,D} {5,S} {6,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {2,S} + 6 H u0 p0 c0 {2,S} + ''' + smiles = 'C=C' + self.compare(adjlist, smiles) + + # Test O2 + adjlist = ''' + 1 O u0 p2 c0 {2,D} + 2 O u0 p2 c0 {1,D} + ''' + smiles = 'O=O' + self.compare(adjlist, smiles) + + # Test CH3 + adjlist = ''' + multiplicity 2 + 1 C u1 p0 c0 {2,S} {3,S} {4,S} + 2 H u0 p0 c0 {1,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + ''' + smiles = '[CH3]' + self.compare(adjlist, smiles) + + # Test HO + adjlist = ''' + multiplicity 2 + 1 O u1 p2 c0 {2,S} + 2 H u0 p0 c0 {1,S} + ''' + smiles = '[OH]' + self.compare(adjlist, smiles) + + # Test C2H5 + adjlist = ''' + multiplicity 2 + 1 C u0 p0 c0 {2,S} {5,S} {6,S} {7,S} + 2 C u1 p0 c0 {1,S} {3,S} {4,S} + 3 H u0 p0 c0 {2,S} + 4 H u0 p0 c0 {2,S} + 5 H u0 p0 c0 {1,S} + 6 H u0 p0 c0 {1,S} + 7 H u0 p0 c0 {1,S} + ''' + smiles = 'C[CH2]' + self.compare(adjlist, smiles) + + # Test O + adjlist = ''' + multiplicity 3 + 1 O u2 p2 c0 + ''' + smiles = '[O]' + self.compare(adjlist, smiles) + + # Test HO2 + adjlist = ''' + multiplicity 2 + 1 O u1 p2 c0 {2,S} + 2 O u0 p2 c0 {1,S} {3,S} + 3 H u0 p0 c0 {2,S} + ''' + smiles = '[O]O' + self.compare(adjlist, smiles) + + # Test CH, methylidyne. + # Wikipedia reports: + # The ground state is a doublet radical with one unpaired electron, + # and the first two excited states are a quartet radical with three + # unpaired electrons and a doublet radical with one unpaired electron. + # With the quartet radical only 71 kJ above the ground state, a sample + # of methylidyne exists as a mixture of electronic states even at + # room temperature, giving rise to complex reactions. + # + adjlist = ''' + multiplicity 2 + 1 C u1 p1 c0 {2,S} + 2 H u0 p0 c0 {1,S} + ''' + smiles = '[CH]' + self.compare(adjlist, smiles) + + # Test H + adjlist = ''' + multiplicity 2 + 1 H u1 p0 c0 + ''' + smiles = '[H]' + self.compare(adjlist, smiles) + + # Test atomic C, which is triplet in ground state + adjlist = ''' + multiplicity 3 + 1 C u2 p1 c0 + ''' + smiles = '[C]' + self.compare(adjlist, smiles) + + # Test O2 + adjlist = ''' + multiplicity 3 + 1 O u1 p2 c0 {2,S} + 2 O u1 p2 c0 {1,S} + ''' + smiles = '[O][O]' + self.compare(adjlist, smiles) + + def test_fromInChI(self): + inchi = 'InChI=1S/CH4/h1H4' + mol = fromInChI(Molecule(), inchi) + self.assertTrue(mol.isIsomorphic(self.methane)) + # Test that atomtypes that rely on lone pairs for identity are typed correctly + inchi = "InChI=1S/CH5N/c1-2/h2H2,1H3" + mol = fromInChI(Molecule(), inchi) + self.assertEquals(mol.atoms[1].atomType, atomTypes['N3s']) + + # current implementation of SMARTS is broken + def test_fromSMARTS(self): + smarts = '[CH4]' + mol = fromSMARTS(Molecule(), smarts) + self.assertTrue(mol.isIsomorphic(self.methane)) + + def test_toRDKitMol(self): + """ + Test that toRDKitMol returns correct indices and atom mappings. + """ + bondOrderDict = {'SINGLE': 1, 'DOUBLE': 2, 'TRIPLE': 3, 'AROMATIC': 1.5} + mol = fromSMILES(Molecule(), 'C1CCC=C1C=O') + rdkitmol, rdAtomIndices = mol.toRDKitMol(removeHs=False, returnMapping=True, sanitize=True) + for atom in mol.atoms: + # Check that all atoms are found in mapping + self.assertTrue(atom in rdAtomIndices) + # Check that all bonds are in rdkitmol with correct mapping and order + for connectedAtom, bond in atom.bonds.iteritems(): + bondType = str( + rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom], rdAtomIndices[connectedAtom]).GetBondType()) + rdkitBondOrder = bondOrderDict[bondType] + self.assertEqual(bond.order, rdkitBondOrder) + + # Test for removeHs = True + rdkitmol2, rdAtomIndices2 = mol.toRDKitMol(removeHs=True, returnMapping=True, sanitize=True) + + for atom in mol.atoms: + # Check that all non-hydrogen atoms are found in mapping + if atom.symbol != 'H': + self.assertTrue(atom in rdAtomIndices) + # Check that all bonds connected to non-hydrogen have the correct mapping and order + for connectedAtom, bond in atom.bonds.iteritems(): + if connectedAtom.symbol != 'H': + bondType = str(rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom], + rdAtomIndices[connectedAtom]).GetBondType()) + rdkitBondOrder = bondOrderDict[bondType] + self.assertEqual(bond.order, rdkitBondOrder) From 7092e54571e6d1595adb8d3f4984db636e6bb5fc Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 5 Oct 2017 17:37:01 -0400 Subject: [PATCH 05/57] Move inchiparsingTest contents into translatorTest --- rmgpy/molecule/inchiparsingTest.py | 350 ----------------------------- rmgpy/molecule/translatorTest.py | 307 +++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 350 deletions(-) delete mode 100644 rmgpy/molecule/inchiparsingTest.py diff --git a/rmgpy/molecule/inchiparsingTest.py b/rmgpy/molecule/inchiparsingTest.py deleted file mode 100644 index cfb7868d3c..0000000000 --- a/rmgpy/molecule/inchiparsingTest.py +++ /dev/null @@ -1,350 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -############################################################################### -# # -# RMG - Reaction Mechanism Generator # -# # -# Copyright (c) 2002-2018 Prof. William H. Green (whgreen@mit.edu), # -# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # -# # -# Permission is hereby granted, free of charge, to any person obtaining a # -# copy of this software and associated documentation files (the 'Software'), # -# to deal in the Software without restriction, including without limitation # -# the rights to use, copy, modify, merge, publish, distribute, sublicense, # -# and/or sell copies of the Software, and to permit persons to whom the # -# Software is furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in # -# all copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # -# DEALINGS IN THE SOFTWARE. # -# # -############################################################################### - -import re -import unittest -from external.wip import work_in_progress - -from rmgpy.species import Species -from .adjlist import ConsistencyChecker -from .molecule import Molecule -from .util import retrieveElementCount -from .inchi import compose_aug_inchi, P_LAYER_PREFIX, P_LAYER_SEPARATOR, U_LAYER_PREFIX, U_LAYER_SEPARATOR - -from .translator import fromAugmentedInChI - -class InChIParsingTest(unittest.TestCase): - - def compare(self, inchi, u_indices=None, p_indices = None): - u_layer = U_LAYER_PREFIX + U_LAYER_SEPARATOR.join(map(str, u_indices)) if u_indices else None - p_layer = P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_indices)) if p_indices else None - - aug_inchi = compose_aug_inchi(inchi, u_layer, p_layer) - - mol = fromAugmentedInChI(Molecule(), aug_inchi) - ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) - - for at in mol.atoms: - ConsistencyChecker.check_partial_charge(at) - - spc = Species(molecule=[mol]) - spc.generate_resonance_structures() - - ignore_prefix = r"(InChI=1+)(S*)/" - aug_inchi_expected = re.split(ignore_prefix, aug_inchi)[-1] - aug_inchi_computed = re.split(ignore_prefix, spc.getAugmentedInChI())[-1] - self.assertEquals(aug_inchi_expected, aug_inchi_computed) - - return mol - - def test_Ethane_parsing(self): - inchi = 'C2H6/c1-2/h1-2H3' - self.compare(inchi) - - def test_Ethyl_parsing(self): - inchi = 'C2H5/c1-2/h1H2,2H3' - u_indices = [1] - self.compare(inchi, u_indices) - - def test_CH3_parsing(self): - inchi = 'CH3/h1H3' - u_indices = [1] - self.compare(inchi, u_indices) - - def test_H2_parsing(self): - inchi = 'H2/h1H' - self.compare(inchi) - - def test_C2H4_biradical_parsing(self): - inchi = 'C2H4/c1-2/h1-2H2' - u_indices = [1,2] - self.compare(inchi, u_indices) - - def test_C2H3_triradical_parsing(self): - inchi = 'C2H3/c1-2/h1H,2H2' - u_indices = [1,1,2] - self.compare(inchi, u_indices) - - def test_C3H6_biradical_parsing(self): - inchi = 'C3H6/c1-3-2/h1-3H2' - u_indices = [1,2] - self.compare(inchi, u_indices) - - def testC2H3O3(self): - adjlist = ''' - 1 C u0 p0 c0 {2,D} {6,S} {7,S} - 2 C u0 p0 c0 {1,D} {3,S} {5,S} - 3 O u1 p2 c0 {2,S} - 4 O u0 p2 c0 {5,S} {8,S} - 5 O u0 p2 c0 {2,S} {4,S} - 6 H u0 p0 c0 {1,S} - 7 H u0 p0 c0 {1,S} - 8 H u0 p0 c0 {4,S} - ''' - inchi = 'C2H3O3/c1-2(3)5-4/h4H,1H2' - u_indices = [1] - self.compare(inchi, u_indices) - - def testC2H2(self): - inchi = 'C2H2/c1-2/h1-2H' - u_indices = [1,2] - mol = self.compare(inchi, u_indices) - - def testO2(self): - inchi = 'O2/c1-2' - u_indices = [1,2] - self.compare(inchi, u_indices) - - def testTriRadicalZwitterMult4(self): - inchi = 'C6H11/c1-3-5-6-4-2/h5H,1-4,6H2' - u_indices = [1,2,5] - self.compare(inchi, u_indices) - - def testTriRadicalDoubleBondMult4(self): - inchi = 'C4H7/c1-3-4-2/h3H,1-2,4H2' - u_indices = [1,2,3] - self.compare(inchi, u_indices) - - def testTriRadical2DoubleBondMult4(self): - inchi = 'C6H9/c1-4-6(3)5-2/h1,4-6H,2H2,3H3' - u_indices = [1, 2, 5] - self.compare(inchi, u_indices) - - def testQuadriRadicalDoubleBondZwitterMult5(self): - inchi = 'C8H14/c1-4-6-7-8(3)5-2/h5-6,8H,1-2,4,7H2,3H3' - u_indices = [1, 2, 5, 6] - mol = self.compare(inchi, u_indices) - - def testQuadri2DoubleBondMult5(self): - inchi = 'C8H14/c1-5-7(3)8(4)6-2/h5-8H,1-2H2,3-4H3' - u_indices = [1, 2, 5, 6] - self.compare(inchi, u_indices) - - def testC5H6O(self): - inchi = 'C5H6O/c6-5-3-1-2-4-5/h1-3,5H,4H2' - u_indices = [2, 6] - self.compare(inchi, u_indices) - - def testC5H6O_2(self): - inchi = 'C5H6O/c1-5-3-2-4-6-5/h2-5H,1H2' - u_indices = [1,3] - self.compare(inchi, u_indices) - - def testC5H6O_3(self): - inchi = 'C5H6O/c1-5-3-2-4-6-5/h2-5H,1H2' - u_indices = [1,2,3,4] - self.compare(inchi, u_indices) - - @work_in_progress - def testCO(self): - inchi = 'CO/c1-2' - p_indices = [1,2] - mol = self.compare(inchi, [], p_indices) - - assert mol.atoms[1].lonePairs == 1 # Oxygen - - assert mol.atoms[0].charge == -1 - assert mol.atoms[1].charge == +1 - - def testTripletMethylene(self): - inchi = 'CH2/h1H2' - - u_indices = [1,1] - self.compare(inchi, u_indices) - - def testSingletMethylene(self): - inchi = 'CH2/h1H2' - - p_indices = [1] - self.compare(inchi, u_indices=[], p_indices=p_indices) - - - def testC4H6O(self): - inchi = 'C4H6O/c1-2-3-4-5/h2H,3H2,1H3' - u_indices = [2,4] - mol = self.compare(inchi, u_indices) - for at in mol.atoms: - if at.isOxygen(): - self.assertTrue(at.lonePairs == 2) - - def testC6H6(self): - inchi = 'C6H6/c1-3-5-6-4-2/h1,6H,2,5H2' - u_indices = [1, 3] - mol = self.compare(inchi, u_indices) - - def testC4H6O_2(self): - inchi = 'C4H6O/c1-2-3-4-5/h2,4H,1,3H2' - u_indices = [4, 5] - mol = self.compare(inchi, u_indices) - - def test_CO_triplet(self): - - adjlist = """ - multiplicity 3 - 1 C u2 p0 c0 {2,D} - 2 O u0 p2 c0 {1,D} - - """ - spc = Species(molecule=[Molecule().fromAdjacencyList(adjlist)]) - aug_inchi = spc.getAugmentedInChI() - - self.assertEqual(Species(molecule=[Molecule().fromAugmentedInChI(aug_inchi)]).isIsomorphic(spc), True) - - def test_CCCO_triplet(self): - - adjlist = """ - multiplicity 3 -1 C u0 p0 c0 {2,D} {5,S} {6,S} -2 C u0 p0 c0 {1,D} {3,S} {7,S} -3 C u1 p0 c0 {2,S} {4,S} {8,S} -4 O u1 p2 c0 {3,S} -5 H u0 p0 c0 {1,S} -6 H u0 p0 c0 {1,S} -7 H u0 p0 c0 {2,S} -8 H u0 p0 c0 {3,S} - """ - mol = Molecule().fromAdjacencyList(adjlist) - - spc = Species(molecule=[mol]) - spc.generate_resonance_structures() - aug_inchi = spc.getAugmentedInChI() - - self.assertEqual(Species(molecule=[Molecule().fromAugmentedInChI(aug_inchi)]).isIsomorphic(spc), True) - - def testC3H4(self): - inchi = 'C3H4/c1-3-2/h1,3H,2H2' - u_indices = [1, 1] - mol = self.compare(inchi, u_indices) - - def test_C6H8O2(self): - inchi = 'C6H8O2/c1-3-5(7)6(8)4-2/h3-6H,1-2H2' - u_indices = [7,8] - self.compare(inchi, u_indices) - - def test_C3H3O3(self): - inchi = 'C3H3O3/c1-2-5-3-6-4/h1-3H' - u_indices = [1,3,4] - self.compare(inchi, u_indices) - - def test_CH2O2(self): - inchi = 'CH2O2/c2-1-3/h1H,(H,2,3)' - u_indices = [1,2] - self.compare(inchi, u_indices) - - def test_C2H2O3(self): - inchi = 'C2H2O3/c1-5-2(3)4/h1H2' - u_indices = [1,3] - self.compare(inchi, u_indices) - - def test_C3H4O4(self): - inchi = 'C3H4O4/c4-3(5)1-2-7-6/h1-3,6H' - u_indices = [4,5] - self.compare(inchi, u_indices) - - def test_C6H6O4(self): - inchi = 'InChI=1S/C6H6O4/c1-2-4-9-6(7)3-5-10-8/h2-3H,1,5H2' - u_indices = [1,3,4,8] - self.compare(inchi, u_indices) - - def test_C3H2O3(self): - - inchi = 'InChI=1S/C3H2O3/c1-2-3(4)6-5/h1H2' - u_indices = [2,5] - - self.compare(inchi, u_indices) - - def test_C6H6O6(self): - inchi = 'C6H6O6/c7-6(2-5-12-9)10-3-1-4-11-8/h1,7H,4-5H2' - u_indices = [2,3,8,9] - self.compare(inchi, u_indices) - - def test_C3H2(self): - inchi = 'C3H2/c1-3-2/h1-2H' - u_indices = [1,1] - self.compare(inchi, u_indices) - - def test_C3H4(self): - inchi = 'InChI=1S/C3H4/c1-3-2/h1,3H,2H2' - u_indices = [1,1] - self.compare(inchi, u_indices) - - def test_C6H8(self): - inchi = 'InChI=1S/C6H8/c1-3-5-6-4-2/h1,4H,2,5-6H2' - u_indices = [1,1,3,3] - self.compare(inchi, u_indices) - - def test_C6H10(self): - inchi = 'InChI=1S/C6H10/c1-3-5-6-4-2/h3-4H,1-2,5-6H2' - u_indices = [1,3] - self.compare(inchi, u_indices) - - def test_ammonia(self): - inchi = 'InChI=1S/H3N/h1H3' - self.compare(inchi) - - @work_in_progress - def test_ammonium(self): - """ - has same inchi as ammonia but gets a proton layer: /p+1 - """ - inchi = 'InChI=1S/H3N/h1H3/p+1' - self.compare(inchi) - - def test_H2S(self): - inchi = 'InChI=1S/H2S/h1H2' - self.compare(inchi) - - def test_pyridine(self): - inchi = 'InChI=1S/C5H5N/c1-2-4-6-5-3-1/h1-5H' - self.compare(inchi) - - def test_pyrimidine(self): - inchi = 'InChI=1S/C4H4N2/c1-2-5-4-6-3-1/h1-4H' - self.compare(inchi) - - @work_in_progress - def test_nitrate(self): - """ - - Mobile H spread over oxygen 2, 3, 4 - - Negative charge (3 lone pairs) spread out over oxygen 2, 3, 4 - - Nitrogen 1 positively charged - - """ - inchi = 'InChI=1S/HNO3/c2-1(3)4/h(H,2,3,4)' - p_indices = [-1, 3, 3, 3]#??? - mol = self.compare(inchi, [], p_indices) - - def test_NO(self): - inchi = 'InChI=1S/NO/c1-2' - u_indices = [1] - mol = self.compare(inchi, u_indices) - -if __name__ == '__main__': - unittest.main() diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index 53fdea61ea..1c26836ce4 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -36,7 +36,9 @@ import unittest from external.wip import work_in_progress +from rmgpy.molecule.adjlist import ConsistencyChecker from rmgpy.molecule.atomtype import atomTypes +from rmgpy.molecule.inchi import compose_aug_inchi, P_LAYER_PREFIX, P_LAYER_SEPARATOR, U_LAYER_PREFIX, U_LAYER_SEPARATOR from rmgpy.molecule.molecule import Molecule from rmgpy.molecule.translator import * from rmgpy.species import Species @@ -975,3 +977,308 @@ def test_toRDKitMol(self): rdAtomIndices[connectedAtom]).GetBondType()) rdkitBondOrder = bondOrderDict[bondType] self.assertEqual(bond.order, rdkitBondOrder) + + +class InChIParsingTest(unittest.TestCase): + def compare(self, inchi, u_indices=None, p_indices=None): + u_layer = U_LAYER_PREFIX + U_LAYER_SEPARATOR.join(map(str, u_indices)) if u_indices else None + p_layer = P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_indices)) if p_indices else None + + aug_inchi = compose_aug_inchi(inchi, u_layer, p_layer) + + mol = fromAugmentedInChI(Molecule(), aug_inchi) + ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) + + for at in mol.atoms: + ConsistencyChecker.check_partial_charge(at) + + spc = Species(molecule=[mol]) + spc.generate_resonance_structures() + + ignore_prefix = r"(InChI=1+)(S*)/" + aug_inchi_expected = re.split(ignore_prefix, aug_inchi)[-1] + aug_inchi_computed = re.split(ignore_prefix, spc.getAugmentedInChI())[-1] + self.assertEquals(aug_inchi_expected, aug_inchi_computed) + + return mol + + def test_Ethane_parsing(self): + inchi = 'C2H6/c1-2/h1-2H3' + self.compare(inchi) + + def test_Ethyl_parsing(self): + inchi = 'C2H5/c1-2/h1H2,2H3' + u_indices = [1] + self.compare(inchi, u_indices) + + def test_CH3_parsing(self): + inchi = 'CH3/h1H3' + u_indices = [1] + self.compare(inchi, u_indices) + + def test_H2_parsing(self): + inchi = 'H2/h1H' + self.compare(inchi) + + def test_C2H4_biradical_parsing(self): + inchi = 'C2H4/c1-2/h1-2H2' + u_indices = [1, 2] + self.compare(inchi, u_indices) + + def test_C2H3_triradical_parsing(self): + inchi = 'C2H3/c1-2/h1H,2H2' + u_indices = [1, 1, 2] + self.compare(inchi, u_indices) + + def test_C3H6_biradical_parsing(self): + inchi = 'C3H6/c1-3-2/h1-3H2' + u_indices = [1, 2] + self.compare(inchi, u_indices) + + def testC2H3O3(self): + adjlist = ''' + 1 C u0 p0 c0 {2,D} {6,S} {7,S} + 2 C u0 p0 c0 {1,D} {3,S} {5,S} + 3 O u1 p2 c0 {2,S} + 4 O u0 p2 c0 {5,S} {8,S} + 5 O u0 p2 c0 {2,S} {4,S} + 6 H u0 p0 c0 {1,S} + 7 H u0 p0 c0 {1,S} + 8 H u0 p0 c0 {4,S} + ''' + inchi = 'C2H3O3/c1-2(3)5-4/h4H,1H2' + u_indices = [1] + self.compare(inchi, u_indices) + + def testC2H2(self): + inchi = 'C2H2/c1-2/h1-2H' + u_indices = [1, 2] + mol = self.compare(inchi, u_indices) + + def testO2(self): + inchi = 'O2/c1-2' + u_indices = [1, 2] + self.compare(inchi, u_indices) + + def testTriRadicalZwitterMult4(self): + inchi = 'C6H11/c1-3-5-6-4-2/h5H,1-4,6H2' + u_indices = [1, 2, 5] + self.compare(inchi, u_indices) + + def testTriRadicalDoubleBondMult4(self): + inchi = 'C4H7/c1-3-4-2/h3H,1-2,4H2' + u_indices = [1, 2, 3] + self.compare(inchi, u_indices) + + def testTriRadical2DoubleBondMult4(self): + inchi = 'C6H9/c1-4-6(3)5-2/h1,4-6H,2H2,3H3' + u_indices = [1, 2, 5] + self.compare(inchi, u_indices) + + def testQuadriRadicalDoubleBondZwitterMult5(self): + inchi = 'C8H14/c1-4-6-7-8(3)5-2/h5-6,8H,1-2,4,7H2,3H3' + u_indices = [1, 2, 5, 6] + mol = self.compare(inchi, u_indices) + + def testQuadri2DoubleBondMult5(self): + inchi = 'C8H14/c1-5-7(3)8(4)6-2/h5-8H,1-2H2,3-4H3' + u_indices = [1, 2, 5, 6] + self.compare(inchi, u_indices) + + def testC5H6O(self): + inchi = 'C5H6O/c6-5-3-1-2-4-5/h1-3,5H,4H2' + u_indices = [2, 6] + self.compare(inchi, u_indices) + + def testC5H6O_2(self): + inchi = 'C5H6O/c1-5-3-2-4-6-5/h2-5H,1H2' + u_indices = [1, 3] + self.compare(inchi, u_indices) + + def testC5H6O_3(self): + inchi = 'C5H6O/c1-5-3-2-4-6-5/h2-5H,1H2' + u_indices = [1, 2, 3, 4] + self.compare(inchi, u_indices) + + @work_in_progress + def testCO(self): + inchi = 'CO/c1-2' + p_indices = [1, 2] + mol = self.compare(inchi, [], p_indices) + + assert mol.atoms[1].lonePairs == 1 # Oxygen + + assert mol.atoms[0].charge == -1 + assert mol.atoms[1].charge == +1 + + def testTripletMethylene(self): + inchi = 'CH2/h1H2' + + u_indices = [1, 1] + self.compare(inchi, u_indices) + + def testSingletMethylene(self): + inchi = 'CH2/h1H2' + + p_indices = [1] + self.compare(inchi, u_indices=[], p_indices=p_indices) + + def testC4H6O(self): + inchi = 'C4H6O/c1-2-3-4-5/h2H,3H2,1H3' + u_indices = [2, 4] + mol = self.compare(inchi, u_indices) + for at in mol.atoms: + if at.isOxygen(): + self.assertTrue(at.lonePairs == 2) + + def testC6H6(self): + inchi = 'C6H6/c1-3-5-6-4-2/h1,6H,2,5H2' + u_indices = [1, 3] + mol = self.compare(inchi, u_indices) + + def testC4H6O_2(self): + inchi = 'C4H6O/c1-2-3-4-5/h2,4H,1,3H2' + u_indices = [4, 5] + mol = self.compare(inchi, u_indices) + + def test_CO_triplet(self): + + adjlist = """ + multiplicity 3 + 1 C u2 p0 c0 {2,D} + 2 O u0 p2 c0 {1,D} + + """ + spc = Species(molecule=[Molecule().fromAdjacencyList(adjlist)]) + aug_inchi = spc.getAugmentedInChI() + + self.assertEqual(Species(molecule=[Molecule().fromAugmentedInChI(aug_inchi)]).isIsomorphic(spc), True) + + def test_CCCO_triplet(self): + + adjlist = """ + multiplicity 3 +1 C u0 p0 c0 {2,D} {5,S} {6,S} +2 C u0 p0 c0 {1,D} {3,S} {7,S} +3 C u1 p0 c0 {2,S} {4,S} {8,S} +4 O u1 p2 c0 {3,S} +5 H u0 p0 c0 {1,S} +6 H u0 p0 c0 {1,S} +7 H u0 p0 c0 {2,S} +8 H u0 p0 c0 {3,S} + """ + mol = Molecule().fromAdjacencyList(adjlist) + + spc = Species(molecule=[mol]) + spc.generate_resonance_structures() + aug_inchi = spc.getAugmentedInChI() + + self.assertEqual(Species(molecule=[Molecule().fromAugmentedInChI(aug_inchi)]).isIsomorphic(spc), True) + + def testC3H4(self): + inchi = 'C3H4/c1-3-2/h1,3H,2H2' + u_indices = [1, 1] + mol = self.compare(inchi, u_indices) + + def test_C6H8O2(self): + inchi = 'C6H8O2/c1-3-5(7)6(8)4-2/h3-6H,1-2H2' + u_indices = [7, 8] + self.compare(inchi, u_indices) + + def test_C3H3O3(self): + inchi = 'C3H3O3/c1-2-5-3-6-4/h1-3H' + u_indices = [1, 3, 4] + self.compare(inchi, u_indices) + + def test_CH2O2(self): + inchi = 'CH2O2/c2-1-3/h1H,(H,2,3)' + u_indices = [1, 2] + self.compare(inchi, u_indices) + + def test_C2H2O3(self): + inchi = 'C2H2O3/c1-5-2(3)4/h1H2' + u_indices = [1, 3] + self.compare(inchi, u_indices) + + def test_C3H4O4(self): + inchi = 'C3H4O4/c4-3(5)1-2-7-6/h1-3,6H' + u_indices = [4, 5] + self.compare(inchi, u_indices) + + def test_C6H6O4(self): + inchi = 'InChI=1S/C6H6O4/c1-2-4-9-6(7)3-5-10-8/h2-3H,1,5H2' + u_indices = [1, 3, 4, 8] + self.compare(inchi, u_indices) + + def test_C3H2O3(self): + + inchi = 'InChI=1S/C3H2O3/c1-2-3(4)6-5/h1H2' + u_indices = [2, 5] + + self.compare(inchi, u_indices) + + def test_C6H6O6(self): + inchi = 'C6H6O6/c7-6(2-5-12-9)10-3-1-4-11-8/h1,7H,4-5H2' + u_indices = [2, 3, 8, 9] + self.compare(inchi, u_indices) + + def test_C3H2(self): + inchi = 'C3H2/c1-3-2/h1-2H' + u_indices = [1, 1] + self.compare(inchi, u_indices) + + def test_C3H4(self): + inchi = 'InChI=1S/C3H4/c1-3-2/h1,3H,2H2' + u_indices = [1, 1] + self.compare(inchi, u_indices) + + def test_C6H8(self): + inchi = 'InChI=1S/C6H8/c1-3-5-6-4-2/h1,4H,2,5-6H2' + u_indices = [1, 1, 3, 3] + self.compare(inchi, u_indices) + + def test_C6H10(self): + inchi = 'InChI=1S/C6H10/c1-3-5-6-4-2/h3-4H,1-2,5-6H2' + u_indices = [1, 3] + self.compare(inchi, u_indices) + + def test_ammonia(self): + inchi = 'InChI=1S/H3N/h1H3' + self.compare(inchi) + + @work_in_progress + def test_ammonium(self): + """ + has same inchi as ammonia but gets a proton layer: /p+1 + """ + inchi = 'InChI=1S/H3N/h1H3/p+1' + self.compare(inchi) + + def test_H2S(self): + inchi = 'InChI=1S/H2S/h1H2' + self.compare(inchi) + + def test_pyridine(self): + inchi = 'InChI=1S/C5H5N/c1-2-4-6-5-3-1/h1-5H' + self.compare(inchi) + + def test_pyrimidine(self): + inchi = 'InChI=1S/C4H4N2/c1-2-5-4-6-3-1/h1-4H' + self.compare(inchi) + + @work_in_progress + def test_nitrate(self): + """ + - Mobile H spread over oxygen 2, 3, 4 + - Negative charge (3 lone pairs) spread out over oxygen 2, 3, 4 + - Nitrogen 1 positively charged + + """ + inchi = 'InChI=1S/HNO3/c2-1(3)4/h(H,2,3,4)' + p_indices = [-1, 3, 3, 3] # ??? + mol = self.compare(inchi, [], p_indices) + + def test_NO(self): + inchi = 'InChI=1S/NO/c1-2' + u_indices = [1] + mol = self.compare(inchi, u_indices) From 142ac1ad8e8f0f4d3300f8787f5511a0139ca5b5 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 6 Oct 2017 16:31:43 -0400 Subject: [PATCH 06/57] Automatically format inchi.py using pycharm No functional changes --- rmgpy/molecule/inchi.py | 143 ++++++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 72 deletions(-) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 922a30cd19..eb0c5f7e93 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -28,28 +28,26 @@ # # ############################################################################### -import cython -import re import itertools +import re +import cython from rdkit import Chem +import rmgpy.molecule.element as elements +import rmgpy.molecule.pathfinder as pathfinder +import rmgpy.molecule.resonance as resonance from rmgpy.exceptions import InchiException - -# search for (*) PARENTHESES from rmgpy.molecule.adjlist import ConsistencyChecker -from rmgpy.molecule.molecule import Atom, Bond, Molecule from rmgpy.molecule.converter import toRDKitMol +from rmgpy.molecule.molecule import Atom, Bond, Molecule from rmgpy.molecule.util import agglomerate, partition, generate_combo, swap -import rmgpy.molecule.resonance as resonance -import rmgpy.molecule.element as elements -import rmgpy.molecule.pathfinder as pathfinder -PARENTHESES = re.compile( r'\((.[^\(\)]*)\)') +# search for (*) PARENTHESES +PARENTHESES = re.compile(r'\((.[^\(\)]*)\)') INCHI_PREFIX = 'InChI=1' - """ The prefix with the information on the distribution of unpaired electrons across the atoms. @@ -67,7 +65,6 @@ """The separator that separates the indices of the atoms that bear unpaired electrons.""" U_LAYER_SEPARATOR = ',' - """ The prefix with the information on the distribution of the atoms with an unexpected number of lone pairs. @@ -89,6 +86,7 @@ ulayer_pattern = re.compile(U_LAYER_PREFIX + r'(.*)') player_pattern = re.compile(P_LAYER_PREFIX + r'(.*)') + def decompose(string): """ Converts an augmented inchi into @@ -104,10 +102,10 @@ def decompose(string): """ cython.declare( - inchi=str, - u_indices=list, - p_indices=list, - ) + inchi=str, + u_indices=list, + p_indices=list, + ) if U_LAYER_PREFIX in string: inchi = string.split(U_LAYER_PREFIX)[0] @@ -127,12 +125,13 @@ def decompose(string): for index in dummy: if '(0)' in str(index): index = int(str(index).split('(0)')[0]) - p_indices.append((index,0)) + p_indices.append((index, 0)) else: p_indices.append(int(index)) return inchi, u_indices, p_indices + def ignore_prefix(string): """ Splits off the 'InChI=1S' or 'InChI=1' layer of an InChI @@ -144,6 +143,7 @@ def ignore_prefix(string): return re.split(r"(InChI=1+)(S*)/", string)[-1] + def compose_aug_inchi(inchi, ulayer=None, player=None): """ Composes an augmented InChI by concatenating the different pieces @@ -152,17 +152,18 @@ def compose_aug_inchi(inchi, ulayer=None, player=None): InChI=1S/XXXX.../c.../h.../ux,x,/... """ cython.declare( - temp=str, - ) + temp=str, + ) aug_inchi = INCHI_PREFIX + '/' if not INCHI_PREFIX in inchi else '' aug_inchi += inchi - + for layer in filter(None, [ulayer, player]): aug_inchi += layer return aug_inchi + def compose_aug_inchi_key(inchi_key, ulayer=None, player=None): """ Composes an augmented InChI Key by concatenating the different pieces @@ -176,9 +177,10 @@ def compose_aug_inchi_key(inchi_key, ulayer=None, player=None): aug_inchi_key = inchi_key for layer in filter(None, [ulayer, player]): - aug_inchi_key += '-' + layer[1:]#cut off the '/' + aug_inchi_key += '-' + layer[1:] # cut off the '/' + + return aug_inchi_key - return aug_inchi_key def parse_H_layer(inchi): """ @@ -200,14 +202,13 @@ def parse_H_layer(inchi): """ cython.declare( - pieces=list, - h_layer=str, - piece=str, - couples=list, - match=str, - mobile_h_atoms=list, - ) - + pieces=list, + h_layer=str, + piece=str, + couples=list, + match=str, + mobile_h_atoms=list, + ) pieces = inchi.split('/') h_layer = None @@ -215,7 +216,7 @@ def parse_H_layer(inchi): if piece.startswith('h'): h_layer = piece break - else: + else: raise Exception('Could not find the hydrogen layer in the inchi: {}'.format(inchi)) couples = [] @@ -225,6 +226,7 @@ def parse_H_layer(inchi): return couples + def parse_E_layer(auxinfo): """ Converts the layer with equivalence information (E-layer) @@ -249,19 +251,19 @@ def parse_E_layer(auxinfo): """ cython.declare( - pieces=list, - e_layer=str, - piece=str, - equivalent_atoms=list, - atomtuple=str, - indices=list, - ) + pieces=list, + e_layer=str, + piece=str, + equivalent_atoms=list, + atomtuple=str, + indices=list, + ) pieces = auxinfo.split('/') e_layer = None for piece in pieces: if piece.startswith('E'): - e_layer = piece[2:]#cut off /E: + e_layer = piece[2:] # cut off /E: break else: return [] @@ -273,7 +275,7 @@ def parse_E_layer(auxinfo): return equivalent_atoms - + def parse_N_layer(auxinfo): """ Parses the layer with atom ordering information (N-layer) @@ -289,24 +291,23 @@ def parse_N_layer(auxinfo): /N:4,3,2,1 The original number of an atom with identification number n is given as the - n-th member of this list for a component; the lists are separated with “;”. + n-th member of this list for a component; the lists are separated with ";". Raises an exception when the N-layer could not be found. """ - cython.declare( - pieces=list, - atom_numbers=str, - piece=str, - indices=list, - ) + pieces=list, + atom_numbers=str, + piece=str, + indices=list, + ) pieces = auxinfo.split('/') atom_numbers = None for piece in pieces: if piece.startswith('N'): - atom_numbers = piece[2:]#cut off N: + atom_numbers = piece[2:] # cut off N: break else: raise Exception('Could not find the N-layer in the auxiliary info: {}'.format(auxinfo)) @@ -335,27 +336,26 @@ def create_U_layer(mol, auxinfo): """ cython.declare( - minmol=Molecule, - #rdkitmol=, - u_layer=list, - i=int, - at=Atom, - equivalent_atoms=list, - ) + minmol=Molecule, + # rdkitmol=, + u_layer=list, + i=int, + at=Atom, + equivalent_atoms=list, + ) if mol.getRadicalCount() == 0: return None elif mol.getFormula() == 'H': return U_LAYER_PREFIX + '1' - # find the resonance isomer with the lowest u index: minmol = generate_minimum_resonance_isomer(mol) # create preliminary u-layer: u_layer = [] for i, at in enumerate(minmol.atoms): - u_layer.extend([i+1] * at.radicalElectrons) + u_layer.extend([i + 1] * at.radicalElectrons) # extract equivalent atom pairs from E-layer of auxiliary info: equivalent_atoms = parse_E_layer(auxinfo) @@ -376,7 +376,7 @@ def is_valid_combo(combo, mol, distances): new_distances=list, orig_dist=dict, new_dist=dict, - ) + ) # compute shortest path between atoms agglomerates = agglomerate(combo) @@ -423,7 +423,7 @@ def find_lowest_u_layer(mol, u_layer, equivalent_atoms): orig_distances=list, selected_group=list, combo=list, - ) + ) if not equivalent_atoms: return u_layer @@ -438,7 +438,6 @@ def find_lowest_u_layer(mol, u_layer, equivalent_atoms): grouped_electrons.remove(group) corresponding_E_layers.remove(e_layer) - combos = generate_combo(grouped_electrons, corresponding_E_layers) # compute original distance: orig_agglomerates = agglomerate(grouped_electrons) @@ -477,16 +476,15 @@ def generate_minimum_resonance_isomer(mol): cand=Molecule, metric_sel=list, metric_cand=list, - ) - + ) candidates = resonance.generate_isomorphic_resonance_structures(mol) sel = candidates[0] metric_sel = get_unpaired_electrons(sel) for cand in candidates[1:]: - metric_cand = get_unpaired_electrons(cand) - if metric_cand < metric_sel: + metric_cand = get_unpaired_electrons(cand) + if metric_cand < metric_sel: sel = cand metric_sel = metric_cand @@ -503,7 +501,7 @@ def get_unpaired_electrons(mol): locations=list, index=int, at=Atom, - ) + ) locations = [] for index, at in enumerate(mol.atoms): if at.radicalElectrons >= 1: @@ -524,7 +522,7 @@ def compute_agglomerate_distance(agglomerates, mol): distances=list, agglomerate=list, dist=dict, - ) + ) distances = [] for agglomerate in agglomerates: @@ -586,14 +584,14 @@ def create_augmented_layers(mol): [molcopy.removeAtom(h) for h in hydrogens] rdkitmol = toRDKitMol(molcopy) - _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon')# suppress stereo warnings + _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon') # suppress stereo warnings # extract the atom numbers from N-layer of auxiliary info: atom_indices = parse_N_layer(auxinfo) atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] # sort the atoms based on the order of the atom indices - molcopy.atoms = [x for (y,x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] + molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] ulayer = create_U_layer(molcopy, auxinfo) @@ -636,7 +634,7 @@ def create_P_layer(mol, auxinfo): if at.lonePairs == 0: p_layer.append('{}{}'.format(i, '(0)')) else: - p_layer.extend([i+1] * at.lonePairs) + p_layer.extend([i + 1] * at.lonePairs) # extract equivalent atom pairs from E-layer of auxiliary info: equivalent_atoms = parse_E_layer(auxinfo) @@ -1194,15 +1192,17 @@ def fix_unsaturated_bond(mol, indices, aug_inchi): class InChI(str): """InChI is a type of string in which the InChI=1 prefix is ignored.""" + def __new__(self, inchi): + if not INCHI_PREFIX in inchi: + raise InchiException('Not a valid InChI: {}'.format(inchi)) - if not INCHI_PREFIX in inchi: - raise InchiException('Not a valid InChI: {}'.format(inchi)) + return str.__new__(self, ignore_prefix(inchi)) - return str.__new__(self, ignore_prefix(inchi)) class AugmentedInChI(InChI): """AugmentedInChI is an InChI with inchi, and unpaired electron attributes.""" + def __init__(self, aug_inchi): super(AugmentedInChI, self).__init__() inchi, u_indices, p_indices = decompose(str(self)) @@ -1212,4 +1212,3 @@ def __init__(self, aug_inchi): # default to None self.u_indices = u_indices or None self.p_indices = p_indices or None - From 524b135da1cc80780883a9b2b046dbfe420ee598 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 6 Oct 2017 16:52:45 -0400 Subject: [PATCH 07/57] Remove unused methods in inchi.py check_bond_order_oxygen was replaced by is_valid subfunction of convert_3_atom_2_bond_path find_mobile_h_system was never used --- rmgpy/molecule/inchi.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index eb0c5f7e93..b5cec7dc58 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -761,40 +761,6 @@ def fixCharge(mol, u_indices): fix_adjacent_charges(mol) -def check_bond_order_oxygen(mol): - """Check if total bond order of oxygen atoms is smaller than 4.""" - from rmgpy.molecule.util import ORDERS - - for at in mol.atoms: - if at.number == 8: - order = sum([ORDERS[b.order] for _, b in at.bonds.iteritems()]) - not_correct = order >= 4 - if not_correct: - return False - - return True - - -def find_mobile_h_system(mol, all_mobile_h_atoms_couples, test_indices): - """ - - """ - dummy = test_indices[:] - - for mobile_h_atom_couple in all_mobile_h_atoms_couples: - for test_index in test_indices: - if test_index in mobile_h_atom_couple: - original_atom = test_index - dummy.remove(test_index) - mobile_h_atom_couple.remove(test_index) - new_partner = mobile_h_atom_couple[0] - central = dummy[0] - return mol.atoms[central - 1], mol.atoms[original_atom - 1], mol.atoms[new_partner - 1] - - raise Exception('We should always have found the mobile-H system. All mobile H couples: {}, test indices: {}' - .format(all_mobile_h_atoms_couples, test_indices)) - - def fix_adjacent_charges(mol): """ Searches for pairs of charged atoms. From c1b0d01f5f6f4d2f6213ff8a3dd2ddfde2603037 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 6 Oct 2017 18:19:35 -0400 Subject: [PATCH 08/57] Reorganize methods related to fixing molecules created from augInChI Rename fix to fix_molecule for clarity Make all subfunctions called by fix_molecule private Reorder methods based on calling hierarchy Update cython declarations No functional changes --- rmgpy/molecule/inchi.pxd | 34 ++- rmgpy/molecule/inchi.py | 475 ++++++++++++++++++----------------- rmgpy/molecule/inchiTest.py | 5 +- rmgpy/molecule/translator.py | 2 +- 4 files changed, 270 insertions(+), 246 deletions(-) diff --git a/rmgpy/molecule/inchi.pxd b/rmgpy/molecule/inchi.pxd index 228c79cad2..8a5665946f 100644 --- a/rmgpy/molecule/inchi.pxd +++ b/rmgpy/molecule/inchi.pxd @@ -55,16 +55,36 @@ cpdef list compute_agglomerate_distance(list agglomerates, Molecule mol) cpdef str create_P_layer(Molecule mol, str auxinfo) -cpdef reset_lone_pairs(Molecule mol, list p_indices) +cpdef _fix_triplet_to_singlet(Molecule mol, list p_indices) -cdef Molecule fix_unsaturated_bond_to_biradical(Molecule mol, str inchi, list u_indices) +cpdef _convert_charge_to_unpaired_electron(Molecule mol, list u_indices) -cpdef bint isUnsaturated(Molecule mol) +cpdef _convert_4_atom_3_bond_path(Atom start) -cpdef check(Molecule mol, aug_inchi) +cpdef _convert_3_atom_2_bond_path(Atom start, Molecule mol) -cpdef fix_oxygen_unsaturated_bond(Molecule mol, list u_indices) +cpdef _convert_delocalized_charge_to_unpaired_electron(Molecule mol, list u_indices) -cpdef fixCharge(Molecule mol, list u_indices) +cpdef _fix_adjacent_charges(Molecule mol) -cpdef fix_triplet_to_singlet(Molecule mol, list p_indices) +cpdef _fix_charge(Molecule mol, list u_indices) + +cpdef _reset_lone_pairs(Molecule mol, list p_indices) + +cpdef _fix_oxygen_unsaturated_bond(Molecule mol, list u_indices) + +cpdef bint _is_unsaturated(Molecule mol) + +cpdef bint _convert_unsaturated_bond_to_triplet(Bond bond) + +cpdef bint _fix_mobile_h(Molecule mol, str inchi, int u1, int u2) + +cpdef bint _fix_butadiene_path(Atom start, Atom end) + +cpdef Molecule _fix_unsaturated_bond_to_biradical(Molecule mol, str inchi, list u_indices) + +cpdef _fix_unsaturated_bond(Molecule mol, list u_indices, aug_inchi) + +cpdef _check_molecule(Molecule mol, aug_inchi) + +cpdef fix_molecule(Molecule mol, aug_inchi) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index b5cec7dc58..2e2a5c07af 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -657,127 +657,28 @@ def find_lowest_p_layer(minmol, p_layer, equivalent_atoms): """ return minmol +################################################################## +# Methods for fixing molecules generated from an augmented InChI # +################################################################## -def check(mol, aug_inchi): +def _fix_triplet_to_singlet(mol, p_indices): """ - Check if the molecular structure is correct. - - Checks whether the multiplicity contained in the augmented inchi, - corresponds to the number of unpaired electrons + 1 found in the molecule. - - Checks whether the valence of each atom is compatible with the bond order, - number of unpaired electrons, lone pairs and charge. - - """ - cython.declare(inchi=str, - at=Atom - ) - - ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) - inchi, u_indices, p_indices = decompose(str(aug_inchi)) - assert(mol.getRadicalCount() == len(u_indices)) - - for at in mol.atoms: - ConsistencyChecker.check_partial_charge(at) - - -def fix_oxygen_unsaturated_bond(mol, u_indices): - """ - Searches for a radical or a charged oxygen atom connected to - a closed-shell carbon via an unsatured bond. - - Decrements the unsatured bond, - transfers the unpaired electron from O to C or - converts the charge from O to an unpaired electron on C, - increases the lone pair count of O to 2. - - Only do this once per molecule. - """ - - for at in mol.atoms: - if at.isOxygen() and at.radicalElectrons == 1 and at.lonePairs == 1: - bonds = mol.getBonds(at) - oxygen = at - for atom2, bond in bonds.iteritems(): - if bond.isTriple(): - bond.decrementOrder() - oxygen.radicalElectrons -= 1 - atom2.radicalElectrons += 1 - oxygen.lonePairs += 1 - return - elif at.isOxygen() and at.charge == 1 and at.lonePairs == 1: - bonds = mol.getBonds(at) - oxygen = at - - start = oxygen - # search for 3-atom-2-bond [X=X-X] paths - paths = pathfinder.find_allyl_end_with_charge(start) - for path in paths: - end = path[-1] - start.charge += 1 if start.charge < 0 else -1 - end.charge += 1 if end.charge < 0 else -1 - start.lonePairs += 1 - # filter bonds from path and convert bond orders: - bonds = path[1::2] # odd elements - for bond in bonds[::2]: # even bonds - assert isinstance(bond, Bond) - bond.decrementOrder() - for bond in bonds[1::2]: # odd bonds - assert isinstance(bond, Bond) - bond.incrementOrder() - return - else: - for atom2, bond in bonds.iteritems(): - if not bond.isSingle() and atom2.charge == 0: - oxygen.charge -= 1 - if (mol.atoms.index(atom2) + 1) in u_indices: - bond.decrementOrder() - atom2.radicalElectrons += 1 - u_indices.remove(mol.atoms.index(atom2) + 1) - oxygen.lonePairs += 1 - return - + Iterates over the atoms and checks whether atoms bearing two unpaired electrons are + also present in the p_indices list. -def fixCharge(mol, u_indices): - """ - Tries to fix a number of structural features in the molecule related to charge, - based on the information from the parameter list of atom indices with unpaired electrons. + If so, convert to the two unpaired electrons into a lone pair, and remove that atom + index from the p_indices list. """ - if not u_indices: - return - - is_charged = sum([abs(at.charge) for at in mol.atoms]) != 0 - is_correct = mol.getRadicalCount() == (mol.multiplicity - 1) - if mol.multiplicity < 3 or is_correct or not is_charged: - return - - # converting charges to unpaired electrons for atoms in the u-layer - convert_charge_to_unpaired_electron(mol, u_indices) - - # convert neighboring atoms (or delocalized paths) to unpaired electrons - convert_delocalized_charge_to_unpaired_electron(mol, u_indices) - - fix_adjacent_charges(mol) - - -def fix_adjacent_charges(mol): - """ - Searches for pairs of charged atoms. - Neutralizes one unit of charge on each atom, - and increments the bond order of the bond in between - the atoms. - """ for at in mol.atoms: - if at.charge != 0: - for neigh, bond in at.bonds.iteritems(): - if neigh.charge != 0: - bond.incrementOrder() - at.charge += 1 if at.charge < 0 else -1 - neigh.charge += 1 if neigh.charge < 0 else -1 + index = mol.atoms.index(at) + 1 + if mol.getRadicalCount() == 2 and index in p_indices: + at.lonePairs += 1 + at.radicalElectrons -= 2 + p_indices.remove(index) -def convert_charge_to_unpaired_electron(mol, u_indices): +def _convert_charge_to_unpaired_electron(mol, u_indices): """ Iterates over the atoms foundin the parameter list and converts a unit of charge on atoms into an unpaired electron. @@ -792,29 +693,7 @@ def convert_charge_to_unpaired_electron(mol, u_indices): u_indices.remove(at_index) -def convert_delocalized_charge_to_unpaired_electron(mol, u_indices): - """ - Iterates over the atom indices of the parameter list and searches - a charged atom that is connected to that atom via some kind of - delocalization path. - - """ - u_indices_copy = u_indices[:] - for index in u_indices_copy: - start = mol.atoms[index - 1] - - found = convert_4_atom_3_bond_path(start) - if found: - u_indices.remove(index) - continue - - found = convert_3_atom_2_bond_path(start, mol) - if found: - u_indices.remove(index) - continue - - -def convert_4_atom_3_bond_path(start): +def _convert_4_atom_3_bond_path(start): """ Searches for 4-atom-3-bond [X=X-X=X+] paths starting from the parameter atom. If a path is found, the starting atom receives an unpaired electron while @@ -843,7 +722,7 @@ def convert_4_atom_3_bond_path(start): return False -def convert_3_atom_2_bond_path(start, mol): +def _convert_3_atom_2_bond_path(start, mol): """ Searches for 3-atom-2-bond [X=X-X+] paths paths starting from the parameter atom. If a correct path is found, the starting atom receives an unpaired electron while @@ -913,81 +792,174 @@ def is_valid(mol): return False -def fix(mol, aug_inchi): +def _convert_delocalized_charge_to_unpaired_electron(mol, u_indices): """ - Fixes a number of structural features of the erroneous Molecule - parsed by the backends, based on multiplicity and unpaired electron information - stored in the augmented inchi. + Iterates over the atom indices of the parameter list and searches + a charged atom that is connected to that atom via some kind of + delocalization path. + """ + u_indices_copy = u_indices[:] + for index in u_indices_copy: + start = mol.atoms[index - 1] - u_indices = aug_inchi.u_indices[:] if aug_inchi.u_indices else [] - p_indices = aug_inchi.p_indices[:] if aug_inchi.p_indices else [] + found = _convert_4_atom_3_bond_path(start) + if found: + u_indices.remove(index) + continue - # ignore atoms that bear already unpaired electrons: - for i in set(u_indices[:]): - atom = mol.atoms[i - 1] - [u_indices.remove(i) for _ in range(atom.radicalElectrons)] + found = _convert_3_atom_2_bond_path(start, mol) + if found: + u_indices.remove(index) + continue - # ignore atoms that bear already lone pairs: - for i in set(p_indices[:]): - atom = mol.atoms[i - 1] - [p_indices.remove(i) for _ in range(atom.lonePairs)] - fix_triplet_to_singlet(mol, p_indices) +def _fix_adjacent_charges(mol): + """ + Searches for pairs of charged atoms. + Neutralizes one unit of charge on each atom, + and increments the bond order of the bond in between + the atoms. + """ + for at in mol.atoms: + if at.charge != 0: + for neigh, bond in at.bonds.iteritems(): + if neigh.charge != 0: + bond.incrementOrder() + at.charge += 1 if at.charge < 0 else -1 + neigh.charge += 1 if neigh.charge < 0 else -1 + + +def _fix_charge(mol, u_indices): + """ + Tries to fix a number of structural features in the molecule related to charge, + based on the information from the parameter list of atom indices with unpaired electrons. + """ - fixCharge(mol, u_indices) + if not u_indices: + return - reset_lone_pairs(mol, p_indices) + is_charged = sum([abs(at.charge) for at in mol.atoms]) != 0 + is_correct = mol.getRadicalCount() == (mol.multiplicity - 1) + if mol.multiplicity < 3 or is_correct or not is_charged: + return - fix_oxygen_unsaturated_bond(mol, u_indices) + # converting charges to unpaired electrons for atoms in the u-layer + _convert_charge_to_unpaired_electron(mol, u_indices) - fix_unsaturated_bond(mol, u_indices, aug_inchi) + # convert neighboring atoms (or delocalized paths) to unpaired electrons + _convert_delocalized_charge_to_unpaired_electron(mol, u_indices) - check(mol, aug_inchi) + _fix_adjacent_charges(mol) -def fix_triplet_to_singlet(mol, p_indices): +def _reset_lone_pairs(mol, p_indices): """ - Iterates over the atoms and checks whether atoms bearing two unpaired electrons are - also present in the p_indices list. + Iterates over the atoms of the molecule and + resets the atom's lone pair count to the value stored in the p_indices list, + or to the default value. - If so, convert to the two unpaired electrons into a lone pair, and remove that atom - index from the p_indices list. + """ + for at in mol.atoms: + index = mol.atoms.index(at) + 1 # 1-based index + count = p_indices.count(index) + if count != 0: + at.lonePairs = count + else: + order = at.getBondOrdersForAtom() + at.lonePairs = (elements.PeriodicSystem.valence_electrons[ + at.symbol] - order - at.radicalElectrons - at.charge) / 2 + + +def _fix_oxygen_unsaturated_bond(mol, u_indices): + """ + Searches for a radical or a charged oxygen atom connected to + a closed-shell carbon via an unsatured bond. + + Decrements the unsatured bond, + transfers the unpaired electron from O to C or + converts the charge from O to an unpaired electron on C, + increases the lone pair count of O to 2. + + Only do this once per molecule. """ for at in mol.atoms: - index = mol.atoms.index(at) + 1 - if mol.getRadicalCount() == 2 and index in p_indices: - at.lonePairs += 1 - at.radicalElectrons -= 2 - p_indices.remove(index) + if at.isOxygen() and at.radicalElectrons == 1 and at.lonePairs == 1: + bonds = mol.getBonds(at) + oxygen = at + for atom2, bond in bonds.iteritems(): + if bond.isTriple(): + bond.decrementOrder() + oxygen.radicalElectrons -= 1 + atom2.radicalElectrons += 1 + oxygen.lonePairs += 1 + return + elif at.isOxygen() and at.charge == 1 and at.lonePairs == 1: + bonds = mol.getBonds(at) + oxygen = at + start = oxygen + # search for 3-atom-2-bond [X=X-X] paths + paths = pathfinder.find_allyl_end_with_charge(start) + for path in paths: + end = path[-1] + start.charge += 1 if start.charge < 0 else -1 + end.charge += 1 if end.charge < 0 else -1 + start.lonePairs += 1 + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd elements + for bond in bonds[::2]: # even bonds + assert isinstance(bond, Bond) + bond.decrementOrder() + for bond in bonds[1::2]: # odd bonds + assert isinstance(bond, Bond) + bond.incrementOrder() + return + else: + for atom2, bond in bonds.iteritems(): + if not bond.isSingle() and atom2.charge == 0: + oxygen.charge -= 1 + if (mol.atoms.index(atom2) + 1) in u_indices: + bond.decrementOrder() + atom2.radicalElectrons += 1 + u_indices.remove(mol.atoms.index(atom2) + 1) + oxygen.lonePairs += 1 + return -def fix_butadiene_path(start, end): + +def _is_unsaturated(mol): """ - Searches for a 1,3-butadiene path between the start and end atom. - Adds an unpaired electron to start and end atom, and "inverts" the bonds - in between them. + Does the molecule have a bond that's not single? + Eg. a bond that is double or triple or benzene """ - path = pathfinder.find_butadiene(start, end) - if path is not None: - start.radicalElectrons += 1 - end.radicalElectrons += 1 - # filter bonds from path and convert bond orders: - bonds = path[1::2] # odd elements - for bond in bonds[::2]: # even bonds - assert isinstance(bond, Bond) - bond.decrementOrder() - for bond in bonds[1::2]: # odd bonds - assert isinstance(bond, Bond) - bond.incrementOrder() + cython.declare(atom1=Atom, + atom2=Atom, + bonds=dict, + bond=Bond) + for atom1 in mol.atoms: + bonds = mol.getBonds(atom1) + for atom2, bond in bonds.iteritems(): + if not bond.isSingle(): + return True + + return False - return True +def _convert_unsaturated_bond_to_triplet(bond): + """ + Decrements the bond if it is unsatured, and adds an unpaired + electron to each of the atoms connected by the bond. + """ + if not bond.isSingle(): + for at in (bond.atom1, bond.atom2): + at.radicalElectrons += 1 + bond.decrementOrder() + return True return False -def fix_mobile_h(mol, inchi, u1, u2): +def _fix_mobile_h(mol, inchi, u1, u2): """ Identifies a system of atoms bearing unpaired electrons and mobile hydrogens @@ -1038,38 +1010,31 @@ def fix_mobile_h(mol, inchi, u1, u2): return False -def convert_unsaturated_bond_to_triplet(bond): +def _fix_butadiene_path(start, end): """ - Decrements the bond if it is unsatured, and adds an unpaired - electron to each of the atoms connected by the bond. + Searches for a 1,3-butadiene path between the start and end atom. + Adds an unpaired electron to start and end atom, and "inverts" the bonds + in between them. """ - if not bond.isSingle(): - for at in (bond.atom1, bond.atom2): - at.radicalElectrons += 1 - bond.decrementOrder() - return True - return False - + path = pathfinder.find_butadiene(start, end) + if path is not None: + start.radicalElectrons += 1 + end.radicalElectrons += 1 + # filter bonds from path and convert bond orders: + bonds = path[1::2] # odd elements + for bond in bonds[::2]: # even bonds + assert isinstance(bond, Bond) + bond.decrementOrder() + for bond in bonds[1::2]: # odd bonds + assert isinstance(bond, Bond) + bond.incrementOrder() -def reset_lone_pairs(mol, p_indices): - """ - Iterates over the atoms of the molecule and - resets the atom's lone pair count to the value stored in the p_indices list, - or to the default value. + return True - """ - for at in mol.atoms: - index = mol.atoms.index(at) + 1 # 1-based index - count = p_indices.count(index) - if count != 0: - at.lonePairs = count - else: - order = at.getBondOrdersForAtom() - at.lonePairs = (elements.PeriodicSystem.valence_electrons[ - at.symbol] - order - at.radicalElectrons - at.charge) / 2 + return False -def fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): +def _fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): """ Convert an unsaturated bond (double, triple) into a bond with a lower bond order (single, double), and give an unpaired electron @@ -1088,16 +1053,16 @@ def fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): atom2 = mol.atoms[u2 - 1] # convert to 0-based index for atoms in molecule if mol.hasBond(atom1, atom2): b = mol.getBond(atom1, atom2) - isFixed = convert_unsaturated_bond_to_triplet(b) + isFixed = _convert_unsaturated_bond_to_triplet(b) if isFixed: break else: - isFixed = fix_mobile_h(mol, inchi, u1, u2) + isFixed = _fix_mobile_h(mol, inchi, u1, u2) if isFixed: break else: - isFixed = fix_butadiene_path(atom1, atom2) + isFixed = _fix_butadiene_path(atom1, atom2) if isFixed: break @@ -1113,25 +1078,7 @@ def fix_unsaturated_bond_to_biradical(mol, inchi, u_indices): ) -def isUnsaturated(mol): - """ - Does the molecule have a bond that's not single? - Eg. a bond that is double or triple or benzene - """ - cython.declare(atom1=Atom, - atom2=Atom, - bonds=dict, - bond=Bond) - for atom1 in mol.atoms: - bonds = mol.getBonds(atom1) - for atom2, bond in bonds.iteritems(): - if not bond.isSingle(): - return True - - return False - - -def fix_unsaturated_bond(mol, indices, aug_inchi): +def _fix_unsaturated_bond(mol, indices, aug_inchi): """ Adds unpaired electrons to the molecule by converting unsaturated bonds into triplets. @@ -1148,12 +1095,68 @@ def fix_unsaturated_bond(mol, indices, aug_inchi): raise Exception('Cannot correct {} based on {} by converting unsaturated bonds into unpaired electrons...' \ .format(mol.toAdjacencyList(), aug_inchi)) - unsaturated = isUnsaturated(mol) + unsaturated = _is_unsaturated(mol) while not correct and unsaturated and len(indices) > 1: - mol = fix_unsaturated_bond_to_biradical(mol, aug_inchi.inchi, indices) + mol = _fix_unsaturated_bond_to_biradical(mol, aug_inchi.inchi, indices) correct = mol.getRadicalCount() == (mol.multiplicity - 1) - unsaturated = isUnsaturated(mol) + unsaturated = _is_unsaturated(mol) + + +def _check_molecule(mol, aug_inchi): + """ + Check if the molecular structure is correct. + + Checks whether the multiplicity contained in the augmented inchi, + corresponds to the number of unpaired electrons + 1 found in the molecule. + + Checks whether the valence of each atom is compatible with the bond order, + number of unpaired electrons, lone pairs and charge. + + """ + cython.declare(inchi=str, + at=Atom + ) + + ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) + inchi, u_indices, p_indices = decompose(str(aug_inchi)) + assert(mol.getRadicalCount() == len(u_indices)) + + for at in mol.atoms: + ConsistencyChecker.check_partial_charge(at) + + +def fix_molecule(mol, aug_inchi): + """ + Fixes a number of structural features of the erroneous Molecule + parsed by the backends, based on multiplicity and unpaired electron information + stored in the augmented inchi. + """ + + u_indices = aug_inchi.u_indices[:] if aug_inchi.u_indices else [] + p_indices = aug_inchi.p_indices[:] if aug_inchi.p_indices else [] + + # ignore atoms that bear already unpaired electrons: + for i in set(u_indices[:]): + atom = mol.atoms[i - 1] + [u_indices.remove(i) for _ in range(atom.radicalElectrons)] + + # ignore atoms that bear already lone pairs: + for i in set(p_indices[:]): + atom = mol.atoms[i - 1] + [p_indices.remove(i) for _ in range(atom.lonePairs)] + + _fix_triplet_to_singlet(mol, p_indices) + + _fix_charge(mol, u_indices) + + _reset_lone_pairs(mol, p_indices) + + _fix_oxygen_unsaturated_bond(mol, u_indices) + + _fix_unsaturated_bond(mol, u_indices, aug_inchi) + + _check_molecule(mol, aug_inchi) class InChI(str): diff --git a/rmgpy/molecule/inchiTest.py b/rmgpy/molecule/inchiTest.py index 3b77375f54..01b65b364e 100644 --- a/rmgpy/molecule/inchiTest.py +++ b/rmgpy/molecule/inchiTest.py @@ -34,6 +34,7 @@ from .molecule import Atom, Molecule from .inchi import * +from .inchi import _reset_lone_pairs class InChITest(unittest.TestCase): @@ -307,7 +308,7 @@ def test_Methane(self): mol = Molecule().fromSMILES(smi) p_indices = [] - reset_lone_pairs(mol, p_indices) + _reset_lone_pairs(mol, p_indices) for at in mol.atoms: self.assertEquals(at.lonePairs, 0) @@ -322,7 +323,7 @@ def test_SingletMethylene(self): mol = Molecule().fromAdjacencyList(adjlist) p_indices = [1] - reset_lone_pairs(mol, p_indices) + _reset_lone_pairs(mol, p_indices) for at in mol.atoms: if at.symbol == 'C': diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 3a73ceba6b..9060eb46a4 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -478,7 +478,7 @@ def fromAugmentedInChI(mol, aug_inchi): mol.multiplicity = len(aug_inchi.u_indices) + 1 if aug_inchi.u_indices else 1 - inchiutil.fix(mol, aug_inchi) + inchiutil.fix_molecule(mol, aug_inchi) mol.updateAtomTypes() From 2f720164adf525db699094bc53571be5518306b2 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 19 Oct 2017 16:18:15 -0400 Subject: [PATCH 09/57] Reorganize methods related parsing and generating InChI layers Rename decompose to decompose_aug_inchi Rename ignore_prefix to remove_inchi_prefix Make internal methods private Reorder methods based on calling hierarchy Update cython declarations in augmenting pxd No functional changes --- rmgpy/molecule/inchi.pxd | 31 ++-- rmgpy/molecule/inchi.py | 356 ++++++++++++++++++------------------ rmgpy/molecule/inchiTest.py | 35 ++-- 3 files changed, 216 insertions(+), 206 deletions(-) diff --git a/rmgpy/molecule/inchi.pxd b/rmgpy/molecule/inchi.pxd index 8a5665946f..b68b73b590 100644 --- a/rmgpy/molecule/inchi.pxd +++ b/rmgpy/molecule/inchi.pxd @@ -27,33 +27,40 @@ from .molecule cimport Atom, Bond, Molecule -cpdef tuple decompose(string) +cpdef tuple decompose_aug_inchi(str string) -cpdef str ignore_prefix(str string) +cpdef str remove_inchi_prefix(str string) cpdef str compose_aug_inchi(str inchi, str ulayer=*, str player=*) cpdef str compose_aug_inchi_key(str inchi_key, str ulayer=*, str player=*) -cpdef list parse_H_layer(str inchi) +cpdef list _parse_H_layer(str inchi) -cpdef list parse_E_layer(str auxinfo) +cpdef list _parse_E_layer(str auxinfo) -cpdef list parse_N_layer(str auxinfo) +cpdef list _parse_N_layer(str auxinfo) -cpdef str create_U_layer(Molecule mol, str auxinfo) +cpdef bint _has_unexpected_lone_pairs(Molecule mol) -cpdef bint is_valid_combo(list combo, Molecule mol, list distances) +cpdef list _get_unpaired_electrons(Molecule mol) -cpdef list find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atoms) +cpdef Molecule _generate_minimum_resonance_isomer(Molecule mol) -cpdef Molecule generate_minimum_resonance_isomer(Molecule mol) +cpdef list _compute_agglomerate_distance(list agglomerates, Molecule mol) -cpdef list get_unpaired_electrons(Molecule mol) +cpdef bint _is_valid_combo(list combo, Molecule mol, list distances) -cpdef list compute_agglomerate_distance(list agglomerates, Molecule mol) +cpdef list _find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atoms) + +cpdef str _create_U_layer(Molecule mol, str auxinfo) + +cpdef Molecule _find_lowest_p_layer(Molecule minmol, list p_layer, list equivalent_atoms) + +cpdef str _create_P_layer(Molecule mol, str auxinfo) + +cpdef tuple create_augmented_layers(Molecule mol) -cpdef str create_P_layer(Molecule mol, str auxinfo) cpdef _fix_triplet_to_singlet(Molecule mol, list p_indices) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 2e2a5c07af..5aeaf9b863 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -87,7 +87,7 @@ player_pattern = re.compile(P_LAYER_PREFIX + r'(.*)') -def decompose(string): +def decompose_aug_inchi(string): """ Converts an augmented inchi into - an inchi, @@ -99,7 +99,6 @@ def decompose(string): bear any lone pairs. The "x(0)" will be parsed into a tuple (x, 0). - """ cython.declare( inchi=str, @@ -132,7 +131,7 @@ def decompose(string): return inchi, u_indices, p_indices -def ignore_prefix(string): +def remove_inchi_prefix(string): """ Splits off the 'InChI=1S' or 'InChI=1' layer of an InChI and returns the last part. @@ -182,7 +181,11 @@ def compose_aug_inchi_key(inchi_key, ulayer=None, player=None): return aug_inchi_key -def parse_H_layer(inchi): +########################################################## +# Methods for parsing layers in InChI and auxiliary info # +########################################################## + +def _parse_H_layer(inchi): """ Converts the Mobile H layer of an inchi string into a list of atom indices couples that carry a mobile hydrogen. @@ -227,7 +230,7 @@ def parse_H_layer(inchi): return couples -def parse_E_layer(auxinfo): +def _parse_E_layer(auxinfo): """ Converts the layer with equivalence information (E-layer) on atoms into a list of lists of equivalent atom indices. @@ -276,9 +279,9 @@ def parse_E_layer(auxinfo): return equivalent_atoms -def parse_N_layer(auxinfo): +def _parse_N_layer(auxinfo): """ - Parses the layer with atom ordering information (N-layer) + Parses the layer with atom ordering information (N-layer) and returns a list of atom indices that reflect how the atoms of the original molecule should be ordered according to the InChI algorithm. @@ -287,7 +290,7 @@ def parse_N_layer(auxinfo): Auxiliary info of SMILES OCCC (InChI=1S/C3H8O/c1-2-3-4/h4H,2-3H2,1H3): AuxInfo=1/0/N:4,3,2,1/rA:4OCCC/rB:s1;s2;s3;/rC:;;;; - N-layer: + N-layer: /N:4,3,2,1 The original number of an atom with identification number n is given as the @@ -317,56 +320,108 @@ def parse_N_layer(auxinfo): return indices -def create_U_layer(mol, auxinfo): +########################################### +# Methods for generating augmented layers # +########################################### + +def _has_unexpected_lone_pairs(mol): """ - Creates a string with the positions of the atoms that bear unpaired electrons. The string - can be used to complement the InChI with an additional layer that allows for the differentiation - between structures with multiple unpaired electrons. + Iterates over the atoms of the Molecule and returns whether + at least one atom bears an unexpected number of lone pairs. - The string is composed of a prefix ('u') followed by the positions of each of the unpaired electrons, - sorted in numerical order. + E.g. + carbon with > 0 lone pairs + nitrogen with > 1 lone pairs + oxygen with > 2 lone pairs - Example: - - methyl radical ([CH3]) : u1 - - triplet methylene biradical ([CH2]) : u1,1 - - ethane-1,2-diyl biradical ([CH2][CH2]): u1,2 + The expected number of lone pairs of an element is equal to + """ - When the molecule does not bear any unpaired electrons, None is returned. + for at in mol.atoms: + try: + exp = elements.PeriodicSystem.lone_pairs[at.symbol] + except KeyError: + raise Exception("Unrecognized element: {}".format(at.symbol)) + else: + if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: return True + + return False + +def _get_unpaired_electrons(mol): + """ + Returns a sorted list of the indices of the atoms that bear one or more + unpaired electrons. """ cython.declare( - minmol=Molecule, - # rdkitmol=, - u_layer=list, - i=int, + locations=list, + index=int, at=Atom, - equivalent_atoms=list, ) + locations = [] + for index, at in enumerate(mol.atoms): + if at.radicalElectrons >= 1: + locations.append(index) - if mol.getRadicalCount() == 0: - return None - elif mol.getFormula() == 'H': - return U_LAYER_PREFIX + '1' + return sorted(locations) - # find the resonance isomer with the lowest u index: - minmol = generate_minimum_resonance_isomer(mol) - # create preliminary u-layer: - u_layer = [] - for i, at in enumerate(minmol.atoms): - u_layer.extend([i + 1] * at.radicalElectrons) +def _generate_minimum_resonance_isomer(mol): + """ + Select the resonance isomer that is isomorphic to the parameter isomer, with the lowest unpaired + electrons descriptor. - # extract equivalent atom pairs from E-layer of auxiliary info: - equivalent_atoms = parse_E_layer(auxinfo) - if equivalent_atoms: - # select lowest u-layer: - u_layer = find_lowest_u_layer(minmol, u_layer, equivalent_atoms) + First, we generate all isomorphic resonance isomers. + Next, we return the candidate with the lowest unpaired electrons metric. - return (U_LAYER_PREFIX + ','.join(map(str, u_layer))) + The metric is a sorted list with indices of the atoms that bear an unpaired electron + """ + cython.declare( + candidates=list, + sel=Molecule, + cand=Molecule, + metric_sel=list, + metric_cand=list, + ) + + candidates = resonance.generate_isomorphic_resonance_structures(mol) + + sel = candidates[0] + metric_sel = _get_unpaired_electrons(sel) + for cand in candidates[1:]: + metric_cand = _get_unpaired_electrons(cand) + if metric_cand < metric_sel: + sel = cand + metric_sel = metric_cand + + return sel + + +def _compute_agglomerate_distance(agglomerates, mol): + """ + Iterates over a list of lists containing atom indices. + For each list the distances between the atoms is computed. + A list of distances is returned. -def is_valid_combo(combo, mol, distances): + """ + + cython.declare( + distances=list, + agglomerate=list, + dist=dict, + ) + + distances = [] + for agglomerate in agglomerates: + dist = pathfinder.compute_atom_distance(agglomerate, mol) + distances.append(dist) + + return distances + + +def _is_valid_combo(combo, mol, distances): """ Check if the combination of atom indices refers to atoms that are adjacent in the molecule. @@ -380,7 +435,7 @@ def is_valid_combo(combo, mol, distances): # compute shortest path between atoms agglomerates = agglomerate(combo) - new_distances = compute_agglomerate_distance(agglomerates, mol) + new_distances = _compute_agglomerate_distance(agglomerates, mol) # combo is valid if the distance is equal to the parameter distance @@ -394,7 +449,7 @@ def is_valid_combo(combo, mol, distances): return True -def find_lowest_u_layer(mol, u_layer, equivalent_atoms): +def _find_lowest_u_layer(mol, u_layer, equivalent_atoms): """ Searches for the "minimum" combination of indices of atoms that bear unpaired electrons. @@ -441,14 +496,14 @@ def find_lowest_u_layer(mol, u_layer, equivalent_atoms): combos = generate_combo(grouped_electrons, corresponding_E_layers) # compute original distance: orig_agglomerates = agglomerate(grouped_electrons) - orig_distances = compute_agglomerate_distance(orig_agglomerates, mol) + orig_distances = _compute_agglomerate_distance(orig_agglomerates, mol) # deflate the list of lists to be able to numerically compare them selected_group = sorted(itertools.chain.from_iterable(grouped_electrons)) # see if any of the combos is valid and results in a lower numerical combination than the original for combo in combos: - if is_valid_combo(combo, mol, orig_distances): + if _is_valid_combo(combo, mol, orig_distances): combo = sorted(itertools.chain.from_iterable(combo)) if combo < selected_group: selected_group = combo @@ -459,106 +514,111 @@ def find_lowest_u_layer(mol, u_layer, equivalent_atoms): return sorted(new_u_layer) -def generate_minimum_resonance_isomer(mol): - """ - Select the resonance isomer that is isomorphic to the parameter isomer, with the lowest unpaired - electrons descriptor. - - First, we generate all isomorphic resonance isomers. - Next, we return the candidate with the lowest unpaired electrons metric. - - The metric is a sorted list with indices of the atoms that bear an unpaired electron +def _create_U_layer(mol, auxinfo): """ + Creates a string with the positions of the atoms that bear unpaired electrons. The string + can be used to complement the InChI with an additional layer that allows for the differentiation + between structures with multiple unpaired electrons. - cython.declare( - candidates=list, - sel=Molecule, - cand=Molecule, - metric_sel=list, - metric_cand=list, - ) - - candidates = resonance.generate_isomorphic_resonance_structures(mol) - - sel = candidates[0] - metric_sel = get_unpaired_electrons(sel) - for cand in candidates[1:]: - metric_cand = get_unpaired_electrons(cand) - if metric_cand < metric_sel: - sel = cand - metric_sel = metric_cand + The string is composed of a prefix ('u') followed by the positions of each of the unpaired electrons, + sorted in numerical order. - return sel + Example: + - methyl radical ([CH3]) : u1 + - triplet methylene biradical ([CH2]) : u1,1 + - ethane-1,2-diyl biradical ([CH2][CH2]): u1,2 + When the molecule does not bear any unpaired electrons, None is returned. -def get_unpaired_electrons(mol): - """ - Returns a sorted list of the indices of the atoms that bear one or more - unpaired electrons. """ cython.declare( - locations=list, - index=int, + minmol=Molecule, + # rdkitmol=, + u_layer=list, + i=int, at=Atom, + equivalent_atoms=list, ) - locations = [] - for index, at in enumerate(mol.atoms): - if at.radicalElectrons >= 1: - locations.append(index) - return sorted(locations) + if mol.getRadicalCount() == 0: + return None + elif mol.getFormula() == 'H': + return U_LAYER_PREFIX + '1' + # find the resonance isomer with the lowest u index: + minmol = _generate_minimum_resonance_isomer(mol) -def compute_agglomerate_distance(agglomerates, mol): - """ - Iterates over a list of lists containing atom indices. - For each list the distances between the atoms is computed. - A list of distances is returned. + # create preliminary u-layer: + u_layer = [] + for i, at in enumerate(minmol.atoms): + u_layer.extend([i + 1] * at.radicalElectrons) - """ + # extract equivalent atom pairs from E-layer of auxiliary info: + equivalent_atoms = _parse_E_layer(auxinfo) + if equivalent_atoms: + # select lowest u-layer: + u_layer = _find_lowest_u_layer(minmol, u_layer, equivalent_atoms) - cython.declare( - distances=list, - agglomerate=list, - dist=dict, - ) + return (U_LAYER_PREFIX + ','.join(map(str, u_layer))) - distances = [] - for agglomerate in agglomerates: - dist = pathfinder.compute_atom_distance(agglomerate, mol) - distances.append(dist) - return distances +def _find_lowest_p_layer(minmol, p_layer, equivalent_atoms): + """ + Permute the equivalent atoms and return the combination with the + lowest p-layer. + + TODO: The presence of unpaired electrons complicates stuff. + """ + return minmol -def has_unexpected_lone_pairs(mol): +def _create_P_layer(mol, auxinfo): """ - Iterates over the atoms of the Molecule and returns whether - at least one atom bears an unexpected number of lone pairs. + Creates a string with the positions of the atoms that bear an unexpected number of lone pairs. The string + can be used to complement the InChI with an additional layer that allows for the differentiation + between structures with lone pairs. - E.g. - carbon with > 0 lone pairs - nitrogen with > 1 lone pairs - oxygen with > 2 lone pairs + The string is composed of a prefix ('P_LAYER_PREFIX') followed by the positions of each of the atoms with an + unexpected number of lone pairs, sorted in numerical order. - The expected number of lone pairs of an element is equal to + Example: + - singlet methylene biradical ([CH2]) : 'P_LAYER_PREFIX'1 + + When the molecule does not bear any atoms with an unexpected number of lone pairs, + None is returned. """ + # TODO: find the resonance isomer with the lowest p index: + minmol = mol - for at in mol.atoms: + # create preliminary p-layer: + p_layer = [] + for i, at in enumerate(mol.atoms): try: exp = elements.PeriodicSystem.lone_pairs[at.symbol] except KeyError: raise Exception("Unrecognized element: {}".format(at.symbol)) else: - if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: return True + if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: + if at.lonePairs == 0: + p_layer.append('{}{}'.format(i, '(0)')) + else: + p_layer.extend([i + 1] * at.lonePairs) - return False + # extract equivalent atom pairs from E-layer of auxiliary info: + equivalent_atoms = _parse_E_layer(auxinfo) + if equivalent_atoms: + # select lowest u-layer: + u_layer = _find_lowest_p_layer(minmol, p_layer, equivalent_atoms) + + if p_layer: + return (P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_layer))) + else: + return None def create_augmented_layers(mol): """ - The indices in the string refer to the atom indices in the molecule, according to the atom order obtained by sorting the atoms using the InChI canonicalization algorithm. @@ -570,10 +630,9 @@ def create_augmented_layers(mol): to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed as to ensure the atoms with the lowest indices are used to compose the string. - """ - if mol.getRadicalCount() == 0 and not has_unexpected_lone_pairs(mol): + if mol.getRadicalCount() == 0 and not _has_unexpected_lone_pairs(mol): return None, None elif mol.getFormula() == 'H': return U_LAYER_PREFIX + '1', None @@ -587,76 +646,19 @@ def create_augmented_layers(mol): _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon') # suppress stereo warnings # extract the atom numbers from N-layer of auxiliary info: - atom_indices = parse_N_layer(auxinfo) + atom_indices = _parse_N_layer(auxinfo) atom_indices = [atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)] # sort the atoms based on the order of the atom indices molcopy.atoms = [x for (y, x) in sorted(zip(atom_indices, molcopy.atoms), key=lambda pair: pair[0])] - ulayer = create_U_layer(molcopy, auxinfo) + ulayer = _create_U_layer(molcopy, auxinfo) - player = create_P_layer(molcopy, auxinfo) + player = _create_P_layer(molcopy, auxinfo) return ulayer, player -def create_P_layer(mol, auxinfo): - """ - - Creates a string with the positions of the atoms that bear an unexpected number of lone pairs. The string - can be used to complement the InChI with an additional layer that allows for the differentiation - between structures with lone pairs. - - The string is composed of a prefix ('P_LAYER_PREFIX') followed by the positions of each of the atoms with an - unexpected number of lone pairs, sorted in numerical order. - - Example: - - singlet methylene biradical ([CH2]) : 'P_LAYER_PREFIX'1 - - When the molecule does not bear any atoms with an unexpected number of lone pairs, - None is returned. - - - """ - - # TODO: find the resonance isomer with the lowest p index: - minmol = mol - - # create preliminary p-layer: - p_layer = [] - for i, at in enumerate(mol.atoms): - try: - exp = elements.PeriodicSystem.lone_pairs[at.symbol] - except KeyError: - raise Exception("Unrecognized element: {}".format(at.symbol)) - else: - if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: - if at.lonePairs == 0: - p_layer.append('{}{}'.format(i, '(0)')) - else: - p_layer.extend([i + 1] * at.lonePairs) - - # extract equivalent atom pairs from E-layer of auxiliary info: - equivalent_atoms = parse_E_layer(auxinfo) - if equivalent_atoms: - # select lowest u-layer: - u_layer = find_lowest_p_layer(minmol, p_layer, equivalent_atoms) - - if p_layer: - return (P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_layer))) - else: - return None - - -def find_lowest_p_layer(minmol, p_layer, equivalent_atoms): - """ - Permute the equivalent atoms and return the combination with the - lowest p-layer. - - TODO: The presence of unpaired electrons complicates stuff. - """ - return minmol - ################################################################## # Methods for fixing molecules generated from an augmented InChI # ################################################################## @@ -978,7 +980,7 @@ def _fix_mobile_h(mol, inchi, u1, u2): and the bond between them will decrease in order. """ - mobile_hydrogens = parse_H_layer(inchi) + mobile_hydrogens = _parse_H_layer(inchi) if mobile_hydrogens: # WIP: only consider the first system of mobile hydrogens: @@ -1119,7 +1121,7 @@ def _check_molecule(mol, aug_inchi): ) ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) - inchi, u_indices, p_indices = decompose(str(aug_inchi)) + inchi, u_indices, p_indices = decompose_aug_inchi(str(aug_inchi)) assert(mol.getRadicalCount() == len(u_indices)) for at in mol.atoms: @@ -1166,7 +1168,7 @@ def __new__(self, inchi): if not INCHI_PREFIX in inchi: raise InchiException('Not a valid InChI: {}'.format(inchi)) - return str.__new__(self, ignore_prefix(inchi)) + return str.__new__(self, remove_inchi_prefix(inchi)) class AugmentedInChI(InChI): @@ -1174,7 +1176,7 @@ class AugmentedInChI(InChI): def __init__(self, aug_inchi): super(AugmentedInChI, self).__init__() - inchi, u_indices, p_indices = decompose(str(self)) + inchi, u_indices, p_indices = decompose_aug_inchi(str(self)) self.inchi = str(inchi) diff --git a/rmgpy/molecule/inchiTest.py b/rmgpy/molecule/inchiTest.py index 01b65b364e..8d2cc3a03b 100644 --- a/rmgpy/molecule/inchiTest.py +++ b/rmgpy/molecule/inchiTest.py @@ -34,7 +34,8 @@ from .molecule import Atom, Molecule from .inchi import * -from .inchi import _reset_lone_pairs +from .inchi import _has_unexpected_lone_pairs, _parse_E_layer, _parse_H_layer, _parse_N_layer, _reset_lone_pairs + class InChITest(unittest.TestCase): @@ -96,10 +97,10 @@ class IgnorePrefixTest(unittest.TestCase): def test_ignore(self): string = 'InChI=1S/foo' - self.assertTrue( ignore_prefix(string) == 'foo') + self.assertTrue(remove_inchi_prefix(string) == 'foo') with self.assertRaises(InchiException): - ignore_prefix('foo') + remove_inchi_prefix('foo') class ComposeTest(unittest.TestCase): @@ -116,7 +117,7 @@ def test_OCO(self): smi = 'O=C-O' inchi = Molecule().fromSMILES(smi).toInChI() - mobile_hs = parse_H_layer(inchi) + mobile_hs = _parse_H_layer(inchi) expected = [[2,3]] self.assertTrue(mobile_hs == expected) @@ -125,20 +126,20 @@ def test_no_equivalence_layer(self): """Test that the absence of an E-layer results in an empty list.""" auxinfo = "AuxInfo=1/0/N:1/rA:1C/rB:/rC:;" - e_layer = parse_E_layer(auxinfo) + e_layer = _parse_E_layer(auxinfo) self.assertFalse(e_layer) def test_C8H22(self): auxinfo = "AuxInfo=1/0/N:1,8,4,6,2,7,3,5/E:(1,2)(3,4)(5,6)(7,8)/rA:8C.2C.2CCCCCC/rB:s1;s2;s3;s3;s5;s5;d7;/rC:;;;;;;;;" - e_layer = parse_E_layer(auxinfo) + e_layer = _parse_E_layer(auxinfo) expected = [[1, 2], [3, 4], [5, 6], [7, 8]] self.assertTrue(e_layer == expected) def test_C7H17(self): auxinfo = "AuxInfo=1/0/N:3,5,7,2,4,6,1/E:(1,2,3)(4,5,6)/rA:7CCCCCCC/rB:s1;d2;s1;d4;s1;d6;/rC:;;;;;;;" - e_layer = parse_E_layer(auxinfo) + e_layer = _parse_E_layer(auxinfo) expected = [[1, 2, 3], [4, 5, 6]] self.assertTrue(e_layer == expected) @@ -146,7 +147,7 @@ def test_C7H17(self): class ParseNLayerTest(unittest.TestCase): def test_OCCC(self): auxinfo = "AuxInfo=1/0/N:4,3,2,1/rA:4OCCC/rB:s1;s2;s3;/rC:;;;;" - n_layer = parse_N_layer(auxinfo) + n_layer = _parse_N_layer(auxinfo) expected = [4,3,2,1] self.assertTrue(n_layer == expected) @@ -155,23 +156,23 @@ class DecomposeTest(unittest.TestCase): def test_inchi(self): string = 'InChI=1S/XXXX/cXXX/hXXX' - inchi, u_indices, p_indices = decompose(string) + inchi, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([], u_indices) def test_inchi_u_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/u1,2' - inchi, u_indices, p_indices = decompose(string) + inchi, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([1,2], u_indices) def test_inchi_p_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/lp1,2' - inchi, u_indices, p_indices = decompose(string) + inchi, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([1,2], p_indices) def test_inchi_u_layer_p_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/u1,2/lp3,4' - inchi, u_indices, p_indices = decompose(string) + inchi, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([1,2], u_indices) self.assertEquals([3,4], p_indices) @@ -181,7 +182,7 @@ def test_inchi_p_layer_zero_lp(self): pairs can be read correctly. """ string = 'InChI=1S/XXXX/cXXX/hXXX/lp1(0)' - inchi, u_indices, p_indices = decompose(string) + inchi, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([(1,0)], p_indices) class CreateULayerTest(unittest.TestCase): @@ -232,22 +233,22 @@ def testC4H6(self): class ExpectedLonePairsTest(unittest.TestCase): def test_SingletCarbon(self): mol = Molecule(atoms=[Atom(element='C', lonePairs=1)]) - unexpected = has_unexpected_lone_pairs(mol) + unexpected = _has_unexpected_lone_pairs(mol) self.assertTrue(unexpected) def test_NormalCarbon(self): mol = Molecule(atoms=[Atom(element='C', lonePairs=0)]) - unexpected = has_unexpected_lone_pairs(mol) + unexpected = _has_unexpected_lone_pairs(mol) self.assertFalse(unexpected) def test_NormalOxygen(self): mol = Molecule(atoms=[Atom(element='O', lonePairs=2)]) - unexpected = has_unexpected_lone_pairs(mol) + unexpected = _has_unexpected_lone_pairs(mol) self.assertFalse(unexpected) def test_Oxygen_3LP(self): mol = Molecule(atoms=[Atom(element='O', lonePairs=3)]) - unexpected = has_unexpected_lone_pairs(mol) + unexpected = _has_unexpected_lone_pairs(mol) self.assertTrue(unexpected) From 27bbf87f6ebefa58bd5c5424b3c2c1371ae5105b Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 20 Oct 2017 15:39:28 -0400 Subject: [PATCH 10/57] Standardize importing of OpenBabel Assume RDKit is installed and check for OpenBabel --- rmgpy/molecule/converter.py | 16 ++++++++++++++-- rmgpy/molecule/molecule.py | 7 ++----- rmgpy/molecule/translator.pxd | 1 - rmgpy/molecule/translator.py | 23 +++++++---------------- 4 files changed, 23 insertions(+), 24 deletions(-) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index 338beff8fb..c55d32d407 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -35,9 +35,17 @@ import sys import cython -import openbabel +# Assume that rdkit is installed from rdkit import Chem - +# Test if openbabel is installed +try: + import openbabel +except ImportError: + OB_INSTALLED = False +else: + OB_INSTALLED = True + +from rmgpy.exceptions import DependencyError from rmgpy.molecule import element as elements from rmgpy.molecule.molecule import Atom, Bond @@ -191,6 +199,8 @@ def toOBMol(mol, returnMapping=False): Convert a molecular structure to an OpenBabel OBMol object. Uses `OpenBabel `_ to perform the conversion. """ + if not OB_INSTALLED: + raise DependencyError('OpenBabel is not installed. Please install or use RDKit.') # Sort the atoms to ensure consistent output mol.sortAtoms() @@ -229,6 +239,8 @@ def fromOBMol(mol, obmol): # cython.declare(i=cython.int) # cython.declare(radicalElectrons=cython.int, charge=cython.int, lonePairs=cython.int) # cython.declare(atom=Atom, atom1=Atom, atom2=Atom, bond=Bond) + if not OB_INSTALLED: + raise DependencyError('OpenBabel is not installed. Please install or use RDKit.') mol.vertices = [] diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 220445caff..01cfd03711 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -45,10 +45,6 @@ import itertools from copy import deepcopy -try: - import openbabel -except: - pass from .graph import Vertex, Edge, Graph, getVertexConnectivityValue import rmgpy.molecule.group as gr from rmgpy.molecule.pathfinder import find_shortest_path @@ -60,6 +56,7 @@ import rmgpy.molecule.resonance as resonance from .kekulize import kekulize from .adjlist import Saturator +from rmgpy.exceptions import DependencyError ################################################################################ @@ -1884,7 +1881,7 @@ def getAromaticRings(self, rings=None): logging.info('Trying to use OpenBabel to check aromaticity.') try: obmol, obAtomIds = converter.toOBMol(self, returnMapping=True) - except ImportError: + except DependencyError: logging.warning('Unable to check aromaticity by converting for OB Mol.') return [], [] else: diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index 593901741c..f9b1d89dd1 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -30,7 +30,6 @@ cimport element as elements cimport inchi as inchiutil cpdef list BACKENDS -cpdef dict INSTALLED_BACKENDS cpdef dict INCHI_LOOKUPS cpdef dict SMILES_LOOKUPS diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 9060eb46a4..4642af4d0c 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -38,17 +38,15 @@ import itertools import logging -# Assume that OB is not installed by default -INSTALLED_BACKENDS = { - 'OB': False, -} - +# Assume that rdkit is installed +from rdkit import Chem +# Test if openbabel is installed try: import openbabel - INSTALLED_BACKENDS['OB'] = True -except: - pass -from rdkit import Chem +except ImportError: + BACKENDS = ['rdkit'] +else: + BACKENDS = ['openbabel', 'rdkit'] from .molecule import Atom from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol @@ -58,13 +56,6 @@ # constants -BACKENDS = [ - 'rdkit', -] - -if INSTALLED_BACKENDS['OB']: - BACKENDS.insert(0, 'openbabel') - INCHI_LOOKUPS = { 'H': '[H]', # RDkit was improperly handling the Hydrogen radical from InChI 'He': '[He]', From 6ec62b8a948bd8aed0378e5c2a546ecdb40f0bc1 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 20 Oct 2017 15:48:42 -0400 Subject: [PATCH 11/57] Rename two constants in translator.py --- rmgpy/molecule/translator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 4642af4d0c..6f426c7cec 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -87,7 +87,7 @@ } #: This dictionary is used to shortcut lookups of a molecule's SMILES string from its chemical formula. -_known_smiles_molecules = { +MOLECULE_LOOKUPS = { 'N2': 'N#N', 'CH4': 'C', 'CH2O': 'C=O', @@ -113,7 +113,7 @@ 'HI': 'I', } -_known_smiles_radicals = { +RADICAL_LOOKUPS = { 'CH3': '[CH3]', 'HO': '[OH]', 'C2H5': 'C[CH2]', @@ -275,9 +275,9 @@ def toSMILES(mol): try: if mol.isRadical(): - return _known_smiles_radicals[mol.getFormula()] + return RADICAL_LOOKUPS[mol.getFormula()] else: - return _known_smiles_molecules[mol.getFormula()] + return MOLECULE_LOOKUPS[mol.getFormula()] except KeyError: # It wasn't in the above list. pass From 543bed65f0f2258f75070632d56f6d75bbb416d6 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Fri, 20 Oct 2017 19:47:56 -0400 Subject: [PATCH 12/57] Refactor translator module Keep existing to/from methods as is Rename __parse function to _read for interpreting identifiers Create _write function for generating identifiers Create backend specific functions for reading and writing to replace identifier specific functions Note: For InChI Key, the last letter is no longer removed, since doing so defeats the purpose of having the key be an identifier --- rmgpy/molecule/molecule.py | 3 - rmgpy/molecule/moleculeTest.py | 4 +- rmgpy/molecule/translator.pxd | 35 ++- rmgpy/molecule/translator.py | 429 ++++++++++++++++--------------- rmgpy/molecule/translatorTest.py | 4 - 5 files changed, 242 insertions(+), 233 deletions(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 01cfd03711..0fbd9311d2 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1431,9 +1431,6 @@ def toInChIKey(self): Convert a molecular structure to an InChI Key string. Uses `RDKit `_ to perform the conversion. - - Removes check-sum dash (-) and character so that only - the 14 + 9 characters remain. """ return translator.toInChIKey(self) diff --git a/rmgpy/molecule/moleculeTest.py b/rmgpy/molecule/moleculeTest.py index c34dd845b6..4da1d36ae2 100644 --- a/rmgpy/molecule/moleculeTest.py +++ b/rmgpy/molecule/moleculeTest.py @@ -1240,7 +1240,7 @@ def testInChIKey(self): """ molecule = Molecule().fromInChI('InChI=1S/C7H12/c1-2-7-4-3-6(1)5-7/h6-7H,1-5H2') key = molecule.toInChIKey() - self.assertEqual(key, 'UMRZSTCPUPJPOJ-UHFFFAOYSA') + self.assertEqual(key, 'UMRZSTCPUPJPOJ-UHFFFAOYSA-N') def testAugmentedInChI(self): """ @@ -1262,7 +1262,7 @@ def testAugmentedInChIKey(self): 2 C u1 p0 c0 {1,S} """, saturateH=True) - self.assertEqual(mol.toAugmentedInChIKey(), 'VGGSQFUCUMXWEO-UHFFFAOYSA-u1,2') + self.assertEqual(mol.toAugmentedInChIKey(), 'VGGSQFUCUMXWEO-UHFFFAOYSA-N-u1,2') def testLinearMethane(self): """ diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index f9b1d89dd1..b4b579b5d9 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -33,40 +33,37 @@ cpdef list BACKENDS cpdef dict INCHI_LOOKUPS cpdef dict SMILES_LOOKUPS -cpdef dict _known_smiles_molecules -cpdef dict _known_smiles_radicals +cpdef dict MOLECULE_LOOKUPS +cpdef dict RADICAL_LOOKUPS -cpdef str toInChI(Molecule mol) +cpdef str toInChI(Molecule mol, backend=?) cpdef str toAugmentedInChI(Molecule mol) -cpdef str toInChIKey(Molecule mol) +cpdef str toInChIKey(Molecule mol, backend=?) cpdef str toAugmentedInChIKey(Molecule mol) -cpdef str toSMARTS(Molecule mol) +cpdef str toSMARTS(Molecule mol, backend=?) -cpdef str toSMILES(Molecule mol) +cpdef str toSMILES(Molecule mol, backend=?) -cdef Molecule __fromSMILES(Molecule mol, str smilesstr, str backend) +cpdef Molecule fromInChI(Molecule mol, str inchistr, backend=?) -cdef Molecule __fromInChI(Molecule mol, str inchistr, str backend) +cpdef Molecule fromSMILES(Molecule mol, str smilesstr, str backend=?) -cdef Molecule __fromSMARTS(Molecule mol, str identifier, str backend) +cpdef Molecule fromSMARTS(Molecule mol, str smartsstr, str backend=?) -cdef Molecule __parse(Molecule mol, str identifier, str type_identifier, str backend) - -cpdef Molecule parse_openbabel(Molecule mol, str identifier, str type_identifier) - -cpdef isCorrectlyParsed(Molecule mol, str identifier) +cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) -cpdef Molecule fromInChI(Molecule mol, str inchistr, backend=*) +cpdef object _rdkit_translator(object input_object, str identifier_type, Molecule mol=?) -cpdef Molecule fromSMILES(Molecule mol, str smilesstr, str backend=*) +cpdef object _openbabel_translator(object input_object, str identifier_type, Molecule mol=?) -cpdef Molecule fromSMARTS(Molecule mol, str smartsstr, str backend=*) +cdef Molecule _lookup(Molecule mol, str identifier, str identifier_type) -cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) +cpdef _is_correctly_parsed(Molecule mol, str identifier) -cdef Molecule __lookup(Molecule mol, str identifier, str type_identifier) +cdef Molecule _read(Molecule mol, str identifier, str identifier_type, str backend) +cdef str _write(Molecule mol, str identifier_type, str backend) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 6f426c7cec..d059452808 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -48,7 +48,8 @@ else: BACKENDS = ['openbabel', 'rdkit'] -from .molecule import Atom +from rmgpy.exceptions import DependencyError +from .molecule import Atom, Molecule from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol import rmgpy.molecule.inchi as inchiutil @@ -138,7 +139,7 @@ } -def toInChI(mol): +def toInChI(mol, backend='try-all'): """ Convert a molecular structure to an InChI string. Uses `RDKit `_ to perform the conversion. @@ -149,19 +150,7 @@ def toInChI(mol): Convert a molecular structure to an InChI string. Uses `OpenBabel `_ to perform the conversion. """ - try: - if not Chem.inchi.INCHI_AVAILABLE: - return "RDKitInstalledWithoutInChI" - rdkitmol = toRDKitMol(mol) - return Chem.inchi.MolToInchi(rdkitmol, options='-SNon') - except: - pass - - obmol = toOBMol(mol) - obConversion = openbabel.OBConversion() - obConversion.SetOutFormat('inchi') - obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) - return obConversion.WriteString(obmol).strip() + return _write(mol, 'inchi', backend) def toAugmentedInChI(mol): @@ -171,7 +160,6 @@ def toAugmentedInChI(mol): Two additional layers are added to the InChI: - unpaired electrons layer: the position of the unpaired electrons in the molecule - """ cython.declare( @@ -188,7 +176,7 @@ def toAugmentedInChI(mol): return aug_inchi -def toInChIKey(mol): +def toInChIKey(mol, backend='try-all'): """ Convert a molecular structure to an InChI Key string. Uses `OpenBabel `_ to perform the conversion. @@ -197,26 +185,8 @@ def toInChIKey(mol): Convert a molecular structure to an InChI Key string. Uses `RDKit `_ to perform the conversion. - - Removes check-sum dash (-) and character so that only - the 14 + 9 characters remain. """ - try: - if not Chem.inchi.INCHI_AVAILABLE: - return "RDKitInstalledWithoutInChI" - inchi = toInChI(mol) - return Chem.inchi.InchiToInchiKey(inchi)[:-2] - except: - pass - - # for atom in mol.vertices: - # if atom.isNitrogen(): - obmol = toOBMol(mol) - obConversion = openbabel.OBConversion() - obConversion.SetOutFormat('inchi') - obConversion.SetOptions('w', openbabel.OBConversion.OUTOPTIONS) - obConversion.SetOptions('K', openbabel.OBConversion.OUTOPTIONS) - return obConversion.WriteString(obmol).strip()[:-2] + return _write(mol, 'inchikey', backend) def toAugmentedInChIKey(mol): @@ -237,18 +207,16 @@ def toAugmentedInChIKey(mol): return inchiutil.compose_aug_inchi_key(key, ulayer, player) -def toSMARTS(mol): +def toSMARTS(mol, backend='rdkit'): """ Convert a molecular structure to an SMARTS string. Uses `RDKit `_ to perform the conversion. Perceives aromaticity and removes Hydrogen atoms. """ - rdkitmol = toRDKitMol(mol) + return _write(mol, 'sma', backend) - return Chem.MolToSmarts(rdkitmol) - -def toSMILES(mol): +def toSMILES(mol, backend='default'): """ Convert a molecular structure to an SMILES string. @@ -261,156 +229,194 @@ def toSMILES(mol): While converting to an RDMolecule it will perceive aromaticity and removes Hydrogen atoms. """ - # If we're going to have to check the formula anyway, # we may as well shortcut a few small known molecules. - # Dictionary lookups are O(1) so this should be fast: + # Dictionary lookups are O(1) so this should be fast. # The dictionary is defined at the top of this file. - - cython.declare( - atom=Atom, - # obmol=, - # rdkitmol=, - ) - try: if mol.isRadical(): - return RADICAL_LOOKUPS[mol.getFormula()] + output = RADICAL_LOOKUPS[mol.getFormula()] else: - return MOLECULE_LOOKUPS[mol.getFormula()] + output = MOLECULE_LOOKUPS[mol.getFormula()] except KeyError: - # It wasn't in the above list. - pass - for atom in mol.vertices: - if atom.isNitrogen(): - obmol = toOBMol(mol) - try: - SMILEwriter = openbabel.OBConversion() - SMILEwriter.SetOutFormat('smi') - SMILEwriter.SetOptions("i", - SMILEwriter.OUTOPTIONS) # turn off isomer and stereochemistry information (the @ signs!) - except: - pass - return SMILEwriter.WriteString(obmol).strip() - - rdkitmol = toRDKitMol(mol, sanitize=False) - if not mol.isAromatic(): - return Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) - return Chem.MolToSmiles(rdkitmol) - - - -def __fromSMILES(mol, smilesstr, backend): - """Replace the Molecule `mol` with that given by the SMILES `smilesstr` - using the backend `backend`""" - if backend.lower() == 'rdkit': - rdkitmol = Chem.MolFromSmiles(smilesstr) - if rdkitmol is None: - raise ValueError("Could not interpret the SMILES string {0!r}".format(smilesstr)) - fromRDKitMol(mol, rdkitmol) - return mol - elif backend.lower() == 'openbabel': - parse_openbabel(mol, smilesstr, 'smi') - return mol - else: - raise NotImplementedError('Unrecognized backend for SMILES parsing: {0}'.format(backend)) - - -def __fromInChI(mol, inchistr, backend): - """Replace the Molecule `mol` with that given by the InChI `inchistr` - using the backend `backend`""" - if backend.lower() == 'rdkit': - rdkitmol = Chem.inchi.MolFromInchi(inchistr, removeHs=False) - mol = fromRDKitMol(mol, rdkitmol) - return mol - elif backend.lower() == 'openbabel': - return parse_openbabel(mol, inchistr, 'inchi') + if backend == 'default': + for atom in mol.atoms: + if atom.isNitrogen(): + return _write(mol, 'smi', backend='openbabel') + return _write(mol, 'smi', backend='rdkit') + else: + return _write(mol, 'smi', backend=backend) else: - raise NotImplementedError('Unrecognized backend for InChI parsing: {0}'.format(backend)) + return output -def __fromSMARTS(mol, smartsstr, backend): - """Replace the Molecule `mol` with that given by the SMARTS `smartsstr` - using the backend `backend`""" - if backend.lower() == 'rdkit': - rdkitmol = Chem.MolFromSmarts(smartsstr) - if rdkitmol is None: - raise ValueError("Could not interpret the SMARTS string {0!r}".format(smartsstr)) - fromRDKitMol(mol, rdkitmol) - return mol +def fromInChI(mol, inchistr, backend='try-all'): + """ + Convert an InChI string `inchistr` to a molecular structure. Uses + a user-specified backend for conversion, currently supporting + rdkit (default) and openbabel. + """ + mol.InChI = inchistr + + if inchiutil.INCHI_PREFIX in inchistr: + return _read(mol, inchistr, 'inchi', backend) else: - raise NotImplementedError('Unrecognized backend for SMARTS parsing: {0}'.format(backend)) + return _read(mol, inchiutil.INCHI_PREFIX + '/' + inchistr, 'inchi', backend) -def __parse(mol, identifier, type_identifier, backend): +def fromAugmentedInChI(mol, aug_inchi): """ - Parses the identifier based on the type of identifier (inchi/smi/sma) - and the backend used. + Creates a Molecule object from the augmented inchi. - First, look up the identifier in a dictionary to see if it can be processed - this way. + First, the inchi is converted into a Molecule using + the backend parsers. - If not in the dictionary, parse it through the specified backed, - or try all backends. + Next, the multiplicity and unpaired electron information + is used to fix a number of parsing errors made by the backends. + + Finally, the atom types of the corrected molecule are perceived. + Returns a Molecule object """ - if __lookup(mol, identifier, type_identifier) is not None: - if isCorrectlyParsed(mol, identifier): - mol.updateAtomTypes() - return mol + if not isinstance(aug_inchi, inchiutil.AugmentedInChI): + aug_inchi = inchiutil.AugmentedInChI(aug_inchi) - for _backend in (BACKENDS if backend == 'try-all' else [backend]): - if type_identifier == 'smi': - __fromSMILES(mol, identifier, _backend) - elif type_identifier == 'inchi': - __fromInChI(mol, identifier, _backend) - elif type_identifier == 'sma': - __fromSMARTS(mol, identifier, _backend) - else: - raise NotImplementedError("Unknown identifier type {0}".format(type_identifier)) + mol = fromInChI(mol, aug_inchi.inchi) + + mol.multiplicity = len(aug_inchi.u_indices) + 1 if aug_inchi.u_indices else 1 + + inchiutil.fix_molecule(mol, aug_inchi) + + mol.updateAtomTypes() - if isCorrectlyParsed(mol, identifier): - mol.updateAtomTypes() - return mol - else: - logging.debug('Backend %s is not able to parse identifier %s', _backend, identifier) - - logging.error("Unable to correctly parse %s with backend %s", identifier, backend) - raise Exception("Couldn't parse {0}".format(identifier)) - - -def parse_openbabel(mol, identifier, type_identifier): - """Converts the identifier to a Molecule using Openbabel.""" - obConversion = openbabel.OBConversion() - obConversion.SetInAndOutFormats(type_identifier, "smi") # SetInFormat(identifier) does not exist. - obmol = openbabel.OBMol() - obConversion.ReadString(obmol, identifier) - obmol.AddHydrogens() - obmol.AssignSpinMultiplicity(True) - fromOBMol(mol, obmol) - # mol.updateAtomTypes() return mol -def isCorrectlyParsed(mol, identifier): - """Check if molecule object has been correctly parsed.""" - conditions = [] +def fromSMARTS(mol, smartsstr, backend='rdkit'): + """ + Convert a SMARTS string `smartsstr` to a molecular structure. Uses + `RDKit `_ to perform the conversion. + This Kekulizes everything, removing all aromatic atom types. + """ + return _read(mol, smartsstr, 'sma', backend) - if mol.atoms: - conditions.append(True) + +def fromSMILES(mol, smilesstr, backend='try-all'): + """ + Convert a SMILES string `smilesstr` to a molecular structure. Uses + a user-specified backend for conversion, currently supporting + rdkit (default) and openbabel. + """ + return _read(mol, smilesstr, 'smi', backend) + + +def _rdkit_translator(input_object, identifier_type, mol=None): + """ + Converts between formats using RDKit. If input is a :class:`Molecule`, + the identifier_type is used to determine the output type. If the input is + a `str`, then the identifier_type is used to identify the input, and the + desired output is assumed to be a :class:`Molecule` object. + + Args: + input_object: either molecule or string identifier + identifier_type: format of string identifier + 'inchi' -> InChI + 'inchikey' -> InChI Key + 'sma' -> SMARTS + 'smi' -> SMILES + mol: molecule object for output (optional) + """ + if identifier_type == 'inchi' and not Chem.inchi.INCHI_AVAILABLE: + raise DependencyError("RDKit installed without InChI. Please reinstall to read and write InChI strings.") + + if isinstance(input_object, str): + # We are converting from a string identifier to a molecule + if identifier_type == 'inchi': + rdkitmol = Chem.inchi.MolFromInchi(input_object, removeHs=False) + elif identifier_type == 'sma': + rdkitmol = Chem.MolFromSmarts(input_object) + elif identifier_type == 'smi': + rdkitmol = Chem.MolFromSmiles(input_object) + else: + raise ValueError('Identifier type {0} is not supported for reading using RDKit.'.format(identifier_type)) + if rdkitmol is None: + raise ValueError("Could not interpret the identifier {0!r}".format(input_object)) + output = fromRDKitMol(mol, rdkitmol) + elif isinstance(input_object, Molecule): + # We are converting from a molecule to a string identifier + rdkitmol = toRDKitMol(input_object, sanitize=False) + if identifier_type == 'inchi': + output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon') + elif identifier_type == 'inchikey': + inchi = toInChI(mol) + output = Chem.inchi.InchiToInchiKey(inchi) + elif identifier_type == 'sma': + output = Chem.MolToSmarts(rdkitmol) + elif identifier_type == 'smi': + if input_object.isAromatic(): + output = Chem.MolToSmiles(rdkitmol) + else: + output = Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) + else: + raise ValueError('Identifier type {0} is not supported for writing using RDKit.'.format(identifier_type)) else: - conditions.append(False) + raise ValueError('Unexpected input format. Should be a Molecule or a string.') - if 'InChI' in identifier: - inchi_elementcount = util.retrieveElementCount(identifier) - mol_elementcount = util.retrieveElementCount(mol) - conditions.append(inchi_elementcount == mol_elementcount) + return output - return all(conditions) + +def _openbabel_translator(input_object, identifier_type, mol=None): + """ + Converts between formats using OpenBabel. If input is a :class:`Molecule`, + the identifier_type is used to determine the output type. If the input is + a `str`, then the identifier_type is used to identify the input, and the + desired output is assumed to be a :class:`Molecule` object. + + Args: + input_object: either molecule or string identifier + identifier_type: format of string identifier + 'inchi' -> InChI + 'inchikey' -> InChI Key + 'smi' -> SMILES + mol: molecule object for output (optional) + """ + ob_conversion = openbabel.OBConversion() + + if isinstance(input_object, str): + # We are converting from a string identifier to a Molecule + ob_conversion.SetInFormat(identifier_type) + obmol = openbabel.OBMol() + ob_conversion.ReadString(obmol, input_object) + obmol.AddHydrogens() + obmol.AssignSpinMultiplicity(True) + if mol is None: + mol = Molecule() + output = fromOBMol(mol, obmol) + elif isinstance(input_object, Molecule): + # We are converting from a Molecule to a string identifier + if identifier_type == 'inchi': + ob_conversion.SetOutFormat('inchi') + ob_conversion.AddOption('w') + elif identifier_type == 'inchikey': + ob_conversion.SetOutFormat('inchi') + ob_conversion.AddOption('w') + ob_conversion.AddOption('K') + elif identifier_type == 'smi': + ob_conversion.SetOutFormat('smi') + # turn off isomer and stereochemistry information + ob_conversion.AddOption('i') + else: + raise ValueError('Unexpected identifier type {0}.'.format(identifier_type)) + obmol = toOBMol(input_object) + output = ob_conversion.WriteString(obmol).strip() + else: + raise ValueError('Unexpected input format. Should be a Molecule or a string.') + + return output -def __lookup(mol, identifier, type_identifier): +def _lookup(mol, identifier, identifier_type): """ Looks up the identifier and parses it the way we think is best. @@ -418,13 +424,13 @@ def __lookup(mol, identifier, type_identifier): For troublesome smiles, we look up the adj list, and parse the adj list. """ - if type_identifier.lower() == 'inchi': + if identifier_type.lower() == 'inchi': try: smi = INCHI_LOOKUPS[identifier.split('/', 1)[1]] return mol.fromSMILES(smi) except KeyError: return None - elif type_identifier.lower() == 'smi': + elif identifier_type.lower() == 'smi': try: adjList = SMILES_LOOKUPS[identifier] return mol.fromAdjacencyList(adjList) @@ -432,66 +438,79 @@ def __lookup(mol, identifier, type_identifier): return None -def fromInChI(mol, inchistr, backend='try-all'): - """ - Convert an InChI string `inchistr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting - rdkit (default) and openbabel. - """ - - mol.InChI = inchistr +def _is_correctly_parsed(mol, identifier): + """Check if molecule object has been correctly parsed.""" + conditions = [] - if inchiutil.INCHI_PREFIX in inchistr: - return __parse(mol, inchistr, 'inchi', backend) + if mol.atoms: + conditions.append(True) else: - return __parse(mol, inchiutil.INCHI_PREFIX + '/' + inchistr, 'inchi', backend) - - -def fromAugmentedInChI(mol, aug_inchi): - """ - Creates a Molecule object from the augmented inchi. + conditions.append(False) - First, the inchi is converted into a Molecule using - the backend parsers. + if 'InChI' in identifier: + inchi_elementcount = util.retrieveElementCount(identifier) + mol_elementcount = util.retrieveElementCount(mol) + conditions.append(inchi_elementcount == mol_elementcount) - Next, the multiplicity and unpaired electron information - is used to fix a number of parsing errors made by the backends. + return all(conditions) - Finally, the atom types of the corrected molecule are perceived. - Returns a Molecule object +def _read(mol, identifier, identifier_type, backend): """ + Parses the identifier based on the type of identifier (inchi/smi/sma) + and the backend used. - if not isinstance(aug_inchi, inchiutil.AugmentedInChI): - aug_inchi = inchiutil.AugmentedInChI(aug_inchi) + First, look up the identifier in a dictionary to see if it can be processed + this way. - mol = fromInChI(mol, aug_inchi.inchi) + If not in the dictionary, parse it through the specified backed, + or try all backends. + """ - mol.multiplicity = len(aug_inchi.u_indices) + 1 if aug_inchi.u_indices else 1 + if _lookup(mol, identifier, identifier_type) is not None: + if _is_correctly_parsed(mol, identifier): + mol.updateAtomTypes() + return mol - inchiutil.fix_molecule(mol, aug_inchi) + for backend in (BACKENDS if backend == 'try-all' else [backend]): + if backend == 'rdkit': + mol = _rdkit_translator(identifier, identifier_type, mol) + elif backend == 'openbabel': + mol = _openbabel_translator(identifier, identifier_type, mol) + else: + raise NotImplementedError("Unrecognized backend {0}".format(backend)) - mol.updateAtomTypes() + if _is_correctly_parsed(mol, identifier): + mol.updateAtomTypes() + return mol + else: + logging.debug('Backend %s is not able to parse identifier %s', backend, identifier) - return mol + raise ValueError("Unable to correctly parse {0} with backend {1}.".format(identifier, backend)) -def fromSMILES(mol, smilesstr, backend='try-all'): - """ - Convert a SMILES string `smilesstr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting - rdkit (default) and openbabel. +def _write(mol, identifier_type, backend): """ - return __parse(mol, smilesstr, 'smi', backend) + Converts the input molecule to the specified identifier type. + Uses backends as specified by the `backend` argument. -def fromSMARTS(mol, smartsstr, backend='rdkit'): - """ - Convert a SMARTS string `smartsstr` to a molecular structure. Uses - `RDKit `_ to perform the conversion. - This Kekulizes everything, removing all aromatic atom types. + Returns a string identifier of the requested type. """ + for backend in (BACKENDS if backend == 'try-all' else [backend]): + if backend == 'rdkit': + try: + output = _rdkit_translator(mol, identifier_type) + except ValueError: + continue + elif backend == 'openbabel': + try: + output = _openbabel_translator(mol, identifier_type) + except ValueError: + continue + else: + raise NotImplementedError("Unrecognized backend {0}".format(backend)) - return __parse(mol, smartsstr, 'sma', backend) - + return output + raise ValueError("Unable to generate identifier type {0} with backend {1}.".format(identifier_type, backend)) diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index 1c26836ce4..da29caa154 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -332,15 +332,11 @@ def test_singlet_vs_closed_shell(self): # aug_inchi = 'InChI=1S/C6H5/c1-2-4-6-5-3-1/h1-5H/u1' # self.compare(adjlist, aug_inchi) - @work_in_progress def test_C5H6_triplet_singlet(self): """ n-C5 chain with 2 unpaired electrons at the terminal carbon atoms, and 2 carbon atoms with each a lone pair, next to a terminal carbon atom. - - InChI generation currently generates: - "InChI=1S/C5H10/c1-3-5-4-2/h1-5H2/u1,2/lp4,5" """ adjlist = """ From 7f374c0d8496294e41cc8896b4e8ef2921cd68a9 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 22 Jan 2018 17:03:23 -0500 Subject: [PATCH 13/57] Combine InChI generation methods with aug_level argument Simplifies code structure Also add another augmentation level with just the multiplicity --- rmgpy/molecule/molecule.py | 4 +- rmgpy/molecule/translator.pxd | 8 +--- rmgpy/molecule/translator.py | 90 ++++++++++++++++++----------------- 3 files changed, 51 insertions(+), 51 deletions(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 0fbd9311d2..7af06a5e1c 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1419,7 +1419,7 @@ def toAugmentedInChI(self): Separate layer with a forward slash character. """ - return translator.toAugmentedInChI(self) + return translator.toInChI(self, aug_level=2) def toInChIKey(self): @@ -1442,7 +1442,7 @@ def toAugmentedInChIKey(self): Simply append the multiplicity string, do not separate by a character like forward slash. """ - return translator.toAugmentedInChIKey(self) + return translator.toInChIKey(self, aug_level=2) def toSMARTS(self): diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index b4b579b5d9..c887e1f661 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -36,13 +36,9 @@ cpdef dict SMILES_LOOKUPS cpdef dict MOLECULE_LOOKUPS cpdef dict RADICAL_LOOKUPS -cpdef str toInChI(Molecule mol, backend=?) +cpdef str toInChI(Molecule mol, str backend=?, int aug_level=?) -cpdef str toAugmentedInChI(Molecule mol) - -cpdef str toInChIKey(Molecule mol, backend=?) - -cpdef str toAugmentedInChIKey(Molecule mol) +cpdef str toInChIKey(Molecule mol, str backend=?, int aug_level=?) cpdef str toSMARTS(Molecule mol, backend=?) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index d059452808..7bd19bfd1e 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -139,72 +139,76 @@ } -def toInChI(mol, backend='try-all'): +def toInChI(mol, backend='try-all', aug_level=0): """ - Convert a molecular structure to an InChI string. Uses - `RDKit `_ to perform the conversion. - Perceives aromaticity. + Convert a molecular structure to an InChI string. + For aug_level=0, generates the canonical InChI. + For aug_level=1, appends the molecule multiplicity. + For aug_level=2, appends positions of unpaired and paired electrons. - or + Uses RDKit or OpenBabel for conversion. - Convert a molecular structure to an InChI string. Uses - `OpenBabel `_ to perform the conversion. + Args: + backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + aug_level level of augmentation, 0, 1, or 2 """ - return _write(mol, 'inchi', backend) + cython.declare(inchi=str, ulayer=str, player=str, mlayer=str) + if aug_level == 0: + return _write(mol, 'inchi', backend) -def toAugmentedInChI(mol): - """ - This function generates the augmented InChI canonical identifier, and that allows for the differentiation - between structures with spin states and multiple unpaired electrons. + elif aug_level == 1: + inchi = toInChI(mol, backend=backend) - Two additional layers are added to the InChI: - - unpaired electrons layer: the position of the unpaired electrons in the molecule - """ + mlayer = '/mult{0}'.format(mol.multiplicity) if mol.multiplicity != 0 else '' - cython.declare( - inchi=str, - ulayer=str, - aug_inchi=str, - ) - inchi = toInChI(mol) + return inchi + mlayer - ulayer, player = inchiutil.create_augmented_layers(mol) + elif aug_level == 2: + inchi = toInChI(mol, backend=backend) - aug_inchi = inchiutil.compose_aug_inchi(inchi, ulayer, player) + ulayer, player = inchiutil.create_augmented_layers(mol) - return aug_inchi + return inchiutil.compose_aug_inchi(inchi, ulayer, player) + + else: + raise ValueError("Implemented values for aug_level are 0, 1, or 2.") -def toInChIKey(mol, backend='try-all'): +def toInChIKey(mol, backend='try-all', aug_level=0): """ - Convert a molecular structure to an InChI Key string. Uses - `OpenBabel `_ to perform the conversion. + Convert a molecular structure to an InChI Key string. + For aug_level=0, generates the canonical InChI. + For aug_level=1, appends the molecule multiplicity. + For aug_level=2, appends positions of unpaired and paired electrons. - or + Uses RDKit or OpenBabel for conversion. - Convert a molecular structure to an InChI Key string. Uses - `RDKit `_ to perform the conversion. + Args: + backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + aug_level level of augmentation, 0, 1, or 2 """ - return _write(mol, 'inchikey', backend) + cython.declare(key=str, ulayer=str, player=str, mlayer=str) + if aug_level == 0: + return _write(mol, 'inchikey', backend) -def toAugmentedInChIKey(mol): - """ - Adds additional layers to the InChIKey, - generating the "augmented" InChIKey. - """ + elif aug_level == 1: + key = toInChIKey(mol, backend=backend) - cython.declare( - key=str, - ulayer=str - ) + mlayer = '-mult{0}'.format(mol.multiplicity) if mol.multiplicity != 0 else '' - key = toInChIKey(mol) + return key + mlayer - ulayer, player = inchiutil.create_augmented_layers(mol) + elif aug_level == 2: + key = toInChIKey(mol, backend=backend) - return inchiutil.compose_aug_inchi_key(key, ulayer, player) + ulayer, player = inchiutil.create_augmented_layers(mol) + + return inchiutil.compose_aug_inchi_key(key, ulayer, player) + + else: + raise ValueError("Implemented values for aug_level are 0, 1, or 2.") def toSMARTS(mol, backend='rdkit'): From 5bd165b7688c4a3e5ef6032dc15874ff71745ad5 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 24 Jan 2018 13:46:17 -0500 Subject: [PATCH 14/57] Add some error checking in translator._read Raise error if an InChI key is provided Raise error if identifier is an InChI but identifier type is not --- rmgpy/molecule/translator.py | 5 +++++ rmgpy/molecule/translatorTest.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 7bd19bfd1e..71566558d7 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -470,6 +470,11 @@ def _read(mol, identifier, identifier_type, backend): If not in the dictionary, parse it through the specified backed, or try all backends. """ + # Check for potential mistakes in input arguments + if 'InChIKey' in identifier: + raise ValueError('InChIKey is a write-only format and cannot be parsed.') + elif 'InChI' in identifier and identifier_type != 'inchi': + raise ValueError('Improper identifier type "{0}". The provided identifier appears to be an InChI.'.format(identifier_type)) if _lookup(mol, identifier, identifier_type) is not None: if _is_correctly_parsed(mol, identifier): diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index da29caa154..c489735c75 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -32,6 +32,7 @@ This module contains unit test for the translator module. """ +import mock import re import unittest from external.wip import work_in_progress @@ -974,6 +975,20 @@ def test_toRDKitMol(self): rdkitBondOrder = bondOrderDict[bondType] self.assertEqual(bond.order, rdkitBondOrder) + def test_incorrect_identifier_type(self): + """Test that the appropriate error is raised for identifier/type mismatch.""" + with self.assertRaises(ValueError) as cm: + Molecule().fromSMILES('InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H') + + self.assertTrue('Improper identifier type' in cm.exception.message) + + def test_read_inchikey_error(self): + """Test that the correct error is raised when reading an InChIKey""" + with self.assertRaises(ValueError) as cm: + Molecule().fromInChI('InChIKey=UHOVQNZJYSORNB-UHFFFAOYSA-N') + + self.assertTrue('InChIKey is a write-only format' in cm.exception.message) + class InChIParsingTest(unittest.TestCase): def compare(self, inchi, u_indices=None, p_indices=None): From 8dd8d37ad50f5ee48241004fd15ed1ce2b3ea472 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Tue, 27 Feb 2018 18:20:01 -0500 Subject: [PATCH 15/57] Fix RDKitMol conversion to avoid losing lone pairs Main change is to update lone pairs first when converting from RDKitMol Added line to set formal charge when converting to RDKitMol to be safe Add unit test for lone pairs Removed unused line --- rmgpy/molecule/converter.py | 4 +++- rmgpy/molecule/converterTest.py | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index c55d32d407..454538ac02 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -69,6 +69,7 @@ def toRDKitMol(mol, removeHs=True, returnMapping=False, sanitize=True): for index, atom in enumerate(mol.vertices): rdAtom = Chem.rdchem.Atom(atom.element.symbol) rdAtom.SetNumRadicalElectrons(atom.radicalElectrons) + rdAtom.SetFormalCharge(atom.charge) if atom.element.symbol == 'C' and atom.lonePairs == 1 and mol.multiplicity == 1: rdAtom.SetNumRadicalElectrons(2) rdkitmol.AddAtom(rdAtom) if removeHs and atom.symbol == 'H': @@ -140,7 +141,6 @@ def fromRDKitMol(mol, rdkitmol): # Add bonds by iterating again through atoms for j in xrange(0, i): - rdkitatom2 = rdkitmol.GetAtomWithIdx(j + 1) rdkitbond = rdkitmol.GetBondBetweenAtoms(i, j) if rdkitbond is not None: order = 0 @@ -155,6 +155,8 @@ def fromRDKitMol(mol, rdkitmol): bond = Bond(mol.vertices[i], mol.vertices[j], order) mol.addBond(bond) + # We need to update lone pairs first because the charge was set by RDKit + mol.updateLonePairs() # Set atom types and connectivity values mol.update() diff --git a/rmgpy/molecule/converterTest.py b/rmgpy/molecule/converterTest.py index 7fcb2efd30..1e348a959f 100644 --- a/rmgpy/molecule/converterTest.py +++ b/rmgpy/molecule/converterTest.py @@ -34,10 +34,13 @@ import unittest -from .converter import debugRDKitMol +from rmgpy.exceptions import AtomTypeError +from rmgpy.molecule.converter import debugRDKitMol, toRDKitMol, fromRDKitMol +from rmgpy.molecule.molecule import Molecule class RDKitTest(unittest.TestCase): + def testDebugger(self): """ Test the debugRDKitMol(rdmol) function doesn't crash @@ -50,3 +53,21 @@ def testDebugger(self): import logging rdmol = rdkit.Chem.MolFromSmiles('CCC') message = debugRDKitMol(rdmol, level=logging.INFO) + + def test_lone_pair_retention(self): + """Test that we don't lose any lone pairs on round trip RDKit conversion.""" + mol = Molecule().fromAdjacencyList( +""" +1 C u0 p0 c0 {2,D} {3,S} {4,S} +2 O u0 p2 c0 {1,D} +3 H u0 p0 c0 {1,S} +4 H u0 p0 c0 {1,S} +""") + rdmol = toRDKitMol(mol) + + try: + mol2 = fromRDKitMol(Molecule(), rdmol) + except AtomTypeError as e: + self.fail('Could not convert from RDKitMol: ' + e.message) + else: + self.assertTrue(mol.isIsomorphic(mol2)) From 8abbee9a9856bfd3cbac9d74da8e3d6a856cbefc Mon Sep 17 00:00:00 2001 From: Max Liu Date: Tue, 27 Feb 2018 20:39:32 -0500 Subject: [PATCH 16/57] Restore Molecule.sortAtoms() method and related methods Significant effort was spent getting this right in #456 For some reason, these changes were removed in 11e76f6bb5 Mark one resonance unit test as work in progress These changes affected aromaticity perception for that molecule This shows that the previous behavior was actually a bug Will be addressed by future changes to aromaticity perception --- rmgpy/molecule/molecule.py | 22 +++++++++++++++++----- rmgpy/molecule/resonanceTest.py | 1 + 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 7af06a5e1c..d71103a523 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -208,10 +208,10 @@ def equivalent(self, other): return True def getDescriptor(self): - return (self.getAtomConnectivityValue(), self.number) + return self.number, self.getAtomConnectivityValue(), self.radicalElectrons, self.lonePairs, self.charge def getAtomConnectivityValue(self): - return -1*self.connectivity + return getVertexConnectivityValue(self) def isSpecificCaseOf(self, other): """ @@ -846,8 +846,20 @@ def sortAtoms(self): """ Sort the atoms in the graph. This can make certain operations, e.g. the isomorphism functions, much more efficient. + + This function orders atoms using several attributes in atom.getDescriptor(). + Currently it sorts by placing heaviest atoms first and hydrogen atoms last. + Placing hydrogens last during sorting ensures that functions with hydrogen + removal work properly. """ - return self.sortVertices() + cython.declare(vertex=Vertex, a=Atom, index=int) + for vertex in self.vertices: + if vertex.sortingLabel < 0: + self.updateConnectivityValues() + break + self.atoms.sort(key=lambda a: a.getDescriptor(), reverse=True) + for index, vertex in enumerate(self.vertices): + vertex.sortingLabel = index def update(self): """ @@ -861,7 +873,7 @@ def update(self): self.updateAtomTypes() self.updateMultiplicity() - self.sortVertices() + self.sortAtoms() def getFormula(self): """ @@ -1795,7 +1807,7 @@ def saturate_radicals(self): # this is necessary, because saturating with H shouldn't be # changing atom types, but it doesn't hurt anything and is not # very expensive, so will do it anyway) - self.sortVertices() + self.sortAtoms() self.updateAtomTypes() self.multiplicity = 1 diff --git a/rmgpy/molecule/resonanceTest.py b/rmgpy/molecule/resonanceTest.py index 8c8c133b8e..9929dd113d 100644 --- a/rmgpy/molecule/resonanceTest.py +++ b/rmgpy/molecule/resonanceTest.py @@ -381,6 +381,7 @@ def testAromaticResonanceStructures(self): self.assertTrue(result1[0].isIsomorphic(result2[0])) self.assertTrue(result1[0].isIsomorphic(result3[0])) + @work_in_progress def testBridgedAromatic(self): """Test that we can handle bridged aromatics. From d53a3ccb0b39d78aee2bf263120f3dd920b46d04 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Tue, 27 Feb 2018 20:45:55 -0500 Subject: [PATCH 17/57] Fix errors and style issues flagged by Codacy Also move two unit tests to converterTest.py Fix one of those unit tests so that it works properly --- rmgpy/molecule/converter.py | 2 - rmgpy/molecule/converterTest.py | 56 ++++++++++++++++++++++++- rmgpy/molecule/inchi.pxd | 2 +- rmgpy/molecule/inchi.py | 27 ++++++------ rmgpy/molecule/inchiTest.py | 10 ++--- rmgpy/molecule/moleculeTest.py | 28 ------------- rmgpy/molecule/translator.py | 3 +- rmgpy/molecule/translatorTest.py | 70 ++++++-------------------------- 8 files changed, 86 insertions(+), 112 deletions(-) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index 454538ac02..aaf87dd88b 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -252,8 +252,6 @@ def fromOBMol(mol, obmol): # iterate through atoms in obmol for obatom in openbabel.OBMolAtomIter(obmol): - idx = obatom.GetIdx()#openbabel idx starts at 1! - # Use atomic number as key for element number = obatom.GetAtomicNum() element = elements.getElement(number) diff --git a/rmgpy/molecule/converterTest.py b/rmgpy/molecule/converterTest.py index 1e348a959f..ac701ed1ac 100644 --- a/rmgpy/molecule/converterTest.py +++ b/rmgpy/molecule/converterTest.py @@ -42,8 +42,7 @@ class RDKitTest(unittest.TestCase): def testDebugger(self): - """ - Test the debugRDKitMol(rdmol) function doesn't crash + """Test the debugRDKitMol(rdmol) function doesn't crash We can't really test it in the unit testing framework, because that already captures and redirects standard output, and that @@ -53,6 +52,7 @@ def testDebugger(self): import logging rdmol = rdkit.Chem.MolFromSmiles('CCC') message = debugRDKitMol(rdmol, level=logging.INFO) + self.assertIsNotNone(message) def test_lone_pair_retention(self): """Test that we don't lose any lone pairs on round trip RDKit conversion.""" @@ -71,3 +71,55 @@ def test_lone_pair_retention(self): self.fail('Could not convert from RDKitMol: ' + e.message) else: self.assertTrue(mol.isIsomorphic(mol2)) + + def test_atom_mapping_1(self): + """Test that toRDKitMol returns correct indices and atom mappings.""" + bondOrderDict = {'SINGLE': 1, 'DOUBLE': 2, 'TRIPLE': 3, 'AROMATIC': 1.5} + mol = Molecule().fromSMILES('C1CCC=C1C=O') + rdkitmol, rdAtomIndices = toRDKitMol(mol, removeHs=False, returnMapping=True) + for atom in mol.atoms: + # Check that all atoms are found in mapping + self.assertTrue(atom in rdAtomIndices) + # Check that all bonds are in rdkitmol with correct mapping and order + for connectedAtom, bond in atom.bonds.iteritems(): + bondType = str( + rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom], rdAtomIndices[connectedAtom]).GetBondType()) + rdkitBondOrder = bondOrderDict[bondType] + self.assertEqual(bond.order, rdkitBondOrder) + + # Test for removeHs = True + rdkitmol2, rdAtomIndices2 = toRDKitMol(mol, removeHs=True, returnMapping=True) + for atom in mol.atoms: + # Check that all non-hydrogen atoms are found in mapping + if atom.symbol != 'H': + self.assertTrue(atom in rdAtomIndices2) + # Check that all bonds connected to non-hydrogen have the correct mapping and order + for connectedAtom, bond in atom.bonds.iteritems(): + if connectedAtom.symbol != 'H': + bondType = str(rdkitmol2.GetBondBetweenAtoms(rdAtomIndices2[atom], + rdAtomIndices2[connectedAtom]).GetBondType()) + rdkitBondOrder = bondOrderDict[bondType] + self.assertEqual(bond.order, rdkitBondOrder) + + def test_atom_mapping_2(self): + """Test that toRDKitMol returns correct indices and atom mappings when hydrogens are removed.""" + adjlist = """ +1 H u0 p0 c0 {2,S} +2 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S} +3 H u0 p0 c0 {2,S} +4 H u0 p0 c0 {2,S} +5 O u0 p2 c0 {2,S} {6,S} +6 H u0 p0 c0 {5,S} + """ + + mol = Molecule().fromAdjacencyList(adjlist) + rdkitmol, rdAtomIndices = toRDKitMol(mol, removeHs=True, returnMapping=True) + + heavy_atoms = [at for at in mol.atoms if at.number != 1] + for at1 in heavy_atoms: + for at2 in heavy_atoms: + if mol.hasBond(at1, at2): + try: + rdkitmol.GetBondBetweenAtoms(rdAtomIndices[at1], rdAtomIndices[at2]) + except RuntimeError: + self.fail("RDKit failed in finding the bond in the original atom!") diff --git a/rmgpy/molecule/inchi.pxd b/rmgpy/molecule/inchi.pxd index b68b73b590..56170bd87f 100644 --- a/rmgpy/molecule/inchi.pxd +++ b/rmgpy/molecule/inchi.pxd @@ -55,7 +55,7 @@ cpdef list _find_lowest_u_layer(Molecule mol, list u_layer, list equivalent_atom cpdef str _create_U_layer(Molecule mol, str auxinfo) -cpdef Molecule _find_lowest_p_layer(Molecule minmol, list p_layer, list equivalent_atoms) +cpdef list _find_lowest_p_layer(Molecule minmol, list p_layer, list equivalent_atoms) cpdef str _create_P_layer(Molecule mol, str auxinfo) diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 5aeaf9b863..d1261cc374 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -343,7 +343,7 @@ def _has_unexpected_lone_pairs(mol): except KeyError: raise Exception("Unrecognized element: {}".format(at.symbol)) else: - if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: return True + if at.lonePairs != exp: return True return False @@ -570,7 +570,7 @@ def _find_lowest_p_layer(minmol, p_layer, equivalent_atoms): TODO: The presence of unpaired electrons complicates stuff. """ - return minmol + return p_layer def _create_P_layer(mol, auxinfo): @@ -599,7 +599,7 @@ def _create_P_layer(mol, auxinfo): except KeyError: raise Exception("Unrecognized element: {}".format(at.symbol)) else: - if at.lonePairs != elements.PeriodicSystem.lone_pairs[at.symbol]: + if at.lonePairs != exp: if at.lonePairs == 0: p_layer.append('{}{}'.format(i, '(0)')) else: @@ -609,7 +609,7 @@ def _create_P_layer(mol, auxinfo): equivalent_atoms = _parse_E_layer(auxinfo) if equivalent_atoms: # select lowest u-layer: - u_layer = _find_lowest_p_layer(minmol, p_layer, equivalent_atoms) + p_layer = _find_lowest_p_layer(minmol, p_layer, equivalent_atoms) if p_layer: return (P_LAYER_PREFIX + P_LAYER_SEPARATOR.join(map(str, p_layer))) @@ -640,7 +640,8 @@ def create_augmented_layers(mol): molcopy = mol.copy(deep=True) hydrogens = filter(lambda at: at.number == 1, molcopy.atoms) - [molcopy.removeAtom(h) for h in hydrogens] + for h in hydrogens: + molcopy.removeAtom(h) rdkitmol = toRDKitMol(molcopy) _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon') # suppress stereo warnings @@ -752,8 +753,6 @@ def is_valid(mol): return True - index = mol.atoms.index(start) + 1 - paths = pathfinder.find_allyl_end_with_charge(start) for path in paths: @@ -917,7 +916,7 @@ def _fix_oxygen_unsaturated_bond(mol, u_indices): for bond in bonds[1::2]: # odd bonds assert isinstance(bond, Bond) bond.incrementOrder() - return + break else: for atom2, bond in bonds.iteritems(): if not bond.isSingle() and atom2.charge == 0: @@ -927,7 +926,6 @@ def _fix_oxygen_unsaturated_bond(mol, u_indices): atom2.radicalElectrons += 1 u_indices.remove(mol.atoms.index(atom2) + 1) oxygen.lonePairs += 1 - return def _is_unsaturated(mol): @@ -939,9 +937,8 @@ def _is_unsaturated(mol): atom2=Atom, bonds=dict, bond=Bond) - for atom1 in mol.atoms: - bonds = mol.getBonds(atom1) - for atom2, bond in bonds.iteritems(): + for atom in mol.atoms: + for bond in atom.bonds.itervalues(): if not bond.isSingle(): return True @@ -1121,7 +1118,7 @@ def _check_molecule(mol, aug_inchi): ) ConsistencyChecker.check_multiplicity(mol.getRadicalCount(), mol.multiplicity) - inchi, u_indices, p_indices = decompose_aug_inchi(str(aug_inchi)) + _, u_indices, _ = decompose_aug_inchi(str(aug_inchi)) assert(mol.getRadicalCount() == len(u_indices)) for at in mol.atoms: @@ -1141,12 +1138,12 @@ def fix_molecule(mol, aug_inchi): # ignore atoms that bear already unpaired electrons: for i in set(u_indices[:]): atom = mol.atoms[i - 1] - [u_indices.remove(i) for _ in range(atom.radicalElectrons)] + for _ in range(atom.radicalElectrons): u_indices.remove(i) # ignore atoms that bear already lone pairs: for i in set(p_indices[:]): atom = mol.atoms[i - 1] - [p_indices.remove(i) for _ in range(atom.lonePairs)] + for _ in range(atom.lonePairs): p_indices.remove(i) _fix_triplet_to_singlet(mol, p_indices) diff --git a/rmgpy/molecule/inchiTest.py b/rmgpy/molecule/inchiTest.py index 8d2cc3a03b..124d4820b7 100644 --- a/rmgpy/molecule/inchiTest.py +++ b/rmgpy/molecule/inchiTest.py @@ -156,23 +156,23 @@ class DecomposeTest(unittest.TestCase): def test_inchi(self): string = 'InChI=1S/XXXX/cXXX/hXXX' - inchi, u_indices, p_indices = decompose_aug_inchi(string) + _, u_indices, _ = decompose_aug_inchi(string) self.assertEquals([], u_indices) def test_inchi_u_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/u1,2' - inchi, u_indices, p_indices = decompose_aug_inchi(string) + _, u_indices, _ = decompose_aug_inchi(string) self.assertEquals([1,2], u_indices) def test_inchi_p_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/lp1,2' - inchi, u_indices, p_indices = decompose_aug_inchi(string) + _, _, p_indices = decompose_aug_inchi(string) self.assertEquals([1,2], p_indices) def test_inchi_u_layer_p_layer(self): string = 'InChI=1S/XXXX/cXXX/hXXX/u1,2/lp3,4' - inchi, u_indices, p_indices = decompose_aug_inchi(string) + _, u_indices, p_indices = decompose_aug_inchi(string) self.assertEquals([1,2], u_indices) self.assertEquals([3,4], p_indices) @@ -182,7 +182,7 @@ def test_inchi_p_layer_zero_lp(self): pairs can be read correctly. """ string = 'InChI=1S/XXXX/cXXX/hXXX/lp1(0)' - inchi, u_indices, p_indices = decompose_aug_inchi(string) + _, _, p_indices = decompose_aug_inchi(string) self.assertEquals([(1,0)], p_indices) class CreateULayerTest(unittest.TestCase): diff --git a/rmgpy/molecule/moleculeTest.py b/rmgpy/molecule/moleculeTest.py index 4da1d36ae2..d9ac0964a2 100644 --- a/rmgpy/molecule/moleculeTest.py +++ b/rmgpy/molecule/moleculeTest.py @@ -1527,34 +1527,6 @@ def testMalformedAugmentedInChI_Wrong_Indices(self): with self.assertRaises(Exception): mol = Molecule().fromAugmentedInChI(malform_aug_inchi) - def testRDKitMolAtomMapping(self): - """ - Test that the atom mapping returned by toRDKitMol contains the correct - atom indices of the atoms of the molecule when hydrogens are removed. - """ - from rmgpy.molecule.converter import toRDKitMol - - adjlist = ''' -1 H u0 p0 c0 {2,S} -2 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S} -3 H u0 p0 c0 {2,S} -4 H u0 p0 c0 {2,S} -5 O u0 p2 c0 {2,S} {6,S} -6 H u0 p0 c0 {5,S} - ''' - - mol = Molecule().fromAdjacencyList(adjlist) - rdkitmol, rdAtomIndices = toRDKitMol(mol, removeHs=True, returnMapping=True) - - heavy_atoms = [at for at in mol.atoms if at.number != 1] - for at1 in heavy_atoms: - for at2 in heavy_atoms: - if mol.hasBond(at1, at2): - try: - rdkitmol.GetBondBetweenAtoms(rdAtomIndices[at1],rdAtomIndices[at2]) - except RuntimeError: - self.fail("RDKit failed in finding the bond in the original atom!") - def testUpdateLonePairs(self): adjlist = """ 1 Si u0 p1 c0 {2,S} {3,S} diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 71566558d7..0edaca70db 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -35,7 +35,6 @@ import cython -import itertools import logging # Assume that rdkit is installed @@ -49,7 +48,7 @@ BACKENDS = ['openbabel', 'rdkit'] from rmgpy.exceptions import DependencyError -from .molecule import Atom, Molecule +from .molecule import Molecule from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol import rmgpy.molecule.inchi as inchiutil diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index c489735c75..55196faf3a 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -32,7 +32,6 @@ This module contains unit test for the translator module. """ -import mock import re import unittest from external.wip import work_in_progress @@ -75,9 +74,9 @@ def test_C5H5(self): def test_C7H8(self): - """Looks a lot like toluene but with 1 double bond replaced by a biradical.""" + """Looks a lot like toluene but with 1 double bond replaced by a biradical. - """unpaired electrons on tertiary carbon, and on carbon in para position.""" + unpaired electrons on tertiary carbon, and on carbon in para position.""" adjlist = """ 1 C u1 p0 c0 {2,S} {3,S} {4,S} 2 C u0 p0 c0 {1,S} {7,D} {10,S} @@ -138,7 +137,6 @@ def test_benzyne(self): 9 H u0 p0 c0 {5,S} 10 H u0 p0 c0 {6,S} """ - benzatetraene = 'InChI=1S/C6H4/c1-2-4-6-5-3-1/h1-4H' aug_inchi = 'InChI=1S/C6H4/c1-2-4-6-5-3-1/h1-4H' self.compare(adjlist, aug_inchi) @@ -355,7 +353,7 @@ def test_C5H6_triplet_singlet(self): 11 H u0 p0 c0 {5,S} """ - aug_inchi = 'InChI=1S/C5H6/c1-3-5-4-2/h1-3H2/u1,2/lp4,5' + aug_inchi = 'InChI=1S/C5H6/c1-3-5-4-2/h1-3H2/u1,2/lp3,5' self.compare(adjlist, aug_inchi) @@ -943,38 +941,6 @@ def test_fromSMARTS(self): mol = fromSMARTS(Molecule(), smarts) self.assertTrue(mol.isIsomorphic(self.methane)) - def test_toRDKitMol(self): - """ - Test that toRDKitMol returns correct indices and atom mappings. - """ - bondOrderDict = {'SINGLE': 1, 'DOUBLE': 2, 'TRIPLE': 3, 'AROMATIC': 1.5} - mol = fromSMILES(Molecule(), 'C1CCC=C1C=O') - rdkitmol, rdAtomIndices = mol.toRDKitMol(removeHs=False, returnMapping=True, sanitize=True) - for atom in mol.atoms: - # Check that all atoms are found in mapping - self.assertTrue(atom in rdAtomIndices) - # Check that all bonds are in rdkitmol with correct mapping and order - for connectedAtom, bond in atom.bonds.iteritems(): - bondType = str( - rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom], rdAtomIndices[connectedAtom]).GetBondType()) - rdkitBondOrder = bondOrderDict[bondType] - self.assertEqual(bond.order, rdkitBondOrder) - - # Test for removeHs = True - rdkitmol2, rdAtomIndices2 = mol.toRDKitMol(removeHs=True, returnMapping=True, sanitize=True) - - for atom in mol.atoms: - # Check that all non-hydrogen atoms are found in mapping - if atom.symbol != 'H': - self.assertTrue(atom in rdAtomIndices) - # Check that all bonds connected to non-hydrogen have the correct mapping and order - for connectedAtom, bond in atom.bonds.iteritems(): - if connectedAtom.symbol != 'H': - bondType = str(rdkitmol.GetBondBetweenAtoms(rdAtomIndices[atom], - rdAtomIndices[connectedAtom]).GetBondType()) - rdkitBondOrder = bondOrderDict[bondType] - self.assertEqual(bond.order, rdkitBondOrder) - def test_incorrect_identifier_type(self): """Test that the appropriate error is raised for identifier/type mismatch.""" with self.assertRaises(ValueError) as cm: @@ -1047,16 +1013,6 @@ def test_C3H6_biradical_parsing(self): self.compare(inchi, u_indices) def testC2H3O3(self): - adjlist = ''' - 1 C u0 p0 c0 {2,D} {6,S} {7,S} - 2 C u0 p0 c0 {1,D} {3,S} {5,S} - 3 O u1 p2 c0 {2,S} - 4 O u0 p2 c0 {5,S} {8,S} - 5 O u0 p2 c0 {2,S} {4,S} - 6 H u0 p0 c0 {1,S} - 7 H u0 p0 c0 {1,S} - 8 H u0 p0 c0 {4,S} - ''' inchi = 'C2H3O3/c1-2(3)5-4/h4H,1H2' u_indices = [1] self.compare(inchi, u_indices) @@ -1064,7 +1020,7 @@ def testC2H3O3(self): def testC2H2(self): inchi = 'C2H2/c1-2/h1-2H' u_indices = [1, 2] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) def testO2(self): inchi = 'O2/c1-2' @@ -1089,7 +1045,7 @@ def testTriRadical2DoubleBondMult4(self): def testQuadriRadicalDoubleBondZwitterMult5(self): inchi = 'C8H14/c1-4-6-7-8(3)5-2/h5-6,8H,1-2,4,7H2,3H3' u_indices = [1, 2, 5, 6] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) def testQuadri2DoubleBondMult5(self): inchi = 'C8H14/c1-5-7(3)8(4)6-2/h5-8H,1-2H2,3-4H3' @@ -1117,10 +1073,10 @@ def testCO(self): p_indices = [1, 2] mol = self.compare(inchi, [], p_indices) - assert mol.atoms[1].lonePairs == 1 # Oxygen + self.assertEqual(mol.atoms[1].lonePairs, 1) # Oxygen - assert mol.atoms[0].charge == -1 - assert mol.atoms[1].charge == +1 + self.assertEqual(mol.atoms[0].charge, -1) + self.assertEqual(mol.atoms[1].charge, 1) def testTripletMethylene(self): inchi = 'CH2/h1H2' @@ -1145,12 +1101,12 @@ def testC4H6O(self): def testC6H6(self): inchi = 'C6H6/c1-3-5-6-4-2/h1,6H,2,5H2' u_indices = [1, 3] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) def testC4H6O_2(self): inchi = 'C4H6O/c1-2-3-4-5/h2,4H,1,3H2' u_indices = [4, 5] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) def test_CO_triplet(self): @@ -1189,7 +1145,7 @@ def test_CCCO_triplet(self): def testC3H4(self): inchi = 'C3H4/c1-3-2/h1,3H,2H2' u_indices = [1, 1] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) def test_C6H8O2(self): inchi = 'C6H8O2/c1-3-5(7)6(8)4-2/h3-6H,1-2H2' @@ -1287,9 +1243,9 @@ def test_nitrate(self): """ inchi = 'InChI=1S/HNO3/c2-1(3)4/h(H,2,3,4)' p_indices = [-1, 3, 3, 3] # ??? - mol = self.compare(inchi, [], p_indices) + self.compare(inchi, [], p_indices) def test_NO(self): inchi = 'InChI=1S/NO/c1-2' u_indices = [1] - mol = self.compare(inchi, u_indices) + self.compare(inchi, u_indices) From a06d840b3215fd904bd5431f068fb6e17513eca4 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 5 Mar 2018 12:25:21 -0500 Subject: [PATCH 18/57] Have OpenBabel output canonical SMILES --- rmgpy/molecule/translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 0edaca70db..9fb679f9a7 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -406,7 +406,7 @@ def _openbabel_translator(input_object, identifier_type, mol=None): ob_conversion.AddOption('w') ob_conversion.AddOption('K') elif identifier_type == 'smi': - ob_conversion.SetOutFormat('smi') + ob_conversion.SetOutFormat('can') # turn off isomer and stereochemistry information ob_conversion.AddOption('i') else: From 46025bf7e134a814187b678e8555062c6a20ed74 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 12:33:08 -0500 Subject: [PATCH 19/57] Add 'rdkit-first' option for translator backends Also add function to generate backend list to reduce code duplication --- rmgpy/molecule/translator.pxd | 2 ++ rmgpy/molecule/translator.py | 41 +++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index c887e1f661..dea50445d0 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -63,3 +63,5 @@ cpdef _is_correctly_parsed(Molecule mol, str identifier) cdef Molecule _read(Molecule mol, str identifier, str identifier_type, str backend) cdef str _write(Molecule mol, str identifier_type, str backend) + +cdef _get_backend_list(str backend) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 9fb679f9a7..4ab2024c13 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -138,7 +138,7 @@ } -def toInChI(mol, backend='try-all', aug_level=0): +def toInChI(mol, backend='rdkit-first', aug_level=0): """ Convert a molecular structure to an InChI string. For aug_level=0, generates the canonical InChI. @@ -174,7 +174,7 @@ def toInChI(mol, backend='try-all', aug_level=0): raise ValueError("Implemented values for aug_level are 0, 1, or 2.") -def toInChIKey(mol, backend='try-all', aug_level=0): +def toInChIKey(mol, backend='rdkit-first', aug_level=0): """ Convert a molecular structure to an InChI Key string. For aug_level=0, generates the canonical InChI. @@ -480,19 +480,19 @@ def _read(mol, identifier, identifier_type, backend): mol.updateAtomTypes() return mol - for backend in (BACKENDS if backend == 'try-all' else [backend]): - if backend == 'rdkit': + for option in _get_backend_list(backend): + if option == 'rdkit': mol = _rdkit_translator(identifier, identifier_type, mol) - elif backend == 'openbabel': + elif option == 'openbabel': mol = _openbabel_translator(identifier, identifier_type, mol) else: - raise NotImplementedError("Unrecognized backend {0}".format(backend)) + raise NotImplementedError("Unrecognized backend {0}".format(option)) if _is_correctly_parsed(mol, identifier): mol.updateAtomTypes() return mol else: - logging.debug('Backend %s is not able to parse identifier %s', backend, identifier) + logging.debug('Backend %s is not able to parse identifier %s', option, identifier) raise ValueError("Unable to correctly parse {0} with backend {1}.".format(identifier, backend)) @@ -505,20 +505,39 @@ def _write(mol, identifier_type, backend): Returns a string identifier of the requested type. """ - for backend in (BACKENDS if backend == 'try-all' else [backend]): - if backend == 'rdkit': + for option in _get_backend_list(backend): + if option == 'rdkit': try: output = _rdkit_translator(mol, identifier_type) except ValueError: continue - elif backend == 'openbabel': + elif option == 'openbabel': try: output = _openbabel_translator(mol, identifier_type) except ValueError: continue else: - raise NotImplementedError("Unrecognized backend {0}".format(backend)) + raise NotImplementedError("Unrecognized backend {0}".format(option)) return output raise ValueError("Unable to generate identifier type {0} with backend {1}.".format(identifier_type, backend)) + + +def _get_backend_list(backend): + """ + Returns the appropriate list or iterator of backends given the provided keyword. + """ + if not isinstance(backend, str): + raise ValueError("The backend argument should be a string. " + "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") + backend = backend.strip().lower() + if backend == 'try-all': + return BACKENDS + elif backend == 'rdkit-first': + return reversed(BACKENDS) + elif backend in ['rdkit', 'openbabel']: + return [backend] + else: + raise ValueError("Unrecognized value for backend argument. " + "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") From 2520d7424d69347ef9dd1bde4519bfd638649b5f Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 14:43:53 -0500 Subject: [PATCH 20/57] Add output checking when generating identifiers Rename _is_correctly_parsed to _check_output Use _check_output when generating identifiers to ensure that empty identifiers are not kept and that InChI element counts are correct --- rmgpy/molecule/translator.pxd | 2 +- rmgpy/molecule/translator.py | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index dea50445d0..498189c6af 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -58,7 +58,7 @@ cpdef object _openbabel_translator(object input_object, str identifier_type, Mol cdef Molecule _lookup(Molecule mol, str identifier, str identifier_type) -cpdef _is_correctly_parsed(Molecule mol, str identifier) +cpdef _check_output(Molecule mol, str identifier) cdef Molecule _read(Molecule mol, str identifier, str identifier_type, str backend) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 4ab2024c13..c04125f00f 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -441,16 +441,17 @@ def _lookup(mol, identifier, identifier_type): return None -def _is_correctly_parsed(mol, identifier): +def _check_output(mol, identifier): """Check if molecule object has been correctly parsed.""" conditions = [] - if mol.atoms: - conditions.append(True) - else: - conditions.append(False) + # Check that the molecule has atoms + conditions.append(bool(mol.atoms)) + # Check that the identifier is not blank + conditions.append(bool(identifier.strip())) - if 'InChI' in identifier: + # Check that the InChI element count matches the molecule + if 'InChI=1' in identifier: inchi_elementcount = util.retrieveElementCount(identifier) mol_elementcount = util.retrieveElementCount(mol) conditions.append(inchi_elementcount == mol_elementcount) @@ -476,7 +477,7 @@ def _read(mol, identifier, identifier_type, backend): raise ValueError('Improper identifier type "{0}". The provided identifier appears to be an InChI.'.format(identifier_type)) if _lookup(mol, identifier, identifier_type) is not None: - if _is_correctly_parsed(mol, identifier): + if _check_output(mol, identifier): mol.updateAtomTypes() return mol @@ -488,11 +489,11 @@ def _read(mol, identifier, identifier_type, backend): else: raise NotImplementedError("Unrecognized backend {0}".format(option)) - if _is_correctly_parsed(mol, identifier): + if _check_output(mol, identifier): mol.updateAtomTypes() return mol else: - logging.debug('Backend %s is not able to parse identifier %s', option, identifier) + logging.debug('Backend {0} is not able to parse identifier {1}'.format(option, identifier)) raise ValueError("Unable to correctly parse {0} with backend {1}.".format(identifier, backend)) @@ -519,7 +520,11 @@ def _write(mol, identifier_type, backend): else: raise NotImplementedError("Unrecognized backend {0}".format(option)) - return output + if _check_output(mol, output): + return output + else: + logging.debug('Backend {0} is not able to generate {1} for this molecule:\n' + '{2}'.format(option, identifier_type, mol.toAdjacencyList())) raise ValueError("Unable to generate identifier type {0} with backend {1}.".format(identifier_type, backend)) From 10f7dad7630bdd8eb9ca3ec9e72de15e47701c8a Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 14:52:14 -0500 Subject: [PATCH 21/57] Only prevent RDKit from sanitizing for SMILES generation Originally implemented in 83bb9d9ae0 to allow differentiation of Kekulized and Aromatic SMILES strings. --- rmgpy/molecule/translator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index c04125f00f..c32eabbadf 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -348,7 +348,10 @@ def _rdkit_translator(input_object, identifier_type, mol=None): output = fromRDKitMol(mol, rdkitmol) elif isinstance(input_object, Molecule): # We are converting from a molecule to a string identifier - rdkitmol = toRDKitMol(input_object, sanitize=False) + if identifier_type == 'smi': + rdkitmol = toRDKitMol(input_object, sanitize=False) + else: + rdkitmol = toRDKitMol(input_object, sanitize=True) if identifier_type == 'inchi': output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon') elif identifier_type == 'inchikey': From ed212e1069a692256c102f3eb9174ef5bba57732 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 15:02:17 -0500 Subject: [PATCH 22/57] Refactor imports in converter and translator modules Attempt to eliminate cyclic imports Use `mm` to refer to rmgpy.molecule.molecule --- rmgpy/molecule/converter.pxd | 10 +++++----- rmgpy/molecule/converter.py | 23 ++++++++++++----------- rmgpy/molecule/translator.pxd | 33 ++++++++++++++++----------------- rmgpy/molecule/translator.py | 14 +++++++------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd index 9a28f2a877..976fb9d7bc 100644 --- a/rmgpy/molecule/converter.pxd +++ b/rmgpy/molecule/converter.pxd @@ -25,13 +25,13 @@ # # ############################################################################### -from .molecule cimport Atom, Bond, Molecule +cimport rmgpy.molecule.molecule as mm -cpdef toRDKitMol(Molecule mol, bint removeHs=*, bint returnMapping=*, bint sanitize=*) +cpdef toRDKitMol(mm.Molecule mol, bint removeHs=*, bint returnMapping=*, bint sanitize=*) -cpdef Molecule fromRDKitMol(Molecule mol, object rdkitmol) +cpdef mm.Molecule fromRDKitMol(mm.Molecule mol, object rdkitmol) -cpdef toOBMol(Molecule mol, bint returnMapping=*) +cpdef toOBMol(mm.Molecule mol, bint returnMapping=*) -cpdef Molecule fromOBMol(Molecule mol, object obmol) +cpdef mm.Molecule fromOBMol(mm.Molecule mol, object obmol) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index aaf87dd88b..e5af35ea88 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -45,9 +45,10 @@ else: OB_INSTALLED = True +import rmgpy.molecule.element as elements +import rmgpy.molecule.molecule as mm + from rmgpy.exceptions import DependencyError -from rmgpy.molecule import element as elements -from rmgpy.molecule.molecule import Atom, Bond def toRDKitMol(mol, removeHs=True, returnMapping=False, sanitize=True): @@ -112,10 +113,10 @@ def fromRDKitMol(mol, rdkitmol): lonePairs=cython.int, number=cython.int, order=cython.float, - atom=Atom, - atom1=Atom, - atom2=Atom, - bond=Bond) + atom=mm.Atom, + atom1=mm.Atom, + atom2=mm.Atom, + bond=mm.Bond) mol.vertices = [] @@ -136,7 +137,7 @@ def fromRDKitMol(mol, rdkitmol): charge = rdkitatom.GetFormalCharge() radicalElectrons = rdkitatom.GetNumRadicalElectrons() - atom = Atom(element, radicalElectrons, charge, '', 0) + atom = mm.Atom(element, radicalElectrons, charge, '', 0) mol.vertices.append(atom) # Add bonds by iterating again through atoms @@ -152,7 +153,7 @@ def fromRDKitMol(mol, rdkitmol): elif rdbondtype.name == 'TRIPLE': order = 3 elif rdbondtype.name == 'AROMATIC': order = 1.5 - bond = Bond(mol.vertices[i], mol.vertices[j], order) + bond = mm.Bond(mol.vertices[i], mol.vertices[j], order) mol.addBond(bond) # We need to update lone pairs first because the charge was set by RDKit @@ -240,7 +241,7 @@ def fromOBMol(mol, obmol): # Below are the declared variables for cythonizing the module # cython.declare(i=cython.int) # cython.declare(radicalElectrons=cython.int, charge=cython.int, lonePairs=cython.int) - # cython.declare(atom=Atom, atom1=Atom, atom2=Atom, bond=Bond) + # cython.declare(atom=mm.Atom, atom1=mm.Atom, atom2=mm.Atom, bond=mm.Bond) if not OB_INSTALLED: raise DependencyError('OpenBabel is not installed. Please install or use RDKit.') @@ -260,7 +261,7 @@ def fromOBMol(mol, obmol): obatom_multiplicity = obatom.GetSpinMultiplicity() radicalElectrons = obatom_multiplicity - 1 if obatom_multiplicity != 0 else 0 - atom = Atom(element, radicalElectrons, charge, '', 0) + atom = mm.Atom(element, radicalElectrons, charge, '', 0) mol.vertices.append(atom) # iterate through bonds in obmol @@ -270,7 +271,7 @@ def fromOBMol(mol, obmol): if oborder not in [1,2,3] and obbond.IsAromatic() : oborder = 1.5 - bond = Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder)#python array indices start at 0 + bond = mm.Bond(mol.vertices[obbond.GetBeginAtomIdx() - 1], mol.vertices[obbond.GetEndAtomIdx() - 1], oborder)#python array indices start at 0 mol.addBond(bond) diff --git a/rmgpy/molecule/translator.pxd b/rmgpy/molecule/translator.pxd index 498189c6af..0d6c48416d 100644 --- a/rmgpy/molecule/translator.pxd +++ b/rmgpy/molecule/translator.pxd @@ -25,9 +25,8 @@ # # ############################################################################### -from .molecule cimport Atom, Molecule -cimport element as elements -cimport inchi as inchiutil +cimport rmgpy.molecule.molecule as mm + cpdef list BACKENDS cpdef dict INCHI_LOOKUPS @@ -36,32 +35,32 @@ cpdef dict SMILES_LOOKUPS cpdef dict MOLECULE_LOOKUPS cpdef dict RADICAL_LOOKUPS -cpdef str toInChI(Molecule mol, str backend=?, int aug_level=?) +cpdef str toInChI(mm.Molecule mol, str backend=?, int aug_level=?) -cpdef str toInChIKey(Molecule mol, str backend=?, int aug_level=?) +cpdef str toInChIKey(mm.Molecule mol, str backend=?, int aug_level=?) -cpdef str toSMARTS(Molecule mol, backend=?) +cpdef str toSMARTS(mm.Molecule mol, backend=?) -cpdef str toSMILES(Molecule mol, backend=?) +cpdef str toSMILES(mm.Molecule mol, backend=?) -cpdef Molecule fromInChI(Molecule mol, str inchistr, backend=?) +cpdef mm.Molecule fromInChI(mm.Molecule mol, str inchistr, backend=?) -cpdef Molecule fromSMILES(Molecule mol, str smilesstr, str backend=?) +cpdef mm.Molecule fromSMILES(mm.Molecule mol, str smilesstr, str backend=?) -cpdef Molecule fromSMARTS(Molecule mol, str smartsstr, str backend=?) +cpdef mm.Molecule fromSMARTS(mm.Molecule mol, str smartsstr, str backend=?) -cpdef Molecule fromAugmentedInChI(Molecule mol, aug_inchi) +cpdef mm.Molecule fromAugmentedInChI(mm.Molecule mol, aug_inchi) -cpdef object _rdkit_translator(object input_object, str identifier_type, Molecule mol=?) +cpdef object _rdkit_translator(object input_object, str identifier_type, mm.Molecule mol=?) -cpdef object _openbabel_translator(object input_object, str identifier_type, Molecule mol=?) +cpdef object _openbabel_translator(object input_object, str identifier_type, mm.Molecule mol=?) -cdef Molecule _lookup(Molecule mol, str identifier, str identifier_type) +cdef mm.Molecule _lookup(mm.Molecule mol, str identifier, str identifier_type) -cpdef _check_output(Molecule mol, str identifier) +cpdef _check_output(mm.Molecule mol, str identifier) -cdef Molecule _read(Molecule mol, str identifier, str identifier_type, str backend) +cdef mm.Molecule _read(mm.Molecule mol, str identifier, str identifier_type, str backend) -cdef str _write(Molecule mol, str identifier_type, str backend) +cdef str _write(mm.Molecule mol, str identifier_type, str backend) cdef _get_backend_list(str backend) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index c32eabbadf..ea5de1db8d 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -47,13 +47,13 @@ else: BACKENDS = ['openbabel', 'rdkit'] -from rmgpy.exceptions import DependencyError -from .molecule import Molecule -from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol - import rmgpy.molecule.inchi as inchiutil +import rmgpy.molecule.molecule as mm import rmgpy.molecule.util as util +from rmgpy.exceptions import DependencyError +from rmgpy.molecule.converter import toRDKitMol, fromRDKitMol, toOBMol, fromOBMol + # constants INCHI_LOOKUPS = { @@ -346,7 +346,7 @@ def _rdkit_translator(input_object, identifier_type, mol=None): if rdkitmol is None: raise ValueError("Could not interpret the identifier {0!r}".format(input_object)) output = fromRDKitMol(mol, rdkitmol) - elif isinstance(input_object, Molecule): + elif isinstance(input_object, mm.Molecule): # We are converting from a molecule to a string identifier if identifier_type == 'smi': rdkitmol = toRDKitMol(input_object, sanitize=False) @@ -397,9 +397,9 @@ def _openbabel_translator(input_object, identifier_type, mol=None): obmol.AddHydrogens() obmol.AssignSpinMultiplicity(True) if mol is None: - mol = Molecule() + mol = mm.Molecule() output = fromOBMol(mol, obmol) - elif isinstance(input_object, Molecule): + elif isinstance(input_object, mm.Molecule): # We are converting from a Molecule to a string identifier if identifier_type == 'inchi': ob_conversion.SetOutFormat('inchi') From 5ffa86210dfba6ad4fa9fa7f0dc9adac5db25380 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 15:44:22 -0500 Subject: [PATCH 23/57] Add two unit tests for SMILES and InChI generation --- rmgpy/molecule/translatorTest.py | 105 +++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index 55196faf3a..db918870a7 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -356,6 +356,46 @@ def test_C5H6_triplet_singlet(self): aug_inchi = 'InChI=1S/C5H6/c1-3-5-4-2/h1-3H2/u1,2/lp3,5' self.compare(adjlist, aug_inchi) + def test_aromatic_resonance_structures(self): + """Test that different resonance structures give identical InChIs.""" + mol = Molecule().fromAdjacencyList(""" +multiplicity 2 +1 C u0 p0 c0 {2,D} {14,S} {18,S} +2 C u0 p0 c0 {1,D} {3,S} {19,S} +3 C u0 p0 c0 {2,S} {4,D} {20,S} +4 C u0 p0 c0 {3,D} {5,S} {13,S} +5 C u0 p0 c0 {4,S} {6,S} {14,D} +6 C u0 p0 c0 {5,S} {7,D} {21,S} +7 C u0 p0 c0 {6,D} {8,S} {22,S} +8 C u0 p0 c0 {7,S} {9,D} {13,S} +9 C u0 p0 c0 {8,D} {10,S} {23,S} +10 C u0 p0 c0 {9,S} {11,D} {24,S} +11 C u0 p0 c0 {10,D} {12,S} {25,S} +12 C u0 p0 c0 {11,S} {13,D} {26,S} +13 C u0 p0 c0 {4,S} {8,S} {12,D} +14 C u0 p0 c0 {1,S} {5,D} {15,S} +15 C u1 p0 c0 {14,S} {16,S} {17,S} +16 H u0 p0 c0 {15,S} +17 H u0 p0 c0 {15,S} +18 H u0 p0 c0 {1,S} +19 H u0 p0 c0 {2,S} +20 H u0 p0 c0 {3,S} +21 H u0 p0 c0 {6,S} +22 H u0 p0 c0 {7,S} +23 H u0 p0 c0 {9,S} +24 H u0 p0 c0 {10,S} +25 H u0 p0 c0 {11,S} +26 H u0 p0 c0 {12,S} +""") + res = mol.generate_resonance_structures() + + inchi_list = [struct.toInChI() for struct in res] + + expected_inchi = 'InChI=1S/C15H11/c1-11-5-4-8-15-13(11)10-9-12-6-2-3-7-14(12)15/h2-10H,1H2' + + for inchi in inchi_list: + self.assertEqual(inchi, expected_inchi) + class SMILESGenerationTest(unittest.TestCase): def compare(self, adjlist, smiles): @@ -639,6 +679,71 @@ def test_various(self): smiles = '[O][O]' self.compare(adjlist, smiles) + def test_aromatics(self): + """Test that different aromatics representations returns different SMILES.""" + mol1 = Molecule().fromAdjacencyList(""" +1 O u0 p2 c0 {6,S} {9,S} +2 C u0 p0 c0 {3,D} {5,S} {11,S} +3 C u0 p0 c0 {2,D} {4,S} {12,S} +4 C u0 p0 c0 {3,S} {6,D} {13,S} +5 C u0 p0 c0 {2,S} {7,D} {10,S} +6 C u0 p0 c0 {1,S} {4,D} {7,S} +7 C u0 p0 c0 {5,D} {6,S} {8,S} +8 C u0 p0 c0 {7,S} {14,S} {15,S} {16,S} +9 H u0 p0 c0 {1,S} +10 H u0 p0 c0 {5,S} +11 H u0 p0 c0 {2,S} +12 H u0 p0 c0 {3,S} +13 H u0 p0 c0 {4,S} +14 H u0 p0 c0 {8,S} +15 H u0 p0 c0 {8,S} +16 H u0 p0 c0 {8,S} +""") + mol2 = Molecule().fromAdjacencyList(""" +1 O u0 p2 c0 {6,S} {9,S} +2 C u0 p0 c0 {3,S} {5,D} {11,S} +3 C u0 p0 c0 {2,S} {4,D} {12,S} +4 C u0 p0 c0 {3,D} {6,S} {13,S} +5 C u0 p0 c0 {2,D} {7,S} {10,S} +6 C u0 p0 c0 {1,S} {4,S} {7,D} +7 C u0 p0 c0 {5,S} {6,D} {8,S} +8 C u0 p0 c0 {7,S} {14,S} {15,S} {16,S} +9 H u0 p0 c0 {1,S} +10 H u0 p0 c0 {5,S} +11 H u0 p0 c0 {2,S} +12 H u0 p0 c0 {3,S} +13 H u0 p0 c0 {4,S} +14 H u0 p0 c0 {8,S} +15 H u0 p0 c0 {8,S} +16 H u0 p0 c0 {8,S} +""") + mol3 = Molecule().fromAdjacencyList(""" +1 O u0 p2 c0 {6,S} {9,S} +2 C u0 p0 c0 {3,B} {5,B} {11,S} +3 C u0 p0 c0 {2,B} {4,B} {12,S} +4 C u0 p0 c0 {3,B} {6,B} {13,S} +5 C u0 p0 c0 {2,B} {7,B} {10,S} +6 C u0 p0 c0 {1,S} {4,B} {7,B} +7 C u0 p0 c0 {5,B} {6,B} {8,S} +8 C u0 p0 c0 {7,S} {14,S} {15,S} {16,S} +9 H u0 p0 c0 {1,S} +10 H u0 p0 c0 {5,S} +11 H u0 p0 c0 {2,S} +12 H u0 p0 c0 {3,S} +13 H u0 p0 c0 {4,S} +14 H u0 p0 c0 {8,S} +15 H u0 p0 c0 {8,S} +16 H u0 p0 c0 {8,S} +""") + + smiles1 = mol1.toSMILES() + smiles2 = mol2.toSMILES() + smiles3 = mol3.toSMILES() + + self.assertNotEqual(smiles1, smiles2) + self.assertNotEqual(smiles2, smiles3) + self.assertNotEqual(smiles1, smiles3) + class ParsingTest(unittest.TestCase): def setUp(self): From 1ab1513b57fe9382021ab497c847c56e3b83437e Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 7 Mar 2018 15:47:44 -0500 Subject: [PATCH 24/57] Update TestMolecule.smilesTest with canonical SMILES --- rmgpy/molecule/moleculeTest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/molecule/moleculeTest.py b/rmgpy/molecule/moleculeTest.py index d9ac0964a2..142096cd02 100644 --- a/rmgpy/molecule/moleculeTest.py +++ b/rmgpy/molecule/moleculeTest.py @@ -1151,7 +1151,7 @@ def testSMILES(self): test_strings = ['[C-]#[O+]', '[C]', '[CH]', 'OO', '[H][H]', '[H]', '[He]', '[O]', 'O', '[CH3]', 'C', '[OH]', 'CCC', 'CC', 'N#N', '[O]O', 'C[CH2]', '[Ar]', 'CCCC', - 'O=C=O', 'N#[C]', + 'O=C=O', '[C]#N', ] for s in test_strings: molecule = Molecule(SMILES=s) From 13ceacb2a809dd5abf7e05639f05978ec0bbb1ab Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 15 Mar 2018 12:46:44 -0400 Subject: [PATCH 25/57] Add get_element_count methods to Molecule and Group --- rmgpy/molecule/group.pxd | 2 ++ rmgpy/molecule/group.py | 32 ++++++++++++++++++++++++++++++++ rmgpy/molecule/molecule.pxd | 2 ++ rmgpy/molecule/molecule.py | 16 ++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/rmgpy/molecule/group.pxd b/rmgpy/molecule/group.pxd index 88567ee57c..9a5f38f625 100644 --- a/rmgpy/molecule/group.pxd +++ b/rmgpy/molecule/group.pxd @@ -153,6 +153,8 @@ cdef class Group(Graph): cpdef dict getLabeledAtoms(self) + cpdef dict get_element_count(self) + cpdef fromAdjacencyList(self, str adjlist) cpdef toAdjacencyList(self, str label=?) diff --git a/rmgpy/molecule/group.py b/rmgpy/molecule/group.py index 107765aa5d..dc518548b3 100644 --- a/rmgpy/molecule/group.py +++ b/rmgpy/molecule/group.py @@ -1090,6 +1090,38 @@ def getLabeledAtoms(self): labeled[atom.label] = atom return labeled + def get_element_count(self): + """ + Returns the element count for the molecule as a dictionary. + Wildcards are not counted as any particular element. + """ + from rmgpy.molecule.atomtype import allElements + + element_count = {} + for atom in self.atoms: + same = True + match = None + for atomtype in atom.atomType: + if match is None: + # This is the first type in the list, so check all elements + for element in allElements: + if atomtype.isSpecificCaseOf(atomTypes[element]): + match = element + break + else: + # We've already matched one atomtype, now confirm that the rest are the same + if not atomtype.isSpecificCaseOf(atomTypes[match]): + same = False + break + # If match is None, then the group is not a specific case of any element + if match is not None and same: + if match in element_count: + element_count[match] += 1 + else: + element_count[match] = 1 + + return element_count + def fromAdjacencyList(self, adjlist): """ Convert a string adjacency list `adjlist` to a molecular structure. diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd index f9bf3a6506..4bb9baa513 100644 --- a/rmgpy/molecule/molecule.pxd +++ b/rmgpy/molecule/molecule.pxd @@ -172,6 +172,8 @@ cdef class Molecule(Graph): cpdef dict getLabeledAtoms(self) + cpdef dict get_element_count(self) + cpdef bint isIsomorphic(self, Graph other, dict initialMap=?) except -2 cpdef list findIsomorphism(self, Graph other, dict initialMap=?) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index d71103a523..75a9b7af3f 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1138,6 +1138,22 @@ def getLabeledAtoms(self): labeled[atom.label] = atom return labeled + def get_element_count(self): + """ + Returns the element count for the molecule as a dictionary. + """ + element_count = {} + for atom in self.atoms: + symbol = atom.element.symbol + isotope = atom.element.isotope + key = symbol if isotope == -1 else (symbol, isotope) + if key in element_count: + element_count[key] += 1 + else: + element_count[key] = 1 + + return element_count + def isIsomorphic(self, other, initialMap=None): """ Returns :data:`True` if two graphs are isomorphic and :data:`False` From e3d11c951123ea36f71ea6b133f610145fcd5c08 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 15 Mar 2018 13:14:18 -0400 Subject: [PATCH 26/57] Add unit tests for get_element_count --- rmgpy/molecule/groupTest.py | 12 ++++++++++++ rmgpy/molecule/moleculeTest.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/rmgpy/molecule/groupTest.py b/rmgpy/molecule/groupTest.py index eb5a976149..afae1c2774 100644 --- a/rmgpy/molecule/groupTest.py +++ b/rmgpy/molecule/groupTest.py @@ -1375,6 +1375,18 @@ def testMergeGroups(self): mergedGroup = backbone2.mergeGroups(end2) self.assertTrue(mergedGroup.isIdentical(desiredMerge2)) + def test_get_element_count(self): + """Test that we can count elements properly.""" + group = Group().fromAdjacencyList(""" +1 R!H u0 {2,S} +2 [Cs,Cd,Ct,Cb] u0 {1,S} {3,S} +3 [Cs,Cd,Ct,Cb,O2s,S2s] u0 {2,S} {4,S} +4 N1s u0 {3,S} +""") + expected = {'C': 1, 'N': 1} + result = group.get_element_count() + self.assertEqual(expected, result) + ################################################################################ if __name__ == '__main__': diff --git a/rmgpy/molecule/moleculeTest.py b/rmgpy/molecule/moleculeTest.py index 142096cd02..bd7efcc5e1 100644 --- a/rmgpy/molecule/moleculeTest.py +++ b/rmgpy/molecule/moleculeTest.py @@ -2206,6 +2206,23 @@ def testSaturateUnfilledValence(self): test.update() self.assertTrue(expected.isIsomorphic(test)) + def test_get_element_count(self): + """Test that we can count elements properly.""" + mol1 = Molecule(SMILES='c1ccccc1') + expected1 = {'C': 6, 'H': 6} + result1 = mol1.get_element_count() + self.assertEqual(expected1, result1) + + mol2 = Molecule(SMILES='CS(C)(=O)=O') + expected2 = {'C': 2, 'H': 6, 'O': 2, 'S': 1} + result2 = mol2.get_element_count() + self.assertEqual(expected2, result2) + + mol3 = Molecule(SMILES='CCN') + expected3 = {'C': 2, 'H': 7, 'N': 1} + result3 = mol3.get_element_count() + self.assertEqual(expected3, result3) + ################################################################################ From ea7c6b5fcfcd418e4c15b4649070a1378c26ec29 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 15 Mar 2018 13:18:56 -0400 Subject: [PATCH 27/57] Use get_element_count in util.retrieveElementCount --- rmgpy/molecule/util.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/rmgpy/molecule/util.py b/rmgpy/molecule/util.py index d85eedeb8d..7da222e410 100644 --- a/rmgpy/molecule/util.py +++ b/rmgpy/molecule/util.py @@ -52,16 +52,7 @@ def retrieveElementCount(obj): return element_count elif isinstance(obj, Molecule): - for atom in obj.atoms: - symbol = atom.element.symbol - isotope = atom.element.isotope - key = symbol if isotope == -1 else (symbol, isotope) - if key in element_count: - updated_count = element_count[key] + 1 - element_count[key] = updated_count - else: - element_count[key] = 1 - return element_count + return obj.get_element_count() else: raise Exception From 153da2169663ffe57cebd7091e387c34f67070b2 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 15 Mar 2018 13:40:34 -0400 Subject: [PATCH 28/57] Use get_element_count for subgraph isomorphism More general implementation to eliminated hardcoded elements --- rmgpy/molecule/group.pxd | 8 +---- rmgpy/molecule/group.py | 53 ++++------------------------ rmgpy/molecule/molecule.py | 72 ++++++++++++++------------------------ 3 files changed, 34 insertions(+), 99 deletions(-) diff --git a/rmgpy/molecule/group.pxd b/rmgpy/molecule/group.pxd index 9a5f38f625..3bade96941 100644 --- a/rmgpy/molecule/group.pxd +++ b/rmgpy/molecule/group.pxd @@ -114,13 +114,7 @@ cdef class Group(Graph): # These read-only attribues act as a "fingerprint" for accelerating # subgraph isomorphism checks - cdef public short carbonCount - cdef public short nitrogenCount - cdef public short oxygenCount - cdef public short sulfurCount - cdef public short chlorineCount - cdef public short iodineCount - cdef public short siliconCount + cdef public dict elementCount cdef public short radicalCount cpdef addAtom(self, GroupAtom atom) diff --git a/rmgpy/molecule/group.py b/rmgpy/molecule/group.py index dc518548b3..ba8107283e 100644 --- a/rmgpy/molecule/group.py +++ b/rmgpy/molecule/group.py @@ -869,6 +869,8 @@ def __init__(self, atoms=None, props=None, multiplicity=None): Graph.__init__(self, atoms) self.props = props or {} self.multiplicity = multiplicity or [] + self.elementCount = {} + self.radicalCount = -1 self.update() def __reduce__(self): @@ -1148,54 +1150,11 @@ def updateFingerprint(self): Update the molecular fingerprint used to accelerate the subgraph isomorphism checks. """ - cython.declare(atom=GroupAtom, atomType=AtomType) - cython.declare(carbon=AtomType, nitrogen=AtomType, oxygen=AtomType, sulfur=AtomType, chlorine=AtomType, - iodine=AtomType, silicon=AtomType) - cython.declare(isCarbon=cython.bint, isNitrogen=cython.bint, isOxygen=cython.bint, isSulfur=cython.bint, - isChlorine=cython.bint, isIodine=cython.bint, isSilicon=cython.bint, radical=cython.int) - - carbon = atomTypes['C'] - nitrogen = atomTypes['N'] - oxygen = atomTypes['O'] - sulfur = atomTypes['S'] - chlorine = atomTypes['Cl'] - iodine = atomTypes['I'] - silicon = atomTypes['Si'] - - self.carbonCount = 0 - self.nitrogenCount = 0 - self.oxygenCount = 0 - self.sulfurCount = 0 - self.chlorineCount = 0 - self.iodineCount = 0 - self.siliconCount = 0 - self.radicalCount = 0 + cython.declare(atom=GroupAtom) + + self.elementCount = self.get_element_count() + self.radicalCount = 0 for atom in self.vertices: - if len(atom.atomType) == 1: - atomType = atom.atomType[0] - isCarbon = atomType.equivalent(carbon) - isNitrogen = atomType.equivalent(nitrogen) - isOxygen = atomType.equivalent(oxygen) - isSulfur = atomType.equivalent(sulfur) - isChlorine = atomType.equivalent(chlorine) - isIodine = atomType.equivalent(iodine) - isSilicon = atomType.equivalent(silicon) - sum_is_atom = isCarbon + isNitrogen + isOxygen + isSulfur + isChlorine + isIodine + isSilicon - if sum_is_atom == 1: - if isCarbon: - self.carbonCount += 1 - elif isNitrogen: - self.nitrogenCount += 1 - elif isOxygen: - self.oxygenCount += 1 - elif isSulfur: - self.sulfurCount += 1 - elif isChlorine: - self.chlorineCount += 1 - elif isIodine: - self.iodineCount += 1 - elif isSilicon: - self.siliconCount += 1 if len(atom.radicalElectrons) >= 1: self.radicalCount += atom.radicalElectrons[0] diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 75a9b7af3f..8fafd00d8f 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1222,33 +1222,23 @@ def isSubgraphIsomorphic(self, other, initialMap=None): if not isinstance(other, gr.Group): raise TypeError('Got a {0} object for parameter "other", when a Molecule object is required.'.format(other.__class__)) group = other - - # Count the number of carbons, oxygens, and radicals in the molecule - carbonCount = 0; nitrogenCount = 0; oxygenCount = 0; sulfurCount = 0; radicalCount = 0 - for atom in self.vertices: - if atom.element.symbol == 'C': - carbonCount += 1 - elif atom.element.symbol == 'N': - nitrogenCount += 1 - elif atom.element.symbol == 'O': - oxygenCount += 1 - elif atom.element.symbol == 'S': - sulfurCount += 1 - radicalCount += atom.radicalElectrons - - + + # Check multiplicity if group.multiplicity: if self.multiplicity not in group.multiplicity: return False - # If the molecule has fewer of any of these things than the functional - # group does, then we know the subgraph isomorphism fails without - # needing to perform the full isomorphism check - if (radicalCount < group.radicalCount or - carbonCount < group.carbonCount or - nitrogenCount < group.nitrogenCount or - oxygenCount < group.oxygenCount or - sulfurCount < group.sulfurCount): + + # Compare radical counts + if self.getRadicalCount() < group.radicalCount: return False + # Compare element counts + element_count = self.get_element_count() + for element, count in group.elementCount.iteritems(): + if element not in element_count: + return False + elif element_count[element] < count: + return False + # Do the isomorphism comparison result = Graph.isSubgraphIsomorphic(self, other, initialMap) return result @@ -1272,31 +1262,23 @@ def findSubgraphIsomorphisms(self, other, initialMap=None): if not isinstance(other, gr.Group): raise TypeError('Got a {0} object for parameter "other", when a Group object is required.'.format(other.__class__)) group = other - # Count the number of carbons, oxygens, and radicals in the molecule - carbonCount = 0; nitrogenCount = 0; oxygenCount = 0; sulfurCount = 0; radicalCount = 0 - for atom in self.vertices: - if atom.element.symbol == 'C': - carbonCount += 1 - elif atom.element.symbol == 'N': - nitrogenCount += 1 - elif atom.element.symbol == 'O': - oxygenCount += 1 - elif atom.element.symbol == 'S': - sulfurCount += 1 - radicalCount += atom.radicalElectrons - - + + # Check multiplicity if group.multiplicity: if self.multiplicity not in group.multiplicity: return [] - # If the molecule has fewer of any of these things than the functional - # group does, then we know the subgraph isomorphism fails without - # needing to perform the full isomorphism check - if (radicalCount < group.radicalCount or - carbonCount < group.carbonCount or - nitrogenCount < group.nitrogenCount or - oxygenCount < group.oxygenCount or - sulfurCount < group.sulfurCount): + + # Compare radical counts + if self.getRadicalCount() < group.radicalCount: return [] + + # Compare element counts + element_count = self.get_element_count() + for element, count in group.elementCount.iteritems(): + if element not in element_count: + return [] + elif element_count[element] < count: + return [] + # Do the isomorphism comparison result = Graph.findSubgraphIsomorphisms(self, other, initialMap) return result From 6f02e88278b5332135542bf274821faafc394e64 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Tue, 20 Mar 2018 16:12:58 -0400 Subject: [PATCH 29/57] Remove inadequately specified model chemistries In order to use model chemistries correctly, it is necessary to know the basis set that was used for the calculations of the atomic reference energies. Therefore, remove model chemistries where the basis set was not specified. Also remove unecessary comments. --- documentation/source/users/cantherm/input.rst | 3 --- rmgpy/cantherm/statmech.py | 11 +---------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index 563ca17f4e..eb78945d19 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -67,9 +67,6 @@ Model Chemistry AEC BC SOC ``'B-CCSD(T)-F12/cc-pVnZ-F12'``, *n = D,T,Q* v v ``'B-CCSD(T)-F12/cc-pCVnZ-F12'``, *n = D,T,Q* v v ``'B-CCSD(T)-F12/aug-cc-pVnZ-F12'``, *n = D,T,Q* v v -``'DFT_G03_b3lyp'`` v v v -``'DFT_ks_b3lyp'`` v -``'DFT_uks_b3lyp'`` v ``'G03_PBEPBE_6-311++g_d_p'`` v v ``'MP2_rmp2_pVnZ'``, *n = D,T,Q* v v ``'FCI/cc-pVnZ'``, *n = D,T,Q* v v diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 1881b799a9..023823a2bd 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -574,7 +574,6 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): # We are assuming that SOC is included in the Bond Energy Corrections elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': -# atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235} atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} @@ -615,13 +614,6 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} - elif modelChemistry == 'DFT_G03_b3lyp': - atomEnergies = {'H':-0.502256981529 + SOC['H'], 'N':-54.6007233648 + SOC['N'], 'O':-75.0898777574+ SOC['O'], 'C':-37.8572666349+ SOC['C']} - elif modelChemistry == 'DFT_ks_b3lyp': - atomEnergies = {'H':-0.49785866 + SOC['H'], 'N':-54.45608798 + SOC['N'], 'O':-74.93566254+ SOC['O'], 'C':-37.76119132+ SOC['C']} - elif modelChemistry == 'DFT_uks_b3lyp': - atomEnergies = {'H':-0.49785866 + SOC['H'], 'N':-54.45729113 + SOC['N'], 'O':-74.93566254+ SOC['O'], 'C':-37.76119132+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVDZ': atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} elif modelChemistry == 'MP2_rmp2_pVTZ': @@ -639,7 +631,6 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} elif modelChemistry == 'FCI/cc-pVDZ': -# atomEnergies = {'C':-37.760717371923} atomEnergies = {'C':-37.789527+ SOC['C']} elif modelChemistry == 'FCI/cc-pVTZ': atomEnergies = {'C':-37.781266669684+ SOC['C']} @@ -710,7 +701,7 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): 'N-H': -0.42, 'N=O': 1.11, 'N-N': -1.87, 'N=N': -1.58,'N-O': 0.35, #Table 2: Ashcraft R (2007) J. Phys. Chem. B; DOI: 10.1021/jp073539t 'N#N': -2.0, 'O=O': -0.2, 'H-H': 1.1, # Unknown source } - elif modelChemistry in ['B3LYP/cbsb7', 'B3LYP/6-311G(2d,d,p)', 'DFT_G03_b3lyp','B3LYP/6-311+G(3df,2p)','b3lyp/6-31G**']: + elif modelChemistry in ['B3LYP/cbsb7', 'B3LYP/6-311G(2d,d,p)', 'B3LYP/6-311+G(3df,2p)', 'b3lyp/6-31G**']: bondEnergies = { 'C-H': 0.25, 'C-C': -1.89, 'C=C': -0.40, 'C#C': -1.50, 'O-H': -1.09, 'C-O': -1.18, 'C=O': -0.01, 'N-H': 1.36, 'C-N': -0.44, 'C#N': 0.22, 'C-S': -2.35, 'O=S': -5.19, 'S-H': -0.52, } From b2b333e54c87bcd63264c4f983bd01d9fcd61bc5 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Tue, 20 Mar 2018 17:02:45 -0400 Subject: [PATCH 30/57] Automatically infer atoms from quantum chemistry output --- documentation/source/users/cantherm/input.rst | 22 +++-------- .../source/users/cantherm/input_pdep.rst | 24 +++--------- .../23dimethylpropoxy/dimetpropoxy.py | 9 +---- .../23dimethylpropoxy/dimetpropoxy_betasci.py | 6 --- .../cantherm/reactions/CH3OH+HCO/ch3oh.py | 6 --- examples/cantherm/reactions/CH3OH+HCO/hco.py | 6 --- examples/cantherm/reactions/CH3OH+HCO/ts.py | 6 --- examples/cantherm/reactions/H+C2H4=C2H5/TS.py | 5 --- examples/cantherm/species/Benzyl/benzyl.py | 5 --- examples/cantherm/species/C2H4/ethene.py | 5 --- examples/cantherm/species/C2H5/ethyl.py | 5 --- examples/cantherm/species/C2H6/C2H6.py | 5 --- examples/cantherm/species/H/H.py | 4 -- .../species/Toulene/toluene_FreeRotor.py | 5 --- .../species/Toulene/toluene_HinderedRotor.py | 5 --- rmgpy/cantherm/statmech.py | 38 +++++++++++++------ 16 files changed, 37 insertions(+), 119 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index eb78945d19..67aca03dd5 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -39,7 +39,7 @@ which accepts a string describing the model chemistry. CanTherm uses this information to adjust the computed energies to the usual gas-phase reference states by applying atom, bond and spin-orbit coupling energy corrections. This is particularly important for ``thermo()`` calculations (see below). Note that the user must specify under the -``species()`` function the type and number of atoms and bonds for CanTherm to apply these corrections. +``species()`` function the type and number of bonds for CanTherm to apply these corrections. The example below specifies CBS-QB3 as the model chemistry:: modelChemistry("CBS-QB3") @@ -114,7 +114,6 @@ The species input file accepts the following parameters: ======================= =========================== ==================================== Parameter Required? Description ======================= =========================== ==================================== -``atoms`` yes Type and number of atoms in the species ``bonds`` optional Type and number of bonds in the species ``linear`` yes ``True`` if the molecule is linear, ``False`` if not ``externalSymmetry`` yes The external symmetry number for rotation @@ -128,12 +127,11 @@ Parameter Required? Description ``rotors`` optional A list of :class:`HinderedRotor()` and/or :class:`FreeRotor()` objects describing the hindered/free rotors ======================= =========================== ==================================== -The ``atom`` and ``bond`` parameters are used to apply atomization energy corrections (AEC), bond corrections (BC), and spin orbit corrections (SOC) for a given ``modelChemistry()`` (see `Model Chemistry`_). +The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used +to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry()`` +(see `Model Chemistry`_). -Allowed atom symbols for the ``atoms`` parameter are -``'C'``, ``'N'``, ``'O'``, ``'S'``, ``'P'``, and ``'H'``. For example, for formaldehyde we would write:: - - atoms = {'C': 1, 'O': 1, H': 2} +The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry()``. Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'C=C'``, ``'N-O'``, ``'C=S'``, ``'O=O'``, ``'C#N'``... @@ -190,11 +188,6 @@ the ``species()`` function in the input file should look like the following exam and the species input file (``C2H6.py`` in the example above) should look like the following:: - atoms = { - 'C': 2, - 'H': 6, - } - bonds = { 'C-C': 1, 'C-H': 6, @@ -300,11 +293,6 @@ in between these two extremes. To summarize, the species input file with hindered/free rotors should look like the following example (different options for specifying the same ``rotors`` entry are commented out):: - atoms = { - 'C': 2, - 'H': 6, - } - bonds = { 'C-C': 1, 'C-H': 6, diff --git a/documentation/source/users/cantherm/input_pdep.rst b/documentation/source/users/cantherm/input_pdep.rst index 30069164db..5c22dd68f7 100644 --- a/documentation/source/users/cantherm/input_pdep.rst +++ b/documentation/source/users/cantherm/input_pdep.rst @@ -51,7 +51,7 @@ which accepts a string describing the model chemistry. CanTherm uses this information to adjust the computed energies to the usual gas-phase reference states by applying atom, bond and spin-orbit coupling energy corrections. This is particularly important for ``thermo()`` calculations (see below). Note that the user must specify under the -``species()`` function the type and number of atoms and bonds for CanTherm to apply these corrections. +``species()`` function the type and number of bonds for CanTherm to apply these corrections. The example below specifies CBS-QB3 as the model chemistry:: modelChemistry("CBS-QB3") @@ -164,7 +164,6 @@ The species input file accepts the following parameters: ======================= =========================== ==================================== Parameter Required? Description ======================= =========================== ==================================== -``atoms`` yes Type and number of atoms in the species ``bonds`` optional Type and number of bonds in the species ``linear`` yes ``True`` if the molecule is linear, ``False`` if not ``externalSymmetry`` yes The external symmetry number for rotation @@ -178,12 +177,11 @@ Parameter Required? Description ``rotors`` optional A list of :class:`HinderedRotor()` and/or :class:`FreeRotor()` objects describing the hindered/free rotors ======================= =========================== ==================================== -The ``atom`` and ``bond`` parameters are used to apply atomization energy corrections (AEC), bond corrections (BC), and spin orbit corrections (SOC) for a given ``modelChemistry()`` (see `Model Chemistry `_). +The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used +to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry()`` +(see `Model Chemistry`_). -Allowed atom symbols for the ``atoms`` parameter are -``'C'``, ``'N'``, ``'O'``, ``'S'``, ``'P'``, and ``'H'``. For example, for acetylperoxy radical we would write:: - - atoms = {'C': 2, 'O': 3, H': 3} +The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry()``. Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'C=C'``, ``'N-O'``, ``'C=S'``, ``'O=O'``, ``'C#N'``... @@ -236,12 +234,6 @@ For example:: In summary, in order to specify the molecular properties of a species by parsing the output of quantum chemistry calculations, without any hindered/free rotors, the species input file should look like the following (using acetylperoxy as an example):: - atoms = { - 'C': 2, - 'O': 3, - H': 3, - } - bonds = { 'C-C': 1, 'C=O': 1, @@ -350,12 +342,6 @@ in between these two extremes. To summarize, the species input file with hindered/free rotors should look like the following example (different options for specifying the same ``rotors`` entry are commented out):: - atoms = { - 'C': 2, - 'O': 3, - 'H': 3, - } - bonds = { 'C-C': 1, 'C=O': 1, diff --git a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py index b39b2a0370..184663854f 100644 --- a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py +++ b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py @@ -1,13 +1,6 @@ - -#!/usr/bin/env python +#!/usr/bin/env python # encoding: utf-8 -atoms = { - 'C': 5, - 'H': 11, - 'O': 1 -} - bonds = {} linear = False diff --git a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py index 8b21fb458a..478f428c70 100644 --- a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py +++ b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py @@ -1,12 +1,6 @@ #!/usr/bin/env python # encoding: utf-8 -atoms = { - 'C': 5, - 'H': 11, - 'O': 1 -} - bonds = {} linear = False diff --git a/examples/cantherm/reactions/CH3OH+HCO/ch3oh.py b/examples/cantherm/reactions/CH3OH+HCO/ch3oh.py index 089e7f17a8..3b7efd7379 100644 --- a/examples/cantherm/reactions/CH3OH+HCO/ch3oh.py +++ b/examples/cantherm/reactions/CH3OH+HCO/ch3oh.py @@ -1,12 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 1, - 'H': 4, - 'O': 1, -} - bonds = { 'C-O': 1, 'C-H': 3, diff --git a/examples/cantherm/reactions/CH3OH+HCO/hco.py b/examples/cantherm/reactions/CH3OH+HCO/hco.py index 9a10cb957f..35a2647813 100644 --- a/examples/cantherm/reactions/CH3OH+HCO/hco.py +++ b/examples/cantherm/reactions/CH3OH+HCO/hco.py @@ -1,12 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 1, - 'H': 1, - 'O': 1, -} - bonds = { 'C-O': 1, 'C-H': 1, diff --git a/examples/cantherm/reactions/CH3OH+HCO/ts.py b/examples/cantherm/reactions/CH3OH+HCO/ts.py index 2c9afa8dc3..fc5c40b3c2 100644 --- a/examples/cantherm/reactions/CH3OH+HCO/ts.py +++ b/examples/cantherm/reactions/CH3OH+HCO/ts.py @@ -1,12 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 2, - 'H': 5, - 'O': 2, -} - bonds = {} linear = False diff --git a/examples/cantherm/reactions/H+C2H4=C2H5/TS.py b/examples/cantherm/reactions/H+C2H4=C2H5/TS.py index 0bcfe1f07b..ec0030ed1b 100644 --- a/examples/cantherm/reactions/H+C2H4=C2H5/TS.py +++ b/examples/cantherm/reactions/H+C2H4=C2H5/TS.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 2, - 'H': 5, -} - bonds = {} linear = False diff --git a/examples/cantherm/species/Benzyl/benzyl.py b/examples/cantherm/species/Benzyl/benzyl.py index 1d45763d19..f2b2d2087d 100755 --- a/examples/cantherm/species/Benzyl/benzyl.py +++ b/examples/cantherm/species/Benzyl/benzyl.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 7, - 'H': 7, -} - bonds = { 'C=C': 3, 'C-C': 4, diff --git a/examples/cantherm/species/C2H4/ethene.py b/examples/cantherm/species/C2H4/ethene.py index f0fd6bb923..ea47ab91a9 100644 --- a/examples/cantherm/species/C2H4/ethene.py +++ b/examples/cantherm/species/C2H4/ethene.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 2, - 'H': 4, -} - bonds = { 'C=C': 1, 'C-H': 4, diff --git a/examples/cantherm/species/C2H5/ethyl.py b/examples/cantherm/species/C2H5/ethyl.py index 5f5d180e13..29d0fc1322 100644 --- a/examples/cantherm/species/C2H5/ethyl.py +++ b/examples/cantherm/species/C2H5/ethyl.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 2, - 'H': 5, -} - bonds = { 'C-C': 1, 'C-H': 5, diff --git a/examples/cantherm/species/C2H6/C2H6.py b/examples/cantherm/species/C2H6/C2H6.py index 3543a7259c..618b24b9de 100644 --- a/examples/cantherm/species/C2H6/C2H6.py +++ b/examples/cantherm/species/C2H6/C2H6.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # encoding: utf-8 -atoms = { - 'C': 2, - 'H': 6, -} - bonds = { 'C-C': 1, 'C-H': 6, diff --git a/examples/cantherm/species/H/H.py b/examples/cantherm/species/H/H.py index 7d0928ab6e..f2540fff88 100644 --- a/examples/cantherm/species/H/H.py +++ b/examples/cantherm/species/H/H.py @@ -1,10 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'H': 1, -} - bonds = {} linear = False diff --git a/examples/cantherm/species/Toulene/toluene_FreeRotor.py b/examples/cantherm/species/Toulene/toluene_FreeRotor.py index 2e22f2d49c..ac2498886a 100755 --- a/examples/cantherm/species/Toulene/toluene_FreeRotor.py +++ b/examples/cantherm/species/Toulene/toluene_FreeRotor.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 7, - 'H': 8, -} - bonds = { 'C-C': 4, 'C-H': 8, diff --git a/examples/cantherm/species/Toulene/toluene_HinderedRotor.py b/examples/cantherm/species/Toulene/toluene_HinderedRotor.py index f07dcf47e7..09dd8c1975 100755 --- a/examples/cantherm/species/Toulene/toluene_HinderedRotor.py +++ b/examples/cantherm/species/Toulene/toluene_HinderedRotor.py @@ -1,11 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -atoms = { - 'C': 7, - 'H': 8, -} - bonds = { 'C-C': 4, 'C-H': 8, diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 023823a2bd..21d7c6fd13 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -39,6 +39,8 @@ import numpy import logging +from rdkit.Chem import GetPeriodicTable + import rmgpy.constants as constants from rmgpy.cantherm.output import prettify @@ -55,6 +57,14 @@ from rmgpy.statmech.conformer import Conformer from rmgpy.exceptions import InputError +# These are the atoms we currently have enthalpies of formation for +atom_num_dict = {1: 'H', + 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', + 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl'} + +# Use the RDKit periodic table so we can write symbols for not implemented elements +_rdkit_periodic_table = GetPeriodicTable() + ################################################################################ class ScanLog: @@ -215,15 +225,6 @@ def load(self): logging.error('The species file {0} was invalid:'.format(path)) raise - try: - atoms = local_context['atoms'] - except KeyError: - raise InputError('Required attribute "atoms" not found in species file {0!r}.'.format(path)) - else: - if isinstance(self.species, Species): - # Save atoms for use in writing thermo output - self.species.props['elementCounts'] = atoms - try: bonds = local_context['bonds'] except KeyError: @@ -318,6 +319,21 @@ def load(self): logging.debug(' Reading optimized geometry...') coordinates, number, mass = geomLog.loadGeometry() + # Infer atoms from geometry + atoms = {} + for atom_num in number: + try: + symbol = atom_num_dict[atom_num] + except KeyError: + raise Exception( + 'Element {} is not yet supported.'.format(_rdkit_periodic_table.GetElementSymbol(atom_num)) + ) + atoms[symbol] = atoms.get(symbol, 0) + 1 + + # Save atoms for use in writing thermo output + if isinstance(self.species, Species): + self.species.props['elementCounts'] = atoms + conformer.coordinates = (coordinates,"angstroms") conformer.number = number conformer.mass = (mass,"amu") @@ -449,8 +465,6 @@ def save(self, outputFile): logging.info('Saving statistical mechanics parameters for {0}...'.format(self.species.label)) f = open(outputFile, 'a') - - numbers = {1: 'H', 6: 'C', 7: 'N', 8: 'O', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl'} conformer = self.species.conformer @@ -462,7 +476,7 @@ def save(self, outputFile): x = coordinates[i,0] y = coordinates[i,1] z = coordinates[i,2] - f.write('# {0} {1:9.4f} {2:9.4f} {3:9.4f}\n'.format(numbers[number[i]], x, y, z)) + f.write('# {0} {1:9.4f} {2:9.4f} {3:9.4f}\n'.format(atom_num_dict[number[i]], x, y, z)) string = 'conformer(label={0!r}, E0={1!r}, modes={2!r}, spinMultiplicity={3:d}, opticalIsomers={4:d}'.format( self.species.label, From 427bafcb73bc231505e05a28f8e93db065e7f4a9 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Tue, 20 Mar 2018 17:46:18 -0400 Subject: [PATCH 31/57] Allow user-defined atomic energies --- documentation/source/users/cantherm/input.rst | 102 +++++---- .../source/users/cantherm/input_pdep.rst | 37 ++- rmgpy/cantherm/input.py | 2 + rmgpy/cantherm/statmech.py | 213 +++++++++--------- 4 files changed, 197 insertions(+), 157 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index 67aca03dd5..151f000f84 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -13,12 +13,13 @@ Each section is made up of one or more function calls, where parameters are specified as text strings, numbers, or objects. Text strings must be wrapped in either single or double quotes. -The following is a list of all the functions of a CanTherm input file for thermodynamics and high-pressure limit kinetics computations: +The following is a list of all the components of a CanTherm input file for thermodynamics and high-pressure limit kinetics computations: =========================== ========================================================= -Function Description +Component Description =========================== ========================================================= ``modelChemistry`` Level of theory from quantum chemical calculations +``atomEnergies`` Dictionary of atomic energies at ``modelChemistry`` level ``frequencyScaleFactor`` A factor by which to scale all frequencies ``useHinderedRotors`` ``True`` if hindered rotors are used, ``False`` if not ``useBondCorrections`` ``True`` if bond corrections are used, ``False`` if not @@ -33,8 +34,8 @@ Function Description Model Chemistry =============== -The first item in the input file should be a ``modelChemistry()`` function, -which accepts a string describing the model chemistry. +The first item in the input file should be a ``modelChemistry`` assignment +with a string describing the model chemistry. CanTherm uses this information to adjust the computed energies to the usual gas-phase reference states by applying atom, bond and spin-orbit coupling energy corrections. This is particularly @@ -42,49 +43,66 @@ important for ``thermo()`` calculations (see below). Note that the user must spe ``species()`` function the type and number of bonds for CanTherm to apply these corrections. The example below specifies CBS-QB3 as the model chemistry:: - modelChemistry("CBS-QB3") - -Currently, atomization energy corrections (AEC), bond corrections (BC), and spin orbit correction (SOC) are available for the following model chemistries: - -================================================ ===== ==== ==== -Model Chemistry AEC BC SOC -================================================ ===== ==== ==== -``'CBS-QB3'`` v v v -``'G3'`` v v -``'M08SO/MG3S*'`` v v -``'M06-2X/cc-pVTZ'`` v v -``'Klip_1'`` v v -``'Klip_2'`` *uses QCI(tz,qz) values* v v -``'Klip_3'`` *uses QCI(dz,qz) values* v v -``'Klip_2_cc'`` *uses CCSD(T)(tz,qz) values* v v -``'CCSD-F12/cc-pVDZ-F12'`` v v -``'CCSD(T)-F12/cc-pVDZ-F12_H-TZ'`` v v -``'CCSD(T)-F12/cc-pVDZ-F12_H-QZ'`` v v -``'CCSD(T)-F12/cc-pVnZ-F12'``, *n = D,T,Q* v v v -``'CCSD(T)-F12/cc-pVDZ-F12_noscale'`` v v -``'CCSD(T)-F12/cc-pCVnZ-F12'``, *n = D,T,Q* v v -``'CCSD(T)-F12/aug-cc-pVnZ-F12'``, *n = D,T,Q* v v -``'B-CCSD(T)-F12/cc-pVnZ-F12'``, *n = D,T,Q* v v -``'B-CCSD(T)-F12/cc-pCVnZ-F12'``, *n = D,T,Q* v v -``'B-CCSD(T)-F12/aug-cc-pVnZ-F12'``, *n = D,T,Q* v v -``'G03_PBEPBE_6-311++g_d_p'`` v v -``'MP2_rmp2_pVnZ'``, *n = D,T,Q* v v -``'FCI/cc-pVnZ'``, *n = D,T,Q* v v -``'BMK/cbsb7'`` v v v -``'BMK/6-311G(2d,d,p)'`` v v v + modelChemistry = "CBS-QB3" + +Alternatively, the atomic energies at the ``modelChemistry`` level of theory can be directly +specified in the input file by providing a dictionary of these energies in the following format:: + + atomEnergies = { + 'H': -0.499818, + 'C': -37.78552, + 'N': -54.520543, + 'O': -74.987979, + 'S': -397.658253, + } + +The table below shows which model chemistries have atomization energy corrections (AEC), bond +corrections (BC), and spin orbit corrections (SOC). It also lists which atoms types are available +for a given model chemistry. + +================================================ ===== ==== ==== ==================== +Model Chemistry AEC BC SOC Supported Elements +================================================ ===== ==== ==== ==================== +``'CBS-QB3'`` v v v H, C, N, O, P, S +``'G3'`` v v H, C, N, O, P, S +``'M08SO/MG3S*'`` v v H, C, N, O, P, S +``'M06-2X/cc-pVTZ'`` v v H, C, N, O, P, S +``'Klip_1'`` v v H, C, N, O +``'Klip_2'`` *uses QCI(tz,qz) values* v v H, C, N, O +``'Klip_3'`` *uses QCI(dz,qz) values* v v H, C, N, O +``'Klip_2_cc'`` *uses CCSD(T)(tz,qz) values* v v H, C, O +``'CCSD-F12/cc-pVDZ-F12'`` v v H, C, N, O +``'CCSD(T)-F12/cc-pVDZ-F12_H-TZ'`` v v H, C, N, O +``'CCSD(T)-F12/cc-pVDZ-F12_H-QZ'`` v v H, C, N, O +``'CCSD(T)-F12/cc-pVnZ-F12'``, *n = D,T,Q* v v v H, C, N, O, S +``'CCSD(T)-F12/cc-pVDZ-F12_noscale'`` v v H, C, N, O +``'CCSD(T)-F12/cc-pCVnZ-F12'``, *n = D,T,Q* v v H, C, N, O +``'CCSD(T)-F12/aug-cc-pVnZ'``, *n = D,T,Q* v v H, C, N, O +``'B-CCSD(T)-F12/cc-pVnZ-F12'``, *n = D,T,Q* v v H, C, N, O, S +``'B-CCSD(T)-F12/cc-pCVnZ-F12'``, *n = D,T,Q* v v H, C, N, O +``'B-CCSD(T)-F12/aug-cc-pVnZ'``, *n = D,T,Q* v v H, C, N, O +``'G03_PBEPBE_6-311++g_d_p'`` v v H, C, N, O +``'MP2_rmp2_pVnZ'``, *n = D,T,Q* v v H, C, N, O +``'FCI/cc-pVnZ'``, *n = D,T,Q* v v C +``'BMK/cbsb7'`` v v v H, C, N, O, P, S +``'BMK/6-311G(2d,d,p)'`` v v v H, C, N, O, P, S ``'B3LYP/6-311+G(3df,2p)'`` v -``'B3LYP/6-31G**'`` v v -================================================ ===== ==== ==== +``'B3LYP/6-31G**'`` v v H, C, O, S +================================================ ===== ==== ==== ==================== Notes: -- In ``'M08SO/MG3S*'`` the grid size used in the [QChem] electronic structure calculation utilizes 75 radial points and 434 angular points. ``'DFT_G03_b3lyp'`` is a B3LYP calculation with a moderately large basis set. +- In ``'M08SO/MG3S*'`` the grid size used in the [QChem] electronic structure calculation utilizes 75 radial points and 434 angular points. - Refer to paper by Goldsmith et al. (*Goldsmith, C. F.; Magoon, G. R.; Green, W. H., Database of Small Molecule Thermochemistry for Combustion. J. Phys. Chem. A 2012, 116, 9033-9057*) for definition of ``'Klip_2'`` (*QCI(tz,qz)*) and ``'Klip_3'`` (*QCI(dz,qz)*). +If a model chemistry other than the ones in the above table is used, then the user should supply +the corresponding atomic energies (using ``atomEnergies``) to get meaningful results. Bond +corrections would not be applied in this case. + Frequency Scale Factor ====================== -Frequency scale factors are empirically fit to experiment for different ``modelChemistry()``. Refer to NIST website for values (http://cccbdb.nist.gov/vibscalejust.asp). +Frequency scale factors are empirically fit to experiment for different ``modelChemistry``. Refer to NIST website for values (http://cccbdb.nist.gov/vibscalejust.asp). For CBS-QB3, which is not included in the link above, ``frequencyScaleFactor = 0.99`` according to Montgomery et al. (*J. Chem. Phys. 1999, 110, 2822–2827*). Species @@ -128,10 +146,10 @@ Parameter Required? Description ======================= =========================== ==================================== The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used -to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry()`` +to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry`` (see `Model Chemistry`_). -The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry()``. +The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry``. Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'C=C'``, ``'N-O'``, ``'C=S'``, ``'O=O'``, ``'C#N'``... @@ -155,7 +173,7 @@ For ethane, we would write:: opticalIsomers = 1 -The ``energy`` parameter is a dictionary with entries for different ``modelChemistry()``. The entries can consist of either +The ``energy`` parameter is a dictionary with entries for different ``modelChemistry``. The entries can consist of either floating point numbers corresponding to the 0 K atomization energy in Hartree (without zero-point energy correction), or they can specify the path to a quantum chemistry calculation output file that contains the species's energy. For example:: @@ -165,7 +183,7 @@ they can specify the path to a quantum chemistry calculation output file that co } In this example, the ``CBS-QB3`` energy is obtained from a Gaussian log file, while the ``Klip_2`` energy is specified directly. -The energy used will depend on what ``modelChemistry()`` was specified in the input file. CanTherm can parse the energy from +The energy used will depend on what ``modelChemistry`` was specified in the input file. CanTherm can parse the energy from a ``GaussianLog``, ``MolproLog`` or ``QchemLog``. The input to the remaining parameters, ``geometry``, ``frequencies`` and ``rotors``, will depend on if hindered/free rotors are included. diff --git a/documentation/source/users/cantherm/input_pdep.rst b/documentation/source/users/cantherm/input_pdep.rst index 5c22dd68f7..146f193a0b 100644 --- a/documentation/source/users/cantherm/input_pdep.rst +++ b/documentation/source/users/cantherm/input_pdep.rst @@ -19,12 +19,13 @@ Each section is made up of one or more function calls, where parameters are specified as text strings, numbers, or objects. Text strings must be wrapped in either single or double quotes. -The following is a list of all the functions of a CanTherm input file for pressure-dependent calculations: +The following is a list of all the components of a CanTherm input file for pressure-dependent calculations: =========================== ================================================================ -Function Description +Component Description =========================== ================================================================ ``modelChemistry`` Level of theory from quantum chemical calculations +``atomEnergies`` Dictionary of atomic energies at ``modelChemistry`` level ``frequencyScaleFactor`` A factor by which to scale all frequencies ``useHinderedRotors`` ``True`` if hindered rotors are used, ``False`` if not ``useBondCorrections`` ``True`` if bond corrections are used, ``False`` if not @@ -45,8 +46,8 @@ Important differences are mentioned in the sections below. Model Chemistry =============== -The first item in the input file should be a ``modelChemistry()`` function, -which accepts a string describing the model chemistry. +The first item in the input file should be a ``modelChemistry`` assignment +with a string describing the model chemistry. CanTherm uses this information to adjust the computed energies to the usual gas-phase reference states by applying atom, bond and spin-orbit coupling energy corrections. This is particularly @@ -54,15 +55,27 @@ important for ``thermo()`` calculations (see below). Note that the user must spe ``species()`` function the type and number of bonds for CanTherm to apply these corrections. The example below specifies CBS-QB3 as the model chemistry:: - modelChemistry("CBS-QB3") + modelChemistry = "CBS-QB3" -Currently, atomization energy corrections (AEC), bond corrections (BC), and spin orbit correction (SOC) are available for -model chemistries as described under `High-Pressure Limit: Model Chemistry `_ +Alternatively, the atomic energies at the ``modelChemistry`` level of theory can be directly +specified in the input file by providing a dictionary of these energies in the following format:: + + atomEnergies = { + 'H': -0.499818, + 'C': -37.78552, + 'N': -54.520543, + 'O': -74.987979, + 'S': -397.658253, + } + +Whether or not atomization energy corrections (AEC), bond corrections (BC), and spin orbit +corrections (SOC); and which atom types are available for a given model chemistry is described +under `High-Pressure Limit: Model Chemistry `_ Frequency Scale Factor ====================== -Frequency scale factors are empirically fit to experiment for different ``modelChemistry()``. Refer to NIST website for values (http://cccbdb.nist.gov/vibscalejust.asp). +Frequency scale factors are empirically fit to experiment for different ``modelChemistry``. Refer to NIST website for values (http://cccbdb.nist.gov/vibscalejust.asp). For CBS-QB3, which is not included in the link above, ``frequencyScaleFactor = 0.99`` according to Montgomery et al. (*J. Chem. Phys. 1999, 110, 2822–2827*). Species Parameters @@ -178,10 +191,10 @@ Parameter Required? Description ======================= =========================== ==================================== The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used -to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry()`` +to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry`` (see `Model Chemistry`_). -The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry()``. +The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry``. Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'C=C'``, ``'N-O'``, ``'C=S'``, ``'O=O'``, ``'C#N'``... @@ -205,7 +218,7 @@ For acetylperoxy radical, we would write:: opticalIsomers = 1 -The ``energy`` parameter is a dictionary with entries for different ``modelChemistry()``. The entries can consist of either +The ``energy`` parameter is a dictionary with entries for different ``modelChemistry``. The entries can consist of either floating point numbers corresponding to the 0 K atomization energy in Hartree (without zero-point energy correction), or they can specify the path to a quantum chemistry calculation output file that contains the species's energy. For example:: @@ -215,7 +228,7 @@ they can specify the path to a quantum chemistry calculation output file that co } In this example, the ``CBS-QB3`` energy is obtained from a Gaussian log file, while the ``Klip_2`` energy is specified directly. -The energy used will depend on what ``modelChemistry()`` was specified in the input file. CanTherm can parse the energy from +The energy used will depend on what ``modelChemistry`` was specified in the input file. CanTherm can parse the energy from a ``GaussianLog``, ``MolproLog`` or ``QchemLog``. The input to the remaining parameters, ``geometry``, ``frequencies`` and ``rotors``, will depend on if hindered/free rotors are included. diff --git a/rmgpy/cantherm/input.py b/rmgpy/cantherm/input.py index 3f06a729fd..8f9f05708d 100644 --- a/rmgpy/cantherm/input.py +++ b/rmgpy/cantherm/input.py @@ -389,6 +389,7 @@ def loadInputFile(path): frequencyScaleFactor = local_context.get('frequencyScaleFactor', 1.0) useHinderedRotors = local_context.get('useHinderedRotors', True) useBondCorrections = local_context.get('useBondCorrections', False) + atomEnergies = local_context.get('atomEnergies', None) directory = os.path.dirname(path) @@ -399,5 +400,6 @@ def loadInputFile(path): job.frequencyScaleFactor = frequencyScaleFactor job.includeHinderedRotors = useHinderedRotors job.applyBondEnergyCorrections = useBondCorrections + job.atomEnergies = atomEnergies return jobList diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 21d7c6fd13..b47d5f299b 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -175,6 +175,7 @@ def __init__(self, species, path): self.frequencyScaleFactor = 1.0 self.includeHinderedRotors = True self.applyBondEnergyCorrections = True + self.atomEnergies = None def execute(self, outputFile=None, plot=False): """ @@ -344,7 +345,11 @@ def load(self): E0 = energyLog.loadEnergy(self.frequencyScaleFactor) else: E0 = E0 * constants.E_h * constants.Na # Hartree/particle to J/mol - E0 = applyEnergyCorrections(E0, self.modelChemistry, atoms, bonds if self.applyBondEnergyCorrections else {}) + E0 = applyEnergyCorrections(E0, + self.modelChemistry, + atoms, + bonds if self.applyBondEnergyCorrections else {}, + atomEnergies=self.atomEnergies) ZPE = statmechLog.loadZeroPointEnergy() * self.frequencyScaleFactor # The E0_withZPE at this stage contains the ZPE @@ -534,7 +539,7 @@ def plotHinderedRotor(self, angle, Vlist, cosineRotor, fourierRotor, rotor, roto ################################################################################ -def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): +def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, atomEnergies=None): """ Given an energy `E0` in J/mol as read from the output of a quantum chemistry calculation at a given `modelChemistry`, adjust the energy such that it @@ -557,108 +562,110 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds): # Step 1: Reference all energies to a model chemistry-independent basis # by subtracting out that model chemistry's atomic energies - # Note: If your model chemistry does not include spin orbit coupling, you should add the corrections to the energies here - if modelChemistry == 'CBS-QB3': - atomEnergies = {'H':-0.499818 + SOC['H'], 'N':-54.520543 + SOC['N'], 'O':-74.987624+ SOC['O'], 'C':-37.785385+ SOC['C'], 'P':-340.817186+ SOC['P'], 'S': -397.657360+ SOC['S']} - elif modelChemistry == 'M06-2X/cc-pVTZ': - atomEnergies = {'H':-0.498135 + SOC['H'], 'N':-54.586780 + SOC['N'], 'O':-75.064242+ SOC['O'], 'C':-37.842468+ SOC['C'], 'P':-341.246985+ SOC['P'], 'S': -398.101240+ SOC['S']} - elif modelChemistry == 'G3': - atomEnergies = {'H':-0.5010030, 'N':-54.564343, 'O':-75.030991, 'C':-37.827717, 'P':-341.116432, 'S': -397.961110} - elif modelChemistry == 'M08SO/MG3S*': # * indicates that the grid size used in the [QChem] electronic - #structure calculation utilized 75 radial points and 434 angular points - #(i.e,, this is specified in the $rem section of the [qchem] input file as: XC_GRID 000075000434) - atomEnergies = {'H':-0.5017321350 + SOC['H'], 'N':-54.5574039365 + SOC['N'], 'O':-75.0382931348+ SOC['O'], 'C':-37.8245648740+ SOC['C'], 'P':-341.2444299005+ SOC['P'], 'S':-398.0940312227+ SOC['S'] } - elif modelChemistry == 'Klip_1': - atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53383153 + SOC['N'], 'O':-75.00935474+ SOC['O'], 'C':-37.79266591+ SOC['C']} - elif modelChemistry == 'Klip_2': - #Klip QCI(tz,qz) - atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53169400 + SOC['N'], 'O':-75.00714902+ SOC['O'], 'C':-37.79060419+ SOC['C']} - elif modelChemistry == 'Klip_3': - #Klip QCI(dz,tz) - atomEnergies = {'H':-0.50005578 + SOC['H'], 'N':-54.53128140 + SOC['N'], 'O':-75.00356581+ SOC['O'], 'C':-37.79025175+ SOC['C']} - - elif modelChemistry == 'Klip_2_cc': - #Klip CCSD(T)(tz,qz) - atomEnergies = {'H':-0.50003976 + SOC['H'], 'O':-75.00681155+ SOC['O'], 'C':-37.79029443+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-TZ': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-QZ': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} - - # We are assuming that SOC is included in the Bond Energy Corrections - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} - elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': - atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} - elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': - atomEnergies = {'H':-0.499994558325, 'N':-54.530515226371, 'O':-75.005600062003, 'C':-37.789961656228, 'S':-397.676719774973} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.582137180344 + SOC['N'], 'O':-75.053045547421 + SOC['O'], 'C':-37.840869118707+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.588545831900 + SOC['N'], 'O':-75.065995072347 + SOC['O'], 'C':-37.844662139972+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVQZ-F12': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.589137594139+ SOC['N'], 'O':-75.067412234737+ SOC['O'], 'C':-37.844893820561+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVDZ': - atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.524279516472 + SOC['N'], 'O':-74.992097308083+ SOC['O'], 'C':-37.786694171716+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVTZ': - atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.527419359906 + SOC['N'], 'O':-75.000001429806+ SOC['O'], 'C':-37.788504810868+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVQZ': - atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.529569719016 + SOC['N'], 'O':-75.004026586610+ SOC['O'], 'C':-37.789387892348+ SOC['C']} - - - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.523269942190 + SOC['N'], 'O':-74.990725918500 + SOC['O'], 'C':-37.785409916465 + SOC['C'], 'S': -397.658155086033 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.528135889213 + SOC['N'], 'O':-75.001094055506 + SOC['O'], 'C':-37.788233578503 + SOC['C'], 'S':-397.671745425929 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVQZ-F12': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.529425753163 + SOC['N'], 'O':-75.003820485005 + SOC['O'], 'C':-37.789006506290 + SOC['C'], 'S':-397.674145126931 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.578602780288 + SOC['N'], 'O':-75.048064317367+ SOC['O'], 'C':-37.837592033417+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.586402551258 + SOC['N'], 'O':-75.062767632757+ SOC['O'], 'C':-37.842729156944+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVQZ-F12': - atomEnergies = {'H':-0.49999456 + SOC['H'], 'N':-54.587781507581 + SOC['N'], 'O':-75.065397706471+ SOC['O'], 'C':-37.843634971592+ SOC['C']} - - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVDZ': - atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.520475581942 + SOC['N'], 'O':-74.986992215049+ SOC['O'], 'C':-37.783294495799+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVTZ': - atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.524927371700 + SOC['N'], 'O':-74.996328829705+ SOC['O'], 'C':-37.786320700792+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': - atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} - - elif modelChemistry == 'MP2_rmp2_pVDZ': - atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVTZ': - atomEnergies = {'H':-0.49980981 + SOC['H'], 'N':-54.49615972 + SOC['N'], 'O':-74.95506980+ SOC['O'], 'C':-37.75833104+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVQZ': - atomEnergies = {'H':-0.49994557 + SOC['H'], 'N':-54.50715868 + SOC['N'], 'O':-74.97515364+ SOC['O'], 'C':-37.76533215+ SOC['C']} - - elif modelChemistry == 'CCSD-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.524325513811 + SOC['N'], 'O':-74.992326577897+ SOC['O'], 'C':-37.786213495943+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_noscale': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.526026290887 + SOC['N'], 'O':-74.994751897699+ SOC['O'], 'C':-37.787881871511+ SOC['C']} - - elif modelChemistry == 'G03_PBEPBE_6-311++g_d_p': - atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} - - elif modelChemistry == 'FCI/cc-pVDZ': - atomEnergies = {'C':-37.789527+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVTZ': - atomEnergies = {'C':-37.781266669684+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVQZ': - atomEnergies = {'C':-37.787052110598+ SOC['C']} - - elif modelChemistry in ['BMK/cbsb7', 'BMK/6-311G(2d,d,p)']: - atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} - elif modelChemistry == 'b3lyp/6-31G**': - atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} + if atomEnergies is None: + # Note: If your model chemistry does not include spin orbit coupling, you should add the corrections to the energies here + if modelChemistry == 'CBS-QB3': + atomEnergies = {'H':-0.499818 + SOC['H'], 'N':-54.520543 + SOC['N'], 'O':-74.987624+ SOC['O'], 'C':-37.785385+ SOC['C'], 'P':-340.817186+ SOC['P'], 'S': -397.657360+ SOC['S']} + elif modelChemistry == 'M06-2X/cc-pVTZ': + atomEnergies = {'H':-0.498135 + SOC['H'], 'N':-54.586780 + SOC['N'], 'O':-75.064242+ SOC['O'], 'C':-37.842468+ SOC['C'], 'P':-341.246985+ SOC['P'], 'S': -398.101240+ SOC['S']} + elif modelChemistry == 'G3': + atomEnergies = {'H':-0.5010030, 'N':-54.564343, 'O':-75.030991, 'C':-37.827717, 'P':-341.116432, 'S': -397.961110} + elif modelChemistry == 'M08SO/MG3S*': # * indicates that the grid size used in the [QChem] electronic + #structure calculation utilized 75 radial points and 434 angular points + #(i.e,, this is specified in the $rem section of the [qchem] input file as: XC_GRID 000075000434) + atomEnergies = {'H':-0.5017321350 + SOC['H'], 'N':-54.5574039365 + SOC['N'], 'O':-75.0382931348+ SOC['O'], 'C':-37.8245648740+ SOC['C'], 'P':-341.2444299005+ SOC['P'], 'S':-398.0940312227+ SOC['S'] } + elif modelChemistry == 'Klip_1': + atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53383153 + SOC['N'], 'O':-75.00935474+ SOC['O'], 'C':-37.79266591+ SOC['C']} + elif modelChemistry == 'Klip_2': + #Klip QCI(tz,qz) + atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53169400 + SOC['N'], 'O':-75.00714902+ SOC['O'], 'C':-37.79060419+ SOC['C']} + elif modelChemistry == 'Klip_3': + #Klip QCI(dz,tz) + atomEnergies = {'H':-0.50005578 + SOC['H'], 'N':-54.53128140 + SOC['N'], 'O':-75.00356581+ SOC['O'], 'C':-37.79025175+ SOC['C']} + + elif modelChemistry == 'Klip_2_cc': + #Klip CCSD(T)(tz,qz) + atomEnergies = {'H':-0.50003976 + SOC['H'], 'O':-75.00681155+ SOC['O'], 'C':-37.79029443+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-TZ': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-QZ': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} + + # We are assuming that SOC is included in the Bond Energy Corrections + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} + elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': + atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} + elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': + atomEnergies = {'H':-0.499994558325, 'N':-54.530515226371, 'O':-75.005600062003, 'C':-37.789961656228, 'S':-397.676719774973} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.582137180344 + SOC['N'], 'O':-75.053045547421 + SOC['O'], 'C':-37.840869118707+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.588545831900 + SOC['N'], 'O':-75.065995072347 + SOC['O'], 'C':-37.844662139972+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVQZ-F12': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.589137594139+ SOC['N'], 'O':-75.067412234737+ SOC['O'], 'C':-37.844893820561+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVDZ': + atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.524279516472 + SOC['N'], 'O':-74.992097308083+ SOC['O'], 'C':-37.786694171716+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVTZ': + atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.527419359906 + SOC['N'], 'O':-75.000001429806+ SOC['O'], 'C':-37.788504810868+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVQZ': + atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.529569719016 + SOC['N'], 'O':-75.004026586610+ SOC['O'], 'C':-37.789387892348+ SOC['C']} + + + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.523269942190 + SOC['N'], 'O':-74.990725918500 + SOC['O'], 'C':-37.785409916465 + SOC['C'], 'S': -397.658155086033 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.528135889213 + SOC['N'], 'O':-75.001094055506 + SOC['O'], 'C':-37.788233578503 + SOC['C'], 'S':-397.671745425929 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVQZ-F12': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.529425753163 + SOC['N'], 'O':-75.003820485005 + SOC['O'], 'C':-37.789006506290 + SOC['C'], 'S':-397.674145126931 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.578602780288 + SOC['N'], 'O':-75.048064317367+ SOC['O'], 'C':-37.837592033417+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.586402551258 + SOC['N'], 'O':-75.062767632757+ SOC['O'], 'C':-37.842729156944+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVQZ-F12': + atomEnergies = {'H':-0.49999456 + SOC['H'], 'N':-54.587781507581 + SOC['N'], 'O':-75.065397706471+ SOC['O'], 'C':-37.843634971592+ SOC['C']} + + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVDZ': + atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.520475581942 + SOC['N'], 'O':-74.986992215049+ SOC['O'], 'C':-37.783294495799+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVTZ': + atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.524927371700 + SOC['N'], 'O':-74.996328829705+ SOC['O'], 'C':-37.786320700792+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': + atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} + + elif modelChemistry == 'MP2_rmp2_pVDZ': + atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} + elif modelChemistry == 'MP2_rmp2_pVTZ': + atomEnergies = {'H':-0.49980981 + SOC['H'], 'N':-54.49615972 + SOC['N'], 'O':-74.95506980+ SOC['O'], 'C':-37.75833104+ SOC['C']} + elif modelChemistry == 'MP2_rmp2_pVQZ': + atomEnergies = {'H':-0.49994557 + SOC['H'], 'N':-54.50715868 + SOC['N'], 'O':-74.97515364+ SOC['O'], 'C':-37.76533215+ SOC['C']} + + elif modelChemistry == 'CCSD-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.524325513811 + SOC['N'], 'O':-74.992326577897+ SOC['O'], 'C':-37.786213495943+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_noscale': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.526026290887 + SOC['N'], 'O':-74.994751897699+ SOC['O'], 'C':-37.787881871511+ SOC['C']} + + elif modelChemistry == 'G03_PBEPBE_6-311++g_d_p': + atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} + + elif modelChemistry == 'FCI/cc-pVDZ': + atomEnergies = {'C':-37.789527+ SOC['C']} + elif modelChemistry == 'FCI/cc-pVTZ': + atomEnergies = {'C':-37.781266669684+ SOC['C']} + elif modelChemistry == 'FCI/cc-pVQZ': + atomEnergies = {'C':-37.787052110598+ SOC['C']} + + elif modelChemistry in ['BMK/cbsb7', 'BMK/6-311G(2d,d,p)']: + atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} + elif modelChemistry == 'b3lyp/6-31G**': + atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} + + else: + logging.warning('Unknown model chemistry "{0}"; not applying energy corrections.'.format(modelChemistry)) + return E0 - else: - logging.warning('Unknown model chemistry "{0}"; not applying energy corrections.'.format(modelChemistry)) - return E0 for symbol, count in atoms.items(): if symbol in atomEnergies: E0 -= count * atomEnergies[symbol] * 4.35974394e-18 * constants.Na else: From dfb623b41097c4ab25582def6de8e39abee12bce Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Wed, 21 Mar 2018 11:37:29 -0400 Subject: [PATCH 32/57] Add enthalpies of formation for more atoms --- rmgpy/cantherm/statmech.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index b47d5f299b..79985cb9bc 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -676,10 +676,14 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, atomEnergies=None): # See Gaussian thermo whitepaper at http://www.gaussian.com/g_whitepap/thermo.htm) # Note: these values are relatively old and some improvement may be possible by using newer values, particularly for carbon # However, care should be taken to ensure that they are compatible with the BAC values (if BACs are used) - atomHf = {'H': 51.63 , 'N': 112.53 ,'O': 58.99 ,'C': 169.98, 'S': 65.66 } + atomHf = {'H': 51.63, + 'Li': 37.69, 'Be': 76.48, 'B': 136.2, 'C': 169.98, 'N': 112.53, 'O': 58.99, 'F': 18.47, + 'Na': 25.69, 'Mg': 34.87, 'Al': 78.23, 'Si': 106.6, 'P': 75.42, 'S': 65.66, 'Cl': 28.59} # Thermal contribution to enthalpy Hss(298 K) - Hss(0 K) reported by Gaussian thermo whitepaper # This will be subtracted from the corresponding value in atomHf to produce an enthalpy used in calculating the enthalpy of formation at 298 K - atomThermal = {'H': 1.01 , 'N': 1.04, 'O': 1.04 ,'C': 0.25, 'S': 1.05 } + atomThermal = {'H': 1.01, + 'Li': 1.1, 'Be': 0.46, 'B': 0.29, 'C': 0.25, 'N': 1.04, 'O': 1.04, 'F': 1.05, + 'Na': 1.54, 'Mg': 1.19, 'Al': 1.08, 'Si': 0.76, 'P': 1.28, 'S': 1.05, 'Cl': 1.1} # Total energy correction used to reach gas-phase reference state # Note: Spin orbit coupling no longer included in these energies, since some model chemistries include it automatically atomEnergies = {} From 994c573875cd21c369f88c81767165fb24788c44 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Wed, 21 Mar 2018 11:46:51 -0400 Subject: [PATCH 33/57] Allow bond labels to be entered in reverse --- documentation/source/users/cantherm/input.rst | 2 +- documentation/source/users/cantherm/input_pdep.rst | 2 +- rmgpy/cantherm/statmech.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index 151f000f84..429a9f6f37 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -155,7 +155,7 @@ Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'O=S=O'`` is also allowed. -The order of elements in the bond correction label is important and generally follows the order specified under "allowed atom symbols" above, i.e., ``'C=N'`` is correct while ``'N=C'`` is incorrect. Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. For example, for formaldehyde we would write:: +The order of elements in the bond correction label is not important. Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. For example, for formaldehyde we would write:: bonds = {'C=O': 1, 'C-H': 2} diff --git a/documentation/source/users/cantherm/input_pdep.rst b/documentation/source/users/cantherm/input_pdep.rst index 146f193a0b..4e961be875 100644 --- a/documentation/source/users/cantherm/input_pdep.rst +++ b/documentation/source/users/cantherm/input_pdep.rst @@ -200,7 +200,7 @@ Allowed bond types for the ``bonds`` parameter are, e.g., ``'C-H'``, ``'C-C'``, ``'O=S=O'`` is also allowed. -The order of elements in for the bond correction is important and generally follows the order specified under "allowed atom symbols" above, i.e., ``'C=N'`` is correct while ``'N=C'`` is incorrect. Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. For example, for acetylperoxy radical we would write:: +The order of elements in for the bond correction is not important. Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. For example, for acetylperoxy radical we would write:: bonds = {'C-C': 1, 'C=O': 1, 'C-O': 1, 'O-O': 1, 'C-H': 3} diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 79985cb9bc..3de660b4a8 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -735,6 +735,7 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, atomEnergies=None): for symbol, count in bonds.items(): if symbol in bondEnergies: E0 += count * bondEnergies[symbol] * 4184. + elif symbol[::-1] in bondEnergies: E0 += count * bondEnergies[symbol[::-1]] * 4184. else: logging.warning('Ignored unknown bond type {0!r}.'.format(symbol)) From bfc8057fbf8b6dcd2bfafcaaa38f3e0318373af7 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Thu, 22 Mar 2018 11:23:11 -0400 Subject: [PATCH 34/57] Add option to turn off atom corrections If only running kinetics jobs, then atom corrections do not affect the results. Starting with this commit, atom corrections are turned on by default, but Cantherm will raise an error if elements are not found in the atomEnergies dictionary. The use can then turn off atom corrections if they know what they are doing (i.e., only running kinetics jobs). --- documentation/source/users/cantherm/input.rst | 12 +- .../source/users/cantherm/input_pdep.rst | 6 +- rmgpy/cantherm/input.py | 2 + rmgpy/cantherm/statmech.py | 380 +++++++++--------- 4 files changed, 213 insertions(+), 187 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index 429a9f6f37..80bbdb3a29 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -22,6 +22,7 @@ Component Description ``atomEnergies`` Dictionary of atomic energies at ``modelChemistry`` level ``frequencyScaleFactor`` A factor by which to scale all frequencies ``useHinderedRotors`` ``True`` if hindered rotors are used, ``False`` if not +``useAtomCorrections`` ``True`` if atom corrections are used, ``False`` if not ``useBondCorrections`` ``True`` if bond corrections are used, ``False`` if not ``species`` Contains parameters for non-transition states ``transitionState`` Contains parameters for transition state(s) @@ -57,7 +58,7 @@ specified in the input file by providing a dictionary of these energies in the f } The table below shows which model chemistries have atomization energy corrections (AEC), bond -corrections (BC), and spin orbit corrections (SOC). It also lists which atoms types are available +corrections (BC), and spin orbit corrections (SOC). It also lists which elements are available for a given model chemistry. ================================================ ===== ==== ==== ==================== @@ -99,6 +100,12 @@ If a model chemistry other than the ones in the above table is used, then the us the corresponding atomic energies (using ``atomEnergies``) to get meaningful results. Bond corrections would not be applied in this case. +If a model chemistry or atomic energies are not available, then a kinetics job can still be run by +setting ``useAtomCorrections`` to ``False``, in which case Cantherm will not raise an error for +unknown elements. The user should be aware that the resulting energies and thermodynamic quantities +in the output file will not be meaningful, but kinetics and equilibrium constants will still be +correct. + Frequency Scale Factor ====================== @@ -147,7 +154,8 @@ Parameter Required? Description The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry`` -(see `Model Chemistry`_). +(see `Model Chemistry`_). If not interested in accurate thermodynamics (e.g., if only using ``kinetics()``), then +atom corrections can be turned off by setting ``useAtomCorrections`` to ``False``. The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry``. diff --git a/documentation/source/users/cantherm/input_pdep.rst b/documentation/source/users/cantherm/input_pdep.rst index 4e961be875..1927a5eab6 100644 --- a/documentation/source/users/cantherm/input_pdep.rst +++ b/documentation/source/users/cantherm/input_pdep.rst @@ -28,6 +28,7 @@ Component Description ``atomEnergies`` Dictionary of atomic energies at ``modelChemistry`` level ``frequencyScaleFactor`` A factor by which to scale all frequencies ``useHinderedRotors`` ``True`` if hindered rotors are used, ``False`` if not +``useAtomCorrections`` ``True`` if atom corrections are used, ``False`` if not ``useBondCorrections`` ``True`` if bond corrections are used, ``False`` if not ``species`` Contains parameters for non-transition states ``transitionState`` Contains parameters for transition state(s) @@ -69,7 +70,7 @@ specified in the input file by providing a dictionary of these energies in the f } Whether or not atomization energy corrections (AEC), bond corrections (BC), and spin orbit -corrections (SOC); and which atom types are available for a given model chemistry is described +corrections (SOC); and which elements are available for a given model chemistry is described under `High-Pressure Limit: Model Chemistry `_ Frequency Scale Factor @@ -192,7 +193,8 @@ Parameter Required? Description The types and number of atoms in the species are automatically inferred from the quantum chemistry output and are used to apply atomization energy corrections (AEC) and spin orbit corrections (SOC) for a given ``modelChemistry`` -(see `Model Chemistry`_). +(see `Model Chemistry`_). If not interested in accurate thermodynamics (e.g., if only using ``kinetics()``), then +atom corrections can be turned off by setting ``useAtomCorrections`` to ``False``. The ``bond`` parameter is used to apply bond corrections (BC) for a given ``modelChemistry``. diff --git a/rmgpy/cantherm/input.py b/rmgpy/cantherm/input.py index 8f9f05708d..58b537ffc3 100644 --- a/rmgpy/cantherm/input.py +++ b/rmgpy/cantherm/input.py @@ -388,6 +388,7 @@ def loadInputFile(path): logging.warning('No frequency scale factor specified in input file; assuming a value of unity.') frequencyScaleFactor = local_context.get('frequencyScaleFactor', 1.0) useHinderedRotors = local_context.get('useHinderedRotors', True) + useAtomCorrections = local_context.get('useAtomCorrections', True) useBondCorrections = local_context.get('useBondCorrections', False) atomEnergies = local_context.get('atomEnergies', None) @@ -399,6 +400,7 @@ def loadInputFile(path): job.modelChemistry = modelChemistry job.frequencyScaleFactor = frequencyScaleFactor job.includeHinderedRotors = useHinderedRotors + job.applyAtomEnergyCorrections = useAtomCorrections job.applyBondEnergyCorrections = useBondCorrections job.atomEnergies = atomEnergies diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 3de660b4a8..fc13ca5bc2 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -174,6 +174,7 @@ def __init__(self, species, path): self.modelChemistry = '' self.frequencyScaleFactor = 1.0 self.includeHinderedRotors = True + self.applyAtomEnergyCorrections = True self.applyBondEnergyCorrections = True self.atomEnergies = None @@ -345,11 +346,15 @@ def load(self): E0 = energyLog.loadEnergy(self.frequencyScaleFactor) else: E0 = E0 * constants.E_h * constants.Na # Hartree/particle to J/mol + if not self.applyAtomEnergyCorrections: + logging.warning('Atom corrections are not being used. Do not trust energies and thermo.') E0 = applyEnergyCorrections(E0, self.modelChemistry, atoms, - bonds if self.applyBondEnergyCorrections else {}, - atomEnergies=self.atomEnergies) + bonds, + atomEnergies=self.atomEnergies, + applyAtomEnergyCorrections=self.applyAtomEnergyCorrections, + applyBondEnergyCorrections=self.applyBondEnergyCorrections) ZPE = statmechLog.loadZeroPointEnergy() * self.frequencyScaleFactor # The E0_withZPE at this stage contains the ZPE @@ -539,7 +544,8 @@ def plotHinderedRotor(self, angle, Vlist, cosineRotor, fourierRotor, rotor, roto ################################################################################ -def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, atomEnergies=None): +def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, + atomEnergies=None, applyAtomEnergyCorrections=True, applyBondEnergyCorrections=False): """ Given an energy `E0` in J/mol as read from the output of a quantum chemistry calculation at a given `modelChemistry`, adjust the energy such that it @@ -554,190 +560,198 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, atomEnergies=None): `bonds` is a dictionary associating bond types with the number of that bond in the molecule. """ - - # Spin orbit correction (SOC) in Hartrees - # Values taken from ref 22 of http://dx.doi.org/10.1063/1.477794 and converted to hartrees - # Values in millihartree are also available (with fewer significant figures) from table VII of http://dx.doi.org/10.1063/1.473182 - SOC = {'H':0.0, 'N':0.0, 'O': -0.000355, 'C': -0.000135, 'S': -0.000893, 'P': 0.0} - - # Step 1: Reference all energies to a model chemistry-independent basis - # by subtracting out that model chemistry's atomic energies - if atomEnergies is None: - # Note: If your model chemistry does not include spin orbit coupling, you should add the corrections to the energies here - if modelChemistry == 'CBS-QB3': - atomEnergies = {'H':-0.499818 + SOC['H'], 'N':-54.520543 + SOC['N'], 'O':-74.987624+ SOC['O'], 'C':-37.785385+ SOC['C'], 'P':-340.817186+ SOC['P'], 'S': -397.657360+ SOC['S']} - elif modelChemistry == 'M06-2X/cc-pVTZ': - atomEnergies = {'H':-0.498135 + SOC['H'], 'N':-54.586780 + SOC['N'], 'O':-75.064242+ SOC['O'], 'C':-37.842468+ SOC['C'], 'P':-341.246985+ SOC['P'], 'S': -398.101240+ SOC['S']} - elif modelChemistry == 'G3': - atomEnergies = {'H':-0.5010030, 'N':-54.564343, 'O':-75.030991, 'C':-37.827717, 'P':-341.116432, 'S': -397.961110} - elif modelChemistry == 'M08SO/MG3S*': # * indicates that the grid size used in the [QChem] electronic - #structure calculation utilized 75 radial points and 434 angular points - #(i.e,, this is specified in the $rem section of the [qchem] input file as: XC_GRID 000075000434) - atomEnergies = {'H':-0.5017321350 + SOC['H'], 'N':-54.5574039365 + SOC['N'], 'O':-75.0382931348+ SOC['O'], 'C':-37.8245648740+ SOC['C'], 'P':-341.2444299005+ SOC['P'], 'S':-398.0940312227+ SOC['S'] } - elif modelChemistry == 'Klip_1': - atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53383153 + SOC['N'], 'O':-75.00935474+ SOC['O'], 'C':-37.79266591+ SOC['C']} - elif modelChemistry == 'Klip_2': - #Klip QCI(tz,qz) - atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53169400 + SOC['N'], 'O':-75.00714902+ SOC['O'], 'C':-37.79060419+ SOC['C']} - elif modelChemistry == 'Klip_3': - #Klip QCI(dz,tz) - atomEnergies = {'H':-0.50005578 + SOC['H'], 'N':-54.53128140 + SOC['N'], 'O':-75.00356581+ SOC['O'], 'C':-37.79025175+ SOC['C']} - - elif modelChemistry == 'Klip_2_cc': - #Klip CCSD(T)(tz,qz) - atomEnergies = {'H':-0.50003976 + SOC['H'], 'O':-75.00681155+ SOC['O'], 'C':-37.79029443+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-TZ': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-QZ': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} - - # We are assuming that SOC is included in the Bond Energy Corrections - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} + if applyAtomEnergyCorrections: + # Spin orbit correction (SOC) in Hartrees + # Values taken from ref 22 of http://dx.doi.org/10.1063/1.477794 and converted to hartrees + # Values in millihartree are also available (with fewer significant figures) from table VII of http://dx.doi.org/10.1063/1.473182 + SOC = {'H':0.0, 'N':0.0, 'O': -0.000355, 'C': -0.000135, 'S': -0.000893, 'P': 0.0} + + # Step 1: Reference all energies to a model chemistry-independent basis + # by subtracting out that model chemistry's atomic energies + if atomEnergies is None: + # Note: If your model chemistry does not include spin orbit coupling, you should add the corrections to the energies here + if modelChemistry == 'CBS-QB3': + atomEnergies = {'H':-0.499818 + SOC['H'], 'N':-54.520543 + SOC['N'], 'O':-74.987624+ SOC['O'], 'C':-37.785385+ SOC['C'], 'P':-340.817186+ SOC['P'], 'S': -397.657360+ SOC['S']} + elif modelChemistry == 'M06-2X/cc-pVTZ': + atomEnergies = {'H':-0.498135 + SOC['H'], 'N':-54.586780 + SOC['N'], 'O':-75.064242+ SOC['O'], 'C':-37.842468+ SOC['C'], 'P':-341.246985+ SOC['P'], 'S': -398.101240+ SOC['S']} + elif modelChemistry == 'G3': + atomEnergies = {'H':-0.5010030, 'N':-54.564343, 'O':-75.030991, 'C':-37.827717, 'P':-341.116432, 'S': -397.961110} + elif modelChemistry == 'M08SO/MG3S*': # * indicates that the grid size used in the [QChem] electronic + #structure calculation utilized 75 radial points and 434 angular points + #(i.e,, this is specified in the $rem section of the [qchem] input file as: XC_GRID 000075000434) + atomEnergies = {'H':-0.5017321350 + SOC['H'], 'N':-54.5574039365 + SOC['N'], 'O':-75.0382931348+ SOC['O'], 'C':-37.8245648740+ SOC['C'], 'P':-341.2444299005+ SOC['P'], 'S':-398.0940312227+ SOC['S'] } + elif modelChemistry == 'Klip_1': + atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53383153 + SOC['N'], 'O':-75.00935474+ SOC['O'], 'C':-37.79266591+ SOC['C']} + elif modelChemistry == 'Klip_2': + #Klip QCI(tz,qz) + atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53169400 + SOC['N'], 'O':-75.00714902+ SOC['O'], 'C':-37.79060419+ SOC['C']} + elif modelChemistry == 'Klip_3': + #Klip QCI(dz,tz) + atomEnergies = {'H':-0.50005578 + SOC['H'], 'N':-54.53128140 + SOC['N'], 'O':-75.00356581+ SOC['O'], 'C':-37.79025175+ SOC['C']} + + elif modelChemistry == 'Klip_2_cc': + #Klip CCSD(T)(tz,qz) + atomEnergies = {'H':-0.50003976 + SOC['H'], 'O':-75.00681155+ SOC['O'], 'C':-37.79029443+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-TZ': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-QZ': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} + + # We are assuming that SOC is included in the Bond Energy Corrections + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} + elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': + atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} + elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': + atomEnergies = {'H':-0.499994558325, 'N':-54.530515226371, 'O':-75.005600062003, 'C':-37.789961656228, 'S':-397.676719774973} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.582137180344 + SOC['N'], 'O':-75.053045547421 + SOC['O'], 'C':-37.840869118707+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.588545831900 + SOC['N'], 'O':-75.065995072347 + SOC['O'], 'C':-37.844662139972+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/cc-pCVQZ-F12': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.589137594139+ SOC['N'], 'O':-75.067412234737+ SOC['O'], 'C':-37.844893820561+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVDZ': + atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.524279516472 + SOC['N'], 'O':-74.992097308083+ SOC['O'], 'C':-37.786694171716+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVTZ': + atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.527419359906 + SOC['N'], 'O':-75.000001429806+ SOC['O'], 'C':-37.788504810868+ SOC['C']} + elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVQZ': + atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.529569719016 + SOC['N'], 'O':-75.004026586610+ SOC['O'], 'C':-37.789387892348+ SOC['C']} + + + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.523269942190 + SOC['N'], 'O':-74.990725918500 + SOC['O'], 'C':-37.785409916465 + SOC['C'], 'S': -397.658155086033 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.528135889213 + SOC['N'], 'O':-75.001094055506 + SOC['O'], 'C':-37.788233578503 + SOC['C'], 'S':-397.671745425929 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pVQZ-F12': + atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.529425753163 + SOC['N'], 'O':-75.003820485005 + SOC['O'], 'C':-37.789006506290 + SOC['C'], 'S':-397.674145126931 + SOC['S']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.578602780288 + SOC['N'], 'O':-75.048064317367+ SOC['O'], 'C':-37.837592033417+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVTZ-F12': + atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.586402551258 + SOC['N'], 'O':-75.062767632757+ SOC['O'], 'C':-37.842729156944+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVQZ-F12': + atomEnergies = {'H':-0.49999456 + SOC['H'], 'N':-54.587781507581 + SOC['N'], 'O':-75.065397706471+ SOC['O'], 'C':-37.843634971592+ SOC['C']} + + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVDZ': + atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.520475581942 + SOC['N'], 'O':-74.986992215049+ SOC['O'], 'C':-37.783294495799+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVTZ': + atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.524927371700 + SOC['N'], 'O':-74.996328829705+ SOC['O'], 'C':-37.786320700792+ SOC['C']} + elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': + atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} + + elif modelChemistry == 'MP2_rmp2_pVDZ': + atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} + elif modelChemistry == 'MP2_rmp2_pVTZ': + atomEnergies = {'H':-0.49980981 + SOC['H'], 'N':-54.49615972 + SOC['N'], 'O':-74.95506980+ SOC['O'], 'C':-37.75833104+ SOC['C']} + elif modelChemistry == 'MP2_rmp2_pVQZ': + atomEnergies = {'H':-0.49994557 + SOC['H'], 'N':-54.50715868 + SOC['N'], 'O':-74.97515364+ SOC['O'], 'C':-37.76533215+ SOC['C']} + + elif modelChemistry == 'CCSD-F12/cc-pVDZ-F12': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.524325513811 + SOC['N'], 'O':-74.992326577897+ SOC['O'], 'C':-37.786213495943+ SOC['C']} + + elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_noscale': + atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.526026290887 + SOC['N'], 'O':-74.994751897699+ SOC['O'], 'C':-37.787881871511+ SOC['C']} + + elif modelChemistry == 'G03_PBEPBE_6-311++g_d_p': + atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} + + elif modelChemistry == 'FCI/cc-pVDZ': + atomEnergies = {'C':-37.789527+ SOC['C']} + elif modelChemistry == 'FCI/cc-pVTZ': + atomEnergies = {'C':-37.781266669684+ SOC['C']} + elif modelChemistry == 'FCI/cc-pVQZ': + atomEnergies = {'C':-37.787052110598+ SOC['C']} + + elif modelChemistry in ['BMK/cbsb7', 'BMK/6-311G(2d,d,p)']: + atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} + elif modelChemistry == 'b3lyp/6-31G**': + atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} + + else: + raise Exception('Unknown model chemistry "{}".'.format(modelChemistry)) + + for symbol, count in atoms.items(): + if symbol in atomEnergies: + E0 -= count * atomEnergies[symbol] * 4.35974394e-18 * constants.Na + else: + raise Exception( + 'Unknown element "{}". Turn off atom corrections if only running a kinetics jobs ' + 'or supply a dictionary of atom energies.'.format(symbol) + ) + + # Step 2: Atom energy corrections to reach gas-phase reference state + # Experimental enthalpy of formation at 0 K + # See Gaussian thermo whitepaper at http://www.gaussian.com/g_whitepap/thermo.htm) + # Note: these values are relatively old and some improvement may be possible by using newer values, particularly for carbon + # However, care should be taken to ensure that they are compatible with the BAC values (if BACs are used) + # The enthalpies listed here should correspond to the allowed elements in atom_num_dict + atomHf = {'H': 51.63, + 'Li': 37.69, 'Be': 76.48, 'B': 136.2, 'C': 169.98, 'N': 112.53, 'O': 58.99, 'F': 18.47, + 'Na': 25.69, 'Mg': 34.87, 'Al': 78.23, 'Si': 106.6, 'P': 75.42, 'S': 65.66, 'Cl': 28.59} + # Thermal contribution to enthalpy Hss(298 K) - Hss(0 K) reported by Gaussian thermo whitepaper + # This will be subtracted from the corresponding value in atomHf to produce an enthalpy used in calculating the enthalpy of formation at 298 K + atomThermal = {'H': 1.01, + 'Li': 1.1, 'Be': 0.46, 'B': 0.29, 'C': 0.25, 'N': 1.04, 'O': 1.04, 'F': 1.05, + 'Na': 1.54, 'Mg': 1.19, 'Al': 1.08, 'Si': 0.76, 'P': 1.28, 'S': 1.05, 'Cl': 1.1} + # Total energy correction used to reach gas-phase reference state + # Note: Spin orbit coupling no longer included in these energies, since some model chemistries include it automatically + atomEnthalpyCorrections = {element: atomHf[element] - atomThermal[element] for element in atomHf} + for symbol, count in atoms.items(): + if symbol in atomEnthalpyCorrections: + E0 += count * atomEnthalpyCorrections[symbol] * 4184. + else: + raise Exception('Element "{}" is not supported.'.format(symbol)) + + if applyBondEnergyCorrections: + # Step 3: Bond energy corrections + #The order of elements in the bond correction label is important and should follow the order specified below: + #'C', 'N', 'O', 'S', 'P', and 'H' + #Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. + # For example, ``'C=N'`` is correct while ``'N=C'`` is incorrect + bondEnergies = {} + # 'S-H', 'C-S', 'C=S', 'S-S', 'O-S', 'O=S', 'O=S=O' taken from http://hdl.handle.net/1721.1/98155 (both for + # 'CCSD(T)-F12/cc-pVDZ-F12' and 'CCSD(T)-F12/cc-pVTZ-F12') + if modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': + bondEnergies = { 'C-H': -0.46, 'C-C': -0.68, 'C=C': -1.90, 'C#C': -3.13, + 'O-H': -0.51, 'C-O': -0.23, 'C=O': -0.69, 'O-O': -0.02, 'C-N': -0.67, + 'C=N': -1.46, 'C#N': -2.79, 'N-O': 0.74, 'N_O': -0.23, 'N=O': -0.51, + 'N-H': -0.69, 'N-N': -0.47, 'N=N': -1.54, 'N#N': -2.05, 'S-H': 0.87, + 'C-S': 0.42, 'C=S': 0.51, 'S-S': 0.86, 'O-S': 0.23, 'O=S': -0.53, + 'O=S=O': 1.95, } elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': - atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} + bondEnergies = { 'C-H': -0.09, 'C-C': -0.27, 'C=C': -1.03, 'C#C': -1.79, + 'O-H': -0.06, 'C-O': 0.14, 'C=O': -0.19, 'O-O': 0.16, 'C-N': -0.18, + 'C=N': -0.41, 'C#N': -1.41, 'N-O': 0.87, 'N_O': -0.09, 'N=O': -0.23, + 'N-H': -0.01, 'N-N': -0.21, 'N=N': -0.44, 'N#N': -0.76, 'S-H': 0.52, + 'C-S': 0.13, 'C=S': -0.12, 'S-S': 0.30, 'O-S': 0.15, 'O=S': -2.61, + 'O=S=O': 0.27, } elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': - atomEnergies = {'H':-0.499994558325, 'N':-54.530515226371, 'O':-75.005600062003, 'C':-37.789961656228, 'S':-397.676719774973} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.582137180344 + SOC['N'], 'O':-75.053045547421 + SOC['O'], 'C':-37.840869118707+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.588545831900 + SOC['N'], 'O':-75.065995072347 + SOC['O'], 'C':-37.844662139972+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVQZ-F12': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.589137594139+ SOC['N'], 'O':-75.067412234737+ SOC['O'], 'C':-37.844893820561+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVDZ': - atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.524279516472 + SOC['N'], 'O':-74.992097308083+ SOC['O'], 'C':-37.786694171716+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVTZ': - atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.527419359906 + SOC['N'], 'O':-75.000001429806+ SOC['O'], 'C':-37.788504810868+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVQZ': - atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.529569719016 + SOC['N'], 'O':-75.004026586610+ SOC['O'], 'C':-37.789387892348+ SOC['C']} - - - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.523269942190 + SOC['N'], 'O':-74.990725918500 + SOC['O'], 'C':-37.785409916465 + SOC['C'], 'S': -397.658155086033 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.528135889213 + SOC['N'], 'O':-75.001094055506 + SOC['O'], 'C':-37.788233578503 + SOC['C'], 'S':-397.671745425929 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVQZ-F12': - atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.529425753163 + SOC['N'], 'O':-75.003820485005 + SOC['O'], 'C':-37.789006506290 + SOC['C'], 'S':-397.674145126931 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.578602780288 + SOC['N'], 'O':-75.048064317367+ SOC['O'], 'C':-37.837592033417+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVTZ-F12': - atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.586402551258 + SOC['N'], 'O':-75.062767632757+ SOC['O'], 'C':-37.842729156944+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVQZ-F12': - atomEnergies = {'H':-0.49999456 + SOC['H'], 'N':-54.587781507581 + SOC['N'], 'O':-75.065397706471+ SOC['O'], 'C':-37.843634971592+ SOC['C']} - - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVDZ': - atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.520475581942 + SOC['N'], 'O':-74.986992215049+ SOC['O'], 'C':-37.783294495799+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVTZ': - atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.524927371700 + SOC['N'], 'O':-74.996328829705+ SOC['O'], 'C':-37.786320700792+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': - atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} - - elif modelChemistry == 'MP2_rmp2_pVDZ': - atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVTZ': - atomEnergies = {'H':-0.49980981 + SOC['H'], 'N':-54.49615972 + SOC['N'], 'O':-74.95506980+ SOC['O'], 'C':-37.75833104+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVQZ': - atomEnergies = {'H':-0.49994557 + SOC['H'], 'N':-54.50715868 + SOC['N'], 'O':-74.97515364+ SOC['O'], 'C':-37.76533215+ SOC['C']} - - elif modelChemistry == 'CCSD-F12/cc-pVDZ-F12': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.524325513811 + SOC['N'], 'O':-74.992326577897+ SOC['O'], 'C':-37.786213495943+ SOC['C']} - - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_noscale': - atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.526026290887 + SOC['N'], 'O':-74.994751897699+ SOC['O'], 'C':-37.787881871511+ SOC['C']} - - elif modelChemistry == 'G03_PBEPBE_6-311++g_d_p': - atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} - - elif modelChemistry == 'FCI/cc-pVDZ': - atomEnergies = {'C':-37.789527+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVTZ': - atomEnergies = {'C':-37.781266669684+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVQZ': - atomEnergies = {'C':-37.787052110598+ SOC['C']} - - elif modelChemistry in ['BMK/cbsb7', 'BMK/6-311G(2d,d,p)']: - atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} - elif modelChemistry == 'b3lyp/6-31G**': - atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} - + bondEnergies = { 'C-H': -0.08, 'C-C': -0.26, 'C=C': -1.01, 'C#C': -1.66, + 'O-H': 0.07, 'C-O': 0.25, 'C=O': -0.03, 'O-O': 0.26, 'C-N': -0.20, + 'C=N': -0.30, 'C#N': -1.33, 'N-O': 1.01, 'N_O': -0.03, 'N=O': -0.26, + 'N-H': 0.06, 'N-N': -0.23, 'N=N': -0.37, 'N#N': -0.64,} + elif modelChemistry == 'CBS-QB3': + bondEnergies = { + 'C-C': -0.495,'C-H': -0.045,'C=C': -0.825,'C-O': 0.378,'C=O': 0.743,'O-H': -0.423, #Table2: Paraskevas, PD (2013). Chemistry-A European J., DOI: 10.1002/chem.201301381 + 'C#C': -0.64, 'C#N': -0.89, 'C-S': 0.43, 'O=S': -0.78,'S-H': 0.0, 'C-N': -0.13, 'C-Cl': 1.29, 'C-F': 0.55, # Table IX: Petersson GA (1998) J. of Chemical Physics, DOI: 10.1063/1.477794 + 'N-H': -0.42, 'N=O': 1.11, 'N-N': -1.87, 'N=N': -1.58,'N-O': 0.35, #Table 2: Ashcraft R (2007) J. Phys. Chem. B; DOI: 10.1021/jp073539t + 'N#N': -2.0, 'O=O': -0.2, 'H-H': 1.1, # Unknown source + } + elif modelChemistry in ['B3LYP/cbsb7', 'B3LYP/6-311G(2d,d,p)', 'B3LYP/6-311+G(3df,2p)', 'b3lyp/6-31G**']: + bondEnergies = { 'C-H': 0.25, 'C-C': -1.89, 'C=C': -0.40, 'C#C': -1.50, + 'O-H': -1.09, 'C-O': -1.18, 'C=O': -0.01, 'N-H': 1.36, 'C-N': -0.44, + 'C#N': 0.22, 'C-S': -2.35, 'O=S': -5.19, 'S-H': -0.52, } else: - logging.warning('Unknown model chemistry "{0}"; not applying energy corrections.'.format(modelChemistry)) - return E0 + logging.warning('No bond energy correction found for model chemistry: {0}'.format(modelChemistry)) - for symbol, count in atoms.items(): - if symbol in atomEnergies: E0 -= count * atomEnergies[symbol] * 4.35974394e-18 * constants.Na - else: - logging.warning('Ignored unknown atom type "{0}".'.format(symbol)) - - # Step 2: Atom energy corrections to reach gas-phase reference state - # Experimental enthalpy of formation at 0 K - # See Gaussian thermo whitepaper at http://www.gaussian.com/g_whitepap/thermo.htm) - # Note: these values are relatively old and some improvement may be possible by using newer values, particularly for carbon - # However, care should be taken to ensure that they are compatible with the BAC values (if BACs are used) - atomHf = {'H': 51.63, - 'Li': 37.69, 'Be': 76.48, 'B': 136.2, 'C': 169.98, 'N': 112.53, 'O': 58.99, 'F': 18.47, - 'Na': 25.69, 'Mg': 34.87, 'Al': 78.23, 'Si': 106.6, 'P': 75.42, 'S': 65.66, 'Cl': 28.59} - # Thermal contribution to enthalpy Hss(298 K) - Hss(0 K) reported by Gaussian thermo whitepaper - # This will be subtracted from the corresponding value in atomHf to produce an enthalpy used in calculating the enthalpy of formation at 298 K - atomThermal = {'H': 1.01, - 'Li': 1.1, 'Be': 0.46, 'B': 0.29, 'C': 0.25, 'N': 1.04, 'O': 1.04, 'F': 1.05, - 'Na': 1.54, 'Mg': 1.19, 'Al': 1.08, 'Si': 0.76, 'P': 1.28, 'S': 1.05, 'Cl': 1.1} - # Total energy correction used to reach gas-phase reference state - # Note: Spin orbit coupling no longer included in these energies, since some model chemistries include it automatically - atomEnergies = {} - for element in atomHf: - atomEnergies[element] = atomHf[element] - atomThermal[element] - for symbol, count in atoms.items(): - if symbol in atomEnergies: E0 += count * atomEnergies[symbol] * 4184. - - # Step 3: Bond energy corrections - #The order of elements in the bond correction label is important and should follow the order specified below: - #'C', 'N', 'O', 'S', 'P', and 'H' - #Use ``-``/``=``/``#`` to denote a single/double/triple bond, respectively. - # For example, ``'C=N'`` is correct while ``'N=C'`` is incorrect - bondEnergies = {} - # 'S-H', 'C-S', 'C=S', 'S-S', 'O-S', 'O=S', 'O=S=O' taken from http://hdl.handle.net/1721.1/98155 (both for - # 'CCSD(T)-F12/cc-pVDZ-F12' and 'CCSD(T)-F12/cc-pVTZ-F12') - if modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': - bondEnergies = { 'C-H': -0.46, 'C-C': -0.68, 'C=C': -1.90, 'C#C': -3.13, - 'O-H': -0.51, 'C-O': -0.23, 'C=O': -0.69, 'O-O': -0.02, 'C-N': -0.67, - 'C=N': -1.46, 'C#N': -2.79, 'N-O': 0.74, 'N_O': -0.23, 'N=O': -0.51, - 'N-H': -0.69, 'N-N': -0.47, 'N=N': -1.54, 'N#N': -2.05, 'S-H': 0.87, - 'C-S': 0.42, 'C=S': 0.51, 'S-S': 0.86, 'O-S': 0.23, 'O=S': -0.53, - 'O=S=O': 1.95, } - elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': - bondEnergies = { 'C-H': -0.09, 'C-C': -0.27, 'C=C': -1.03, 'C#C': -1.79, - 'O-H': -0.06, 'C-O': 0.14, 'C=O': -0.19, 'O-O': 0.16, 'C-N': -0.18, - 'C=N': -0.41, 'C#N': -1.41, 'N-O': 0.87, 'N_O': -0.09, 'N=O': -0.23, - 'N-H': -0.01, 'N-N': -0.21, 'N=N': -0.44, 'N#N': -0.76, 'S-H': 0.52, - 'C-S': 0.13, 'C=S': -0.12, 'S-S': 0.30, 'O-S': 0.15, 'O=S': -2.61, - 'O=S=O': 0.27, } - elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': - bondEnergies = { 'C-H': -0.08, 'C-C': -0.26, 'C=C': -1.01, 'C#C': -1.66, - 'O-H': 0.07, 'C-O': 0.25, 'C=O': -0.03, 'O-O': 0.26, 'C-N': -0.20, - 'C=N': -0.30, 'C#N': -1.33, 'N-O': 1.01, 'N_O': -0.03, 'N=O': -0.26, - 'N-H': 0.06, 'N-N': -0.23, 'N=N': -0.37, 'N#N': -0.64,} - elif modelChemistry == 'CBS-QB3': - bondEnergies = { - 'C-C': -0.495,'C-H': -0.045,'C=C': -0.825,'C-O': 0.378,'C=O': 0.743,'O-H': -0.423, #Table2: Paraskevas, PD (2013). Chemistry-A European J., DOI: 10.1002/chem.201301381 - 'C#C': -0.64, 'C#N': -0.89, 'C-S': 0.43, 'O=S': -0.78,'S-H': 0.0, 'C-N': -0.13, 'C-Cl': 1.29, 'C-F': 0.55, # Table IX: Petersson GA (1998) J. of Chemical Physics, DOI: 10.1063/1.477794 - 'N-H': -0.42, 'N=O': 1.11, 'N-N': -1.87, 'N=N': -1.58,'N-O': 0.35, #Table 2: Ashcraft R (2007) J. Phys. Chem. B; DOI: 10.1021/jp073539t - 'N#N': -2.0, 'O=O': -0.2, 'H-H': 1.1, # Unknown source - } - elif modelChemistry in ['B3LYP/cbsb7', 'B3LYP/6-311G(2d,d,p)', 'B3LYP/6-311+G(3df,2p)', 'b3lyp/6-31G**']: - bondEnergies = { 'C-H': 0.25, 'C-C': -1.89, 'C=C': -0.40, 'C#C': -1.50, - 'O-H': -1.09, 'C-O': -1.18, 'C=O': -0.01, 'N-H': 1.36, 'C-N': -0.44, - 'C#N': 0.22, 'C-S': -2.35, 'O=S': -5.19, 'S-H': -0.52, } - else: - logging.warning('No bond energy correction found for model chemistry: {0}'.format(modelChemistry)) - - for symbol, count in bonds.items(): - if symbol in bondEnergies: E0 += count * bondEnergies[symbol] * 4184. - elif symbol[::-1] in bondEnergies: E0 += count * bondEnergies[symbol[::-1]] * 4184. - else: - logging.warning('Ignored unknown bond type {0!r}.'.format(symbol)) + for symbol, count in bonds.items(): + if symbol in bondEnergies: + E0 += count * bondEnergies[symbol] * 4184. + elif symbol[::-1] in bondEnergies: + E0 += count * bondEnergies[symbol[::-1]] * 4184. + else: + logging.warning('Ignored unknown bond type {0!r}.'.format(symbol)) return E0 From b307d5dc429f36e35427fabcf01f707997ed2379 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Thu, 22 Mar 2018 19:21:21 -0400 Subject: [PATCH 35/57] Change model chemistries to lower-case and make user input insensitive to case --- rmgpy/cantherm/commonTest.py | 2 +- rmgpy/cantherm/input.py | 2 +- rmgpy/cantherm/statmech.py | 90 ++++++++++++++++++------------------ 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/rmgpy/cantherm/commonTest.py b/rmgpy/cantherm/commonTest.py index 3528c6f8aa..5bc817baab 100644 --- a/rmgpy/cantherm/commonTest.py +++ b/rmgpy/cantherm/commonTest.py @@ -207,7 +207,7 @@ class testCanthermInput(unittest.TestCase): def setUp(self): """Preparation for all unit tests in this class.""" self.directory = os.path.join(os.path.dirname(os.path.dirname(rmgpy.__file__)), 'examples', 'cantherm') - self.modelChemistry = "CBS-QB3" + self.modelChemistry = "cbs-qb3" self.frequencyScaleFactor = 0.99 self.useHinderedRotors = False self.useBondCorrections = True diff --git a/rmgpy/cantherm/input.py b/rmgpy/cantherm/input.py index 58b537ffc3..73b3381b2f 100644 --- a/rmgpy/cantherm/input.py +++ b/rmgpy/cantherm/input.py @@ -397,7 +397,7 @@ def loadInputFile(path): for job in jobList: if isinstance(job, StatMechJob): job.path = os.path.join(directory, job.path) - job.modelChemistry = modelChemistry + job.modelChemistry = modelChemistry.lower() job.frequencyScaleFactor = frequencyScaleFactor job.includeHinderedRotors = useHinderedRotors job.applyAtomEnergyCorrections = useAtomCorrections diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index fc13ca5bc2..6456f7f75f 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -257,6 +257,7 @@ def load(self): except KeyError: raise InputError('Required attribute "energy" not found in species file {0!r}.'.format(path)) if isinstance(energy, dict): + energy = {k.lower(): v for k, v in energy.items()} # Make model chemistries lower-case try: energy = energy[self.modelChemistry] except KeyError: @@ -568,104 +569,105 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, # Step 1: Reference all energies to a model chemistry-independent basis # by subtracting out that model chemistry's atomic energies + # All model chemistries here should be lower-case because the user input is changed to lower-case if atomEnergies is None: # Note: If your model chemistry does not include spin orbit coupling, you should add the corrections to the energies here - if modelChemistry == 'CBS-QB3': + if modelChemistry == 'cbs-qb3': atomEnergies = {'H':-0.499818 + SOC['H'], 'N':-54.520543 + SOC['N'], 'O':-74.987624+ SOC['O'], 'C':-37.785385+ SOC['C'], 'P':-340.817186+ SOC['P'], 'S': -397.657360+ SOC['S']} - elif modelChemistry == 'M06-2X/cc-pVTZ': + elif modelChemistry == 'm06-2x/cc-pvtz': atomEnergies = {'H':-0.498135 + SOC['H'], 'N':-54.586780 + SOC['N'], 'O':-75.064242+ SOC['O'], 'C':-37.842468+ SOC['C'], 'P':-341.246985+ SOC['P'], 'S': -398.101240+ SOC['S']} - elif modelChemistry == 'G3': + elif modelChemistry == 'g3': atomEnergies = {'H':-0.5010030, 'N':-54.564343, 'O':-75.030991, 'C':-37.827717, 'P':-341.116432, 'S': -397.961110} - elif modelChemistry == 'M08SO/MG3S*': # * indicates that the grid size used in the [QChem] electronic + elif modelChemistry == 'm08so/mg3s*': # * indicates that the grid size used in the [QChem] electronic #structure calculation utilized 75 radial points and 434 angular points #(i.e,, this is specified in the $rem section of the [qchem] input file as: XC_GRID 000075000434) atomEnergies = {'H':-0.5017321350 + SOC['H'], 'N':-54.5574039365 + SOC['N'], 'O':-75.0382931348+ SOC['O'], 'C':-37.8245648740+ SOC['C'], 'P':-341.2444299005+ SOC['P'], 'S':-398.0940312227+ SOC['S'] } - elif modelChemistry == 'Klip_1': + elif modelChemistry == 'klip_1': atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53383153 + SOC['N'], 'O':-75.00935474+ SOC['O'], 'C':-37.79266591+ SOC['C']} - elif modelChemistry == 'Klip_2': + elif modelChemistry == 'klip_2': #Klip QCI(tz,qz) atomEnergies = {'H':-0.50003976 + SOC['H'], 'N':-54.53169400 + SOC['N'], 'O':-75.00714902+ SOC['O'], 'C':-37.79060419+ SOC['C']} - elif modelChemistry == 'Klip_3': + elif modelChemistry == 'klip_3': #Klip QCI(dz,tz) atomEnergies = {'H':-0.50005578 + SOC['H'], 'N':-54.53128140 + SOC['N'], 'O':-75.00356581+ SOC['O'], 'C':-37.79025175+ SOC['C']} - elif modelChemistry == 'Klip_2_cc': + elif modelChemistry == 'klip_2_cc': #Klip CCSD(T)(tz,qz) atomEnergies = {'H':-0.50003976 + SOC['H'], 'O':-75.00681155+ SOC['O'], 'C':-37.79029443+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-TZ': + elif modelChemistry == 'ccsd(t)-f12/cc-pvdz-f12_h-tz': atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_H-QZ': + elif modelChemistry == 'ccsd(t)-f12/cc-pvdz-f12_h-qz': atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.526406291655 + SOC['N'], 'O':-74.995458316117+ SOC['O'], 'C':-37.788203485235+ SOC['C']} # We are assuming that SOC is included in the Bond Energy Corrections - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pvdz-f12': atomEnergies = {'H':-0.499811124128, 'N':-54.526406291655, 'O':-74.995458316117, 'C':-37.788203485235, 'S':-397.663040369707} - elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pvtz-f12': atomEnergies = {'H':-0.499946213243, 'N':-54.53000909621, 'O':-75.004127673424, 'C':-37.789862146471, 'S':-397.675447487865} - elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pvqz-f12': atomEnergies = {'H':-0.499994558325, 'N':-54.530515226371, 'O':-75.005600062003, 'C':-37.789961656228, 'S':-397.676719774973} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVDZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pcvdz-f12': atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.582137180344 + SOC['N'], 'O':-75.053045547421 + SOC['O'], 'C':-37.840869118707+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVTZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pcvtz-f12': atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.588545831900 + SOC['N'], 'O':-75.065995072347 + SOC['O'], 'C':-37.844662139972+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pCVQZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pcvqz-f12': atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.589137594139+ SOC['N'], 'O':-75.067412234737+ SOC['O'], 'C':-37.844893820561+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVDZ': + elif modelChemistry == 'ccsd(t)-f12/aug-cc-pvdz': atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.524279516472 + SOC['N'], 'O':-74.992097308083+ SOC['O'], 'C':-37.786694171716+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVTZ': + elif modelChemistry == 'ccsd(t)-f12/aug-cc-pvtz': atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.527419359906 + SOC['N'], 'O':-75.000001429806+ SOC['O'], 'C':-37.788504810868+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/aug-cc-pVQZ': + elif modelChemistry == 'ccsd(t)-f12/aug-cc-pvqz': atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.529569719016 + SOC['N'], 'O':-75.004026586610+ SOC['O'], 'C':-37.789387892348+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVDZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pvdz-f12': atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.523269942190 + SOC['N'], 'O':-74.990725918500 + SOC['O'], 'C':-37.785409916465 + SOC['C'], 'S': -397.658155086033 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVTZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pvtz-f12': atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.528135889213 + SOC['N'], 'O':-75.001094055506 + SOC['O'], 'C':-37.788233578503 + SOC['C'], 'S':-397.671745425929 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pVQZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pvqz-f12': atomEnergies = {'H':-0.499994558325 + SOC['H'], 'N':-54.529425753163 + SOC['N'], 'O':-75.003820485005 + SOC['O'], 'C':-37.789006506290 + SOC['C'], 'S':-397.674145126931 + SOC['S']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVDZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pcvdz-f12': atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.578602780288 + SOC['N'], 'O':-75.048064317367+ SOC['O'], 'C':-37.837592033417+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVTZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pcvtz-f12': atomEnergies = {'H':-0.499946213243 + SOC['H'], 'N':-54.586402551258 + SOC['N'], 'O':-75.062767632757+ SOC['O'], 'C':-37.842729156944+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/cc-pCVQZ-F12': + elif modelChemistry == 'b-ccsd(t)-f12/cc-pcvqz-f12': atomEnergies = {'H':-0.49999456 + SOC['H'], 'N':-54.587781507581 + SOC['N'], 'O':-75.065397706471+ SOC['O'], 'C':-37.843634971592+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVDZ': + elif modelChemistry == 'b-ccsd(t)-f12/aug-cc-pvdz': atomEnergies = {'H':-0.499459066131 + SOC['H'], 'N':-54.520475581942 + SOC['N'], 'O':-74.986992215049+ SOC['O'], 'C':-37.783294495799+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVTZ': + elif modelChemistry == 'b-ccsd(t)-f12/aug-cc-pvtz': atomEnergies = {'H':-0.499844820798 + SOC['H'], 'N':-54.524927371700 + SOC['N'], 'O':-74.996328829705+ SOC['O'], 'C':-37.786320700792+ SOC['C']} - elif modelChemistry == 'B-CCSD(T)-F12/aug-cc-pVQZ': + elif modelChemistry == 'b-ccsd(t)-f12/aug-cc-pvqz': atomEnergies = {'H':-0.499949526073 + SOC['H'], 'N':-54.528189769291 + SOC['N'], 'O':-75.001879610563+ SOC['O'], 'C':-37.788165047059+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVDZ': + elif modelChemistry == 'mp2_rmp2_pvdz': atomEnergies = {'H':-0.49927840 + SOC['H'], 'N':-54.46141996 + SOC['N'], 'O':-74.89408254+ SOC['O'], 'C':-37.73792713+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVTZ': + elif modelChemistry == 'mp2_rmp2_pvtz': atomEnergies = {'H':-0.49980981 + SOC['H'], 'N':-54.49615972 + SOC['N'], 'O':-74.95506980+ SOC['O'], 'C':-37.75833104+ SOC['C']} - elif modelChemistry == 'MP2_rmp2_pVQZ': + elif modelChemistry == 'mp2_rmp2_pvqz': atomEnergies = {'H':-0.49994557 + SOC['H'], 'N':-54.50715868 + SOC['N'], 'O':-74.97515364+ SOC['O'], 'C':-37.76533215+ SOC['C']} - elif modelChemistry == 'CCSD-F12/cc-pVDZ-F12': + elif modelChemistry == 'ccsd-f12/cc-pvdz-f12': atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.524325513811 + SOC['N'], 'O':-74.992326577897+ SOC['O'], 'C':-37.786213495943+ SOC['C']} - elif modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12_noscale': + elif modelChemistry == 'ccsd(t)-f12/cc-pvdz-f12_noscale': atomEnergies = {'H':-0.499811124128 + SOC['H'], 'N':-54.526026290887 + SOC['N'], 'O':-74.994751897699+ SOC['O'], 'C':-37.787881871511+ SOC['C']} - elif modelChemistry == 'G03_PBEPBE_6-311++g_d_p': + elif modelChemistry == 'g03_pbepbe_6-311++g_d_p': atomEnergies = {'H':-0.499812273282 + SOC['H'], 'N':-54.5289567564 + SOC['N'], 'O':-75.0033596764+ SOC['O'], 'C':-37.7937388736+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVDZ': + elif modelChemistry == 'fci/cc-pvdz': atomEnergies = {'C':-37.789527+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVTZ': + elif modelChemistry == 'fci/cc-pvtz': atomEnergies = {'C':-37.781266669684+ SOC['C']} - elif modelChemistry == 'FCI/cc-pVQZ': + elif modelChemistry == 'fci/cc-pvqz': atomEnergies = {'C':-37.787052110598+ SOC['C']} - elif modelChemistry in ['BMK/cbsb7', 'BMK/6-311G(2d,d,p)']: + elif modelChemistry in ['bmk/cbsb7', 'bmk/6-311g(2d,d,p)']: atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} - elif modelChemistry == 'b3lyp/6-31G**': + elif modelChemistry == 'b3lyp/6-31g**': atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} else: @@ -712,33 +714,33 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, bondEnergies = {} # 'S-H', 'C-S', 'C=S', 'S-S', 'O-S', 'O=S', 'O=S=O' taken from http://hdl.handle.net/1721.1/98155 (both for # 'CCSD(T)-F12/cc-pVDZ-F12' and 'CCSD(T)-F12/cc-pVTZ-F12') - if modelChemistry == 'CCSD(T)-F12/cc-pVDZ-F12': + if modelChemistry == 'ccsd(t)-f12/cc-pvdz-f12': bondEnergies = { 'C-H': -0.46, 'C-C': -0.68, 'C=C': -1.90, 'C#C': -3.13, 'O-H': -0.51, 'C-O': -0.23, 'C=O': -0.69, 'O-O': -0.02, 'C-N': -0.67, 'C=N': -1.46, 'C#N': -2.79, 'N-O': 0.74, 'N_O': -0.23, 'N=O': -0.51, 'N-H': -0.69, 'N-N': -0.47, 'N=N': -1.54, 'N#N': -2.05, 'S-H': 0.87, 'C-S': 0.42, 'C=S': 0.51, 'S-S': 0.86, 'O-S': 0.23, 'O=S': -0.53, 'O=S=O': 1.95, } - elif modelChemistry == 'CCSD(T)-F12/cc-pVTZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pvtz-f12': bondEnergies = { 'C-H': -0.09, 'C-C': -0.27, 'C=C': -1.03, 'C#C': -1.79, 'O-H': -0.06, 'C-O': 0.14, 'C=O': -0.19, 'O-O': 0.16, 'C-N': -0.18, 'C=N': -0.41, 'C#N': -1.41, 'N-O': 0.87, 'N_O': -0.09, 'N=O': -0.23, 'N-H': -0.01, 'N-N': -0.21, 'N=N': -0.44, 'N#N': -0.76, 'S-H': 0.52, 'C-S': 0.13, 'C=S': -0.12, 'S-S': 0.30, 'O-S': 0.15, 'O=S': -2.61, 'O=S=O': 0.27, } - elif modelChemistry == 'CCSD(T)-F12/cc-pVQZ-F12': + elif modelChemistry == 'ccsd(t)-f12/cc-pvqz-f12': bondEnergies = { 'C-H': -0.08, 'C-C': -0.26, 'C=C': -1.01, 'C#C': -1.66, 'O-H': 0.07, 'C-O': 0.25, 'C=O': -0.03, 'O-O': 0.26, 'C-N': -0.20, 'C=N': -0.30, 'C#N': -1.33, 'N-O': 1.01, 'N_O': -0.03, 'N=O': -0.26, 'N-H': 0.06, 'N-N': -0.23, 'N=N': -0.37, 'N#N': -0.64,} - elif modelChemistry == 'CBS-QB3': + elif modelChemistry == 'cbs-qb3': bondEnergies = { 'C-C': -0.495,'C-H': -0.045,'C=C': -0.825,'C-O': 0.378,'C=O': 0.743,'O-H': -0.423, #Table2: Paraskevas, PD (2013). Chemistry-A European J., DOI: 10.1002/chem.201301381 'C#C': -0.64, 'C#N': -0.89, 'C-S': 0.43, 'O=S': -0.78,'S-H': 0.0, 'C-N': -0.13, 'C-Cl': 1.29, 'C-F': 0.55, # Table IX: Petersson GA (1998) J. of Chemical Physics, DOI: 10.1063/1.477794 'N-H': -0.42, 'N=O': 1.11, 'N-N': -1.87, 'N=N': -1.58,'N-O': 0.35, #Table 2: Ashcraft R (2007) J. Phys. Chem. B; DOI: 10.1021/jp073539t 'N#N': -2.0, 'O=O': -0.2, 'H-H': 1.1, # Unknown source } - elif modelChemistry in ['B3LYP/cbsb7', 'B3LYP/6-311G(2d,d,p)', 'B3LYP/6-311+G(3df,2p)', 'b3lyp/6-31G**']: + elif modelChemistry in ['b3lyp/cbsb7', 'b3lyp/6-311g(2d,d,p)', 'b3lyp/6-311+g(3df,2p)', 'b3lyp/6-31g**']: bondEnergies = { 'C-H': 0.25, 'C-C': -1.89, 'C=C': -0.40, 'C#C': -1.50, 'O-H': -1.09, 'C-O': -1.18, 'C=O': -0.01, 'N-H': 1.36, 'C-N': -0.44, 'C#N': 0.22, 'C-S': -2.35, 'O=S': -5.19, 'S-H': -0.52, } From f6288dc4c10d87263aa717fcc2c754db796b10e7 Mon Sep 17 00:00:00 2001 From: Colin Grambow Date: Fri, 23 Mar 2018 12:17:40 -0400 Subject: [PATCH 36/57] Add atomic energies for B3LYP/6-311+g(3df,2p) --- documentation/source/users/cantherm/input.rst | 2 +- rmgpy/cantherm/statmech.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/documentation/source/users/cantherm/input.rst b/documentation/source/users/cantherm/input.rst index 80bbdb3a29..e978837720 100644 --- a/documentation/source/users/cantherm/input.rst +++ b/documentation/source/users/cantherm/input.rst @@ -87,7 +87,7 @@ Model Chemistry AEC BC SOC Supported Eleme ``'FCI/cc-pVnZ'``, *n = D,T,Q* v v C ``'BMK/cbsb7'`` v v v H, C, N, O, P, S ``'BMK/6-311G(2d,d,p)'`` v v v H, C, N, O, P, S -``'B3LYP/6-311+G(3df,2p)'`` v +``'B3LYP/6-311+G(3df,2p)'`` v v v H, C, N, O, P, S ``'B3LYP/6-31G**'`` v v H, C, O, S ================================================ ===== ==== ==== ==================== diff --git a/rmgpy/cantherm/statmech.py b/rmgpy/cantherm/statmech.py index 6456f7f75f..25703f33d9 100644 --- a/rmgpy/cantherm/statmech.py +++ b/rmgpy/cantherm/statmech.py @@ -667,8 +667,10 @@ def applyEnergyCorrections(E0, modelChemistry, atoms, bonds, elif modelChemistry in ['bmk/cbsb7', 'bmk/6-311g(2d,d,p)']: atomEnergies = {'H':-0.498618853119+ SOC['H'], 'N':-54.5697851544+ SOC['N'], 'O':-75.0515210278+ SOC['O'], 'C':-37.8287310027+ SOC['C'], 'P':-341.167615941+ SOC['P'], 'S': -398.001619915+ SOC['S']} - elif modelChemistry == 'b3lyp/6-31g**': + elif modelChemistry == 'b3lyp/6-31g**': # Fitted to small molecules atomEnergies = {'H':-0.500426155, 'C':-37.850331697831, 'O':-75.0535872748806, 'S':-398.100820107242} + elif modelChemistry == 'b3lyp/6-311+g(3df,2p)': # Calculated atomic energies + atomEnergies = {'H':-0.502155915123 + SOC['H'], 'C':-37.8574709934 + SOC['C'], 'N':-54.6007233609 + SOC['N'], 'O':-75.0909131284 + SOC['O'], 'P':-341.281730319 + SOC['P'], 'S':-398.134489850 + SOC['S']} else: raise Exception('Unknown model chemistry "{}".'.format(modelChemistry)) From 8d87b2594d40f39f022b48db08c7f465b6945372 Mon Sep 17 00:00:00 2001 From: alongd Date: Tue, 27 Mar 2018 16:51:15 -0400 Subject: [PATCH 37/57] Raise error if self.densStates is None Otherwise, the user gets a 'Segmentation fault (core dumped)' error with no trace. --- rmgpy/pdep/configuration.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rmgpy/pdep/configuration.pyx b/rmgpy/pdep/configuration.pyx index ada50fcd55..5f90233ba7 100644 --- a/rmgpy/pdep/configuration.pyx +++ b/rmgpy/pdep/configuration.pyx @@ -318,6 +318,8 @@ cdef class Configuration: for spec in self.species: self.densStates *= spec.conformer.spinMultiplicity * spec.conformer.opticalIsomers self.sumStates *= spec.conformer.spinMultiplicity * spec.conformer.opticalIsomers + if self.densStates is None: + raise ValueError("Species {} has no active modes".format(species.label)) @cython.boundscheck(False) @cython.wraparound(False) From 79762ef6b3a0d38967d927015b0df05bdc292087 Mon Sep 17 00:00:00 2001 From: alongd Date: Sat, 17 Feb 2018 20:24:13 -0500 Subject: [PATCH 38/57] Don't add a PDep reaction if it exists as a LibraryReaction Unless the library reaction has the has_PDep_route flag set to True --- rmgpy/rmg/pdep.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/rmgpy/rmg/pdep.py b/rmgpy/rmg/pdep.py index 826651d0b6..f1cea6ac07 100644 --- a/rmgpy/rmg/pdep.py +++ b/rmgpy/rmg/pdep.py @@ -41,7 +41,9 @@ from rmgpy.pdep import Conformer, Configuration from rmgpy.rmg.react import react -from rmgpy.exceptions import PressureDependenceError +from rmgpy.exceptions import PressureDependenceError, NetworkError +from rmgpy.data.kinetics.library import LibraryReaction + ################################################################################ @@ -132,7 +134,7 @@ def __reduce__(self): """ A helper function used when pickling an object. """ - return (PDepNetwork, (self.index, self.source), self.__dict__ ) + return (PDepNetwork, (self.index, self.source), self.__dict__) def __setstate__(self,dict): self.__dict__.update(dict) @@ -227,7 +229,7 @@ def getMaximumLeakSpecies(self, T, P): # Make sure we've identified a species if maxSpecies is None: - raise UnirxnNetworkException('No unimolecular isomers left to explore!') + raise NetworkError('No unimolecular isomers left to explore!') # Return the species return maxSpecies @@ -584,10 +586,31 @@ def update(self, reactionModel, pdepSettings): # Place the net reaction in the core or edge if necessary # Note that leak reactions are not placed in the edge - if all([s in reactionModel.core.species for s in netReaction.reactants]) and all([s in reactionModel.core.species for s in netReaction.products]): - reactionModel.addReactionToCore(netReaction) + if all([s in reactionModel.core.species for s in netReaction.reactants]) \ + and all([s in reactionModel.core.species for s in netReaction.products]): + # Check whether netReaction already exists either in the core as a LibraryReaction + for rxn in reactionModel.core.reactions: + if isinstance(rxn, LibraryReaction) \ + and rxn.isIsomorphic(netReaction, eitherDirection=True) \ + and not rxn.has_pdep_route: # if this reaction is flagged as having an additional PDep pathway, do add the network reaction + logging.info('Network reaction {0} matched an existing core reaction {1}' + ' from the {2} library, and was not added to the model'.format( + str(netReaction), str(rxn), rxn.library)) + break + else: + reactionModel.addReactionToCore(netReaction) else: - reactionModel.addReactionToEdge(netReaction) + # Check whether netReaction already exists either in the core as a LibraryReaction + for rxn in reactionModel.edge.reactions: + if isinstance(rxn, LibraryReaction) \ + and rxn.isIsomorphic(netReaction, eitherDirection=True) \ + and not rxn.has_pdep_route: # if this reaction is flagged as having an additional PDep pathway, do add the network reaction + logging.info('Network reaction {0} matched an existing edge reaction {1}' + ' from the {2} library, and was not added to the model'.format( + str(netReaction), str(rxn), rxn.library)) + break + else: + reactionModel.addReactionToEdge(netReaction) # Set/update the net reaction kinetics using interpolation model kdata = K[:,:,i,j].copy() From 20859c15e4fbb3cc3da9e84ba2b8ceee449bfdac Mon Sep 17 00:00:00 2001 From: alongd Date: Tue, 13 Mar 2018 21:27:13 -0400 Subject: [PATCH 39/57] Added the has_PDep_route attribute to Reaction --- rmgpy/reaction.pxd | 1 + rmgpy/reaction.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/rmgpy/reaction.pxd b/rmgpy/reaction.pxd index 8c742cf249..a3262e1c2e 100644 --- a/rmgpy/reaction.pxd +++ b/rmgpy/reaction.pxd @@ -49,6 +49,7 @@ cdef class Reaction: cdef public bint duplicate cdef public float _degeneracy cdef public list pairs + cdef public bint has_pdep_route cdef public str comment cdef public dict k_effective_cache diff --git a/rmgpy/reaction.py b/rmgpy/reaction.py index 8aa6863283..c033a71754 100644 --- a/rmgpy/reaction.py +++ b/rmgpy/reaction.py @@ -79,6 +79,7 @@ class Reaction: `duplicate` ``bool`` ``True`` if the reaction is known to be a duplicate, ``False`` if not `degeneracy` :class:`double` The reaction path degeneracy for the reaction `pairs` ``list`` Reactant-product pairings to use in converting reaction flux to species flux + `has_pdep_route` ``bool`` ``True`` if the reaction has an additional PDep pathway, ``False`` if not (by default), used for LibraryReactions `comment` ``str`` A description of the reaction source (optional) =================== =========================== ============================ @@ -96,6 +97,7 @@ def __init__(self, duplicate=False, degeneracy=1, pairs=None, + has_pdep_route=False, comment='' ): self.index = index @@ -109,6 +111,7 @@ def __init__(self, self.transitionState = transitionState self.duplicate = duplicate self.pairs = pairs + self.has_pdep_route = has_pdep_route self.comment = comment self.k_effective_cache = {} @@ -129,6 +132,7 @@ def __repr__(self): if self.duplicate: string += 'duplicate={0}, '.format(self.duplicate) if self.degeneracy != 1: string += 'degeneracy={0:.1f}, '.format(self.degeneracy) if self.pairs is not None: string += 'pairs={0}, '.format(self.pairs) + if self.has_pdep_route: string += 'has_pdep_route={0}'.format(self.has_pdep_route) if self.comment != '': string += 'comment={0!r}, '.format(self.comment) string = string[:-2] + ')' return string @@ -168,11 +172,13 @@ def __reduce__(self): self.duplicate, self.degeneracy, self.pairs, + self.has_pdep_route, self.comment )) def __getDegneneracy(self): return self._degeneracy + def __setDegeneracy(self, new): # modify rate if kinetics exists if self.kinetics is not None: @@ -1056,6 +1062,7 @@ def copy(self): other.transitionState = deepcopy(self.transitionState) other.duplicate = self.duplicate other.pairs = deepcopy(self.pairs) + other.has_pdep_route = self.has_pdep_route other.comment = deepcopy(self.comment) return other From 62607cae45520fe7a929ed9c2fc727df3ce267e2 Mon Sep 17 00:00:00 2001 From: alongd Date: Tue, 13 Mar 2018 21:29:40 -0400 Subject: [PATCH 40/57] Added the has_PDep_route attribute to LibraryReaction If a library reaction has more than one pathway, and the given rate does not relate to all pathways, RMG should explore this pathway via PDep (or at least not remove a PDep reaction with identical reactants+products if generated). This flags such cases. --- rmgpy/data/kinetics/library.py | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/rmgpy/data/kinetics/library.py b/rmgpy/data/kinetics/library.py index 24b8ba59b8..9e945621ab 100644 --- a/rmgpy/data/kinetics/library.py +++ b/rmgpy/data/kinetics/library.py @@ -73,6 +73,7 @@ def __init__(self, degeneracy=1, pairs=None, library=None, + has_pdep_route=False, entry=None ): Reaction.__init__(self, @@ -85,7 +86,8 @@ def __init__(self, transitionState=transitionState, duplicate=duplicate, degeneracy=degeneracy, - pairs=pairs + pairs=pairs, + has_pdep_route=has_pdep_route ) self.library = library self.family = library @@ -106,6 +108,7 @@ def __reduce__(self): self.degeneracy, self.pairs, self.library, + self.has_pdep_route, self.entry )) @@ -139,13 +142,13 @@ def getLibraryReactions(self): and returns at list of all of these LibraryReaction and TemplateReaction objects """ rxns = [] - for entry in self.entries.values(): + for entry in self.entries.values(): if entry._longDesc and 'Originally from reaction library: ' in entry._longDesc: lib = [line for line in entry._longDesc.split('\n') if 'Originally from reaction library: ' in line] lib = lib[0].replace('Originally from reaction library: ','') lib = lib.replace('\n','') - rxn = LibraryReaction(reactants=entry.item.reactants[:], products=entry.item.products[:],\ - library=lib, specificCollider=entry.item.specificCollider, kinetics=entry.data, duplicate=entry.item.duplicate,\ + rxn = LibraryReaction(reactants=entry.item.reactants[:], products=entry.item.products[:], + library=lib, specificCollider=entry.item.specificCollider, kinetics=entry.data, duplicate=entry.item.duplicate, reversible=entry.item.reversible ) elif entry._longDesc and 'rate rule' in entry._longDesc: #template reaction @@ -158,15 +161,16 @@ def getLibraryReactions(self): tstrings = tstring.split(';') tstrings[0] = tstrings[0][1:] tstrings[-1] = tstrings[-1][:-1] - rxn = TemplateReaction(reactants=entry.item.reactants[:], products=entry.item.products[:],\ - specificCollider=entry.item.specificCollider, kinetics=entry.data, duplicate=entry.item.duplicate,\ + rxn = TemplateReaction(reactants=entry.item.reactants[:], products=entry.item.products[:], + specificCollider=entry.item.specificCollider, kinetics=entry.data, duplicate=entry.item.duplicate, reversible=entry.item.reversible,family=familyname,template=tstrings ) - else: #pdep or standard library reaction - rxn = LibraryReaction(reactants=entry.item.reactants[:], products=entry.item.products[:],\ - library=self.label, specificCollider=entry.item.specificCollider, kinetics=entry.data, duplicate=entry.item.duplicate,\ - reversible=entry.item.reversible - ) + else: # pdep or standard library reaction + rxn = LibraryReaction(reactants=entry.item.reactants[:], products=entry.item.products[:], + library=self.label, specificCollider=entry.item.specificCollider, + kinetics=entry.data, duplicate=entry.item.duplicate, + reversible=entry.item.reversible, has_pdep_route=entry.item.has_pdep_route + ) rxns.append(rxn) return rxns @@ -179,10 +183,9 @@ def markValidDuplicates(self, reactions1, reactions2): for r1 in reactions1: for r2 in reactions2: if (r1.reactants == r2.reactants and - r1.products == r2.products and - r1.specificCollider == r2.specificCollider and - r1.reversible == r2.reversible - ): + r1.products == r2.products and + r1.specificCollider == r2.specificCollider and + r1.reversible == r2.reversible): r1.duplicate = True r2.duplicate = True @@ -389,6 +392,7 @@ def loadEntry(self, referenceType='', shortDesc='', longDesc='', + has_pdep_route=False, ): # reactants = [Species(label=reactant1.strip().splitlines()[0].strip(), molecule=[Molecule().fromAdjacencyList(reactant1)])] @@ -400,7 +404,8 @@ def loadEntry(self, # if product3 is not None: products.append(Species(label=product3.strip().splitlines()[0].strip(), molecule=[Molecule().fromAdjacencyList(product3)])) # # Make a blank reaction - rxn = Reaction(reactants=[], products=[], degeneracy=degeneracy, duplicate=duplicate, reversible=reversible) + rxn = Reaction(reactants=[], products=[], degeneracy=degeneracy, duplicate=duplicate, reversible=reversible, + has_pdep_route=has_pdep_route) # if not rxn.isBalanced(): # raise DatabaseError('Reaction {0} in kinetics library {1} was not balanced! Please reformulate.'.format(rxn, self.label)) # label = str(rxn) From 90a58c0b1239c7ad4238246f39365046c1a571f2 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 29 Mar 2018 13:53:10 -0400 Subject: [PATCH 41/57] Check for empty molecule when generating identifiers If the molecule does not have any atoms, identifier generation will fail. --- rmgpy/molecule/translator.py | 4 ++++ rmgpy/molecule/translatorTest.py | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index ea5de1db8d..2bed64ef9f 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -509,6 +509,10 @@ def _write(mol, identifier_type, backend): Returns a string identifier of the requested type. """ + # Check that the molecule is not empty + if not mol.atoms: + return '' + for option in _get_backend_list(backend): if option == 'rdkit': try: diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index db918870a7..68753215b0 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -44,6 +44,16 @@ from rmgpy.species import Species +class TranslatorTest(unittest.TestCase): + + def test_empty_molecule(self): + """Test that we can safely return a blank identifier for an empty molecule.""" + mol = Molecule() + + self.assertEqual(mol.toSMILES(), '') + self.assertEqual(mol.toInChI(), '') + + class InChIGenerationTest(unittest.TestCase): def compare(self, adjlist, aug_inchi): spc = Species(molecule=[Molecule().fromAdjacencyList(adjlist)]) From 66eb5f3b0ea3ede639c4633f571aa17889b26357 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 29 Mar 2018 14:45:43 -0400 Subject: [PATCH 42/57] Properly count elements in disconnected InChIs InChIs can represent molecules that are not connected, but we were not properly counting the elements leading to issues with InChI generation for disconnected molecules. --- rmgpy/molecule/translatorTest.py | 8 ++++++++ rmgpy/molecule/util.py | 5 ++++- rmgpy/molecule/utilTest.py | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index 68753215b0..08900cfd7b 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -406,6 +406,14 @@ def test_aromatic_resonance_structures(self): for inchi in inchi_list: self.assertEqual(inchi, expected_inchi) + def test_disconnected_molecule(self): + """Test that we can generate an InChI for a disconnected molecule.""" + mol = Molecule().fromSMILES('CCCCO.C=O') + + inchi = 'InChI=1S/C4H10O.CH2O/c1-2-3-4-5;1-2/h5H,2-4H2,1H3;1H2' + + self.assertEqual(mol.toInChI(), inchi) + class SMILESGenerationTest(unittest.TestCase): def compare(self, adjlist, smiles): diff --git a/rmgpy/molecule/util.py b/rmgpy/molecule/util.py index 7da222e410..8aa2cf3df1 100644 --- a/rmgpy/molecule/util.py +++ b/rmgpy/molecule/util.py @@ -48,7 +48,10 @@ def retrieveElementCount(obj): element, count = match.groups() if count is '': count = 1 - element_count[element] = int(count) + if element in element_count: + element_count[element] += int(count) + else: + element_count[element] = int(count) return element_count elif isinstance(obj, Molecule): diff --git a/rmgpy/molecule/utilTest.py b/rmgpy/molecule/utilTest.py index 4939a3205d..16b4b17af8 100644 --- a/rmgpy/molecule/utilTest.py +++ b/rmgpy/molecule/utilTest.py @@ -33,6 +33,30 @@ from .util import * + +class ElementCountTest(unittest.TestCase): + + def test_inchi_count(self): + """Test element counting for InChI""" + inchi = 'InChI=1S/C4H10O/c1-2-3-4-5/h5H,2-4H2,1H3' + + expected = {'C': 4, 'H': 10, 'O': 1} + + count = retrieveElementCount(inchi) + + self.assertEqual(count, expected) + + def test_inchi_count_disconnected(self): + """Test element counting for InChI with disconnected molecule""" + inchi = 'InChI=1S/C4H10O.CH2O/c1-2-3-4-5;1-2/h5H,2-4H2,1H3;1H2' + + expected = {'C': 5, 'H': 12, 'O': 2} + + count = retrieveElementCount(inchi) + + self.assertEqual(count, expected) + + class PartitionTest(unittest.TestCase): def test_singleton(self): From 6ba5d7b7c788bbd489779b04c417da99a274e532 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 29 Mar 2018 15:54:08 -0400 Subject: [PATCH 43/57] Fix incorrect variable in _rdkit_translator --- rmgpy/molecule/translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 2bed64ef9f..d60109ef50 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -355,7 +355,7 @@ def _rdkit_translator(input_object, identifier_type, mol=None): if identifier_type == 'inchi': output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon') elif identifier_type == 'inchikey': - inchi = toInChI(mol) + inchi = toInChI(input_object) output = Chem.inchi.InchiToInchiKey(inchi) elif identifier_type == 'sma': output = Chem.MolToSmarts(rdkitmol) From 16d4d276f12574d8eb2d2eab98919ed8f8054ead Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 29 Mar 2018 15:57:34 -0400 Subject: [PATCH 44/57] Have toAdjacencyList return emptry string if no atoms are provided --- rmgpy/molecule/adjlist.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rmgpy/molecule/adjlist.py b/rmgpy/molecule/adjlist.py index 2a9f44194e..7da72f7c2e 100644 --- a/rmgpy/molecule/adjlist.py +++ b/rmgpy/molecule/adjlist.py @@ -713,6 +713,9 @@ def toAdjacencyList(atoms, multiplicity, label=None, group=False, removeH=False, Convert a chemical graph defined by a list of `atoms` into a string adjacency list. """ + if not atoms: + return '' + if oldStyle: return toOldAdjacencyList(atoms, multiplicity, label, group, removeH) From caa5d0aa31cba6c8f1cfa5432babe7996e93f5dc Mon Sep 17 00:00:00 2001 From: Max Liu Date: Thu, 29 Mar 2018 17:31:34 -0400 Subject: [PATCH 45/57] Allow _rdkit_translator to handle mol=None input argument Instantiate an empty Molecule object in that case. Mirrors the behavior of _openbabel_translator. --- rmgpy/molecule/translator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index d60109ef50..b42620fd05 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -345,6 +345,8 @@ def _rdkit_translator(input_object, identifier_type, mol=None): raise ValueError('Identifier type {0} is not supported for reading using RDKit.'.format(identifier_type)) if rdkitmol is None: raise ValueError("Could not interpret the identifier {0!r}".format(input_object)) + if mol is None: + mol = mm.Molecule() output = fromRDKitMol(mol, rdkitmol) elif isinstance(input_object, mm.Molecule): # We are converting from a molecule to a string identifier From ad56fcd925fe38090b4b00efc37273a03db478ce Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Mon, 16 Apr 2018 17:30:00 -0400 Subject: [PATCH 46/57] ensure filterReactions is set before the filterReactions occurance --- rmgpy/rmg/main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 0366ce37a5..63024367b6 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -372,7 +372,11 @@ def initialize(self, **kwargs): # Check input file self.checkInput() - + + #Properly set filterReactions to initialize flags properly + if len(self.modelSettingsList) > 0: + self.filterReactions = self.modelSettingsList[0].filterReactions + # See if memory profiling package is available try: import psutil @@ -540,7 +544,7 @@ def execute(self, **kwargs): self.Tmax = max([x.T for x in self.reactionSystems]).value_si # Initiate first reaction discovery step after adding all core species - if self.modelSettingsList[0].filterReactions: + if self.filterReactions: # Run the reaction system to update threshold and react flags for index, reactionSystem in enumerate(self.reactionSystems): reactionSystem.initializeModel( From 27394a6d1b04b6cb417060c59c6352ea99fbe602 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Mon, 16 Apr 2018 17:30:58 -0400 Subject: [PATCH 47/57] make the algorithm go through filter checking if it is the first iteration --- rmgpy/rmg/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 63024367b6..724d1a8a3f 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -674,7 +674,7 @@ def execute(self, **kwargs): for objectToEnlarge in objectsToEnlarge: self.reactionModel.enlarge(objectToEnlarge) - if len(self.reactionModel.core.species) > numCoreSpecies: + if len(self.reactionModel.core.species) > numCoreSpecies or self.reactionModel.iterationNum == 1: tempModelSettings = deepcopy(modelSettings) tempModelSettings.fluxToleranceKeepInEdge = 0 # If there were core species added, then react the edge From 06b68dca3777685ccccead3c02e768eef923c643 Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Mon, 16 Apr 2018 17:32:18 -0400 Subject: [PATCH 48/57] prevent the algorithm from terminating if new reactions are generated --- rmgpy/rmg/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py index 724d1a8a3f..daaf17efb4 100644 --- a/rmgpy/rmg/main.py +++ b/rmgpy/rmg/main.py @@ -714,10 +714,14 @@ def execute(self, **kwargs): maximumEdgeSpecies=modelSettings.maximumEdgeSpecies, reactionSystems=self.reactionSystems) + oldEdgeSize = len(self.reactionModel.edge.reactions) + oldCoreSize = len(self.reactionModel.core.reactions) self.reactionModel.enlarge(reactEdge=True, unimolecularReact=self.unimolecularReact, bimolecularReact=self.bimolecularReact) + if oldEdgeSize != len(self.reactionModel.edge.reactions) or oldCoreSize != len(self.reactionModel.core.reactions): + reactorDone = False if not numpy.isinf(self.modelSettingsList[0].toleranceThermoKeepSpeciesInEdge): self.reactionModel.thermoFilterDown(maximumEdgeSpecies=modelSettings.maximumEdgeSpecies) From 5c4101364a35acdabec852a8ded6072ef592d560 Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Thu, 19 Apr 2018 17:10:38 -0400 Subject: [PATCH 49/57] Update Documentation for rmgpy.molecule molecule.generator and molecule.parser have become molecule.converter and molecule.translator --- documentation/source/reference/molecule/converter.rst | 5 +++++ documentation/source/reference/molecule/generator.rst | 5 ----- documentation/source/reference/molecule/index.rst | 8 ++++---- documentation/source/reference/molecule/parser.rst | 5 ----- documentation/source/reference/molecule/translator.rst | 5 +++++ 5 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 documentation/source/reference/molecule/converter.rst delete mode 100644 documentation/source/reference/molecule/generator.rst delete mode 100644 documentation/source/reference/molecule/parser.rst create mode 100644 documentation/source/reference/molecule/translator.rst diff --git a/documentation/source/reference/molecule/converter.rst b/documentation/source/reference/molecule/converter.rst new file mode 100644 index 0000000000..19f14c023d --- /dev/null +++ b/documentation/source/reference/molecule/converter.rst @@ -0,0 +1,5 @@ +************************ +rmgpy.molecule.converter +************************ + +.. automodule:: rmgpy.molecule.converter diff --git a/documentation/source/reference/molecule/generator.rst b/documentation/source/reference/molecule/generator.rst deleted file mode 100644 index 3f9396c8c7..0000000000 --- a/documentation/source/reference/molecule/generator.rst +++ /dev/null @@ -1,5 +0,0 @@ -************************ -rmgpy.molecule.generator -************************ - -.. automodule:: rmgpy.molecule.generator diff --git a/documentation/source/reference/molecule/index.rst b/documentation/source/reference/molecule/index.rst index 1518f41507..f853c3dc5d 100644 --- a/documentation/source/reference/molecule/index.rst +++ b/documentation/source/reference/molecule/index.rst @@ -94,8 +94,8 @@ Class Description :mod:`rmgpy.molecule.resonance` Resonance structure generation methods :mod:`rmgpy.molecule.kekulize` Kekule structure generation :mod:`rmgpy.molecule.pathfinder` Resonance path enumeration -:mod:`rmgpy.molecule.generator` Molecule string representation generator -:mod:`rmgpy.molecule.parser` Molecule string representation parser +:mod:`rmgpy.molecule.converter` Molecule object converter (RDKit/OpenBabel) +:mod:`rmgpy.molecule.translator` Molecule string representation translator ================================ ======================================================== @@ -164,8 +164,8 @@ Class Description resonance kekulize pathfinder - generator - parser + converter + translator adjlist symmetry moleculedrawer diff --git a/documentation/source/reference/molecule/parser.rst b/documentation/source/reference/molecule/parser.rst deleted file mode 100644 index 4d78304e3b..0000000000 --- a/documentation/source/reference/molecule/parser.rst +++ /dev/null @@ -1,5 +0,0 @@ -********************* -rmgpy.molecule.parser -********************* - -.. automodule:: rmgpy.molecule.parser diff --git a/documentation/source/reference/molecule/translator.rst b/documentation/source/reference/molecule/translator.rst new file mode 100644 index 0000000000..2c2c8cb669 --- /dev/null +++ b/documentation/source/reference/molecule/translator.rst @@ -0,0 +1,5 @@ +************************** +rmgpy.molecule.translator +************************** + +.. automodule:: rmgpy.molecule.translator From b0a4b12114861399fa9ed6fbb010ccedd6c1d77b Mon Sep 17 00:00:00 2001 From: Matt Johnson Date: Tue, 24 Apr 2018 16:14:07 -0400 Subject: [PATCH 50/57] fix values for number of opticalIsomers in 23dimethylpropoxy example --- examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py | 4 ++-- .../reactions/23dimethylpropoxy/dimetpropoxy_betasci.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py index 184663854f..00a9b6203d 100644 --- a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py +++ b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy.py @@ -8,7 +8,7 @@ spinMultiplicity = 2 -opticalIsomers = 1 +opticalIsomers = 2 energy = { 'M08SO': QchemLog('dimetpropoxy.out'), @@ -28,4 +28,4 @@ HinderedRotor(scanLog=QchemLog('c5h11o1scan3.out'), pivots=[1,7], top=[7,8,9,10], symmetry=3, fit='best'), ] -""" \ No newline at end of file +""" diff --git a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py index 478f428c70..82d27d8fe1 100644 --- a/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py +++ b/examples/cantherm/reactions/23dimethylpropoxy/dimetpropoxy_betasci.py @@ -8,7 +8,7 @@ spinMultiplicity = 2 -opticalIsomers = 1 +opticalIsomers = 2 energy = { 'M08SO': QchemLog('dimetpropoxy_betasci.out'), @@ -28,4 +28,4 @@ HinderedRotor(scanLog=QchemLog('c5h11o1scan3.out'), pivots=[1,7], top=[7,8,9,10], symmetry=3, fit='best'), ] -""" \ No newline at end of file +""" From 2451858e72a87c3121a8ff1da1275d29f4e36caa Mon Sep 17 00:00:00 2001 From: Richard West Date: Fri, 27 Apr 2018 10:13:35 -0400 Subject: [PATCH 51/57] Added detail to recommended citation. Added volume, page numbers, and doi. Also updated my email address. --- documentation/source/users/rmg/credits.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/source/users/rmg/credits.rst b/documentation/source/users/rmg/credits.rst index ba9d0d4563..e65a768b2f 100755 --- a/documentation/source/users/rmg/credits.rst +++ b/documentation/source/users/rmg/credits.rst @@ -9,7 +9,7 @@ RMG is based upon work supported by the Department of Energy, Office of Basic En Project Supervisors: - Prof. William H. Green (whgreen@mit.edu) -- Prof. Richard H. West (r.west@neu.edu) +- Prof. Richard H. West (r.west@northeastern.edu) Current Developers: (rmg_dev@mit.edu) @@ -43,5 +43,5 @@ Previous Developers: How to Cite *********** -Connie W. Gao, Joshua W. Allen, William H. Green, Richard H. West, "Reaction Mechanism Generator: automatic -construction of chemical kinetic mechanisms." *Computer Physics Communications* (2016). \ No newline at end of file +Connie W. Gao, Joshua W. Allen, William H. Green, Richard H. West, "Reaction Mechanism Generator: Automatic +construction of chemical kinetic mechanisms." *Computer Physics Communications* 203 (2016) 212-225. https://doi.org/10.1016/j.cpc.2016.02.013 From 126ffbcc3797d7f3feb27c0bf17f038b50af3f70 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 2 May 2018 23:28:08 -0400 Subject: [PATCH 52/57] Update RMG-Py version number to 2.1.9 --- meta.yaml | 2 +- rmgpy/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meta.yaml b/meta.yaml index b068c51866..a10c826439 100644 --- a/meta.yaml +++ b/meta.yaml @@ -77,7 +77,7 @@ requirements: - pyzmq - quantities - rdkit >=2015.09.2 - - rmgdatabase >=2.1.8 + - rmgdatabase >=2.1.9 - scipy - scoop - symmetry diff --git a/rmgpy/version.py b/rmgpy/version.py index 69e7c1ef82..2752c07d7d 100644 --- a/rmgpy/version.py +++ b/rmgpy/version.py @@ -34,4 +34,4 @@ This value can be accessed via `rmgpy.__version__`. """ -__version__ = '2.1.8' \ No newline at end of file +__version__ = '2.1.9' From e8a3a41527d09d81f29795635414a30ebbde5542 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Wed, 2 May 2018 23:30:12 -0400 Subject: [PATCH 53/57] Add release notes for v2.1.9 --- .../source/users/rmg/releaseNotes.rst | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/documentation/source/users/rmg/releaseNotes.rst b/documentation/source/users/rmg/releaseNotes.rst index 9fed704ef2..75c87e075b 100644 --- a/documentation/source/users/rmg/releaseNotes.rst +++ b/documentation/source/users/rmg/releaseNotes.rst @@ -4,6 +4,48 @@ Release Notes ************* +RMG-Py Version 2.1.9 +==================== +Date: May 1, 2018 + +- Cantherm: + - Atom counts are no longer necessary in input files and are automatically determined from geometries + - Custom atom energies can now be specified in input files + - Removed atom energies for a few ambiguous model chemistries + - Add atom energies for B3LYP/6-311+g(3df,2p) + +- Changes: + - Refactored molecule.parser and molecule.generator modules into molecule.converter and molecule.translator to improve code organization + - SMILES generation now outputs canonical SMILES + - Molecule.sortAtoms method restored for deterministic atom order + - PDep reactions which match an existing library reaction are no longer added to the model + +- Fixes: + - Fix issue with reaction filter initiation when using seed mechanisms + +RMG-database Version 2.1.9 +========================== +Date: May 1, 2018 + +- Chlorine: + - New Chlorinated_Hydrocarbons thermo library + - Added group additivity values and long distance corrections for chlorinated species + - Added chlorine groups and training reactions to H_Abstraction + +- Additions: + - New NOx2018 kinetics, thermo, and transport libraries + - New N-S_interactions kinetics library + - New SulfurHaynes thermo library + - Added species to SOxNOx thermo library from quantum calculations + +- Other changes: + - Renamed NOx and SOx kinetics libraries to PrimaryNitrogenLibrary and PrimarySulfurLibrary + - S2O2, SOO2, SO2O2, and N2SH were globally forbidden due to inability to optimize geometries + +- Fixes: + - Corrected some A-factor units in Nitrogen_Dean_and_Bozzelli kinetics library + + RMG-Py Version 2.1.8 ==================== Date: March 22, 2018 From a79e0bb241d7de3d55557fe1a7e81ea4f37f0d50 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 7 May 2018 11:35:07 -0400 Subject: [PATCH 54/57] Use anaconda compilers for conda recipe Update compiler requirements in meta.yaml Remove compiler env variables from build.sh and bld.bat These are automatically set by the new conda compilers --- bld.bat | 5 ----- build.sh | 4 ---- meta.yaml | 6 ++---- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/bld.bat b/bld.bat index 8e7bb3e6b3..7ba30de0b0 100644 --- a/bld.bat +++ b/bld.bat @@ -1,8 +1,3 @@ -set CC=gcc -set CXX=g++ -set F77=gfortran -set F90=gfortran - mingw32-make -j%CPU_COUNT% mingw32-make QM diff --git a/build.sh b/build.sh index 7be3ec95b7..475a92962d 100644 --- a/build.sh +++ b/build.sh @@ -1,7 +1,3 @@ -export CC=${PREFIX}/bin/gcc -export CXX=${PREFIX}/bin/g++ -export F77=${PREFIX}/bin/gfortran -export F90=${PREFIX}/bin/gfortran make -j${CPU_COUNT} make QM $PYTHON setup.py install diff --git a/meta.yaml b/meta.yaml index a10c826439..51d4eb3f8e 100644 --- a/meta.yaml +++ b/meta.yaml @@ -17,12 +17,10 @@ requirements: - cairocffi # [unix] - coverage - cython >=0.25.2 - - gcc # [unix] - - gcc ==4.8.2 # [linux32] + - gcc_linux-64 # [linux] + - clang_osx-64 # [osx] - gprof2dot - jinja2 - - libgcc # [unix] - - libgfortran ==1.0 # [linux] You may need to comment this out for mac osx - lpsolve55 - markupsafe - matplotlib >=1.5 From fb8283e1f26640917a3312ca7baab5695c31faa4 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 7 May 2018 17:09:13 -0400 Subject: [PATCH 55/57] Set version number based on git tag in meta.yaml __conda_version__ is no longer supported --- bld.bat | 3 --- build.sh | 3 --- meta.yaml | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/bld.bat b/bld.bat index 7ba30de0b0..a46a9b3a19 100644 --- a/bld.bat +++ b/bld.bat @@ -4,9 +4,6 @@ mingw32-make QM %PYTHON% setup.py install -:: Save version number stored in rmgpy/__init__.py file -%PYTHON% -c "from rmgpy import __version__; print __version__" > %SRC_DIR%\__conda_version__.txt - :: lazy "install" of everything in our 'external' folder. :: most of which should probably be elsewhere mkdir %SP_DIR%\external diff --git a/build.sh b/build.sh index 475a92962d..9617ea9a17 100644 --- a/build.sh +++ b/build.sh @@ -2,9 +2,6 @@ make -j${CPU_COUNT} make QM $PYTHON setup.py install -# Save version number stored in rmgpy/__init__.py file -$PYTHON -c 'from rmgpy import __version__; print __version__' > ${SRC_DIR}/__conda_version__.txt - # lazy "install" of everything in our 'external' folder. # most of which should probably be elsewhere cp -R ${SRC_DIR}/external ${SP_DIR} diff --git a/meta.yaml b/meta.yaml index 51d4eb3f8e..5412b151f1 100644 --- a/meta.yaml +++ b/meta.yaml @@ -1,7 +1,7 @@ # For conda build package: name: rmg - version: "0" # set by build.sh, which gets from rmgpy/__init__.py + version: {{ environ.get('GIT_DESCRIBE_TAG', '') }} source: path: . From 06ec1ca67b72bf54e9d752ba437fff2fdaaed344 Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 7 May 2018 17:52:36 -0400 Subject: [PATCH 56/57] Update dependencies in meta.yaml Tailor requirements for build and run environments --- meta.yaml | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/meta.yaml b/meta.yaml index 5412b151f1..c82d7af3a2 100644 --- a/meta.yaml +++ b/meta.yaml @@ -11,27 +11,16 @@ build: requirements: build: - - argparse # [py26] - boost ==1.56.0 - - cairo # [unix] - - cairocffi # [unix] - - coverage - cython >=0.25.2 - gcc_linux-64 # [linux] - clang_osx-64 # [osx] - - gprof2dot - jinja2 - lpsolve55 - - markupsafe - - matplotlib >=1.5 - - nose - numpy - openbabel - - psutil - pydas >=1.0.2 - - pydot ==1.2.2 - pydqed >=1.0.1 - - pyparsing - pyrdl - python - pyzmq @@ -40,23 +29,20 @@ requirements: - scipy - scoop - setuptools - - xlwt run: - argparse # [py26] - cairo # [unix] - cairocffi # [unix] - - cantera >=2.2 + - cantera >=2.3.0a3 - coolprop - coverage - - cython ==0.21 + - cython >=0.25.2 - ffmpeg - gprof2dot - graphviz - guppy - jinja2 - - libgcc # [unix] - - libgfortran ==1.0 # [linux] You may need to comment this out for mac osx - lpsolve55 - markupsafe - matplotlib >=1.5 @@ -69,6 +55,7 @@ requirements: - pydas >=1.0.2 - pydot ==1.2.2 - pydqed >=1.0.1 + - pymongo - pyparsing - pyrdl - python From c5590b9133fbf071b4500680d1d7b325005b6dae Mon Sep 17 00:00:00 2001 From: Max Liu Date: Mon, 7 May 2018 13:52:57 -0400 Subject: [PATCH 57/57] Update build scripts for conda recipe Remove make QM command which no longer exists Do not recompile extension modules during install --- bld.bat | 4 ++-- build.sh | 4 +++- setup.py | 4 ---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/bld.bat b/bld.bat index a46a9b3a19..64d18a30c2 100644 --- a/bld.bat +++ b/bld.bat @@ -1,7 +1,7 @@ - +:: Compile RMG mingw32-make -j%CPU_COUNT% -mingw32-make QM +:: Install RMG %PYTHON% setup.py install :: lazy "install" of everything in our 'external' folder. diff --git a/build.sh b/build.sh index 9617ea9a17..8c7ab05e56 100644 --- a/build.sh +++ b/build.sh @@ -1,5 +1,7 @@ +# Compile RMG make -j${CPU_COUNT} -make QM + +# Install RMG $PYTHON setup.py install # lazy "install" of everything in our 'external' folder. diff --git a/setup.py b/setup.py index 61a8dd271e..d591d03641 100644 --- a/setup.py +++ b/setup.py @@ -157,10 +157,6 @@ def getCanthermExtensionModules(): ################################################################################ ext_modules = [] -if 'install' in sys.argv: - # This is so users can still do simply `python setup.py install` - ext_modules.extend(getMainExtensionModules()) - ext_modules.extend(getSolverExtensionModules()) if 'main' in sys.argv: # This is for `python setup.py build_ext main` sys.argv.remove('main')