move extract block to molecule utils

marrink-lab · Nov 22, 2023 · 4ed2979 · 4ed2979
1 parent 362372e
commit 4ed2979
Show file tree

Hide file tree

Showing 4 changed files with 267 additions and 293 deletions.
diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
@@ -19,8 +19,8 @@
 from .processor import Processor
 from .linalg_functions import (u_vect, center_of_geometry,
                                radius_of_gyration)
-from .topology import replace_defined_interaction
 from .linalg_functions import dih
+from .molecule_utils import extract_block
 """
 Processor generating coordinates for all residues of a meta_molecule
 matching those in the meta_molecule.molecule attribute.
@@ -216,80 +216,6 @@ def map_from_CoG(coords):
 
     return out_vectors
 
-def _relabel_interaction_atoms(interaction, mapping):
-    """
-    Relables the atoms in interaction according to the
-    rules defined in mapping.
-
-    Parameters
-    ----------
-    interaction: `vermouth.molecule.Interaction`
-    mapping: `:class:dict`
-
-    Returns
-    -------
-    interaction: `vermouth.molecule.Interaction`
-        the new interaction with updated atoms
-    """
-    new_atoms = [mapping[atom] for atom in interaction.atoms]
-    new_interaction = interaction._replace(atoms=new_atoms)
-    return new_interaction
-
-def extract_block(molecule, nodes, defines={}):
-    """
-    Given a `vermouth.molecule` and a `resname`
-    extract the information of a block from the
-    molecule definition and replace all defines
-    if any are found.
-
-    Parameters
-    ----------
-    molecule:  :class:vermouth.molecule.Molecule
-    nodes: abc.hashable
-        the nodes corresponding to the block to
-        extract
-    defines:   dict
-      dict of type define: value
-
-    Returns
-    -------
-    :class:vermouth.molecule.Block
-    """
-    resid = molecule.nodes[nodes[0]]["resid"]
-    resname = molecule.nodes[nodes[0]]["resname"]
-    block = vermouth.molecule.Block()
-
-    # select all nodes with the same first resid and
-    # make sure the block node labels are atomnames
-    # also build a correspondance dict between node
-    # label in the molecule and in the block for
-    # relabeling the interactions
-    mapping = {}
-    for node in nodes:
-        attr_dict = molecule.nodes[node]
-        if attr_dict["resid"] == resid:
-            block.add_node(attr_dict["atomname"], **attr_dict)
-            mapping[node] = attr_dict["atomname"]
-
-    for inter_type in molecule.interactions:
-        for interaction in molecule.interactions[inter_type]:
-            if all(atom in mapping for atom in interaction.atoms):
-                interaction = replace_defined_interaction(interaction, defines)
-                interaction = _relabel_interaction_atoms(interaction, mapping)
-                block.interactions[inter_type].append(interaction)
-
-    for inter_type in ["bonds", "constraints", "virtual_sitesn",
-                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
-        block.make_edges_from_interaction_type(inter_type)
-
-    if not nx.is_connected(block):
-        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
-               'Make sure all atoms/particles in a residue are connected by bonds,'
-               ' constraints or virual-sites.')
-        raise IOError(msg.format(resname, resid))
-
-    return block
-
 class GenerateTemplates(Processor):
     """
     This processor takes a a class:`polyply.src.MetaMolecule` and

diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
@@ -11,234 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import itertools
-from collections import defaultdict
 import numpy as np
 import networkx as nx
 import pysmiles
 import vermouth
 from vermouth.forcefield import ForceField
-from vermouth.molecule import Interaction
 from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
-from polyply.src.generate_templates import extract_block
+from polyply.src.molecule_utils import extract_block, extract_links
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import equalize_charges
-from polyply.tests.test_lib_files import _interaction_equal 
-
-def diffs_to_prefix(atoms, resid_diffs):
-    """
-    Given a list of atoms and corresponding differences
-    between their resids, generate the offset prefix for
-    the atomnames according to the vermouth sepcific offset
-    language.
-
-    The reference atom must have resid_diff value of 0.
-    Other atoms either get - or + signs
-    depending on their resid offset.
-
-    Parameters
-    ----------
-    atoms: abc.itertable[str]
-    resid_diff: abc.itertable[int]
-        the differences in resid with respeect to
-        the smallest/largest resid which is 0
-
-    Returns
-    -------
-    abc.itertable
-        list with prefixed atom names
-    """
-    prefixed_atoms = []
-    for atom, diff in zip(atoms, resid_diffs):
-        if diff > 0:
-            prefix = "".join(["+" for i in range(0, diff)])
-        else:
-            prefix = "".join(["-" for i in range(diff, 0)])
-        prefixed_atoms.append(prefix + atom)
-    return prefixed_atoms
-
-def _extract_edges_from_shortest_path(atoms, block, min_resid):
-    """
-    Given a list atoms generate a list of edges correspoding to
-    all edges required to connect all atoms by at least one
-    shortest path. Edges are retunred on atomname basis with
-    prefix relative to the `min_resid`. See diffs_to_prefix.
-
-    Paramters:
-    ----------
-    atoms: abc.itertable
-        the atoms to collect edges for
-    block: :class:`vermouth.molecule.Block`
-        the molecule which to servey for edges
-    min_resid: int
-        the resid to which the prefix indicate relative resid
-        distance
-
-    Returns
-    -------
-    list[tuple]
-        the edge list by atomname with prefix indicating relative
-        residue distance to min_resid
-    """
-    edges = []
-    had_edges = []
-    final_atoms = {}
-    resnames = {}
-    for origin, target in itertools.combinations(atoms, r=2):
-        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
-        for edge in zip(path[:-1], path[1:]):
-            if edge not in had_edges:
-                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
-                atom_names = [block.nodes[node]["atomname"] for node in edge]
-                link_names = diffs_to_prefix(atom_names, resid_diffs)
-                final_atoms.update(dict(zip(edge, link_names)))
-                edges.append(link_names)
-                had_edges.append(edge)
-                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
-    return final_atoms, edges, resnames
-
-def extract_links(molecule):
-    """
-    Given a molecule that has the resid and resname attributes
-    correctly set, extract the interactions which span more than
-    a single residue and generate a link.
-
-    Parameters
-    ----------
-    molecule: :class:`vermouth.molecule.Molecule`
-        the molecule from which to extract interactions
-
-    Returns
-    -------
-    list[:class:`vermouth.molecule.Links`]
-        a list with a links found
-    """
-    links = []
-    # patterns are a sqeuence of atoms that define an interaction
-    # sometimes multiple interactions are defined for one pattern
-    # in that case they are all collected in this dictionary
-    patterns = defaultdict(dict)
-    # for each found pattern the resnames are collected; this is important
-    # because the same pattern may apply to residues with different name
-    resnames_for_patterns = defaultdict(dict)
-    link_atoms_for_patterns = defaultdict(list)
-    # as additional safe-gaurd against false links we also collect the edges
-    # that span the interaction by finding the shortest simple path between
-    # all atoms in patterns. Note that the atoms in patterns not always have
-    # to be directly bonded. For example, pairs are not directly bonded and
-    # can span multiple residues
-    #edges_for_patterns = defaultdict(list)
-    for inter_type in molecule.interactions:
-        #print("TYPE", inter_type)
-        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
-            # extract resids and resname corresponding to interaction atoms
-            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
-            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
-            # compute the resid offset to be used for the atom prefixes
-            min_resid = min(resids)
-            diff = resids - min_resid
-            pattern = tuple(set(list(zip(diff, resnames))))
-
-            # in this case all interactions are in a block and we skip
-            if np.sum(diff) == 0:
-                continue
-
-            # we collect the edges corresponding to the simple paths between pairs of atoms
-            # in the interaction
-            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
-            #print(kdx, resnames)
-            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
-            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
-            link_inter = Interaction(atoms=link_atoms,
-                                     parameters=interaction.parameters,
-                                     meta={})
-            #print("inter number", kdx)
-            # here we deal with filtering redundancy
-            if pattern in patterns and inter_type in patterns[pattern]:
-                #print(pattern)
-           #     if pattern == ((0, 'PEO'), (1, 'PEO')):
-           #         print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n")
-
-                for other_inter in patterns[pattern].get(inter_type, []):
-                    if _interaction_equal(other_inter, link_inter, inter_type):
-                        break
-                else:
-                    patterns[pattern][inter_type].append(link_inter)
-                    resnames_for_patterns[pattern].update(resnames)
-                    link_atoms_for_patterns[pattern] += link_atoms
-            else:
-                patterns[pattern][inter_type] = [link_inter]
-                resnames_for_patterns[pattern].update(resnames)
-                #edges_for_patterns[pattern] += edges
-                link_atoms_for_patterns[pattern] += link_atoms
-            #print('resnames', resnames_for_patterns[pattern], '\n')
-#    for inter in patterns[list(patterns.keys())[0]]['angles']:
-#        print(inter)
-    # we make new links for each unique interaction per type
-    for pattern in patterns:
-        link = vermouth.molecule.Link()
-        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
-        #link.add_edges_from(edges_for_patterns[pattern])
-        resnames = resnames_for_patterns[pattern]
-     #   print(resnames)
-        nx.set_node_attributes(link, resnames, "resname")
-
-        had_parameters = []
-        for inter_type, inters in patterns[pattern].items():
-            for idx, interaction in enumerate(inters):
-                #new_parameters = interaction.parameters
-                new_meta = interaction.meta
-                #new_atoms = interaction.atoms
-                # to account for the fact when multiple interactions with the same
-                # atom patterns need to be written to ff
-                new_meta.update({"version": idx})
-                new_meta.update({"comment": "link"})
-                had_parameters.append(interaction.parameters)
-                # map atoms to proper atomnames ..
-                link.interactions[inter_type].append(interaction)
-
-        links.append(link)
-    #print(links)
-    return links
-
-def handle_chirality(molecule, chiral_centers):
-    pass
-
-def hcount(molecule, node):
-    hcounter = 0
-    for node in molecule.neighbors(node):
-        if molecule.nodes[node]["element"] == "H":
-            hcounter+= 1
-    return hcounter
-
-def set_charges(block, res_graph, name):
-    resnames = nx.get_node_attributes(res_graph, 'resname')
-    centrality = nx.betweenness_centrality(res_graph)
-    score = -1
-    most_central_node = None
-    for node, resname in resnames.items():
-        if resname == name and centrality[node] > score:
-            score = centrality[node]
-            most_central_node = node
-    charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge')
-    atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname')
-    charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()}
-    for node in block.nodes:
-        block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']]
-    return block
 
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """
     Main executable for itp to ff tool.
     """
+    # read the topology file
     if itppath.suffix == ".top":
-        # read the topology file
         top = Topology.from_gmx_topfile(itppath, name="test")
         mol = top.molecules[0].molecule
-
+    # read itp file
     if itppath.suffix == ".itp":
         with open(itppath, "r") as _file:
             lines = _file.readlines()
@@ -264,15 +57,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
-        #print("here")
         if itppath.suffix == ".top":
             equalize_charges(new_block, top)
 
-#    for node in mol.nodes:
-#        print(mol.nodes[node])
-
     force_field.links = extract_links(mol)
 
-    print("-----")
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()