From 4ed29798a32dd85f8cb9bd3ae4e66a2a1c46e2b8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 Nov 2023 16:09:33 +0100
Subject: [PATCH] move extract block to molecule utils

---
 polyply/src/generate_templates.py        |  76 +------
 polyply/src/itp_to_ff.py                 | 218 +------------------
 polyply/src/molecule_utils.py            | 260 +++++++++++++++++++++++
 polyply/tests/test_generate_templates.py |   6 +-
 4 files changed, 267 insertions(+), 293 deletions(-)
 create mode 100644 polyply/src/molecule_utils.py

diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index 509663d7..d5ccbe23 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -19,8 +19,8 @@
 from .processor import Processor
 from .linalg_functions import (u_vect, center_of_geometry,
                                radius_of_gyration)
-from .topology import replace_defined_interaction
 from .linalg_functions import dih
+from .molecule_utils import extract_block
 """
 Processor generating coordinates for all residues of a meta_molecule
 matching those in the meta_molecule.molecule attribute.
@@ -216,80 +216,6 @@ def map_from_CoG(coords):
 
     return out_vectors
 
-def _relabel_interaction_atoms(interaction, mapping):
-    """
-    Relables the atoms in interaction according to the
-    rules defined in mapping.
-
-    Parameters
-    ----------
-    interaction: `vermouth.molecule.Interaction`
-    mapping: `:class:dict`
-
-    Returns
-    -------
-    interaction: `vermouth.molecule.Interaction`
-        the new interaction with updated atoms
-    """
-    new_atoms = [mapping[atom] for atom in interaction.atoms]
-    new_interaction = interaction._replace(atoms=new_atoms)
-    return new_interaction
-
-def extract_block(molecule, nodes, defines={}):
-    """
-    Given a `vermouth.molecule` and a `resname`
-    extract the information of a block from the
-    molecule definition and replace all defines
-    if any are found.
-
-    Parameters
-    ----------
-    molecule:  :class:vermouth.molecule.Molecule
-    nodes: abc.hashable
-        the nodes corresponding to the block to
-        extract
-    defines:   dict
-      dict of type define: value
-
-    Returns
-    -------
-    :class:vermouth.molecule.Block
-    """
-    resid = molecule.nodes[nodes[0]]["resid"]
-    resname = molecule.nodes[nodes[0]]["resname"]
-    block = vermouth.molecule.Block()
-
-    # select all nodes with the same first resid and
-    # make sure the block node labels are atomnames
-    # also build a correspondance dict between node
-    # label in the molecule and in the block for
-    # relabeling the interactions
-    mapping = {}
-    for node in nodes:
-        attr_dict = molecule.nodes[node]
-        if attr_dict["resid"] == resid:
-            block.add_node(attr_dict["atomname"], **attr_dict)
-            mapping[node] = attr_dict["atomname"]
-
-    for inter_type in molecule.interactions:
-        for interaction in molecule.interactions[inter_type]:
-            if all(atom in mapping for atom in interaction.atoms):
-                interaction = replace_defined_interaction(interaction, defines)
-                interaction = _relabel_interaction_atoms(interaction, mapping)
-                block.interactions[inter_type].append(interaction)
-
-    for inter_type in ["bonds", "constraints", "virtual_sitesn",
-                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
-        block.make_edges_from_interaction_type(inter_type)
-
-    if not nx.is_connected(block):
-        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
-               'Make sure all atoms/particles in a residue are connected by bonds,'
-               ' constraints or virual-sites.')
-        raise IOError(msg.format(resname, resid))
-
-    return block
-
 class GenerateTemplates(Processor):
     """
     This processor takes a a class:`polyply.src.MetaMolecule` and
diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py
index d8f6d0b0..dc03725c 100644
--- a/polyply/src/itp_to_ff.py
+++ b/polyply/src/itp_to_ff.py
@@ -11,234 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import itertools
-from collections import defaultdict
 import numpy as np
 import networkx as nx
 import pysmiles
 import vermouth
 from vermouth.forcefield import ForceField
-from vermouth.molecule import Interaction
 from vermouth.gmx.itp_read import read_itp
 from polyply.src.topology import Topology
-from polyply.src.generate_templates import extract_block
+from polyply.src.molecule_utils import extract_block, extract_links
 from polyply.src.fragment_finder import FragmentFinder
 from polyply.src.ffoutput import ForceFieldDirectiveWriter
 from polyply.src.charges import equalize_charges
-from polyply.tests.test_lib_files import _interaction_equal 
-
-def diffs_to_prefix(atoms, resid_diffs):
-    """
-    Given a list of atoms and corresponding differences
-    between their resids, generate the offset prefix for
-    the atomnames according to the vermouth sepcific offset
-    language.
-
-    The reference atom must have resid_diff value of 0.
-    Other atoms either get - or + signs
-    depending on their resid offset.
-
-    Parameters
-    ----------
-    atoms: abc.itertable[str]
-    resid_diff: abc.itertable[int]
-        the differences in resid with respeect to
-        the smallest/largest resid which is 0
-
-    Returns
-    -------
-    abc.itertable
-        list with prefixed atom names
-    """
-    prefixed_atoms = []
-    for atom, diff in zip(atoms, resid_diffs):
-        if diff > 0:
-            prefix = "".join(["+" for i in range(0, diff)])
-        else:
-            prefix = "".join(["-" for i in range(diff, 0)])
-        prefixed_atoms.append(prefix + atom)
-    return prefixed_atoms
-
-def _extract_edges_from_shortest_path(atoms, block, min_resid):
-    """
-    Given a list atoms generate a list of edges correspoding to
-    all edges required to connect all atoms by at least one
-    shortest path. Edges are retunred on atomname basis with
-    prefix relative to the `min_resid`. See diffs_to_prefix.
-
-    Paramters:
-    ----------
-    atoms: abc.itertable
-        the atoms to collect edges for
-    block: :class:`vermouth.molecule.Block`
-        the molecule which to servey for edges
-    min_resid: int
-        the resid to which the prefix indicate relative resid
-        distance
-
-    Returns
-    -------
-    list[tuple]
-        the edge list by atomname with prefix indicating relative
-        residue distance to min_resid
-    """
-    edges = []
-    had_edges = []
-    final_atoms = {}
-    resnames = {}
-    for origin, target in itertools.combinations(atoms, r=2):
-        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
-        for edge in zip(path[:-1], path[1:]):
-            if edge not in had_edges:
-                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
-                atom_names = [block.nodes[node]["atomname"] for node in edge]
-                link_names = diffs_to_prefix(atom_names, resid_diffs)
-                final_atoms.update(dict(zip(edge, link_names)))
-                edges.append(link_names)
-                had_edges.append(edge)
-                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
-    return final_atoms, edges, resnames
-
-def extract_links(molecule):
-    """
-    Given a molecule that has the resid and resname attributes
-    correctly set, extract the interactions which span more than
-    a single residue and generate a link.
-
-    Parameters
-    ----------
-    molecule: :class:`vermouth.molecule.Molecule`
-        the molecule from which to extract interactions
-
-    Returns
-    -------
-    list[:class:`vermouth.molecule.Links`]
-        a list with a links found
-    """
-    links = []
-    # patterns are a sqeuence of atoms that define an interaction
-    # sometimes multiple interactions are defined for one pattern
-    # in that case they are all collected in this dictionary
-    patterns = defaultdict(dict)
-    # for each found pattern the resnames are collected; this is important
-    # because the same pattern may apply to residues with different name
-    resnames_for_patterns = defaultdict(dict)
-    link_atoms_for_patterns = defaultdict(list)
-    # as additional safe-gaurd against false links we also collect the edges
-    # that span the interaction by finding the shortest simple path between
-    # all atoms in patterns. Note that the atoms in patterns not always have
-    # to be directly bonded. For example, pairs are not directly bonded and
-    # can span multiple residues
-    #edges_for_patterns = defaultdict(list)
-    for inter_type in molecule.interactions:
-        #print("TYPE", inter_type)
-        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
-            # extract resids and resname corresponding to interaction atoms
-            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
-            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
-            # compute the resid offset to be used for the atom prefixes
-            min_resid = min(resids)
-            diff = resids - min_resid
-            pattern = tuple(set(list(zip(diff, resnames))))
-
-            # in this case all interactions are in a block and we skip
-            if np.sum(diff) == 0:
-                continue
-
-            # we collect the edges corresponding to the simple paths between pairs of atoms
-            # in the interaction
-            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
-            #print(kdx, resnames)
-            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
-            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
-            link_inter = Interaction(atoms=link_atoms,
-                                     parameters=interaction.parameters,
-                                     meta={})
-            #print("inter number", kdx)
-            # here we deal with filtering redundancy
-            if pattern in patterns and inter_type in patterns[pattern]:
-                #print(pattern)
-           #     if pattern == ((0, 'PEO'), (1, 'PEO')):
-           #         print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n")
-
-                for other_inter in patterns[pattern].get(inter_type, []):
-                    if _interaction_equal(other_inter, link_inter, inter_type):
-                        break
-                else:
-                    patterns[pattern][inter_type].append(link_inter)
-                    resnames_for_patterns[pattern].update(resnames)
-                    link_atoms_for_patterns[pattern] += link_atoms
-            else:
-                patterns[pattern][inter_type] = [link_inter]
-                resnames_for_patterns[pattern].update(resnames)
-                #edges_for_patterns[pattern] += edges
-                link_atoms_for_patterns[pattern] += link_atoms
-            #print('resnames', resnames_for_patterns[pattern], '\n')
-#    for inter in patterns[list(patterns.keys())[0]]['angles']:
-#        print(inter)
-    # we make new links for each unique interaction per type
-    for pattern in patterns:
-        link = vermouth.molecule.Link()
-        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
-        #link.add_edges_from(edges_for_patterns[pattern])
-        resnames = resnames_for_patterns[pattern]
-     #   print(resnames)
-        nx.set_node_attributes(link, resnames, "resname")
-
-        had_parameters = []
-        for inter_type, inters in patterns[pattern].items():
-            for idx, interaction in enumerate(inters):
-                #new_parameters = interaction.parameters
-                new_meta = interaction.meta
-                #new_atoms = interaction.atoms
-                # to account for the fact when multiple interactions with the same
-                # atom patterns need to be written to ff
-                new_meta.update({"version": idx})
-                new_meta.update({"comment": "link"})
-                had_parameters.append(interaction.parameters)
-                # map atoms to proper atomnames ..
-                link.interactions[inter_type].append(interaction)
-
-        links.append(link)
-    #print(links)
-    return links
-
-def handle_chirality(molecule, chiral_centers):
-    pass
-
-def hcount(molecule, node):
-    hcounter = 0
-    for node in molecule.neighbors(node):
-        if molecule.nodes[node]["element"] == "H":
-            hcounter+= 1
-    return hcounter
-
-def set_charges(block, res_graph, name):
-    resnames = nx.get_node_attributes(res_graph, 'resname')
-    centrality = nx.betweenness_centrality(res_graph)
-    score = -1
-    most_central_node = None
-    for node, resname in resnames.items():
-        if resname == name and centrality[node] > score:
-            score = centrality[node]
-            most_central_node = node
-    charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge')
-    atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname')
-    charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()}
-    for node in block.nodes:
-        block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']]
-    return block
 
 def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0):
     """
     Main executable for itp to ff tool.
     """
+    # read the topology file
     if itppath.suffix == ".top":
-        # read the topology file
         top = Topology.from_gmx_topfile(itppath, name="test")
         mol = top.molecules[0].molecule
-
+    # read itp file
     if itppath.suffix == ".itp":
         with open(itppath, "r") as _file:
             lines = _file.readlines()
@@ -264,15 +57,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0
         new_block.nrexcl = mol.nrexcl
         force_field.blocks[name] = new_block
         set_charges(new_block, res_graph, name)
-        #print("here")
         if itppath.suffix == ".top":
             equalize_charges(new_block, top)
 
-#    for node in mol.nodes:
-#        print(mol.nodes[node])
-
     force_field.links = extract_links(mol)
 
-    print("-----")
     with open(outpath, "w") as filehandle:
         ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write()
diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py
new file mode 100644
index 00000000..7da9ce43
--- /dev/null
+++ b/polyply/src/molecule_utils.py
@@ -0,0 +1,260 @@
+# Copyright 2022 University of Groningen
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+from collections import defaultdict
+import numpy as np
+import networkx as nx
+import vermouth
+from vermouth.molecule import Interaction
+from polyply.tests.test_lib_files import _interaction_equal
+from .topology import replace_defined_interaction
+
+def diffs_to_prefix(atoms, resid_diffs):
+    """
+    Given a list of atoms and corresponding differences
+    between their resids, generate the offset prefix for
+    the atomnames according to the vermouth sepcific offset
+    language.
+
+    The reference atom must have resid_diff value of 0.
+    Other atoms either get - or + signs
+    depending on their resid offset.
+
+    Parameters
+    ----------
+    atoms: abc.itertable[str]
+    resid_diff: abc.itertable[int]
+        the differences in resid with respeect to
+        the smallest/largest resid which is 0
+
+    Returns
+    -------
+    abc.itertable
+        list with prefixed atom names
+    """
+    prefixed_atoms = []
+    for atom, diff in zip(atoms, resid_diffs):
+        if diff > 0:
+            prefix = "".join(["+" for i in range(0, diff)])
+        else:
+            prefix = "".join(["-" for i in range(diff, 0)])
+        prefixed_atoms.append(prefix + atom)
+    return prefixed_atoms
+
+def _extract_edges_from_shortest_path(atoms, block, min_resid):
+    """
+    Given a list atoms generate a list of edges correspoding to
+    all edges required to connect all atoms by at least one
+    shortest path. Edges are retunred on atomname basis with
+    prefix relative to the `min_resid`. See diffs_to_prefix.
+
+    Paramters:
+    ----------
+    atoms: abc.itertable
+        the atoms to collect edges for
+    block: :class:`vermouth.molecule.Block`
+        the molecule which to servey for edges
+    min_resid: int
+        the resid to which the prefix indicate relative resid
+        distance
+
+    Returns
+    -------
+    list[tuple]
+        the edge list by atomname with prefix indicating relative
+        residue distance to min_resid
+    """
+    edges = []
+    had_edges = []
+    final_atoms = {}
+    resnames = {}
+    for origin, target in itertools.combinations(atoms, r=2):
+        path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0]
+        for edge in zip(path[:-1], path[1:]):
+            if edge not in had_edges:
+                resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid
+                atom_names = [block.nodes[node]["atomname"] for node in edge]
+                link_names = diffs_to_prefix(atom_names, resid_diffs)
+                final_atoms.update(dict(zip(edge, link_names)))
+                edges.append(link_names)
+                had_edges.append(edge)
+                resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge]))
+    return final_atoms, edges, resnames
+
+
+def extract_links(molecule):
+    """
+    Given a molecule that has the resid and resname attributes
+    correctly set, extract the interactions which span more than
+    a single residue and generate a link.
+
+    Parameters
+    ----------
+    molecule: :class:`vermouth.molecule.Molecule`
+        the molecule from which to extract interactions
+
+    Returns
+    -------
+    list[:class:`vermouth.molecule.Links`]
+        a list with a links found
+    """
+    links = []
+    # patterns are a sqeuence of atoms that define an interaction
+    # sometimes multiple interactions are defined for one pattern
+    # in that case they are all collected in this dictionary
+    patterns = defaultdict(dict)
+    # for each found pattern the resnames are collected; this is important
+    # because the same pattern may apply to residues with different name
+    resnames_for_patterns = defaultdict(dict)
+    link_atoms_for_patterns = defaultdict(list)
+    # as additional safe-gaurd against false links we also collect the edges
+    # that span the interaction by finding the shortest simple path between
+    # all atoms in patterns. Note that the atoms in patterns not always have
+    # to be directly bonded. For example, pairs are not directly bonded and
+    # can span multiple residues
+    for inter_type in molecule.interactions:
+        for kdx, interaction in enumerate(molecule.interactions[inter_type]):
+            # extract resids and resname corresponding to interaction atoms
+            resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms])
+            resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms]
+            # compute the resid offset to be used for the atom prefixes
+            min_resid = min(resids)
+            diff = resids - min_resid
+            pattern = tuple(set(list(zip(diff, resnames))))
+
+            # in this case all interactions are in a block and we skip
+            if np.sum(diff) == 0:
+                continue
+
+            # we collect the edges corresponding to the simple paths between pairs of atoms
+            # in the interaction
+            mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid)
+            link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()}
+            link_atoms =  [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms]
+            link_inter = Interaction(atoms=link_atoms,
+                                     parameters=interaction.parameters,
+                                     meta={})
+
+            # here we deal with filtering redundancy
+            if pattern in patterns and inter_type in patterns[pattern]:
+                for other_inter in patterns[pattern].get(inter_type, []):
+                    if _interaction_equal(other_inter, link_inter, inter_type):
+                        break
+                else:
+                    patterns[pattern][inter_type].append(link_inter)
+                    resnames_for_patterns[pattern].update(resnames)
+                    link_atoms_for_patterns[pattern] += link_atoms
+            else:
+                patterns[pattern][inter_type] = [link_inter]
+                resnames_for_patterns[pattern].update(resnames)
+                link_atoms_for_patterns[pattern] += link_atoms
+
+    # we make new links for each unique interaction per type
+    for pattern in patterns:
+        link = vermouth.molecule.Link()
+        link.add_nodes_from(set(link_atoms_for_patterns[pattern]))
+        resnames = resnames_for_patterns[pattern]
+        nx.set_node_attributes(link, resnames, "resname")
+
+        had_parameters = []
+        for inter_type, inters in patterns[pattern].items():
+            for idx, interaction in enumerate(inters):
+                #new_parameters = interaction.parameters
+                new_meta = interaction.meta
+                #new_atoms = interaction.atoms
+                # to account for the fact when multiple interactions with the same
+                # atom patterns need to be written to ff
+                new_meta.update({"version": idx})
+                new_meta.update({"comment": "link"})
+                had_parameters.append(interaction.parameters)
+                # map atoms to proper atomnames ..
+                link.interactions[inter_type].append(interaction)
+        links.append(link)
+    return links
+
+
+def _relabel_interaction_atoms(interaction, mapping):
+    """
+    Relables the atoms in interaction according to the
+    rules defined in mapping.
+
+    Parameters
+    ----------
+    interaction: `vermouth.molecule.Interaction`
+    mapping: `:class:dict`
+
+    Returns
+    -------
+    interaction: `vermouth.molecule.Interaction`
+        the new interaction with updated atoms
+    """
+    new_atoms = [mapping[atom] for atom in interaction.atoms]
+    new_interaction = interaction._replace(atoms=new_atoms)
+    return new_interaction
+
+
+def extract_block(molecule, nodes, defines={}):
+    """
+    Given a `vermouth.molecule` and a `resname`
+    extract the information of a block from the
+    molecule definition and replace all defines
+    if any are found.
+
+    Parameters
+    ----------
+    molecule:  :class:vermouth.molecule.Molecule
+    nodes: abc.hashable
+        the nodes corresponding to the block to
+        extract
+    defines:   dict
+      dict of type define: value
+
+    Returns
+    -------
+    :class:vermouth.molecule.Block
+    """
+    resid = molecule.nodes[nodes[0]]["resid"]
+    resname = molecule.nodes[nodes[0]]["resname"]
+    block = vermouth.molecule.Block()
+
+    # select all nodes with the same first resid and
+    # make sure the block node labels are atomnames
+    # also build a correspondance dict between node
+    # label in the molecule and in the block for
+    # relabeling the interactions
+    mapping = {}
+    for node in nodes:
+        attr_dict = molecule.nodes[node]
+        if attr_dict["resid"] == resid:
+            block.add_node(attr_dict["atomname"], **attr_dict)
+            mapping[node] = attr_dict["atomname"]
+
+    for inter_type in molecule.interactions:
+        for interaction in molecule.interactions[inter_type]:
+            if all(atom in mapping for atom in interaction.atoms):
+                interaction = replace_defined_interaction(interaction, defines)
+                interaction = _relabel_interaction_atoms(interaction, mapping)
+                block.interactions[inter_type].append(interaction)
+
+    for inter_type in ["bonds", "constraints", "virtual_sitesn",
+                       "virtual_sites2", "virtual_sites3", "virtual_sites4"]:
+        block.make_edges_from_interaction_type(inter_type)
+
+    if not nx.is_connected(block):
+        msg = ('\n Residue {} with id {} consistes of two disconnected parts. '
+               'Make sure all atoms/particles in a residue are connected by bonds,'
+               ' constraints or virual-sites.')
+        raise IOError(msg.format(resname, resid))
+
+    return block
diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index 6c21fc63..1d852cb7 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -27,10 +27,10 @@
 from polyply.src.linalg_functions import center_of_geometry
 from polyply.src.generate_templates import (find_atoms,
                                             _expand_inital_coords,
-                                            _relabel_interaction_atoms,
                                             compute_volume, map_from_CoG,
-                                            extract_block, GenerateTemplates,
-					    find_interaction_involving)
+                                            GenerateTemplates,
+					                        find_interaction_involving)
+from polyply.src.molecule_utils import (extract_block, _relabel_interaction_atoms)
 
 class TestGenTemps: