From 4ed29798a32dd85f8cb9bd3ae4e66a2a1c46e2b8 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 22 Nov 2023 16:09:33 +0100 Subject: [PATCH] move extract block to molecule utils --- polyply/src/generate_templates.py | 76 +------ polyply/src/itp_to_ff.py | 218 +------------------ polyply/src/molecule_utils.py | 260 +++++++++++++++++++++++ polyply/tests/test_generate_templates.py | 6 +- 4 files changed, 267 insertions(+), 293 deletions(-) create mode 100644 polyply/src/molecule_utils.py diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index 509663d7..d5ccbe23 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -19,8 +19,8 @@ from .processor import Processor from .linalg_functions import (u_vect, center_of_geometry, radius_of_gyration) -from .topology import replace_defined_interaction from .linalg_functions import dih +from .molecule_utils import extract_block """ Processor generating coordinates for all residues of a meta_molecule matching those in the meta_molecule.molecule attribute. @@ -216,80 +216,6 @@ def map_from_CoG(coords): return out_vectors -def _relabel_interaction_atoms(interaction, mapping): - """ - Relables the atoms in interaction according to the - rules defined in mapping. - - Parameters - ---------- - interaction: `vermouth.molecule.Interaction` - mapping: `:class:dict` - - Returns - ------- - interaction: `vermouth.molecule.Interaction` - the new interaction with updated atoms - """ - new_atoms = [mapping[atom] for atom in interaction.atoms] - new_interaction = interaction._replace(atoms=new_atoms) - return new_interaction - -def extract_block(molecule, nodes, defines={}): - """ - Given a `vermouth.molecule` and a `resname` - extract the information of a block from the - molecule definition and replace all defines - if any are found. - - Parameters - ---------- - molecule: :class:vermouth.molecule.Molecule - nodes: abc.hashable - the nodes corresponding to the block to - extract - defines: dict - dict of type define: value - - Returns - ------- - :class:vermouth.molecule.Block - """ - resid = molecule.nodes[nodes[0]]["resid"] - resname = molecule.nodes[nodes[0]]["resname"] - block = vermouth.molecule.Block() - - # select all nodes with the same first resid and - # make sure the block node labels are atomnames - # also build a correspondance dict between node - # label in the molecule and in the block for - # relabeling the interactions - mapping = {} - for node in nodes: - attr_dict = molecule.nodes[node] - if attr_dict["resid"] == resid: - block.add_node(attr_dict["atomname"], **attr_dict) - mapping[node] = attr_dict["atomname"] - - for inter_type in molecule.interactions: - for interaction in molecule.interactions[inter_type]: - if all(atom in mapping for atom in interaction.atoms): - interaction = replace_defined_interaction(interaction, defines) - interaction = _relabel_interaction_atoms(interaction, mapping) - block.interactions[inter_type].append(interaction) - - for inter_type in ["bonds", "constraints", "virtual_sitesn", - "virtual_sites2", "virtual_sites3", "virtual_sites4"]: - block.make_edges_from_interaction_type(inter_type) - - if not nx.is_connected(block): - msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' - 'Make sure all atoms/particles in a residue are connected by bonds,' - ' constraints or virual-sites.') - raise IOError(msg.format(resname, resid)) - - return block - class GenerateTemplates(Processor): """ This processor takes a a class:`polyply.src.MetaMolecule` and diff --git a/polyply/src/itp_to_ff.py b/polyply/src/itp_to_ff.py index d8f6d0b0..dc03725c 100644 --- a/polyply/src/itp_to_ff.py +++ b/polyply/src/itp_to_ff.py @@ -11,234 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import itertools -from collections import defaultdict import numpy as np import networkx as nx import pysmiles import vermouth from vermouth.forcefield import ForceField -from vermouth.molecule import Interaction from vermouth.gmx.itp_read import read_itp from polyply.src.topology import Topology -from polyply.src.generate_templates import extract_block +from polyply.src.molecule_utils import extract_block, extract_links from polyply.src.fragment_finder import FragmentFinder from polyply.src.ffoutput import ForceFieldDirectiveWriter from polyply.src.charges import equalize_charges -from polyply.tests.test_lib_files import _interaction_equal - -def diffs_to_prefix(atoms, resid_diffs): - """ - Given a list of atoms and corresponding differences - between their resids, generate the offset prefix for - the atomnames according to the vermouth sepcific offset - language. - - The reference atom must have resid_diff value of 0. - Other atoms either get - or + signs - depending on their resid offset. - - Parameters - ---------- - atoms: abc.itertable[str] - resid_diff: abc.itertable[int] - the differences in resid with respeect to - the smallest/largest resid which is 0 - - Returns - ------- - abc.itertable - list with prefixed atom names - """ - prefixed_atoms = [] - for atom, diff in zip(atoms, resid_diffs): - if diff > 0: - prefix = "".join(["+" for i in range(0, diff)]) - else: - prefix = "".join(["-" for i in range(diff, 0)]) - prefixed_atoms.append(prefix + atom) - return prefixed_atoms - -def _extract_edges_from_shortest_path(atoms, block, min_resid): - """ - Given a list atoms generate a list of edges correspoding to - all edges required to connect all atoms by at least one - shortest path. Edges are retunred on atomname basis with - prefix relative to the `min_resid`. See diffs_to_prefix. - - Paramters: - ---------- - atoms: abc.itertable - the atoms to collect edges for - block: :class:`vermouth.molecule.Block` - the molecule which to servey for edges - min_resid: int - the resid to which the prefix indicate relative resid - distance - - Returns - ------- - list[tuple] - the edge list by atomname with prefix indicating relative - residue distance to min_resid - """ - edges = [] - had_edges = [] - final_atoms = {} - resnames = {} - for origin, target in itertools.combinations(atoms, r=2): - path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0] - for edge in zip(path[:-1], path[1:]): - if edge not in had_edges: - resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid - atom_names = [block.nodes[node]["atomname"] for node in edge] - link_names = diffs_to_prefix(atom_names, resid_diffs) - final_atoms.update(dict(zip(edge, link_names))) - edges.append(link_names) - had_edges.append(edge) - resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) - return final_atoms, edges, resnames - -def extract_links(molecule): - """ - Given a molecule that has the resid and resname attributes - correctly set, extract the interactions which span more than - a single residue and generate a link. - - Parameters - ---------- - molecule: :class:`vermouth.molecule.Molecule` - the molecule from which to extract interactions - - Returns - ------- - list[:class:`vermouth.molecule.Links`] - a list with a links found - """ - links = [] - # patterns are a sqeuence of atoms that define an interaction - # sometimes multiple interactions are defined for one pattern - # in that case they are all collected in this dictionary - patterns = defaultdict(dict) - # for each found pattern the resnames are collected; this is important - # because the same pattern may apply to residues with different name - resnames_for_patterns = defaultdict(dict) - link_atoms_for_patterns = defaultdict(list) - # as additional safe-gaurd against false links we also collect the edges - # that span the interaction by finding the shortest simple path between - # all atoms in patterns. Note that the atoms in patterns not always have - # to be directly bonded. For example, pairs are not directly bonded and - # can span multiple residues - #edges_for_patterns = defaultdict(list) - for inter_type in molecule.interactions: - #print("TYPE", inter_type) - for kdx, interaction in enumerate(molecule.interactions[inter_type]): - # extract resids and resname corresponding to interaction atoms - resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms]) - resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms] - # compute the resid offset to be used for the atom prefixes - min_resid = min(resids) - diff = resids - min_resid - pattern = tuple(set(list(zip(diff, resnames)))) - - # in this case all interactions are in a block and we skip - if np.sum(diff) == 0: - continue - - # we collect the edges corresponding to the simple paths between pairs of atoms - # in the interaction - mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) - #print(kdx, resnames) - link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} - link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] - link_inter = Interaction(atoms=link_atoms, - parameters=interaction.parameters, - meta={}) - #print("inter number", kdx) - # here we deal with filtering redundancy - if pattern in patterns and inter_type in patterns[pattern]: - #print(pattern) - # if pattern == ((0, 'PEO'), (1, 'PEO')): - # print(kdx, link_inter.atoms, patterns[pattern].get(inter_type, []), "\n") - - for other_inter in patterns[pattern].get(inter_type, []): - if _interaction_equal(other_inter, link_inter, inter_type): - break - else: - patterns[pattern][inter_type].append(link_inter) - resnames_for_patterns[pattern].update(resnames) - link_atoms_for_patterns[pattern] += link_atoms - else: - patterns[pattern][inter_type] = [link_inter] - resnames_for_patterns[pattern].update(resnames) - #edges_for_patterns[pattern] += edges - link_atoms_for_patterns[pattern] += link_atoms - #print('resnames', resnames_for_patterns[pattern], '\n') -# for inter in patterns[list(patterns.keys())[0]]['angles']: -# print(inter) - # we make new links for each unique interaction per type - for pattern in patterns: - link = vermouth.molecule.Link() - link.add_nodes_from(set(link_atoms_for_patterns[pattern])) - #link.add_edges_from(edges_for_patterns[pattern]) - resnames = resnames_for_patterns[pattern] - # print(resnames) - nx.set_node_attributes(link, resnames, "resname") - - had_parameters = [] - for inter_type, inters in patterns[pattern].items(): - for idx, interaction in enumerate(inters): - #new_parameters = interaction.parameters - new_meta = interaction.meta - #new_atoms = interaction.atoms - # to account for the fact when multiple interactions with the same - # atom patterns need to be written to ff - new_meta.update({"version": idx}) - new_meta.update({"comment": "link"}) - had_parameters.append(interaction.parameters) - # map atoms to proper atomnames .. - link.interactions[inter_type].append(interaction) - - links.append(link) - #print(links) - return links - -def handle_chirality(molecule, chiral_centers): - pass - -def hcount(molecule, node): - hcounter = 0 - for node in molecule.neighbors(node): - if molecule.nodes[node]["element"] == "H": - hcounter+= 1 - return hcounter - -def set_charges(block, res_graph, name): - resnames = nx.get_node_attributes(res_graph, 'resname') - centrality = nx.betweenness_centrality(res_graph) - score = -1 - most_central_node = None - for node, resname in resnames.items(): - if resname == name and centrality[node] > score: - score = centrality[node] - most_central_node = node - charges_tmp = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'charge') - atomnames = nx.get_node_attributes(res_graph.nodes[most_central_node]['graph'], 'atomname') - charges = {atomname: charges_tmp[node] for node, atomname in atomnames.items()} - for node in block.nodes: - block.nodes[node]['charge'] = charges[block.nodes[node]['atomname']] - return block def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0): """ Main executable for itp to ff tool. """ + # read the topology file if itppath.suffix == ".top": - # read the topology file top = Topology.from_gmx_topfile(itppath, name="test") mol = top.molecules[0].molecule - + # read itp file if itppath.suffix == ".itp": with open(itppath, "r") as _file: lines = _file.readlines() @@ -264,15 +57,10 @@ def itp_to_ff(itppath, fragment_smiles, resnames, term_prefix, outpath, charge=0 new_block.nrexcl = mol.nrexcl force_field.blocks[name] = new_block set_charges(new_block, res_graph, name) - #print("here") if itppath.suffix == ".top": equalize_charges(new_block, top) -# for node in mol.nodes: -# print(mol.nodes[node]) - force_field.links = extract_links(mol) - print("-----") with open(outpath, "w") as filehandle: ForceFieldDirectiveWriter(forcefield=force_field, stream=filehandle).write() diff --git a/polyply/src/molecule_utils.py b/polyply/src/molecule_utils.py new file mode 100644 index 00000000..7da9ce43 --- /dev/null +++ b/polyply/src/molecule_utils.py @@ -0,0 +1,260 @@ +# Copyright 2022 University of Groningen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from collections import defaultdict +import numpy as np +import networkx as nx +import vermouth +from vermouth.molecule import Interaction +from polyply.tests.test_lib_files import _interaction_equal +from .topology import replace_defined_interaction + +def diffs_to_prefix(atoms, resid_diffs): + """ + Given a list of atoms and corresponding differences + between their resids, generate the offset prefix for + the atomnames according to the vermouth sepcific offset + language. + + The reference atom must have resid_diff value of 0. + Other atoms either get - or + signs + depending on their resid offset. + + Parameters + ---------- + atoms: abc.itertable[str] + resid_diff: abc.itertable[int] + the differences in resid with respeect to + the smallest/largest resid which is 0 + + Returns + ------- + abc.itertable + list with prefixed atom names + """ + prefixed_atoms = [] + for atom, diff in zip(atoms, resid_diffs): + if diff > 0: + prefix = "".join(["+" for i in range(0, diff)]) + else: + prefix = "".join(["-" for i in range(diff, 0)]) + prefixed_atoms.append(prefix + atom) + return prefixed_atoms + +def _extract_edges_from_shortest_path(atoms, block, min_resid): + """ + Given a list atoms generate a list of edges correspoding to + all edges required to connect all atoms by at least one + shortest path. Edges are retunred on atomname basis with + prefix relative to the `min_resid`. See diffs_to_prefix. + + Paramters: + ---------- + atoms: abc.itertable + the atoms to collect edges for + block: :class:`vermouth.molecule.Block` + the molecule which to servey for edges + min_resid: int + the resid to which the prefix indicate relative resid + distance + + Returns + ------- + list[tuple] + the edge list by atomname with prefix indicating relative + residue distance to min_resid + """ + edges = [] + had_edges = [] + final_atoms = {} + resnames = {} + for origin, target in itertools.combinations(atoms, r=2): + path = list(nx.shortest_simple_paths(block, source=origin, target=target))[0] + for edge in zip(path[:-1], path[1:]): + if edge not in had_edges: + resid_diffs = np.array([block.nodes[node]['resid'] for node in edge]) - min_resid + atom_names = [block.nodes[node]["atomname"] for node in edge] + link_names = diffs_to_prefix(atom_names, resid_diffs) + final_atoms.update(dict(zip(edge, link_names))) + edges.append(link_names) + had_edges.append(edge) + resnames.update(zip(link_names, [ block.nodes[node]["resname"] for node in edge])) + return final_atoms, edges, resnames + + +def extract_links(molecule): + """ + Given a molecule that has the resid and resname attributes + correctly set, extract the interactions which span more than + a single residue and generate a link. + + Parameters + ---------- + molecule: :class:`vermouth.molecule.Molecule` + the molecule from which to extract interactions + + Returns + ------- + list[:class:`vermouth.molecule.Links`] + a list with a links found + """ + links = [] + # patterns are a sqeuence of atoms that define an interaction + # sometimes multiple interactions are defined for one pattern + # in that case they are all collected in this dictionary + patterns = defaultdict(dict) + # for each found pattern the resnames are collected; this is important + # because the same pattern may apply to residues with different name + resnames_for_patterns = defaultdict(dict) + link_atoms_for_patterns = defaultdict(list) + # as additional safe-gaurd against false links we also collect the edges + # that span the interaction by finding the shortest simple path between + # all atoms in patterns. Note that the atoms in patterns not always have + # to be directly bonded. For example, pairs are not directly bonded and + # can span multiple residues + for inter_type in molecule.interactions: + for kdx, interaction in enumerate(molecule.interactions[inter_type]): + # extract resids and resname corresponding to interaction atoms + resids = np.array([molecule.nodes[atom]["resid"] for atom in interaction.atoms]) + resnames = [molecule.nodes[atom]["resname"] for atom in interaction.atoms] + # compute the resid offset to be used for the atom prefixes + min_resid = min(resids) + diff = resids - min_resid + pattern = tuple(set(list(zip(diff, resnames)))) + + # in this case all interactions are in a block and we skip + if np.sum(diff) == 0: + continue + + # we collect the edges corresponding to the simple paths between pairs of atoms + # in the interaction + mol_atoms_to_link_atoms, edges, resnames = _extract_edges_from_shortest_path(interaction.atoms, molecule, min_resid) + link_to_mol_atoms = {value:key for key, value in mol_atoms_to_link_atoms.items()} + link_atoms = [mol_atoms_to_link_atoms[atom] for atom in interaction.atoms] + link_inter = Interaction(atoms=link_atoms, + parameters=interaction.parameters, + meta={}) + + # here we deal with filtering redundancy + if pattern in patterns and inter_type in patterns[pattern]: + for other_inter in patterns[pattern].get(inter_type, []): + if _interaction_equal(other_inter, link_inter, inter_type): + break + else: + patterns[pattern][inter_type].append(link_inter) + resnames_for_patterns[pattern].update(resnames) + link_atoms_for_patterns[pattern] += link_atoms + else: + patterns[pattern][inter_type] = [link_inter] + resnames_for_patterns[pattern].update(resnames) + link_atoms_for_patterns[pattern] += link_atoms + + # we make new links for each unique interaction per type + for pattern in patterns: + link = vermouth.molecule.Link() + link.add_nodes_from(set(link_atoms_for_patterns[pattern])) + resnames = resnames_for_patterns[pattern] + nx.set_node_attributes(link, resnames, "resname") + + had_parameters = [] + for inter_type, inters in patterns[pattern].items(): + for idx, interaction in enumerate(inters): + #new_parameters = interaction.parameters + new_meta = interaction.meta + #new_atoms = interaction.atoms + # to account for the fact when multiple interactions with the same + # atom patterns need to be written to ff + new_meta.update({"version": idx}) + new_meta.update({"comment": "link"}) + had_parameters.append(interaction.parameters) + # map atoms to proper atomnames .. + link.interactions[inter_type].append(interaction) + links.append(link) + return links + + +def _relabel_interaction_atoms(interaction, mapping): + """ + Relables the atoms in interaction according to the + rules defined in mapping. + + Parameters + ---------- + interaction: `vermouth.molecule.Interaction` + mapping: `:class:dict` + + Returns + ------- + interaction: `vermouth.molecule.Interaction` + the new interaction with updated atoms + """ + new_atoms = [mapping[atom] for atom in interaction.atoms] + new_interaction = interaction._replace(atoms=new_atoms) + return new_interaction + + +def extract_block(molecule, nodes, defines={}): + """ + Given a `vermouth.molecule` and a `resname` + extract the information of a block from the + molecule definition and replace all defines + if any are found. + + Parameters + ---------- + molecule: :class:vermouth.molecule.Molecule + nodes: abc.hashable + the nodes corresponding to the block to + extract + defines: dict + dict of type define: value + + Returns + ------- + :class:vermouth.molecule.Block + """ + resid = molecule.nodes[nodes[0]]["resid"] + resname = molecule.nodes[nodes[0]]["resname"] + block = vermouth.molecule.Block() + + # select all nodes with the same first resid and + # make sure the block node labels are atomnames + # also build a correspondance dict between node + # label in the molecule and in the block for + # relabeling the interactions + mapping = {} + for node in nodes: + attr_dict = molecule.nodes[node] + if attr_dict["resid"] == resid: + block.add_node(attr_dict["atomname"], **attr_dict) + mapping[node] = attr_dict["atomname"] + + for inter_type in molecule.interactions: + for interaction in molecule.interactions[inter_type]: + if all(atom in mapping for atom in interaction.atoms): + interaction = replace_defined_interaction(interaction, defines) + interaction = _relabel_interaction_atoms(interaction, mapping) + block.interactions[inter_type].append(interaction) + + for inter_type in ["bonds", "constraints", "virtual_sitesn", + "virtual_sites2", "virtual_sites3", "virtual_sites4"]: + block.make_edges_from_interaction_type(inter_type) + + if not nx.is_connected(block): + msg = ('\n Residue {} with id {} consistes of two disconnected parts. ' + 'Make sure all atoms/particles in a residue are connected by bonds,' + ' constraints or virual-sites.') + raise IOError(msg.format(resname, resid)) + + return block diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index 6c21fc63..1d852cb7 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -27,10 +27,10 @@ from polyply.src.linalg_functions import center_of_geometry from polyply.src.generate_templates import (find_atoms, _expand_inital_coords, - _relabel_interaction_atoms, compute_volume, map_from_CoG, - extract_block, GenerateTemplates, - find_interaction_involving) + GenerateTemplates, + find_interaction_involving) +from polyply.src.molecule_utils import (extract_block, _relabel_interaction_atoms) class TestGenTemps: