Skip to content

Commit

Permalink
Merge branch 'marrink-lab:master' into extra-aa
Browse files Browse the repository at this point in the history
  • Loading branch information
csbrasnett authored Oct 4, 2024
2 parents 3c971f8 + f8fff22 commit beae068
Show file tree
Hide file tree
Showing 22 changed files with 1,979 additions and 1,793 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ tests/test_data/gen_seq/output/*.itp
doc/build
doc/source/api
doc/source/.doctrees

/polyply/data/martini3007/*
7 changes: 7 additions & 0 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
dna_group = parser_gen_itp.add_argument_group('DNA specifc options')
dna_group.add_argument('-dsdna', dest='dsdna', action='store_true',
help='complement single sequence to dsDNA sequence')
modifications_group = parser_gen_itp.add_argument_group('Modifications group')
modifications_group.add_argument('-mods', dest='mods', action='append',
default=[], type=lambda s: s.split(":"),
help=('Add a modification to a residue. The format is '
'<resname><resid>:modification_name, '
'e.g. ASP1:N-ter')
)

parser_gen_itp.set_defaults(func=gen_itp)

Expand Down
53 changes: 0 additions & 53 deletions polyply/data/martini3/aminoacids.ff
Original file line number Diff line number Diff line change
Expand Up @@ -803,59 +803,6 @@ CA BB
[ edges ]
CA BB

[ link ]
resname $protein_resnames
[ atoms ]
CA { }
BB { }
+BB { }
[ exclusions ]
#meta {"comment": "CA-BB"}
CA +BB
[ edges ]
BB +BB

;; Protein termini. These links should be applied last.
[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1}}
[ non-edges ]
BB +BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P5", "charge": 0}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB

;; Cystein bridge
[ link ]
resname "CYS"
Expand Down
37 changes: 37 additions & 0 deletions polyply/data/martini3/modifications.ff
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[ citations ]
Martini3

[ modification ]
C-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1.0}}

[ modification ]
N-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1.0}}

[ modification ]
zwitter
[ atoms ]
BB {"replace": {"atype": "Q5"}}

[ modification ]
COOH-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
NH2-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
CCAP-ter
[ atoms ]
BB {"replace": {"atype": "P1", "charge": 0.0}}

[ modification ]
NCAP-ter
[ atoms ]
BB {"replace": {"atype": "P2", "charge": 0.0}}
16 changes: 8 additions & 8 deletions polyply/src/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,16 @@ def match_link_and_residue_atoms(meta_molecule, link, link_to_resid):
# relative resid has been asserted before so we can
# exclude it here
ignore = ['order', 'charge_group', 'replace', 'resid']
matchs = list(find_atoms(block, ignore=ignore, **attrs))
matches = list(find_atoms(block, ignore=ignore, **attrs))

if len(matchs) == 1:
link_to_mol[node] = matchs[0]
elif len(matchs) == 0:
msg = "Found no matchs for node {} in resiue {}. Cannot apply link."
if len(matches) == 1:
link_to_mol[node] = matches[0]
elif len(matches) == 0:
msg = "Found no matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(node, resid))
else:
msg = "Found {} matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(len(matchs), node, resid))
raise MatchError(msg.format(len(matches), node, resid))

return link_to_mol

Expand Down Expand Up @@ -475,8 +475,8 @@ def run_molecule(self, meta_molecule):
res_link,
node_match=_res_match,
edge_match=_linktype_match)
raw_matchs = GM.subgraph_isomorphisms_iter()
for match in raw_matchs:
raw_matches = GM.subgraph_isomorphisms_iter()
for match in raw_matches:
nodes = match.keys()
resids =[meta_molecule.nodes[node]["resid"] for node in nodes]
orders = [ res_link.nodes[match[node]]["order"] for node in nodes]
Expand Down
116 changes: 113 additions & 3 deletions polyply/src/apply_modifications.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 University of Groningen
# Copyright 2024 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,116 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import vermouth.molecule
from vermouth.log_helpers import StyleAdapter, get_logger
from vermouth.processors.annotate_mut_mod import parse_residue_spec
LOGGER = StyleAdapter(get_logger(__name__))
from .processor import Processor

class ApplyModifications():
pass
protein_resnames = "GLY|ALA|CYS|VAL|LEU|ILE|MET|PRO|HYP|ASN|GLN|ASP|ASP0|GLU|GLU0|THR|SER|LYS|LYS0|ARG|ARG0|HIS|HISH|PHE|TYR|TRP"

def _patch_protein_termini(meta_molecule, ter_mods=['N-ter', 'C-ter']):
"""
make a resspec for a protein with correct terminal modification
"""
protein_termini = [({'resid': 1, 'resname': meta_molecule.nodes[0]['resname']}, ter_mods[0])]
max_resid = meta_molecule.max_resid
last_node = max_resid - 1
last_resname = meta_molecule.nodes[last_node]['resname']
if len(ter_mods) > 1:
last_mod = ({'resid': max_resid, 'resname': last_resname}, ter_mods[1])
protein_termini.append(last_mod)
else:
# if only one mod in ter_mods, apply the mod to both start and end residue
LOGGER.info("Only one terminal modification specified. "
f"Will apply {ter_mods[0]} to both {meta_molecule.nodes[0]['resname']}1 and {last_resname}{max_resid}")
protein_termini.append(({'resid': max_resid, 'resname': last_resname}, ter_mods[0]))

return protein_termini


def apply_mod(meta_molecule, modifications):
"""
Apply a terminal modification.
Note this assumes that the modification is additive, ie. no atoms are
removed
Parameters
----------
meta_molecule: :class:`polyply.src.meta_molecule.MetaMolecule`
modifications: list
list of (resspec, modification) pairs to apply
Returns
----------
meta_molecule
"""

molecule = meta_molecule.molecule

if not molecule.force_field.modifications:
LOGGER.warning('No modifications present in forcefield, none will be applied')
return meta_molecule

for target, desired_mod in modifications:

target_resid = target['resid']

mod_atoms = {}
for mod_atom in molecule.force_field.modifications[desired_mod].atoms:
if 'replace' in mod_atom:
mod_atoms[mod_atom['atomname']] = mod_atom['replace']
else:
mod_atoms[mod_atom['atomname']] = {}

target_residue = meta_molecule.nodes[target_resid - 1]
# takes care to skip all residues that come from an itp file
if not target_residue.get('from_itp', 'False'):
LOGGER.warning("meta_molecule has come from itp. Will not attempt to modify.")
continue
# checks that the resname is a protein resname as defined above
if not vermouth.molecule.attributes_match(target_residue,
{'resname': vermouth.molecule.Choice(protein_resnames.split("|"))}):
LOGGER.warning("The resname of your target residue is not recognised a protein resname. "
"Will not attempt to modify.")
continue

anum_dict = {}
# this gives you the correct node indices
for node in target_residue['graph'].nodes:
aname = molecule.nodes[node]['atomname']
if aname in mod_atoms.keys():
anum_dict[aname] = node + 1
for key, value in mod_atoms[aname].items():
molecule.nodes[node][key] = value

mod_interactions = molecule.force_field.modifications[desired_mod].interactions
for i in mod_interactions:
for j in molecule.force_field.modifications[desired_mod].interactions[i]:
molecule.add_interaction(i,
(anum_dict[j.atoms[0]]-1,
anum_dict[j.atoms[1]]-1),
j.parameters,
meta=j.meta)

return meta_molecule

class ApplyModifications(Processor):
"""
This processor takes a class:`polyply.src.MetaMolecule` and
based on modifications defined in the `force-field` attribute of the
MetaMolecule applies them when appropriate.
"""
def __init__(self, meta_molecule, modifications=[]):
self.target_mods = []
for resspec, val in modifications:
self.target_mods.append((parse_residue_spec(resspec), val))
if len(self.target_mods) == 0:
self.target_mods = _patch_protein_termini(meta_molecule)

def run_molecule(self, meta_molecule):

apply_mod(meta_molecule, self.target_mods)
return meta_molecule
22 changes: 17 additions & 5 deletions polyply/src/build_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, molecules, topology):
self.persistence_length = {}
self.templates = {}
self.current_template = None
self.resnames_to_hash = {}

@SectionLineParser.section_parser('molecule')
def _molecule(self, line, lineno=0):
Expand Down Expand Up @@ -152,6 +153,7 @@ def _template_atoms(self, line, lineno=0):
node_name, atype = tokens[0], tokens[1]
position = np.array(tokens[2:], dtype=float)
self.current_template.add_node(node_name,
atomname=node_name,
atype=atype,
position=position)

Expand Down Expand Up @@ -200,14 +202,17 @@ def finalize_section(self, previous_section, ended_section):
# if the volume is not defined yet compute the volume, this still
# can be overwritten by an explicit volume directive later
resname = self.current_template.name
if resname not in self.topology.volumes:
self.topology.volumes[resname] = compute_volume(self.current_template,
coords,
self.topology.nonbond_params,)
graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(self.current_template,
node_attr='atomname')
if graph_hash not in self.topology.volumes:
self.topology.volumes[graph_hash] = compute_volume(self.current_template,
coords,
self.topology.nonbond_params,)
# internally a template is defined as vectors from the
# center of geometry
mapped_coords = map_from_CoG(coords)
self.templates[resname] = mapped_coords
self.templates[graph_hash] = mapped_coords
self.resnames_to_hash[resname] = graph_hash
self.current_template = None

def finalize(self, lineno=0):
Expand All @@ -229,6 +234,13 @@ def finalize(self, lineno=0):

super().finalize(lineno=lineno)

# if template graphs and volumes are provided
# make sure that volumes are indexed by the hash
for resname, graph_hash in self.resnames_to_hash.items():
if resname in self.topology.volumes:
self.topology.volumes[graph_hash] = self.topology.volumes[resname]
del self.topology.volumes[resname]

@staticmethod
def _tag_nodes(molecule, keyword, option, molname=""):
resids = np.arange(option['start'], option['stop'], 1.)
Expand Down
6 changes: 5 additions & 1 deletion polyply/src/build_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,11 @@ def _compose_system(self, molecules):
vector_sphere)
if success:
self.nonbond_matrix = new_nonbond_matrix
self.nonbond_matrix.concatenate_trees()
# concatanation only makes sense for polymers
# longer than 10; it really hurts performance
# for for example 1 monomer solvents
if len(molecule) > 10:
self.nonbond_matrix.concatenate_trees()
mol_idx += 1
pbar.update(1)

Expand Down
6 changes: 1 addition & 5 deletions polyply/src/check_residue_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,7 @@ def group_residues_by_hash(meta_molecule, template_graphs={}):
dict[`:class:nx.Graph`]
keys are the hash of the graph
"""
unique_graphs = {}
for graph in template_graphs.values():
graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
unique_graphs[graph_hash] = graph

unique_graphs = template_graphs
for node in meta_molecule.nodes:
graph = meta_molecule.nodes[node]["graph"]
graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
Expand Down
9 changes: 8 additions & 1 deletion polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA
from .apply_modifications import ApplyModifications

LOGGER = StyleAdapter(get_logger(__name__))

Expand All @@ -60,7 +61,10 @@ def split_seq_string(sequence):
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[],
lib=None, seq=None, seq_file=None,
dsdna=False, mods=[], protter=False):

"""
Top level function for running the polyply parameter generation.
Parameters seq and seq_file are mutually exclusive. Set the other
Expand Down Expand Up @@ -107,6 +111,9 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None,
LOGGER.info("applying links between residues", type="step")
meta_molecule = ApplyLinks().run_molecule(meta_molecule)

meta_molecule = ApplyModifications(modifications=mods,
meta_molecule=meta_molecule).run_molecule(meta_molecule)

# Raise warning if molecule is disconnected
msg = "Missing a link between residue {idxA} {resA} and residue {idxB} {resB}."
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
Expand Down
Loading

0 comments on commit beae068

Please sign in to comment.