Skip to content

Commit

Permalink
Merge pull request #374 from csbrasnett/termini_patching
Browse files Browse the repository at this point in the history
(protein) termini patching
  • Loading branch information
csbrasnett authored Jul 26, 2024
2 parents 2e970b4 + 69411f2 commit c820aaa
Show file tree
Hide file tree
Showing 13 changed files with 1,877 additions and 1,748 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ tests/test_data/gen_seq/output/*.itp
doc/build
doc/source/api
doc/source/.doctrees

/polyply/data/martini3007/*
7 changes: 7 additions & 0 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
dna_group = parser_gen_itp.add_argument_group('DNA specifc options')
dna_group.add_argument('-dsdna', dest='dsdna', action='store_true',
help='complement single sequence to dsDNA sequence')
modifications_group = parser_gen_itp.add_argument_group('Modifications group')
modifications_group.add_argument('-mods', dest='mods', action='append',
default=[], type=lambda s: s.split(":"),
help=('Add a modification to a residue. The format is '
'<resname><resid>:modification_name, '
'e.g. ASP1:N-ter')
)

parser_gen_itp.set_defaults(func=gen_itp)

Expand Down
53 changes: 0 additions & 53 deletions polyply/data/martini3/aminoacids.ff
Original file line number Diff line number Diff line change
Expand Up @@ -803,59 +803,6 @@ CA BB
[ edges ]
CA BB

[ link ]
resname $protein_resnames
[ atoms ]
CA { }
BB { }
+BB { }
[ exclusions ]
#meta {"comment": "CA-BB"}
CA +BB
[ edges ]
BB +BB

;; Protein termini. These links should be applied last.
[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1}}
[ non-edges ]
BB +BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P5", "charge": 0}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB

;; Cystein bridge
[ link ]
resname "CYS"
Expand Down
37 changes: 37 additions & 0 deletions polyply/data/martini3/modifications.ff
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[ citations ]
Martini3

[ modification ]
C-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1.0}}

[ modification ]
N-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1.0}}

[ modification ]
zwitter
[ atoms ]
BB {"replace": {"atype": "Q5"}}

[ modification ]
COOH-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
NH2-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
CCAP-ter
[ atoms ]
BB {"replace": {"atype": "P1", "charge": 0.0}}

[ modification ]
NCAP-ter
[ atoms ]
BB {"replace": {"atype": "P2", "charge": 0.0}}
16 changes: 8 additions & 8 deletions polyply/src/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,16 @@ def match_link_and_residue_atoms(meta_molecule, link, link_to_resid):
# relative resid has been asserted before so we can
# exclude it here
ignore = ['order', 'charge_group', 'replace', 'resid']
matchs = list(find_atoms(block, ignore=ignore, **attrs))
matches = list(find_atoms(block, ignore=ignore, **attrs))

if len(matchs) == 1:
link_to_mol[node] = matchs[0]
elif len(matchs) == 0:
msg = "Found no matchs for node {} in resiue {}. Cannot apply link."
if len(matches) == 1:
link_to_mol[node] = matches[0]
elif len(matches) == 0:
msg = "Found no matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(node, resid))
else:
msg = "Found {} matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(len(matchs), node, resid))
raise MatchError(msg.format(len(matches), node, resid))

return link_to_mol

Expand Down Expand Up @@ -475,8 +475,8 @@ def run_molecule(self, meta_molecule):
res_link,
node_match=_res_match,
edge_match=_linktype_match)
raw_matchs = GM.subgraph_isomorphisms_iter()
for match in raw_matchs:
raw_matches = GM.subgraph_isomorphisms_iter()
for match in raw_matches:
nodes = match.keys()
resids =[meta_molecule.nodes[node]["resid"] for node in nodes]
orders = [ res_link.nodes[match[node]]["order"] for node in nodes]
Expand Down
116 changes: 113 additions & 3 deletions polyply/src/apply_modifications.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 University of Groningen
# Copyright 2024 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,116 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import vermouth.molecule
from vermouth.log_helpers import StyleAdapter, get_logger
from vermouth.processors.annotate_mut_mod import parse_residue_spec
LOGGER = StyleAdapter(get_logger(__name__))
from .processor import Processor

class ApplyModifications():
pass
protein_resnames = "GLY|ALA|CYS|VAL|LEU|ILE|MET|PRO|HYP|ASN|GLN|ASP|ASP0|GLU|GLU0|THR|SER|LYS|LYS0|ARG|ARG0|HIS|HISH|PHE|TYR|TRP"

def _patch_protein_termini(meta_molecule, ter_mods=['N-ter', 'C-ter']):
"""
make a resspec for a protein with correct terminal modification
"""
protein_termini = [({'resid': 1, 'resname': meta_molecule.nodes[0]['resname']}, ter_mods[0])]
max_resid = meta_molecule.max_resid
last_node = max_resid - 1
last_resname = meta_molecule.nodes[last_node]['resname']
if len(ter_mods) > 1:
last_mod = ({'resid': max_resid, 'resname': last_resname}, ter_mods[1])
protein_termini.append(last_mod)
else:
# if only one mod in ter_mods, apply the mod to both start and end residue
LOGGER.info("Only one terminal modification specified. "
f"Will apply {ter_mods[0]} to both {meta_molecule.nodes[0]['resname']}1 and {last_resname}{max_resid}")
protein_termini.append(({'resid': max_resid, 'resname': last_resname}, ter_mods[0]))

return protein_termini


def apply_mod(meta_molecule, modifications):
"""
Apply a terminal modification.
Note this assumes that the modification is additive, ie. no atoms are
removed
Parameters
----------
meta_molecule: :class:`polyply.src.meta_molecule.MetaMolecule`
modifications: list
list of (resspec, modification) pairs to apply
Returns
----------
meta_molecule
"""

molecule = meta_molecule.molecule

if not molecule.force_field.modifications:
LOGGER.warning('No modifications present in forcefield, none will be applied')
return meta_molecule

for target, desired_mod in modifications:

target_resid = target['resid']

mod_atoms = {}
for mod_atom in molecule.force_field.modifications[desired_mod].atoms:
if 'replace' in mod_atom:
mod_atoms[mod_atom['atomname']] = mod_atom['replace']
else:
mod_atoms[mod_atom['atomname']] = {}

target_residue = meta_molecule.nodes[target_resid - 1]
# takes care to skip all residues that come from an itp file
if not target_residue.get('from_itp', 'False'):
LOGGER.warning("meta_molecule has come from itp. Will not attempt to modify.")
continue
# checks that the resname is a protein resname as defined above
if not vermouth.molecule.attributes_match(target_residue,
{'resname': vermouth.molecule.Choice(protein_resnames.split("|"))}):
LOGGER.warning("The resname of your target residue is not recognised a protein resname. "
"Will not attempt to modify.")
continue

anum_dict = {}
# this gives you the correct node indices
for node in target_residue['graph'].nodes:
aname = molecule.nodes[node]['atomname']
if aname in mod_atoms.keys():
anum_dict[aname] = node + 1
for key, value in mod_atoms[aname].items():
molecule.nodes[node][key] = value

mod_interactions = molecule.force_field.modifications[desired_mod].interactions
for i in mod_interactions:
for j in molecule.force_field.modifications[desired_mod].interactions[i]:
molecule.add_interaction(i,
(anum_dict[j.atoms[0]]-1,
anum_dict[j.atoms[1]]-1),
j.parameters,
meta=j.meta)

return meta_molecule

class ApplyModifications(Processor):
"""
This processor takes a class:`polyply.src.MetaMolecule` and
based on modifications defined in the `force-field` attribute of the
MetaMolecule applies them when appropriate.
"""
def __init__(self, meta_molecule, modifications=[]):
self.target_mods = []
for resspec, val in modifications:
self.target_mods.append((parse_residue_spec(resspec), val))
if len(self.target_mods) == 0:
self.target_mods = _patch_protein_termini(meta_molecule)

def run_molecule(self, meta_molecule):

apply_mod(meta_molecule, self.target_mods)
return meta_molecule
9 changes: 8 additions & 1 deletion polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA
from .apply_modifications import ApplyModifications

LOGGER = StyleAdapter(get_logger(__name__))

Expand All @@ -60,7 +61,10 @@ def split_seq_string(sequence):
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[],
lib=None, seq=None, seq_file=None,
dsdna=False, mods=[], protter=False):

"""
Top level function for running the polyply parameter generation.
Parameters seq and seq_file are mutually exclusive. Set the other
Expand Down Expand Up @@ -107,6 +111,9 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None,
LOGGER.info("applying links between residues", type="step")
meta_molecule = ApplyLinks().run_molecule(meta_molecule)

meta_molecule = ApplyModifications(modifications=mods,
meta_molecule=meta_molecule).run_molecule(meta_molecule)

# Raise warning if molecule is disconnected
msg = "Missing a link between residue {idxA} {resA} and residue {idxB} {resB}."
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
Expand Down
7 changes: 7 additions & 0 deletions polyply/tests/example_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def example_meta_molecule():
3 8 12
"""
force_field = vermouth.forcefield.ForceField("test")

# force_field.modifications.update({'N-ter':{'PTM_atom': False,
# 'replace': {'atype': 'Q5', 'charge': 1.0},
# 'order': 0, 'atomname': 'BB'}}
# )


block_A = Block(force_field=force_field)
block_A.add_nodes_from([(0 , {'resid': 1, 'atomname': 'BB', 'atype': 'A', 'charge': 0.0, 'other': 'A', 'resname': 'A'}),
(1 , {'resid': 1, 'atomname': 'BB1', 'atype': 'B', 'charge': 0.0, 'other': 'A', 'resname': 'A'}),
Expand Down
Loading

0 comments on commit c820aaa

Please sign in to comment.