-
Notifications
You must be signed in to change notification settings - Fork 0
/
protein_letters.py
79 lines (76 loc) · 1.84 KB
/
protein_letters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
File forked and dapted from the 1.79 version of
Biopython.Bio.Data.IUPACData
"""
protein_letters_3to1 = {
'ALA': 'A',
'CYS': 'C',
'ASP': 'D',
'GLU': 'E',
'PHE': 'F',
'GLY': 'G',
'HIS': 'H',
'ILE': 'I',
'LYS': 'K',
'LEU': 'L',
'MET': 'M',
'ASN': 'N',
'PRO': 'P',
'GLN': 'Q',
'ARG': 'R',
'SER': 'S',
'THR': 'T',
'VAL': 'V',
'TRP': 'W',
'TYR': 'Y'
}
# Here the extended AA are described
# B = "Asx"; aspartic acid or asparagine (D or N)
# X = "Xxx"; unknown or 'other' amino acid
# Z = "Glx"; glutamic acid or glutamine (E or Q)
# http://www.chem.qmul.ac.uk/iupac/AminoAcid/A2021.html#AA212
#
# J = "Xle"; leucine or isoleucine (L or I, used in NMR)
# Mentioned in http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
# Also the International Nucleotide Sequence Database Collaboration (INSDC)
# (i.e. GenBank, EMBL, DDBJ) adopted this in 2006
# http://www.ddbj.nig.ac.jp/insdc/icm2006-e.html
#
# Xle (J); Leucine or Isoleucine
# The residue abbreviations, Xle (the three-letter abbreviation) and J
# (the one-letter abbreviation) are reserved for the case that cannot
# experimentally distinguish leucine from isoleucine.
#
# U = "Sec"; selenocysteine
# http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
#
# O = "Pyl"; pyrrolysine
# http://www.chem.qmul.ac.uk/iubmb/newsletter/2009.html#item35
protein_letters_3to1_extended = {
'ALA': 'A',
'CYS': 'C',
'ASP': 'D',
'GLU': 'E',
'PHE': 'F',
'GLY': 'G',
'HIS': 'H',
'ILE': 'I',
'LYS': 'K',
'LEU': 'L',
'MET': 'M',
'ASN': 'N',
'PRO': 'P',
'GLN': 'Q',
'ARG': 'R',
'SER': 'S',
'THR': 'T',
'VAL': 'V',
'TRP': 'W',
'TYR': 'Y',
'ASX': 'B',
'XAA': 'X',
'GLX': 'Z',
'XLE': 'J',
'SEC': 'U',
'PYL': 'O'
}