weirdo 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weirdo/__init__.py +104 -0
- weirdo/amino_acid.py +33 -0
- weirdo/amino_acid_alphabet.py +158 -0
- weirdo/amino_acid_properties.py +358 -0
- weirdo/api.py +372 -0
- weirdo/blosum.py +74 -0
- weirdo/chou_fasman.py +73 -0
- weirdo/cli.py +597 -0
- weirdo/common.py +22 -0
- weirdo/data_manager.py +475 -0
- weirdo/distances.py +16 -0
- weirdo/matrices/BLOSUM30 +25 -0
- weirdo/matrices/BLOSUM50 +21 -0
- weirdo/matrices/BLOSUM62 +27 -0
- weirdo/matrices/__init__.py +0 -0
- weirdo/matrices/amino_acid_properties.txt +829 -0
- weirdo/matrices/helix_vs_coil.txt +28 -0
- weirdo/matrices/helix_vs_strand.txt +27 -0
- weirdo/matrices/pmbec.mat +21 -0
- weirdo/matrices/strand_vs_coil.txt +27 -0
- weirdo/model_manager.py +346 -0
- weirdo/peptide_vectorizer.py +78 -0
- weirdo/pmbec.py +85 -0
- weirdo/reduced_alphabet.py +61 -0
- weirdo/residue_contact_energies.py +74 -0
- weirdo/scorers/__init__.py +95 -0
- weirdo/scorers/base.py +223 -0
- weirdo/scorers/config.py +299 -0
- weirdo/scorers/mlp.py +1126 -0
- weirdo/scorers/reference.py +265 -0
- weirdo/scorers/registry.py +282 -0
- weirdo/scorers/similarity.py +386 -0
- weirdo/scorers/swissprot.py +510 -0
- weirdo/scorers/trainable.py +219 -0
- weirdo/static_data.py +17 -0
- weirdo-2.1.0.dist-info/METADATA +294 -0
- weirdo-2.1.0.dist-info/RECORD +41 -0
- weirdo-2.1.0.dist-info/WHEEL +5 -0
- weirdo-2.1.0.dist-info/entry_points.txt +2 -0
- weirdo-2.1.0.dist-info/licenses/LICENSE +201 -0
- weirdo-2.1.0.dist-info/top_level.txt +1 -0
weirdo/__init__.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from .amino_acid_alphabet import (
|
|
2
|
+
AminoAcid,
|
|
3
|
+
canonical_amino_acids,
|
|
4
|
+
canonical_amino_acid_letters,
|
|
5
|
+
extended_amino_acids,
|
|
6
|
+
extended_amino_acid_letters,
|
|
7
|
+
amino_acid_letter_indices,
|
|
8
|
+
amino_acid_name_indices,
|
|
9
|
+
)
|
|
10
|
+
from .peptide_vectorizer import PeptideVectorizer
|
|
11
|
+
from .distances import hamming
|
|
12
|
+
|
|
13
|
+
# High-level scoring API
|
|
14
|
+
from .api import (
|
|
15
|
+
score_peptide,
|
|
16
|
+
score_peptides,
|
|
17
|
+
create_scorer,
|
|
18
|
+
clear_cache,
|
|
19
|
+
get_available_presets,
|
|
20
|
+
get_preset_info,
|
|
21
|
+
# Model management
|
|
22
|
+
list_models,
|
|
23
|
+
load_model,
|
|
24
|
+
save_model,
|
|
25
|
+
get_available_scorers,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Data management
|
|
29
|
+
from .data_manager import (
|
|
30
|
+
DataManager,
|
|
31
|
+
get_data_manager,
|
|
32
|
+
ensure_data_available,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Model management
|
|
36
|
+
from .model_manager import (
|
|
37
|
+
ModelManager,
|
|
38
|
+
get_model_manager,
|
|
39
|
+
ModelInfo,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Scorer classes (for advanced usage)
|
|
43
|
+
from .scorers import (
|
|
44
|
+
BaseScorer,
|
|
45
|
+
BatchScorer,
|
|
46
|
+
BaseReference,
|
|
47
|
+
StreamingReference,
|
|
48
|
+
TrainableScorer,
|
|
49
|
+
SwissProtReference,
|
|
50
|
+
ScorerConfig,
|
|
51
|
+
register_scorer,
|
|
52
|
+
register_reference,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# ML scorer
|
|
56
|
+
from .scorers import MLPScorer
|
|
57
|
+
|
|
58
|
+
__version__ = "2.1.0"
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
# Amino acid data
|
|
62
|
+
"AminoAcid",
|
|
63
|
+
"canonical_amino_acids",
|
|
64
|
+
"canonical_amino_acid_letters",
|
|
65
|
+
"extended_amino_acids",
|
|
66
|
+
"extended_amino_acid_letters",
|
|
67
|
+
"amino_acid_letter_indices",
|
|
68
|
+
"amino_acid_name_indices",
|
|
69
|
+
# Vectorization
|
|
70
|
+
"PeptideVectorizer",
|
|
71
|
+
# Distances
|
|
72
|
+
"hamming",
|
|
73
|
+
# High-level scoring API
|
|
74
|
+
"score_peptide",
|
|
75
|
+
"score_peptides",
|
|
76
|
+
"create_scorer",
|
|
77
|
+
"clear_cache",
|
|
78
|
+
"get_available_presets",
|
|
79
|
+
"get_preset_info",
|
|
80
|
+
"get_available_scorers",
|
|
81
|
+
# Model management
|
|
82
|
+
"list_models",
|
|
83
|
+
"load_model",
|
|
84
|
+
"save_model",
|
|
85
|
+
"ModelManager",
|
|
86
|
+
"get_model_manager",
|
|
87
|
+
"ModelInfo",
|
|
88
|
+
# Scorer classes
|
|
89
|
+
"BaseScorer",
|
|
90
|
+
"BatchScorer",
|
|
91
|
+
"BaseReference",
|
|
92
|
+
"StreamingReference",
|
|
93
|
+
"TrainableScorer",
|
|
94
|
+
"SwissProtReference",
|
|
95
|
+
"ScorerConfig",
|
|
96
|
+
"register_scorer",
|
|
97
|
+
"register_reference",
|
|
98
|
+
# ML scorer
|
|
99
|
+
"MLPScorer",
|
|
100
|
+
# Data management
|
|
101
|
+
"DataManager",
|
|
102
|
+
"get_data_manager",
|
|
103
|
+
"ensure_data_available",
|
|
104
|
+
]
|
weirdo/amino_acid.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
class AminoAcid(object):
|
|
14
|
+
def __init__(
|
|
15
|
+
self, full_name, short_name, letter, contains=None):
|
|
16
|
+
self.letter = letter
|
|
17
|
+
self.full_name = full_name
|
|
18
|
+
self.short_name = short_name
|
|
19
|
+
if not contains:
|
|
20
|
+
contains = [letter]
|
|
21
|
+
self.contains = contains
|
|
22
|
+
|
|
23
|
+
def __str__(self):
|
|
24
|
+
return (
|
|
25
|
+
("AminoAcid(full_name='%s', short_name='%s', letter='%s', "
|
|
26
|
+
"contains=%s)") % (
|
|
27
|
+
self.letter, self.full_name, self.short_name, self.contains))
|
|
28
|
+
|
|
29
|
+
def __repr__(self):
|
|
30
|
+
return str(self)
|
|
31
|
+
|
|
32
|
+
def __eq__(self, other):
|
|
33
|
+
return other.__class__ is AminoAcid and self.letter == other.letter
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
Quantify amino acids by their physical/chemical properties
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from .amino_acid import AminoAcid
|
|
21
|
+
|
|
22
|
+
canonical_amino_acids = [
|
|
23
|
+
AminoAcid("Alanine", "Ala", "A"),
|
|
24
|
+
AminoAcid("Arginine", "Arg", "R"),
|
|
25
|
+
AminoAcid("Asparagine","Asn", "N"),
|
|
26
|
+
AminoAcid("Aspartic Acid", "Asp", "D"),
|
|
27
|
+
AminoAcid("Cysteine", "Cys", "C"),
|
|
28
|
+
AminoAcid("Glutamic Acid", "Glu", "E"),
|
|
29
|
+
AminoAcid("Glutamine", "Gln", "Q"),
|
|
30
|
+
AminoAcid("Glycine", "Gly", "G"),
|
|
31
|
+
AminoAcid("Histidine", "His", "H"),
|
|
32
|
+
AminoAcid("Isoleucine", "Ile", "I"),
|
|
33
|
+
AminoAcid("Leucine", "Leu", "L"),
|
|
34
|
+
AminoAcid("Lysine", "Lys", "K"),
|
|
35
|
+
AminoAcid("Methionine", "Met", "M"),
|
|
36
|
+
AminoAcid("Phenylalanine", "Phe", "F"),
|
|
37
|
+
AminoAcid("Proline", "Pro", "P"),
|
|
38
|
+
AminoAcid("Serine", "Ser", "S"),
|
|
39
|
+
AminoAcid("Threonine", "Thr", "T"),
|
|
40
|
+
AminoAcid("Tryptophan", "Trp", "W"),
|
|
41
|
+
AminoAcid("Tyrosine", "Tyr", "Y"),
|
|
42
|
+
AminoAcid("Valine", "Val", "V")
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
canonical_amino_acid_letters = [aa.letter for aa in canonical_amino_acids]
|
|
46
|
+
|
|
47
|
+
###
|
|
48
|
+
# Post-translation modifications commonly detected by mass-spec
|
|
49
|
+
###
|
|
50
|
+
|
|
51
|
+
# TODO: figure out three letter codes for modified AAs
|
|
52
|
+
|
|
53
|
+
modified_amino_acids = [
|
|
54
|
+
AminoAcid("Phospho-Serine", "Sep", "s"),
|
|
55
|
+
AminoAcid("Phospho-Threonine", "???", "t"),
|
|
56
|
+
AminoAcid("Phospho-Tyrosine", "???", "y"),
|
|
57
|
+
AminoAcid("Cystine", "???", "c"),
|
|
58
|
+
AminoAcid("Methionine sulfoxide", "???", "m"),
|
|
59
|
+
AminoAcid("Pyroglutamate", "???", "q"),
|
|
60
|
+
AminoAcid("Pyroglutamic acid", "???", "n"),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
###
|
|
64
|
+
# Amino acid tokens which represent multiple canonical amino acids
|
|
65
|
+
###
|
|
66
|
+
wildcard_amino_acids = [
|
|
67
|
+
AminoAcid("Unknown", "Xaa", "X", contains=set(canonical_amino_acid_letters)),
|
|
68
|
+
AminoAcid("Asparagine-or-Aspartic-Acid", "Asx", "B", contains={"D", "N"}),
|
|
69
|
+
AminoAcid("Glutamine-or-Glutamic-Acid", "Glx", "Z", contains={"E", "Q"}),
|
|
70
|
+
AminoAcid("Leucine-or-Isoleucine", "Xle", "J", contains={"I", "L"})
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
###
|
|
74
|
+
# Canonical amino acids + wilcard tokens
|
|
75
|
+
###
|
|
76
|
+
|
|
77
|
+
canonical_amino_acids_with_unknown = canonical_amino_acids + wildcard_amino_acids
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
###
|
|
81
|
+
# Rare amino acids which aren't considered part of the core 20 "canonical"
|
|
82
|
+
###
|
|
83
|
+
|
|
84
|
+
rare_amino_acids = [
|
|
85
|
+
AminoAcid("Selenocysteine", "Sec", "U"),
|
|
86
|
+
AminoAcid("Pyrrolysine", "Pyl", "O"),
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
###
|
|
90
|
+
# Extended amino acids + wildcard tokens
|
|
91
|
+
###
|
|
92
|
+
|
|
93
|
+
extended_amino_acids = canonical_amino_acids + rare_amino_acids + wildcard_amino_acids
|
|
94
|
+
extended_amino_acid_letters = [
|
|
95
|
+
aa.letter for aa in extended_amino_acids
|
|
96
|
+
]
|
|
97
|
+
extended_amino_acids_with_unknown_names = [
|
|
98
|
+
aa.full_name for aa in extended_amino_acids
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
amino_acid_letter_indices = {
|
|
103
|
+
c: i for (i, c) in
|
|
104
|
+
enumerate(extended_amino_acid_letters)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
amino_acid_letter_pairs = [
|
|
109
|
+
"%s%s" % (x, y)
|
|
110
|
+
for y in extended_amino_acids
|
|
111
|
+
for x in extended_amino_acids
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
amino_acid_name_indices = {
|
|
116
|
+
aa_name: i for (i, aa_name)
|
|
117
|
+
in enumerate(extended_amino_acids_with_unknown_names)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
amino_acid_pair_positions = {
|
|
121
|
+
pair: i for (i, pair) in enumerate(amino_acid_letter_pairs)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
def index_to_full_name(idx):
|
|
125
|
+
return extended_amino_acids[idx].full_name
|
|
126
|
+
|
|
127
|
+
def index_to_short_name(idx):
|
|
128
|
+
return extended_amino_acids[idx].short_name
|
|
129
|
+
|
|
130
|
+
def index_to_letter(idx):
|
|
131
|
+
return extended_amino_acids[idx]
|
|
132
|
+
|
|
133
|
+
def letter_to_index(x):
|
|
134
|
+
"""
|
|
135
|
+
Convert from an amino acid's letter code to its position index
|
|
136
|
+
"""
|
|
137
|
+
assert x in amino_acid_letter_indices, "Unknown amino acid: %s" % x
|
|
138
|
+
return amino_acid_letter_indices[x]
|
|
139
|
+
|
|
140
|
+
def peptide_to_indices(xs):
|
|
141
|
+
return [amino_acid_letter_indices[x] for x in xs]
|
|
142
|
+
|
|
143
|
+
def letter_to_short_name(x):
|
|
144
|
+
return index_to_short_name(letter_to_index(x))
|
|
145
|
+
|
|
146
|
+
def peptide_to_short_amino_acid_names(xs):
|
|
147
|
+
return [amino_acid_letter_indices[x] for x in xs]
|
|
148
|
+
|
|
149
|
+
def dict_to_amino_acid_matrix(d, alphabet=canonical_amino_acids):
|
|
150
|
+
n_aa = len(d)
|
|
151
|
+
result_matrix = np.zeros((n_aa, n_aa), dtype="float32")
|
|
152
|
+
for i, aa_row in enumerate(alphabet):
|
|
153
|
+
d_row = d[aa_row.letter]
|
|
154
|
+
for j, aa_col in enumerate(alphabet):
|
|
155
|
+
value = d_row[aa_col.letter]
|
|
156
|
+
result_matrix[i, j] = value
|
|
157
|
+
return result_matrix
|
|
158
|
+
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
Quantify amino acids by their physical/chemical properties
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .amino_acid_alphabet import letter_to_index
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def aa_dict_to_positional_list(aa_property_dict):
|
|
22
|
+
value_list = [None] * 20
|
|
23
|
+
for letter, value in aa_property_dict.items():
|
|
24
|
+
idx = letter_to_index(letter)
|
|
25
|
+
assert idx >= 0
|
|
26
|
+
assert idx < 20
|
|
27
|
+
value_list[idx] = value
|
|
28
|
+
assert all(elt is not None for elt in value_list), \
|
|
29
|
+
"Missing amino acids in:\n%s" % aa_property_dict.keys()
|
|
30
|
+
return value_list
|
|
31
|
+
|
|
32
|
+
def parse_property_table(table_string):
|
|
33
|
+
value_dict = {}
|
|
34
|
+
for line in table_string.splitlines():
|
|
35
|
+
line = line.strip()
|
|
36
|
+
if not line:
|
|
37
|
+
continue
|
|
38
|
+
fields = line.split(" ")
|
|
39
|
+
fields = [f for f in fields if len(f.strip()) > 0]
|
|
40
|
+
assert len(fields) >= 2
|
|
41
|
+
value, letter = fields[:2]
|
|
42
|
+
assert letter not in value_dict, "Repeated amino acid " + line
|
|
43
|
+
value_dict[letter] = float(value)
|
|
44
|
+
return value_dict
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
Amino acids property tables copied from CRASP website
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
hydropathy = parse_property_table("""
|
|
52
|
+
1.80000 A ALA
|
|
53
|
+
-4.5000 R ARG
|
|
54
|
+
-3.5000 N ASN
|
|
55
|
+
-3.5000 D ASP
|
|
56
|
+
2.50000 C CYS
|
|
57
|
+
-3.5000 Q GLN
|
|
58
|
+
-3.5000 E GLU
|
|
59
|
+
-0.4000 G GLY
|
|
60
|
+
-3.2000 H HIS
|
|
61
|
+
4.50000 I ILE
|
|
62
|
+
3.80000 L LEU
|
|
63
|
+
-3.9000 K LYS
|
|
64
|
+
1.90000 M MET
|
|
65
|
+
2.80000 F PHE
|
|
66
|
+
-1.6000 P PRO
|
|
67
|
+
-0.8000 S SER
|
|
68
|
+
-0.7000 T THR
|
|
69
|
+
-0.9000 W TRP
|
|
70
|
+
-1.3000 Y TYR
|
|
71
|
+
4.20000 V VAL
|
|
72
|
+
""")
|
|
73
|
+
|
|
74
|
+
volume = parse_property_table("""
|
|
75
|
+
91.5000 A ALA
|
|
76
|
+
202.0000 R ARG
|
|
77
|
+
135.2000 N ASN
|
|
78
|
+
124.5000 D ASP
|
|
79
|
+
118.0000 C CYS
|
|
80
|
+
161.1000 Q GLN
|
|
81
|
+
155.1000 E GLU
|
|
82
|
+
66.40000 G GLY
|
|
83
|
+
167.3000 H HIS
|
|
84
|
+
168.8000 I ILE
|
|
85
|
+
167.9000 L LEU
|
|
86
|
+
171.3000 K LYS
|
|
87
|
+
170.8000 M MET
|
|
88
|
+
203.4000 F PHE
|
|
89
|
+
129.3000 P PRO
|
|
90
|
+
99.10000 S SER
|
|
91
|
+
122.1000 T THR
|
|
92
|
+
237.6000 W TRP
|
|
93
|
+
203.6000 Y TYR
|
|
94
|
+
141.7000 V VAL
|
|
95
|
+
""")
|
|
96
|
+
|
|
97
|
+
polarity = parse_property_table("""
|
|
98
|
+
0.0000 A ALA
|
|
99
|
+
52.000 R ARG
|
|
100
|
+
3.3800 N ASN
|
|
101
|
+
40.700 D ASP
|
|
102
|
+
1.4800 C CYS
|
|
103
|
+
3.5300 Q GLN
|
|
104
|
+
49.910 E GLU
|
|
105
|
+
0.0000 G GLY
|
|
106
|
+
51.600 H HIS
|
|
107
|
+
0.1500 I ILE
|
|
108
|
+
0.4500 L LEU
|
|
109
|
+
49.500 K LYS
|
|
110
|
+
1.4300 M MET
|
|
111
|
+
0.3500 F PHE
|
|
112
|
+
1.5800 P PRO
|
|
113
|
+
1.6700 S SER
|
|
114
|
+
1.6600 T THR
|
|
115
|
+
2.1000 W TRP
|
|
116
|
+
1.6100 Y TYR
|
|
117
|
+
0.1300 V VAL
|
|
118
|
+
""")
|
|
119
|
+
|
|
120
|
+
pK_side_chain = parse_property_table("""
|
|
121
|
+
0.0000 A ALA
|
|
122
|
+
12.480 R ARG
|
|
123
|
+
0.0000 N ASN
|
|
124
|
+
3.6500 D ASP
|
|
125
|
+
8.1800 C CYS
|
|
126
|
+
0.0000 Q GLN
|
|
127
|
+
4.2500 E GLU
|
|
128
|
+
0.0000 G GLY
|
|
129
|
+
6.0000 H HIS
|
|
130
|
+
0.0000 I ILE
|
|
131
|
+
0.0000 L LEU
|
|
132
|
+
10.530 K LYS
|
|
133
|
+
0.0000 M MET
|
|
134
|
+
0.0000 F PHE
|
|
135
|
+
0.0000 P PRO
|
|
136
|
+
0.0000 S SER
|
|
137
|
+
0.0000 T THR
|
|
138
|
+
0.0000 W TRP
|
|
139
|
+
10.700 Y TYR
|
|
140
|
+
0.0000 V VAL
|
|
141
|
+
""")
|
|
142
|
+
|
|
143
|
+
prct_exposed_residues = parse_property_table("""
|
|
144
|
+
15.0000 A ALA
|
|
145
|
+
67.0000 R ARG
|
|
146
|
+
49.0000 N ASN
|
|
147
|
+
50.0000 D ASP
|
|
148
|
+
5.00000 C CYS
|
|
149
|
+
56.0000 Q GLN
|
|
150
|
+
55.0000 E GLU
|
|
151
|
+
10.0000 G GLY
|
|
152
|
+
34.0000 H HIS
|
|
153
|
+
13.0000 I ILE
|
|
154
|
+
16.0000 L LEU
|
|
155
|
+
85.0000 K LYS
|
|
156
|
+
20.0000 M MET
|
|
157
|
+
10.0000 F PHE
|
|
158
|
+
45.0000 P PRO
|
|
159
|
+
32.0000 S SER
|
|
160
|
+
32.0000 T THR
|
|
161
|
+
17.0000 W TRP
|
|
162
|
+
41.0000 Y TYR
|
|
163
|
+
14.0000 V VAL
|
|
164
|
+
""")
|
|
165
|
+
|
|
166
|
+
hydrophilicity = parse_property_table("""
|
|
167
|
+
-0.5000 A ALA
|
|
168
|
+
3.00000 R ARG
|
|
169
|
+
0.20000 N ASN
|
|
170
|
+
3.00000 D ASP
|
|
171
|
+
-1.0000 C CYS
|
|
172
|
+
0.20000 Q GLN
|
|
173
|
+
3.00000 E GLU
|
|
174
|
+
0.00000 G GLY
|
|
175
|
+
-0.5000 H HIS
|
|
176
|
+
-1.8000 I ILE
|
|
177
|
+
-1.8000 L LEU
|
|
178
|
+
3.00000 K LYS
|
|
179
|
+
-1.3000 M MET
|
|
180
|
+
-2.5000 F PHE
|
|
181
|
+
0.00000 P PRO
|
|
182
|
+
0.30000 S SER
|
|
183
|
+
-0.4000 T THR
|
|
184
|
+
-3.4000 W TRP
|
|
185
|
+
-2.3000 Y TYR
|
|
186
|
+
-1.5000 V VAL
|
|
187
|
+
""")
|
|
188
|
+
|
|
189
|
+
accessible_surface_area = parse_property_table("""
|
|
190
|
+
27.8000 A ALA
|
|
191
|
+
94.7000 R ARG
|
|
192
|
+
60.1000 N ASN
|
|
193
|
+
60.6000 D ASP
|
|
194
|
+
15.5000 C CYS
|
|
195
|
+
68.7000 Q GLN
|
|
196
|
+
68.2000 E GLU
|
|
197
|
+
24.5000 G GLY
|
|
198
|
+
50.7000 H HIS
|
|
199
|
+
22.8000 I ILE
|
|
200
|
+
27.6000 L LEU
|
|
201
|
+
103.000 K LYS
|
|
202
|
+
33.5000 M MET
|
|
203
|
+
25.5000 F PHE
|
|
204
|
+
51.5000 P PRO
|
|
205
|
+
42.0000 S SER
|
|
206
|
+
45.0000 T THR
|
|
207
|
+
34.7000 W TRP
|
|
208
|
+
55.2000 Y TYR
|
|
209
|
+
23.7000 V VAL
|
|
210
|
+
""")
|
|
211
|
+
|
|
212
|
+
local_flexibility = parse_property_table("""
|
|
213
|
+
705.42000 A ALA
|
|
214
|
+
1484.2800 R ARG
|
|
215
|
+
513.46010 N ASN
|
|
216
|
+
34.960000 D ASP
|
|
217
|
+
2412.5601 C CYS
|
|
218
|
+
1087.8300 Q GLN
|
|
219
|
+
1158.6600 E GLU
|
|
220
|
+
33.180000 G GLY
|
|
221
|
+
1637.1300 H HIS
|
|
222
|
+
5979.3701 I ILE
|
|
223
|
+
4985.7300 L LEU
|
|
224
|
+
699.69000 K LYS
|
|
225
|
+
4491.6602 M MET
|
|
226
|
+
5203.8599 F PHE
|
|
227
|
+
431.96000 P PRO
|
|
228
|
+
174.76000 S SER
|
|
229
|
+
601.88000 T THR
|
|
230
|
+
6374.0698 W TRP
|
|
231
|
+
4291.1001 Y TYR
|
|
232
|
+
4474.4199 V VAL
|
|
233
|
+
""")
|
|
234
|
+
|
|
235
|
+
accessible_surface_area_folded = parse_property_table("""
|
|
236
|
+
31.5000 A ALA
|
|
237
|
+
93.8000 R ARG
|
|
238
|
+
62.2000 N ASN
|
|
239
|
+
60.9000 D ASP
|
|
240
|
+
13.9000 C CYS
|
|
241
|
+
74.0000 Q GLN
|
|
242
|
+
72.3000 E GLU
|
|
243
|
+
25.2000 G GLY
|
|
244
|
+
46.7000 H HIS
|
|
245
|
+
23.0000 I ILE
|
|
246
|
+
29.0000 L LEU
|
|
247
|
+
110.300 K LYS
|
|
248
|
+
30.5000 M MET
|
|
249
|
+
28.7000 F PHE
|
|
250
|
+
53.7000 P PRO
|
|
251
|
+
44.2000 S SER
|
|
252
|
+
46.0000 T THR
|
|
253
|
+
41.7000 W TRP
|
|
254
|
+
59.1000 Y TYR
|
|
255
|
+
23.5000 V VAL
|
|
256
|
+
""")
|
|
257
|
+
|
|
258
|
+
refractivity = parse_property_table("""
|
|
259
|
+
4.34000 A ALA
|
|
260
|
+
26.6600 R ARG
|
|
261
|
+
13.2800 N ASN
|
|
262
|
+
12.0000 D ASP
|
|
263
|
+
35.7700 C CYS
|
|
264
|
+
17.5600 Q GLN
|
|
265
|
+
17.2600 E GLU
|
|
266
|
+
0.00000 G GLY
|
|
267
|
+
21.8100 H HIS
|
|
268
|
+
19.0600 I ILE
|
|
269
|
+
18.7800 L LEU
|
|
270
|
+
21.2900 K LYS
|
|
271
|
+
21.6400 M MET
|
|
272
|
+
29.4000 F PHE
|
|
273
|
+
10.9300 P PRO
|
|
274
|
+
6.35000 S SER
|
|
275
|
+
11.0100 T THR
|
|
276
|
+
42.5300 W TRP
|
|
277
|
+
31.5300 Y TYR
|
|
278
|
+
13.9200 V VAL
|
|
279
|
+
""")
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
mass = parse_property_table("""
|
|
283
|
+
70.079 A ALA
|
|
284
|
+
156.188 R ARG
|
|
285
|
+
114.104 N ASN
|
|
286
|
+
115.089 D ASP
|
|
287
|
+
103.144 C CYS
|
|
288
|
+
128.131 Q GLN
|
|
289
|
+
129.116 E GLU
|
|
290
|
+
57.052 G GLY
|
|
291
|
+
137.142 H HIS
|
|
292
|
+
113.160 I ILE
|
|
293
|
+
113.160 L LEU
|
|
294
|
+
128.174 K LYS
|
|
295
|
+
131.198 M MET
|
|
296
|
+
147.177 F PHE
|
|
297
|
+
97.177 P PRO
|
|
298
|
+
87.078 S SER
|
|
299
|
+
101.105 T THR
|
|
300
|
+
186.213 W TRP
|
|
301
|
+
163.170 Y TYR
|
|
302
|
+
99.133 V VAL
|
|
303
|
+
""")
|
|
304
|
+
|
|
305
|
+
###
|
|
306
|
+
# Values copied from:
|
|
307
|
+
# "Solvent accessibility of AA in known protein structures"
|
|
308
|
+
# http://prowl.rockefeller.edu/aainfo/access.htm
|
|
309
|
+
###
|
|
310
|
+
"""
|
|
311
|
+
Solvent accessibility of AA in known protein structures
|
|
312
|
+
|
|
313
|
+
Figure 1.
|
|
314
|
+
|
|
315
|
+
S 0.70 0.20 0.10
|
|
316
|
+
T 0.71 0.16 0.13
|
|
317
|
+
A 0.48 0.35 0.17
|
|
318
|
+
G 0.51 0.36 0.13
|
|
319
|
+
P 0.78 0.13 0.09
|
|
320
|
+
C 0.32 0.54 0.14
|
|
321
|
+
D 0.81 0.09 0.10
|
|
322
|
+
E 0.93 0.04 0.03
|
|
323
|
+
Q 0.81 0.10 0.09
|
|
324
|
+
N 0.82 0.10 0.08
|
|
325
|
+
L 0.41 0.49 0.10
|
|
326
|
+
I 0.39 0.47 0.14
|
|
327
|
+
V 0.40 0.50 0.10
|
|
328
|
+
M 0.44 0.20 0.36
|
|
329
|
+
F 0.42 0.42 0.16
|
|
330
|
+
Y 0.67 0.20 0.13
|
|
331
|
+
W 0.49 0.44 0.07
|
|
332
|
+
K 0.93 0.02 0.05
|
|
333
|
+
R 0.84 0.05 0.11
|
|
334
|
+
H 0.66 0.19 0.15
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
solvent_exposed_area = dict(
|
|
338
|
+
S=0.70,
|
|
339
|
+
T=0.71,
|
|
340
|
+
A=0.48,
|
|
341
|
+
G=0.51,
|
|
342
|
+
P=0.78,
|
|
343
|
+
C=0.32,
|
|
344
|
+
D=0.81,
|
|
345
|
+
E=0.93,
|
|
346
|
+
Q=0.81,
|
|
347
|
+
N=0.82,
|
|
348
|
+
L=0.41,
|
|
349
|
+
I=0.39,
|
|
350
|
+
V=0.40,
|
|
351
|
+
M=0.44,
|
|
352
|
+
F=0.42,
|
|
353
|
+
Y=0.67,
|
|
354
|
+
W=0.49,
|
|
355
|
+
K=0.93,
|
|
356
|
+
R=0.84,
|
|
357
|
+
H=0.66,
|
|
358
|
+
)
|