weirdo 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__init__.py +25 -0
  2. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/__init__.cpython-38.pyc +0 -0
  3. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid.cpython-38.pyc +0 -0
  4. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid_alphabet.cpython-38.pyc +0 -0
  5. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid_properties.cpython-38.pyc +0 -0
  6. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/blosum.cpython-38.pyc +0 -0
  7. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/chou_fasman.cpython-38.pyc +0 -0
  8. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/common.cpython-38.pyc +0 -0
  9. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/distances.cpython-38.pyc +0 -0
  10. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/peptide_vectorizer.cpython-38.pyc +0 -0
  11. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/pmbec.cpython-38.pyc +0 -0
  12. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/reduced_alphabet.cpython-38.pyc +0 -0
  13. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/residue_contact_energies.cpython-38.pyc +0 -0
  14. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/static_data.cpython-38.pyc +0 -0
  15. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid.py +33 -0
  16. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid_alphabet.py +158 -0
  17. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid_properties.py +358 -0
  18. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/blosum.py +74 -0
  19. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/chou_fasman.py +74 -0
  20. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/common.py +22 -0
  21. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/distances.py +16 -0
  22. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/matrices/__init__.py +0 -0
  23. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/matrices/__pycache__/__init__.cpython-38.pyc +0 -0
  24. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/peptide_vectorizer.py +80 -0
  25. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/pmbec.py +87 -0
  26. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/reduced_alphabet.py +57 -0
  27. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/residue_contact_energies.py +74 -0
  28. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/static_data.py +17 -0
  29. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/PKG-INFO +66 -0
  30. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/SOURCES.txt +27 -0
  31. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/dependency_links.txt +1 -0
  32. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/requires.txt +3 -0
  33. Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/top_level.txt +1 -0
@@ -0,0 +1,25 @@
1
+ from .amino_acid_alphabet import (
2
+ AminoAcid,
3
+ canonical_amino_acids,
4
+ canonical_amino_acid_letters,
5
+ extended_amino_acids,
6
+ extended_amino_acid_letters,
7
+ amino_acid_letter_indices,
8
+ amino_acid_name_indices,
9
+ )
10
+ from .peptide_vectorizer import PeptideVectorizer
11
+ from .distances import hamming
12
+
13
+ __version__ = "1.0.0"
14
+
15
+ __all__ = [
16
+ "AminoAcid",
17
+ "canonical_amino_acids",
18
+ "canonical_amino_acid_letters",
19
+ "extended_amino_acids",
20
+ "extended_amino_acid_letters",
21
+ "amino_acid_letter_indices",
22
+ "amino_acid_name_indices",
23
+ "PeptideVectorizer",
24
+ "hamming",
25
+ ]
@@ -0,0 +1,33 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ class AminoAcid(object):
14
+ def __init__(
15
+ self, full_name, short_name, letter, contains=None):
16
+ self.letter = letter
17
+ self.full_name = full_name
18
+ self.short_name = short_name
19
+ if not contains:
20
+ contains = [letter]
21
+ self.contains = contains
22
+
23
+ def __str__(self):
24
+ return (
25
+ ("AminoAcid(full_name='%s', short_name='%s', letter='%s', "
26
+ "contains=%s)") % (
27
+ self.letter, self.full_name, self.short_name, self.contains))
28
+
29
+ def __repr__(self):
30
+ return str(self)
31
+
32
+ def __eq__(self, other):
33
+ return other.__class__ is AminoAcid and self.letter == other.letter
@@ -0,0 +1,158 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ """
15
+ Quantify amino acids by their physical/chemical properties
16
+ """
17
+
18
+ import numpy as np
19
+
20
+ from .amino_acid import AminoAcid
21
+
22
+ canonical_amino_acids = [
23
+ AminoAcid("Alanine", "Ala", "A"),
24
+ AminoAcid("Arginine", "Arg", "R"),
25
+ AminoAcid("Asparagine","Asn", "N"),
26
+ AminoAcid("Aspartic Acid", "Asp", "D"),
27
+ AminoAcid("Cysteine", "Cys", "C"),
28
+ AminoAcid("Glutamic Acid", "Glu", "E"),
29
+ AminoAcid("Glutamine", "Gln", "Q"),
30
+ AminoAcid("Glycine", "Gly", "G"),
31
+ AminoAcid("Histidine", "His", "H"),
32
+ AminoAcid("Isoleucine", "Ile", "I"),
33
+ AminoAcid("Leucine", "Leu", "L"),
34
+ AminoAcid("Lysine", "Lys", "K"),
35
+ AminoAcid("Methionine", "Met", "M"),
36
+ AminoAcid("Phenylalanine", "Phe", "F"),
37
+ AminoAcid("Proline", "Pro", "P"),
38
+ AminoAcid("Serine", "Ser", "S"),
39
+ AminoAcid("Threonine", "Thr", "T"),
40
+ AminoAcid("Tryptophan", "Trp", "W"),
41
+ AminoAcid("Tyrosine", "Tyr", "Y"),
42
+ AminoAcid("Valine", "Val", "V")
43
+ ]
44
+
45
+ canonical_amino_acid_letters = [aa.letter for aa in canonical_amino_acids]
46
+
47
+ ###
48
+ # Post-translation modifications commonly detected by mass-spec
49
+ ###
50
+
51
+ # TODO: figure out three letter codes for modified AAs
52
+
53
+ modified_amino_acids = [
54
+ AminoAcid("Phospho-Serine", "Sep", "s"),
55
+ AminoAcid("Phospho-Threonine", "???", "t"),
56
+ AminoAcid("Phospho-Tyrosine", "???", "y"),
57
+ AminoAcid("Cystine", "???", "c"),
58
+ AminoAcid("Methionine sulfoxide", "???", "m"),
59
+ AminoAcid("Pyroglutamate", "???", "q"),
60
+ AminoAcid("Pyroglutamic acid", "???", "n"),
61
+ ]
62
+
63
+ ###
64
+ # Amino acid tokens which represent multiple canonical amino acids
65
+ ###
66
+ wildcard_amino_acids = [
67
+ AminoAcid("Unknown", "Xaa", "X", contains=set(canonical_amino_acid_letters)),
68
+ AminoAcid("Asparagine-or-Aspartic-Acid", "Asx", "B", contains={"D", "N"}),
69
+ AminoAcid("Glutamine-or-Glutamic-Acid", "Glx", "Z", contains={"E", "Q"}),
70
+ AminoAcid("Leucine-or-Isoleucine", "Xle", "J", contains={"I", "L"})
71
+ ]
72
+
73
+ ###
74
+ # Canonical amino acids + wilcard tokens
75
+ ###
76
+
77
+ canonical_amino_acids_with_unknown = canonical_amino_acids + wildcard_amino_acids
78
+
79
+
80
+ ###
81
+ # Rare amino acids which aren't considered part of the core 20 "canonical"
82
+ ###
83
+
84
+ rare_amino_acids = [
85
+ AminoAcid("Selenocysteine", "Sec", "U"),
86
+ AminoAcid("Pyrrolysine", "Pyl", "O"),
87
+ ]
88
+
89
+ ###
90
+ # Extended amino acids + wildcard tokens
91
+ ###
92
+
93
+ extended_amino_acids = canonical_amino_acids + rare_amino_acids + wildcard_amino_acids
94
+ extended_amino_acid_letters = [
95
+ aa.letter for aa in extended_amino_acids
96
+ ]
97
+ extended_amino_acids_with_unknown_names = [
98
+ aa.full_name for aa in extended_amino_acids
99
+ ]
100
+
101
+
102
+ amino_acid_letter_indices = {
103
+ c: i for (i, c) in
104
+ enumerate(extended_amino_acid_letters)
105
+ }
106
+
107
+
108
+ amino_acid_letter_pairs = [
109
+ "%s%s" % (x, y)
110
+ for y in extended_amino_acids
111
+ for x in extended_amino_acids
112
+ ]
113
+
114
+
115
+ amino_acid_name_indices = {
116
+ aa_name: i for (i, aa_name)
117
+ in enumerate(extended_amino_acids_with_unknown_names)
118
+ }
119
+
120
+ amino_acid_pair_positions = {
121
+ pair: i for (i, pair) in enumerate(amino_acid_letter_pairs)
122
+ }
123
+
124
+ def index_to_full_name(idx):
125
+ return extended_amino_acids[idx].full_name
126
+
127
+ def index_to_short_name(idx):
128
+ return extended_amino_acids[idx].short_name
129
+
130
+ def index_to_letter(idx):
131
+ return extended_amino_acids[idx]
132
+
133
+ def letter_to_index(x):
134
+ """
135
+ Convert from an amino acid's letter code to its position index
136
+ """
137
+ assert x in amino_acid_letter_indices, "Unknown amino acid: %s" % x
138
+ return amino_acid_letter_indices[x]
139
+
140
+ def peptide_to_indices(xs):
141
+ return [amino_acid_letter_indices[x] for x in xs]
142
+
143
+ def letter_to_short_name(x):
144
+ return index_to_short_name(letter_to_index(x))
145
+
146
+ def peptide_to_short_amino_acid_names(xs):
147
+ return [amino_acid_letter_indices[x] for x in xs]
148
+
149
+ def dict_to_amino_acid_matrix(d, alphabet=canonical_amino_acids):
150
+ n_aa = len(d)
151
+ result_matrix = np.zeros((n_aa, n_aa), dtype="float32")
152
+ for i, aa_row in enumerate(alphabet):
153
+ d_row = d[aa_row.letter]
154
+ for j, aa_col in enumerate(alphabet):
155
+ value = d_row[aa_col.letter]
156
+ result_matrix[i, j] = value
157
+ return result_matrix
158
+
@@ -0,0 +1,358 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ Quantify amino acids by their physical/chemical properties
15
+ """
16
+
17
+ from .amino_acid_alphabet import letter_to_index
18
+
19
+
20
+
21
+ def aa_dict_to_positional_list(aa_property_dict):
22
+ value_list = [None] * 20
23
+ for letter, value in aa_property_dict.items():
24
+ idx = letter_to_index(letter)
25
+ assert idx >= 0
26
+ assert idx < 20
27
+ value_list[idx] = value
28
+ assert all(elt is not None for elt in value_list), \
29
+ "Missing amino acids in:\n%s" % aa_property_dict.keys()
30
+ return value_list
31
+
32
+ def parse_property_table(table_string):
33
+ value_dict = {}
34
+ for line in table_string.splitlines():
35
+ line = line.strip()
36
+ if not line:
37
+ continue
38
+ fields = line.split(" ")
39
+ fields = [f for f in fields if len(f.strip()) > 0]
40
+ assert len(fields) >= 2
41
+ value, letter = fields[:2]
42
+ assert letter not in value_dict, "Repeated amino acid " + line
43
+ value_dict[letter] = float(value)
44
+ return value_dict
45
+
46
+
47
+ """
48
+ Amino acids property tables copied from CRASP website
49
+ """
50
+
51
+ hydropathy = parse_property_table("""
52
+ 1.80000 A ALA
53
+ -4.5000 R ARG
54
+ -3.5000 N ASN
55
+ -3.5000 D ASP
56
+ 2.50000 C CYS
57
+ -3.5000 Q GLN
58
+ -3.5000 E GLU
59
+ -0.4000 G GLY
60
+ -3.2000 H HIS
61
+ 4.50000 I ILE
62
+ 3.80000 L LEU
63
+ -3.9000 K LYS
64
+ 1.90000 M MET
65
+ 2.80000 F PHE
66
+ -1.6000 P PRO
67
+ -0.8000 S SER
68
+ -0.7000 T THR
69
+ -0.9000 W TRP
70
+ -1.3000 Y TYR
71
+ 4.20000 V VAL
72
+ """)
73
+
74
+ volume = parse_property_table("""
75
+ 91.5000 A ALA
76
+ 202.0000 R ARG
77
+ 135.2000 N ASN
78
+ 124.5000 D ASP
79
+ 118.0000 C CYS
80
+ 161.1000 Q GLN
81
+ 155.1000 E GLU
82
+ 66.40000 G GLY
83
+ 167.3000 H HIS
84
+ 168.8000 I ILE
85
+ 167.9000 L LEU
86
+ 171.3000 K LYS
87
+ 170.8000 M MET
88
+ 203.4000 F PHE
89
+ 129.3000 P PRO
90
+ 99.10000 S SER
91
+ 122.1000 T THR
92
+ 237.6000 W TRP
93
+ 203.6000 Y TYR
94
+ 141.7000 V VAL
95
+ """)
96
+
97
+ polarity = parse_property_table("""
98
+ 0.0000 A ALA
99
+ 52.000 R ARG
100
+ 3.3800 N ASN
101
+ 40.700 D ASP
102
+ 1.4800 C CYS
103
+ 3.5300 Q GLN
104
+ 49.910 E GLU
105
+ 0.0000 G GLY
106
+ 51.600 H HIS
107
+ 0.1500 I ILE
108
+ 0.4500 L LEU
109
+ 49.500 K LYS
110
+ 1.4300 M MET
111
+ 0.3500 F PHE
112
+ 1.5800 P PRO
113
+ 1.6700 S SER
114
+ 1.6600 T THR
115
+ 2.1000 W TRP
116
+ 1.6100 Y TYR
117
+ 0.1300 V VAL
118
+ """)
119
+
120
+ pK_side_chain = parse_property_table("""
121
+ 0.0000 A ALA
122
+ 12.480 R ARG
123
+ 0.0000 N ASN
124
+ 3.6500 D ASP
125
+ 8.1800 C CYS
126
+ 0.0000 Q GLN
127
+ 4.2500 E GLU
128
+ 0.0000 G GLY
129
+ 6.0000 H HIS
130
+ 0.0000 I ILE
131
+ 0.0000 L LEU
132
+ 10.530 K LYS
133
+ 0.0000 M MET
134
+ 0.0000 F PHE
135
+ 0.0000 P PRO
136
+ 0.0000 S SER
137
+ 0.0000 T THR
138
+ 0.0000 W TRP
139
+ 10.700 Y TYR
140
+ 0.0000 V VAL
141
+ """)
142
+
143
+ prct_exposed_residues = parse_property_table("""
144
+ 15.0000 A ALA
145
+ 67.0000 R ARG
146
+ 49.0000 N ASN
147
+ 50.0000 D ASP
148
+ 5.00000 C CYS
149
+ 56.0000 Q GLN
150
+ 55.0000 E GLU
151
+ 10.0000 G GLY
152
+ 34.0000 H HIS
153
+ 13.0000 I ILE
154
+ 16.0000 L LEU
155
+ 85.0000 K LYS
156
+ 20.0000 M MET
157
+ 10.0000 F PHE
158
+ 45.0000 P PRO
159
+ 32.0000 S SER
160
+ 32.0000 T THR
161
+ 17.0000 W TRP
162
+ 41.0000 Y TYR
163
+ 14.0000 V VAL
164
+ """)
165
+
166
+ hydrophilicity = parse_property_table("""
167
+ -0.5000 A ALA
168
+ 3.00000 R ARG
169
+ 0.20000 N ASN
170
+ 3.00000 D ASP
171
+ -1.0000 C CYS
172
+ 0.20000 Q GLN
173
+ 3.00000 E GLU
174
+ 0.00000 G GLY
175
+ -0.5000 H HIS
176
+ -1.8000 I ILE
177
+ -1.8000 L LEU
178
+ 3.00000 K LYS
179
+ -1.3000 M MET
180
+ -2.5000 F PHE
181
+ 0.00000 P PRO
182
+ 0.30000 S SER
183
+ -0.4000 T THR
184
+ -3.4000 W TRP
185
+ -2.3000 Y TYR
186
+ -1.5000 V VAL
187
+ """)
188
+
189
+ accessible_surface_area = parse_property_table("""
190
+ 27.8000 A ALA
191
+ 94.7000 R ARG
192
+ 60.1000 N ASN
193
+ 60.6000 D ASP
194
+ 15.5000 C CYS
195
+ 68.7000 Q GLN
196
+ 68.2000 E GLU
197
+ 24.5000 G GLY
198
+ 50.7000 H HIS
199
+ 22.8000 I ILE
200
+ 27.6000 L LEU
201
+ 103.000 K LYS
202
+ 33.5000 M MET
203
+ 25.5000 F PHE
204
+ 51.5000 P PRO
205
+ 42.0000 S SER
206
+ 45.0000 T THR
207
+ 34.7000 W TRP
208
+ 55.2000 Y TYR
209
+ 23.7000 V VAL
210
+ """)
211
+
212
+ local_flexibility = parse_property_table("""
213
+ 705.42000 A ALA
214
+ 1484.2800 R ARG
215
+ 513.46010 N ASN
216
+ 34.960000 D ASP
217
+ 2412.5601 C CYS
218
+ 1087.8300 Q GLN
219
+ 1158.6600 E GLU
220
+ 33.180000 G GLY
221
+ 1637.1300 H HIS
222
+ 5979.3701 I ILE
223
+ 4985.7300 L LEU
224
+ 699.69000 K LYS
225
+ 4491.6602 M MET
226
+ 5203.8599 F PHE
227
+ 431.96000 P PRO
228
+ 174.76000 S SER
229
+ 601.88000 T THR
230
+ 6374.0698 W TRP
231
+ 4291.1001 Y TYR
232
+ 4474.4199 V VAL
233
+ """)
234
+
235
+ accessible_surface_area_folded = parse_property_table("""
236
+ 31.5000 A ALA
237
+ 93.8000 R ARG
238
+ 62.2000 N ASN
239
+ 60.9000 D ASP
240
+ 13.9000 C CYS
241
+ 74.0000 Q GLN
242
+ 72.3000 E GLU
243
+ 25.2000 G GLY
244
+ 46.7000 H HIS
245
+ 23.0000 I ILE
246
+ 29.0000 L LEU
247
+ 110.300 K LYS
248
+ 30.5000 M MET
249
+ 28.7000 F PHE
250
+ 53.7000 P PRO
251
+ 44.2000 S SER
252
+ 46.0000 T THR
253
+ 41.7000 W TRP
254
+ 59.1000 Y TYR
255
+ 23.5000 V VAL
256
+ """)
257
+
258
+ refractivity = parse_property_table("""
259
+ 4.34000 A ALA
260
+ 26.6600 R ARG
261
+ 13.2800 N ASN
262
+ 12.0000 D ASP
263
+ 35.7700 C CYS
264
+ 17.5600 Q GLN
265
+ 17.2600 E GLU
266
+ 0.00000 G GLY
267
+ 21.8100 H HIS
268
+ 19.0600 I ILE
269
+ 18.7800 L LEU
270
+ 21.2900 K LYS
271
+ 21.6400 M MET
272
+ 29.4000 F PHE
273
+ 10.9300 P PRO
274
+ 6.35000 S SER
275
+ 11.0100 T THR
276
+ 42.5300 W TRP
277
+ 31.5300 Y TYR
278
+ 13.9200 V VAL
279
+ """)
280
+
281
+
282
+ mass = parse_property_table("""
283
+ 70.079 A ALA
284
+ 156.188 R ARG
285
+ 114.104 N ASN
286
+ 115.089 D ASP
287
+ 103.144 C CYS
288
+ 128.131 Q GLN
289
+ 129.116 E GLU
290
+ 57.052 G GLY
291
+ 137.142 H HIS
292
+ 113.160 I ILE
293
+ 113.160 L LEU
294
+ 128.174 K LYS
295
+ 131.198 M MET
296
+ 147.177 F PHE
297
+ 97.177 P PRO
298
+ 87.078 S SER
299
+ 101.105 T THR
300
+ 186.213 W TRP
301
+ 163.170 Y TYR
302
+ 99.133 V VAL
303
+ """)
304
+
305
+ ###
306
+ # Values copied from:
307
+ # "Solvent accessibility of AA in known protein structures"
308
+ # http://prowl.rockefeller.edu/aainfo/access.htm
309
+ ###
310
+ """
311
+ Solvent accessibility of AA in known protein structures
312
+
313
+ Figure 1.
314
+
315
+ S 0.70 0.20 0.10
316
+ T 0.71 0.16 0.13
317
+ A 0.48 0.35 0.17
318
+ G 0.51 0.36 0.13
319
+ P 0.78 0.13 0.09
320
+ C 0.32 0.54 0.14
321
+ D 0.81 0.09 0.10
322
+ E 0.93 0.04 0.03
323
+ Q 0.81 0.10 0.09
324
+ N 0.82 0.10 0.08
325
+ L 0.41 0.49 0.10
326
+ I 0.39 0.47 0.14
327
+ V 0.40 0.50 0.10
328
+ M 0.44 0.20 0.36
329
+ F 0.42 0.42 0.16
330
+ Y 0.67 0.20 0.13
331
+ W 0.49 0.44 0.07
332
+ K 0.93 0.02 0.05
333
+ R 0.84 0.05 0.11
334
+ H 0.66 0.19 0.15
335
+ """
336
+
337
+ solvent_exposed_area = dict(
338
+ S=0.70,
339
+ T=0.71,
340
+ A=0.48,
341
+ G=0.51,
342
+ P=0.78,
343
+ C=0.32,
344
+ D=0.81,
345
+ E=0.93,
346
+ Q=0.81,
347
+ N=0.82,
348
+ L=0.41,
349
+ I=0.39,
350
+ V=0.40,
351
+ M=0.44,
352
+ F=0.42,
353
+ Y=0.67,
354
+ W=0.49,
355
+ K=0.93,
356
+ R=0.84,
357
+ H=0.66,
358
+ )
@@ -0,0 +1,74 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ from os.path import join
15
+
16
+ from .static_data import MATRIX_DIR
17
+
18
+ from .amino_acid_alphabet import dict_to_amino_acid_matrix
19
+
20
+ def parse_blosum_table(table, coeff_type=int, key_type='row'):
21
+ """
22
+ Parse a table of pairwise amino acid coefficient (e.g. BLOSUM50)
23
+ """
24
+
25
+ lines = table.split("\n")
26
+ # drop comments
27
+ lines = [line for line in lines if not line.startswith("#")]
28
+ # drop CR endline characters
29
+ lines = [line.replace("\r", "") for line in lines]
30
+ # skip empty lines
31
+ lines = [line for line in lines if line]
32
+
33
+ labels = lines[0].split()
34
+
35
+ if len(labels) < 20:
36
+ raise ValueError(
37
+ "Expected 20+ amino acids but first line '%s' has %d fields" % (
38
+ lines[0],
39
+ len(labels)))
40
+ coeffs = {}
41
+ for line in lines[1:]:
42
+
43
+ fields = line.split()
44
+ assert len(fields) >= 21, \
45
+ "Expected AA and 20+ coefficients but '%s' has %d fields" % (
46
+ line, len(fields))
47
+ x = fields[0]
48
+ for i, coeff_str in enumerate(fields[1:]):
49
+ y = labels[i]
50
+ coeff = coeff_type(coeff_str)
51
+ if key_type == 'pair':
52
+ coeffs[(x, y)] = coeff
53
+ elif key_type == 'pair_string':
54
+ coeffs[x + y] = coeff
55
+ else:
56
+ assert key_type == 'row', "Unknown key type: %s" % key_type
57
+ if x not in coeffs:
58
+ coeffs[x] = {}
59
+ coeffs[x][y] = coeff
60
+ return coeffs
61
+
62
+
63
+ with open(join(MATRIX_DIR, 'BLOSUM30'), 'r') as f:
64
+ blosum30_dict = parse_blosum_table(f.read())
65
+ blosum30_matrix = dict_to_amino_acid_matrix(blosum30_dict)
66
+
67
+ with open(join(MATRIX_DIR, 'BLOSUM50'), 'r') as f:
68
+ blosum50_dict = parse_blosum_table(f.read())
69
+ blosum50_matrix = dict_to_amino_acid_matrix(blosum50_dict)
70
+
71
+ with open(join(MATRIX_DIR, 'BLOSUM62'), 'r') as f:
72
+ blosum62_dict = parse_blosum_table(f.read())
73
+ blosum62_matrix = dict_to_amino_acid_matrix(blosum62_dict)
74
+
@@ -0,0 +1,74 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from __future__ import print_function, division, absolute_import
14
+
15
+ from .amino_acid_alphabet import amino_acid_name_indices
16
+
17
+ # Chou-Fasman of structural properties from
18
+ # http://prowl.rockefeller.edu/aainfo/chou.htm
19
+ chou_fasman_table = """
20
+ Alanine 142 83 66 0.06 0.076 0.035 0.058
21
+ Arginine 98 93 95 0.070 0.106 0.099 0.085
22
+ Aspartic Acid 101 54 146 0.147 0.110 0.179 0.081
23
+ Asparagine 67 89 156 0.161 0.083 0.191 0.091
24
+ Cysteine 70 119 119 0.149 0.050 0.117 0.128
25
+ Glutamic Acid 151 037 74 0.056 0.060 0.077 0.064
26
+ Glutamine 111 110 98 0.074 0.098 0.037 0.098
27
+ Glycine 57 75 156 0.102 0.085 0.190 0.152
28
+ Histidine 100 87 95 0.140 0.047 0.093 0.054
29
+ Isoleucine 108 160 47 0.043 0.034 0.013 0.056
30
+ Leucine 121 130 59 0.061 0.025 0.036 0.070
31
+ Lysine 114 74 101 0.055 0.115 0.072 0.095
32
+ Methionine 145 105 60 0.068 0.082 0.014 0.055
33
+ Phenylalanine 113 138 60 0.059 0.041 0.065 0.065
34
+ Proline 57 55 152 0.102 0.301 0.034 0.068
35
+ Serine 77 75 143 0.120 0.139 0.125 0.106
36
+ Threonine 83 119 96 0.086 0.108 0.065 0.079
37
+ Tryptophan 108 137 96 0.077 0.013 0.064 0.167
38
+ Tyrosine 69 147 114 0.082 0.065 0.114 0.125
39
+ Valine 106 170 50 0.062 0.048 0.028 0.053
40
+ """
41
+
42
+
43
+ def parse_chou_fasman(table):
44
+ alpha_helix_score_dict = {}
45
+ beta_sheet_score_dict = {}
46
+ turn_score_dict = {}
47
+
48
+ for line in table.split("\n"):
49
+ fields = [field for field in line.split(" ") if len(field.strip()) > 0]
50
+ if len(fields) == 0:
51
+ continue
52
+
53
+ if fields[1] == 'Acid':
54
+ name = fields[0] + " " + fields[1]
55
+ fields = fields[1:]
56
+ else:
57
+ name = fields[0]
58
+
59
+ assert name in amino_acid_name_indices, "Invalid amino acid name %s" % name
60
+ letter = amino_acid_name_indices[name]
61
+ alpha = int(fields[1])
62
+ beta = int(fields[2])
63
+ turn = int(fields[3])
64
+ alpha_helix_score_dict[letter] = alpha
65
+ beta_sheet_score_dict[letter] = beta
66
+ turn_score_dict[letter] = turn
67
+
68
+ assert len(alpha_helix_score_dict) == 20
69
+ assert len(beta_sheet_score_dict) == 20
70
+ assert len(turn_score_dict) == 20
71
+ return alpha_helix_score_dict, beta_sheet_score_dict, turn_score_dict
72
+
73
+ alpha_helix_score, beta_sheet_score, turn_score = \
74
+ parse_chou_fasman(chou_fasman_table)
@@ -0,0 +1,22 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ import numpy as np
14
+
15
+ def transform_peptide(peptide, property_dict):
16
+ return np.array([property_dict[amino_acid] for amino_acid in peptide])
17
+
18
+ def transform_peptides(peptides, property_dict):
19
+ return np.array([
20
+ [property_dict[aa] for aa in peptide]
21
+ for peptide in peptides])
22
+
@@ -0,0 +1,16 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ def hamming(p1, p2):
14
+ n = min(len(p1), len(p2))
15
+ return sum([p1[i] != p2[i] for i in range(n)])
16
+
@@ -0,0 +1,80 @@
1
+ # Copyright (c) 2014-2016. Mount Sinai School of Medicine
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ from sklearn.feature_extraction.text import CountVectorizer
17
+ from sklearn.preprocessing import normalize
18
+
19
+ def make_count_vectorizer(reduced_alphabet, max_ngram):
20
+ if reduced_alphabet is None:
21
+ preprocessor = None
22
+ else:
23
+ preprocessor = lambda s: "".join([reduced_alphabet[si] for si in s])
24
+
25
+ return CountVectorizer(
26
+ analyzer='char',
27
+ ngram_range=(1, max_ngram),
28
+ dtype=np.float,
29
+ preprocessor=preprocessor)
30
+
31
+ class PeptideVectorizer(object):
32
+ """
33
+ Make n-gram frequency vectors from peptide sequences
34
+ """
35
+ def __init__(
36
+ self,
37
+ max_ngram=1,
38
+ normalize_row=True,
39
+ reduced_alphabet=None,
40
+ training_already_reduced=False):
41
+ self.reduced_alphabet = reduced_alphabet
42
+ self.max_ngram = max_ngram
43
+ self.normalize_row = normalize_row
44
+ self.training_already_reduced = training_already_reduced
45
+ self.count_vectorizer = None
46
+
47
+ def __getstate__(self):
48
+ return {
49
+ 'reduced_alphabet': self.reduced_alphabet,
50
+ 'count_vectorizer': self.count_vectorizer,
51
+ 'training_already_reduced': self.training_already_reduced,
52
+ 'normalize_row': self.normalize_row,
53
+ 'max_ngram': self.max_ngram,
54
+ }
55
+
56
+ def fit_transform(self, amino_acid_strings):
57
+ self.count_vectorizer = \
58
+ make_count_vectorizer(self.reduced_alphabet, self.max_ngram)
59
+
60
+ if self.training_already_reduced:
61
+ c = make_count_vectorizer(None, self.max_ngram)
62
+ X = c.fit_transform(amino_acid_strings).todense()
63
+ self.count_vectorizer.vocabulary_ = c.vocabulary_
64
+ else:
65
+ c = self.count_vectorizer
66
+ X = c.fit_transform(amino_acid_strings).todense()
67
+
68
+ if self.normalize_row:
69
+ X = normalize(X, norm='l1')
70
+ return X
71
+
72
+ def fit(self, amino_acid_strings):
73
+ self.fit_transform(amino_acid_strings)
74
+
75
+ def transform(self, amino_acid_strings):
76
+ assert self.count_vectorizer, "Must call 'fit' before 'transform'"
77
+ X = self.count_vectorizer.transform(amino_acid_strings).todense()
78
+ if self.normalize_row:
79
+ X = normalize(X, norm='l1')
80
+ return X
@@ -0,0 +1,87 @@
1
+ # Copyright (c) 2014-2016. Mount Sinai School of Medicine
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from os.path import join
16
+
17
+ from .static_data import MATRIX_DIR
18
+
19
+ from .amino_acid_alphabet import dict_to_amino_acid_matrix
20
+
21
+ def read_pmbec_coefficients(
22
+ key_type='row',
23
+ verbose=True,
24
+ filename=join(MATRIX_DIR, 'pmbec.mat')):
25
+ """
26
+ Parameters
27
+ ------------
28
+
29
+ filename : str
30
+ Location of PMBEC coefficient matrix
31
+
32
+ key_type : str
33
+ 'row' : every key is a single amino acid,
34
+ which maps to a dictionary for that row
35
+ 'pair' : every key is a tuple of amino acids
36
+ 'pair_string' : every key is a string of two amino acid characters
37
+
38
+ verbose : bool
39
+ Print rows of matrix as we read them
40
+ """
41
+ d = {}
42
+ if key_type == 'row':
43
+ def add_pair(row_letter, col_letter, value):
44
+ if row_letter not in d:
45
+ d[row_letter] = {}
46
+ d[row_letter][col_letter] = value
47
+ elif key_type == 'pair':
48
+ def add_pair(row_letter, col_letter, value):
49
+ d[(row_letter, col_letter)] = value
50
+
51
+ else:
52
+ assert key_type == 'pair_string', \
53
+ "Invalid dictionary key type: %s" % key_type
54
+
55
+ def add_pair(row_letter, col_letter, value):
56
+ d["%s%s" % (row_letter, col_letter)] = value
57
+
58
+ with open(filename, 'r') as f:
59
+ lines = [line for line in f.read().split('\n') if len(line) > 0]
60
+ header = lines[0]
61
+ if verbose:
62
+ print(header)
63
+ residues = [
64
+ x for x in header.split()
65
+ if len(x) == 1 and x != ' ' and x != '\t'
66
+ ]
67
+ assert len(residues) == 20
68
+ if verbose:
69
+ print(residues)
70
+ for line in lines[1:]:
71
+ cols = [
72
+ x
73
+ for x in line.split(' ')
74
+ if len(x) > 0 and x != ' ' and x != '\t'
75
+ ]
76
+ assert len(cols) == 21, "Expected 20 values + letter, got %s" % cols
77
+ row_letter = cols[0]
78
+ for i, col in enumerate(cols[1:]):
79
+ col_letter = residues[i]
80
+ assert col_letter != ' ' and col_letter != '\t'
81
+ value = float(col)
82
+ add_pair(row_letter, col_letter, value)
83
+ return d
84
+
85
+ # dictionary of PMBEC coefficient accessed like pmbec_dict["V"]["R"]
86
+ pmbec_dict = read_pmbec_coefficients(key_type="row")
87
+ pmbec_matrix = dict_to_amino_acid_matrix(pmbec_dict)
@@ -0,0 +1,57 @@
1
+ # Copyright (c) 2014-2018. Mount Sinai School of Medicine
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Amino acid groupings from
17
+ 'Reduced amino acid alphabets improve the sensitivity...' by
18
+ Peterson, Kondev, et al.
19
+ http://www.rpgroup.caltech.edu/publications/Peterson2008.pdf
20
+ """
21
+
22
+
23
+ def dict_from_list(groups):
24
+ aa_to_group = {}
25
+ for i, group in enumerate(groups):
26
+ for c in group:
27
+ aa_to_group[c] = group[0]
28
+ return aa_to_group
29
+
30
+ gbmr4 = dict_from_list(["ADKERNTSQ", "YFLIVMCWH", "G", "P"])
31
+
32
+ sdm12 = dict_from_list([
33
+ "A", "D", "KER", "N", "TSQ", "YF", "LIVM", "C", "W", "H", "G", "P"
34
+ ])
35
+
36
+ hsdm17 = dict_from_list([
37
+ "A", "D", "KE", "R", "N", "T", "S", "Q", "Y",
38
+ "F", "LIV", "M", "C", "W", "H", "G", "P"
39
+ ])
40
+
41
+ """
42
+ Other alphabets from
43
+ http://bio.math-inf.uni-greifswald.de/viscose/html/alphabets.html
44
+ """
45
+
46
+ # hydrophilic vs. hydrophobic
47
+ hp2 = dict_from_list(["AGTSNQDEHRKP", "CMFILVWY"])
48
+
49
+ murphy10 = dict_from_list([
50
+ "LVIM", "C", "A", "G", "ST", "P", "FYW", "EDNQ", "KR", "H"
51
+ ])
52
+
53
+ alex6 = dict_from_list(["C", "G", "P", "FYW", "AVILM", "STNQRHKDE"])
54
+
55
+ aromatic2 = dict_from_list(["FHWY", "ADKERNTSQLIVMCGP"])
56
+
57
+ hp_vs_aromatic = dict_from_list(["H", "CMILV", "FWY", "ADKERNTSQGP"])
@@ -0,0 +1,74 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from os.path import join
14
+
15
+ from .amino_acid_alphabet import canonical_amino_acid_letters, dict_to_amino_acid_matrix
16
+ from .static_data import MATRIX_DIR
17
+
18
+
19
+ def parse_interaction_table(table, amino_acid_order="ARNDCQEGHILKMFPSTWYV"):
20
+ table = table.strip()
21
+ while " " in table:
22
+ table = table.replace(" ", " ")
23
+
24
+ lines = [l.strip() for l in table.split("\n")]
25
+ lines = [l for l in lines if len(l) > 0 and not l.startswith("#")]
26
+ assert len(lines) == 20, "Malformed amino acid interaction table"
27
+ d = {}
28
+ for i, line in enumerate(lines):
29
+ coeff_strings = line.split(" ")
30
+ assert len(coeff_strings) == 20, \
31
+ "Malformed row in amino acid interaction table"
32
+ x = amino_acid_order[i]
33
+ d[x] = {}
34
+ for j, coeff_str in enumerate(coeff_strings):
35
+ value = float(coeff_str)
36
+ y = amino_acid_order[j]
37
+ d[x][y] = value
38
+ return d
39
+
40
+ def transpose_interaction_dict(d):
41
+ transposed = {}
42
+ for x in canonical_amino_acid_letters:
43
+ transposed[x] = {}
44
+ for y in canonical_amino_acid_letters:
45
+ transposed[x][y] = d[y][x]
46
+ return transposed
47
+
48
+
49
+ with open(join(MATRIX_DIR, 'strand_vs_coil.txt'), 'r') as f:
50
+ # Strand vs. Coil
51
+ strand_vs_coil_dict = parse_interaction_table(f.read())
52
+ strand_vs_coil_array = dict_to_amino_acid_matrix(strand_vs_coil_dict)
53
+
54
+ # Coil vs. Strand
55
+ coil_vs_strand_dict = transpose_interaction_dict(strand_vs_coil_dict)
56
+ coil_vs_strand_array = dict_to_amino_acid_matrix(coil_vs_strand_dict)
57
+
58
+ with open(join(MATRIX_DIR, 'helix_vs_strand.txt'), 'r') as f:
59
+ # Helix vs. Strand
60
+ helix_vs_strand_dict = parse_interaction_table(f.read())
61
+ helix_vs_strand_array = dict_to_amino_acid_matrix(helix_vs_strand_dict)
62
+
63
+ # Strand vs. Helix
64
+ strand_vs_helix_dict = transpose_interaction_dict(helix_vs_strand_dict)
65
+ strand_vs_helix_array = dict_to_amino_acid_matrix(strand_vs_helix_dict)
66
+
67
+ with open(join(MATRIX_DIR, 'helix_vs_coil.txt'), 'r') as f:
68
+ # Helix vs. Coil
69
+ helix_vs_coil_dict = parse_interaction_table(f.read())
70
+ helix_vs_coil_array = dict_to_amino_acid_matrix(helix_vs_coil_dict)
71
+
72
+ # Coil vs. Helix
73
+ coil_vs_helix_dict = transpose_interaction_dict(helix_vs_coil_dict)
74
+ coil_vs_helix_array = dict_to_amino_acid_matrix(coil_vs_helix_dict)
@@ -0,0 +1,17 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ from os.path import dirname, realpath, join
15
+
16
+ PACKAGE_DIR = dirname(realpath(__file__))
17
+ MATRIX_DIR = join(PACKAGE_DIR, 'matrices')
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.1
2
+ Name: weirdo
3
+ Version: 1.0.0
4
+ Summary: Peptide similarity measures, distance functions, and attempts to quantify the 'self' proteome
5
+ Home-page: https://github.com/pirl-unc/weirdo
6
+ Author: Alex Rubinsteyn
7
+ Author-email: alex.rubinsteyn@unc.edu
8
+ License: http://www.apache.org/licenses/LICENSE-2.0.html
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+
19
+ <a href="https://travis-ci.org/openvax/weirdo">
20
+ <img src="https://travis-ci.org/openvax/weirdo.svg?branch=master" alt="Build Status" />
21
+ </a>
22
+ <a href="https://coveralls.io/github/openvax/weirdo?branch=master">
23
+ <img src="https://coveralls.io/repos/openvax/weirdo/badge.svg?branch=master&service=github" alt="Coverage Status" />
24
+ </a>
25
+ <a href="https://pypi.python.org/pypi/weirdo/">
26
+ <img src="https://img.shields.io/pypi/v/weirdo.svg?maxAge=1000" alt="PyPI" />
27
+ </a>
28
+
29
+ # weirdo
30
+
31
+ Metrics of immunological foreignness for candidate T-cell epitopes. An extension of the [pepdata](https://www.github.com/peptdata) library.
32
+
33
+ **Amino Acid Properties**
34
+
35
+ The `amino_acid` module contains a variety of physical/chemical properties for both single amino residues and interactions between pairs of residues.
36
+
37
+ Single residue feature tables are parsed into `StringTransformer` objects, which can be treated as dictionaries or will vectorize a string when you call their method `transform_string`.
38
+
39
+ Examples of single residue features:
40
+
41
+ - `hydropathy`
42
+ - `volume`
43
+ - `polarity`
44
+ - `pK_side_chain`
45
+ - `prct_exposed_residues`
46
+ - `hydrophilicity`
47
+ - `accessible_surface_area`
48
+ - `refractivity`
49
+ - `local_flexibility`
50
+ - `accessible_surface_area_folded`
51
+ - `alpha_helix_score` (Chou-Fasman)
52
+ - `beta_sheet_score` (Chou-Fasman)
53
+ - `turn_score` (Chou-Fasman)
54
+
55
+ Pairwise interaction tables are parsed into nested dictionaries, so that the interaction between amino acids `x` and `y` can be determined from `d[x][y]`.
56
+
57
+ Pairwise interaction dictionaries:
58
+
59
+ - `strand_vs_coil` (and its transpose `coil_vs_strand`)
60
+ - `helix_vs_strand` (and its transpose `strand_vs_helix`)
61
+ - `helix_vs_coil` (and its transpose `coil_vs_helix`)
62
+ - `blosum30`
63
+ - `blosum50`
64
+ - `blosum62`
65
+
66
+ There is also a function to parse the coefficients of the [PMBEC similarity matrix](http://www.biomedcentral.com/1471-2105/10/394), though this currently lives in the separate `pmbec` module.
@@ -0,0 +1,27 @@
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ test/test_amino_acids.py
5
+ test/test_blosum.py
6
+ test/test_hamming.py
7
+ test/test_ngram.py
8
+ test/test_pmbec.py
9
+ weirdo/__init__.py
10
+ weirdo/amino_acid.py
11
+ weirdo/amino_acid_alphabet.py
12
+ weirdo/amino_acid_properties.py
13
+ weirdo/blosum.py
14
+ weirdo/chou_fasman.py
15
+ weirdo/common.py
16
+ weirdo/distances.py
17
+ weirdo/peptide_vectorizer.py
18
+ weirdo/pmbec.py
19
+ weirdo/reduced_alphabet.py
20
+ weirdo/residue_contact_energies.py
21
+ weirdo/static_data.py
22
+ weirdo.egg-info/PKG-INFO
23
+ weirdo.egg-info/SOURCES.txt
24
+ weirdo.egg-info/dependency_links.txt
25
+ weirdo.egg-info/requires.txt
26
+ weirdo.egg-info/top_level.txt
27
+ weirdo/matrices/__init__.py