weirdo 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weirdo/__init__.py +104 -0
- weirdo/amino_acid.py +33 -0
- weirdo/amino_acid_alphabet.py +158 -0
- weirdo/amino_acid_properties.py +358 -0
- weirdo/api.py +372 -0
- weirdo/blosum.py +74 -0
- weirdo/chou_fasman.py +73 -0
- weirdo/cli.py +597 -0
- weirdo/common.py +22 -0
- weirdo/data_manager.py +475 -0
- weirdo/distances.py +16 -0
- weirdo/matrices/BLOSUM30 +25 -0
- weirdo/matrices/BLOSUM50 +21 -0
- weirdo/matrices/BLOSUM62 +27 -0
- weirdo/matrices/__init__.py +0 -0
- weirdo/matrices/amino_acid_properties.txt +829 -0
- weirdo/matrices/helix_vs_coil.txt +28 -0
- weirdo/matrices/helix_vs_strand.txt +27 -0
- weirdo/matrices/pmbec.mat +21 -0
- weirdo/matrices/strand_vs_coil.txt +27 -0
- weirdo/model_manager.py +346 -0
- weirdo/peptide_vectorizer.py +78 -0
- weirdo/pmbec.py +85 -0
- weirdo/reduced_alphabet.py +61 -0
- weirdo/residue_contact_energies.py +74 -0
- weirdo/scorers/__init__.py +95 -0
- weirdo/scorers/base.py +223 -0
- weirdo/scorers/config.py +299 -0
- weirdo/scorers/mlp.py +1126 -0
- weirdo/scorers/reference.py +265 -0
- weirdo/scorers/registry.py +282 -0
- weirdo/scorers/similarity.py +386 -0
- weirdo/scorers/swissprot.py +510 -0
- weirdo/scorers/trainable.py +219 -0
- weirdo/static_data.py +17 -0
- weirdo-2.1.0.dist-info/METADATA +294 -0
- weirdo-2.1.0.dist-info/RECORD +41 -0
- weirdo-2.1.0.dist-info/WHEEL +5 -0
- weirdo-2.1.0.dist-info/entry_points.txt +2 -0
- weirdo-2.1.0.dist-info/licenses/LICENSE +201 -0
- weirdo-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#H ZHAC000103
|
|
2
|
+
#D Environment-dependent residue contact energies (rows = helix, cols = coil)
|
|
3
|
+
#R PMID:10706611
|
|
4
|
+
#A Zhang, C. and Kim, S.H.
|
|
5
|
+
#T Environment-dependent residue contact energies for proteins
|
|
6
|
+
#J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
|
|
7
|
+
#M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
|
|
8
|
+
0.12 1.17 0.84 0.90 -0.81 1.16 1.44 0.10 0.69 -0.81 -0.78 1.16 -0.22 -0.67 0.61 0.47 0.36 -0.72 -0.37 -0.43
|
|
9
|
+
0.98 1.65 1.16 0.60 -0.21 1.26 1.12 1.09 1.16 -0.04 -0.09 2.37 0.47 -0.04 1.22 1.05 0.92 -0.09 0.06 0.32
|
|
10
|
+
0.69 1.16 1.16 1.22 -0.06 1.23 1.45 0.96 0.88 0.26 0.12 1.48 0.32 0.03 1.14 0.73 0.62 0.62 0.53 0.23
|
|
11
|
+
0.90 0.40 1.06 1.45 0.58 1.88 2.18 1.13 0.69 0.43 0.65 0.95 0.75 0.33 1.41 0.39 0.54 -0.10 0.12 0.77
|
|
12
|
+
-0.83 0.10 0.40 0.12 -2.65 -0.24 0.96 -0.26 -0.26 -1.61 -1.77 0.80 -1.02 -1.47 -0.31 -0.31 -0.49 -1.30 -0.98 -1.62
|
|
13
|
+
1.13 1.10 1.28 1.37 0.14 1.62 1.84 1.29 1.31 0.05 -0.05 1.50 0.41 0.20 1.14 0.86 0.62 0.45 0.31 0.48
|
|
14
|
+
1.33 0.91 1.33 1.60 0.31 1.60 1.93 1.62 1.01 0.33 0.38 1.12 0.82 0.55 1.54 0.78 0.54 0.23 0.52 0.86
|
|
15
|
+
-0.22 0.72 0.27 0.47 -0.95 0.42 1.39 -0.23 0.40 -0.48 -0.81 1.04 -0.62 -0.36 0.41 0.23 -0.04 -0.71 0.08 -0.35
|
|
16
|
+
0.47 0.81 0.95 0.51 -1.56 0.90 0.89 0.86 0.20 -0.43 -0.48 1.31 -0.63 -0.41 0.56 0.40 0.28 -0.20 -0.22 -0.21
|
|
17
|
+
-0.58 0.17 0.61 0.46 -1.17 0.24 0.80 0.04 -0.16 -1.64 -1.66 0.87 -0.89 -1.56 -0.27 0.02 -0.32 -1.40 -1.13 -1.36
|
|
18
|
+
-0.44 0.20 0.50 0.71 -1.56 0.11 0.82 0.28 -0.15 -1.67 -1.62 0.72 -0.96 -1.55 0.02 0.19 -0.09 -1.46 -0.95 -1.32
|
|
19
|
+
1.07 2.48 1.75 0.98 0.42 1.68 1.04 1.31 1.39 0.41 0.29 2.95 0.98 0.27 1.63 1.51 1.48 0.32 0.60 0.64
|
|
20
|
+
-0.22 0.65 0.76 0.88 -0.95 0.68 1.92 0.27 0.31 -1.32 -1.04 1.02 -0.57 -1.60 0.07 0.47 0.04 -1.29 -0.85 -0.82
|
|
21
|
+
-0.33 -0.06 0.42 0.42 -1.90 0.25 0.64 0.12 -0.01 -1.64 -1.50 0.58 -1.36 -1.77 -0.30 0.02 0.04 -1.41 -1.36 -1.34
|
|
22
|
+
0.78 1.30 1.31 1.27 -0.04 1.44 1.71 0.69 0.84 0.05 0.15 1.68 0.38 0.27 1.05 1.19 0.83 -0.24 0.23 0.12
|
|
23
|
+
0.46 1.07 1.04 0.73 -0.31 1.47 1.23 0.57 0.58 -0.11 -0.24 1.37 0.08 -0.34 0.76 0.51 0.48 -0.04 0.47 0.18
|
|
24
|
+
0.50 0.90 0.75 0.91 -0.26 1.03 1.25 0.55 0.55 -0.20 -0.26 1.42 0.50 -0.22 0.88 0.69 0.56 0.41 0.11 -0.15
|
|
25
|
+
-0.41 -0.06 -0.19 0.32 -0.79 -0.14 0.58 0.07 -0.62 -1.58 -1.16 0.18 -1.03 -1.33 -0.56 0.15 -0.19 -1.83 -0.67 -0.92
|
|
26
|
+
-0.22 -0.07 0.52 0.46 -0.87 0.38 0.59 0.40 -0.17 -1.29 -1.15 0.83 -0.98 -1.16 -0.16 0.34 -0.12 -0.79 -0.77 -0.78
|
|
27
|
+
-0.51 0.49 0.48 0.67 -1.40 0.66 0.63 -0.06 0.28 -1.25 -1.50 1.14 -0.93 -1.36 -0.04 0.10 -0.01 -1.11 -0.82 -1.14
|
|
28
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#H ZHAC000102
|
|
2
|
+
#D Environment-dependent residue contact energies (rows = helix, cols = strand)
|
|
3
|
+
#R PMID:10706611
|
|
4
|
+
#A Zhang, C. and Kim, S.H.
|
|
5
|
+
#T Environment-dependent residue contact energies for proteins
|
|
6
|
+
#J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
|
|
7
|
+
#M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
|
|
8
|
+
-0.94 1.26 0.55 0.76 -1.54 1.14 1.57 -0.78 0.44 -1.59 -1.64 1.91 -0.90 -1.49 0.28 0.20 -0.04 -0.92 -0.75 -1.45
|
|
9
|
+
0.56 1.79 2.31 0.79 -0.67 2.54 0.72 1.09 0.94 -0.01 0.01 3.68 0.89 -0.05 1.37 0.83 1.35 0.00 0.33 0.44
|
|
10
|
+
0.59 2.21 1.82 0.77 -0.90 0.46 3.06 -0.16 0.63 -0.33 0.20 2.43 0.99 0.63 0.54 0.24 0.63 0.11 -0.19 0.23
|
|
11
|
+
0.66 0.76 0.76 1.19 -0.21 1.66 2.22 0.29 0.57 0.59 0.79 1.13 1.41 0.49 1.70 1.03 1.19 1.85 0.18 0.86
|
|
12
|
+
-1.75 0.78 -1.00 0.32 -3.64 0.48 0.87 -1.67 -0.62 -2.77 -2.32 0.19 -1.22 -2.67 -1.62 -0.83 -1.14 -0.52 -1.94 -2.35
|
|
13
|
+
0.33 2.15 1.22 1.26 1.37 1.17 2.56 0.92 1.02 0.11 0.00 2.58 0.79 -0.26 0.53 1.19 1.11 0.21 0.39 0.15
|
|
14
|
+
0.82 1.05 2.18 2.11 0.01 2.42 2.58 1.15 0.97 0.20 0.31 1.31 1.25 0.12 2.00 1.09 1.13 0.58 0.31 0.39
|
|
15
|
+
-0.40 0.95 0.03 0.14 -1.00 0.34 0.99 -1.32 0.13 -1.40 -1.36 1.58 -0.90 -1.41 0.82 -0.27 0.21 -0.59 -1.27 -1.09
|
|
16
|
+
-0.75 2.19 0.13 0.68 -1.37 1.98 1.13 0.01 1.52 -0.83 -0.58 2.26 -0.82 -1.01 0.53 -0.17 0.02 -49.00 -0.61 -0.56
|
|
17
|
+
-1.99 0.25 -0.20 1.00 -2.44 -0.12 0.88 -1.54 -0.05 -2.64 -2.33 0.75 -1.85 -2.46 -1.06 -0.59 -0.65 -1.82 -1.88 -2.45
|
|
18
|
+
-2.02 0.34 -0.04 0.13 -2.29 0.24 0.73 -1.27 -0.46 -2.53 -2.44 0.67 -1.80 -2.28 -1.29 -0.40 -0.34 -1.76 -1.66 -2.26
|
|
19
|
+
0.60 3.11 2.23 1.06 0.50 1.80 1.65 0.82 1.25 0.10 0.34 3.51 0.98 -0.21 1.15 2.09 1.30 -0.14 0.28 0.13
|
|
20
|
+
-1.54 -0.06 -0.63 1.76 -2.51 0.14 0.72 -1.74 0.07 -2.27 -2.22 1.27 -1.77 -1.87 0.34 -0.02 -0.21 -0.93 -1.54 -1.81
|
|
21
|
+
-2.12 0.33 -0.70 0.17 -2.30 -0.59 0.26 -1.60 -0.88 -2.53 -2.44 -0.42 -1.83 -2.68 -1.40 -0.82 -0.61 -1.63 -1.83 -2.25
|
|
22
|
+
0.63 2.43 -0.19 1.31 -1.63 1.46 1.91 0.08 1.11 -0.20 0.47 1.94 -0.34 0.15 0.57 0.00 1.15 0.06 0.26 -0.06
|
|
23
|
+
-0.41 0.88 1.02 1.04 -0.21 1.27 0.94 0.04 0.75 -0.48 -0.67 2.28 0.45 -0.92 0.75 0.50 0.96 0.22 -0.19 -0.54
|
|
24
|
+
-0.32 1.48 0.35 0.43 -1.44 0.38 1.36 -0.38 0.20 -1.14 -1.00 1.38 -0.35 -0.97 -0.05 -0.16 0.29 -0.53 -0.76 -0.73
|
|
25
|
+
-1.85 0.45 -0.03 0.80 -1.64 -0.23 0.11 -0.95 0.67 -1.58 -2.13 0.61 -1.75 -1.59 -1.07 -0.34 -0.40 -1.29 -1.27 -1.79
|
|
26
|
+
-0.88 -0.20 -0.29 0.14 -1.31 0.09 0.71 -0.56 -0.57 -1.66 -1.38 1.40 -1.60 -1.97 -0.73 -0.32 -0.37 -1.40 -0.96 -1.38
|
|
27
|
+
-1.74 0.85 0.24 0.72 -2.25 0.45 0.81 -1.29 -0.24 -2.46 -2.38 0.37 -1.21 -2.16 -1.00 -0.10 -0.57 -1.34 -1.52 -2.31
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
A C D E F G H I K L M N P Q R S T V W Y
|
|
2
|
+
A 0.322860152036 0.0113750373506 -0.0156239175966 -0.00259952715456 -0.0508792185716 0.0382679273874 -0.0832539299638 -0.00196691041626 -0.0103729638696 -0.042393907322 -0.0651042403697 -0.0853704925231 0.0757409633086 -0.0483151514798 -0.0136431408498 0.038455041596 0.0520376087986 0.081101427454 -0.125564718844 -0.0747500389698
|
|
3
|
+
C 0.0113750373506 0.100680270274 0.0102951033136 0.0147570340938 0.0345785831581 0.00933463557214 -0.00750101609651 0.00476007239717 -0.0459237939975 -0.0182998264075 -0.0155971113182 0.0021128481374 -0.00860770840682 -0.0309903425175 -0.0482562439545 -0.0217965163697 -0.0227322740574 -0.0154276574266 0.0412325888637 0.00600631739163
|
|
4
|
+
D -0.0156239175966 0.0102951033136 0.157208255034 0.0724279735923 -0.0189545540921 -0.00870389879389 -0.0180188107498 -0.0283467966687 -0.0634240071162 -0.0279979457557 -0.0241192288182 0.0194310374127 0.042784078891 0.000437307476866 -0.0591268568576 -0.0104660502173 0.00656101264316 -0.0193560886308 0.00415097887978 -0.0191575919464
|
|
5
|
+
E -0.00259952715456 0.0147570340938 0.0724279735923 0.131775168933 -0.00519060032543 -0.00547805492393 -0.0335600821273 -0.0135417817213 -0.069471604426 0.00353800457557 -0.017166710134 0.00534055417468 0.022589833552 0.0281404974641 -0.0697402405064 -0.0172364513778 -0.0054830504799 -0.00806269508269 -0.00791955104235 -0.0231187170833
|
|
6
|
+
F -0.0508792185716 0.0345785831581 -0.0189545540921 -0.00519060032543 0.259179996995 -0.00445131805782 -0.00639743486807 0.0628717025094 -0.049227253611 0.0488666377736 0.0315353570161 -0.0223593028205 -0.0919732521492 -0.0930189756622 -0.0626297946351 -0.0868415233743 -0.0777292391855 -0.015794520965 0.0625957490761 0.0858189617896
|
|
7
|
+
G 0.0382679273874 0.00933463557214 -0.00870389879389 -0.00547805492393 -0.00445131805782 0.122499934434 -0.025558278086 -0.0207027221208 -0.0137316756786 -0.0326424665142 -0.0264215095016 -0.00403752148352 0.0094352664965 0.00425299048772 -0.0232280105465 0.0304312733191 0.00861853592388 -0.0127217072682 -0.0246539147339 -0.0205094859119
|
|
8
|
+
H -0.0832539299638 -0.00750101609651 -0.0180188107498 -0.0335600821273 -0.00639743486807 -0.025558278086 0.207657765989 -0.0888505073496 0.0761447053198 -0.0351727012494 -0.000760393877348 0.0353903619255 -0.0682087048807 -0.00886454093107 0.109052662874 0.00938179429131 -0.0234122309305 -0.0870188771708 0.0123622841944 0.0365879336865
|
|
9
|
+
I -0.00196691041626 0.00476007239717 -0.0283467966687 -0.0135417817213 0.0628717025094 -0.0207027221208 -0.0888505073496 0.27773187827 -0.0381642025534 0.0886112938313 0.0551293441776 -0.0593694184462 -0.039207153398 -0.0626883806129 -0.110160438997 -0.0618078497671 -0.0339233811197 0.091300054417 0.00138488610169 -0.0230596885329
|
|
10
|
+
K -0.0103729638696 -0.0459237939975 -0.0634240071162 -0.069471604426 -0.049227253611 -0.0137316756786 0.0761447053198 -0.0381642025534 0.273355694189 -0.0177282663533 -0.00817300753785 -0.0339854863013 -0.0484016323395 -0.0331603641198 0.21516548555 0.00476731287861 -0.0331318604828 -0.0400367780545 -0.0598522551401 -0.00464804635586
|
|
11
|
+
L -0.042393907322 -0.0182998264075 -0.0279979457557 0.00353800457557 0.0488666377736 -0.0326424665142 -0.0351727012494 0.0886112938313 -0.0177282663533 0.162738321535 0.0750528874999 -0.0111666419731 -0.051023845781 -0.00134001844501 -0.074934598492 -0.0584956357369 -0.031311528799 0.0449678806271 -0.00754567267671 -0.0137219703366
|
|
12
|
+
M -0.0651042403697 -0.0155971113182 -0.0241192288182 -0.017166710134 0.0315353570161 -0.0264215095016 -0.000760393877348 0.0551293441776 -0.00817300753785 0.0750528874999 0.156957428383 0.00753829785887 -0.091647674076 0.00190198496329 -0.0257018542091 -0.0295349216339 -0.0454820084051 -0.0120310888206 0.0210041287765 0.0126203200265
|
|
13
|
+
N -0.0853704925231 0.0021128481374 0.0194310374127 0.00534055417468 -0.0223593028205 -0.00403752148352 0.0353903619255 -0.0593694184462 -0.0339854863013 -0.0111666419731 0.00753829785887 0.151487423988 -0.0106077901881 0.0413965183445 -0.0338327997913 0.0170820288313 -0.00295174153884 -0.0436807942705 0.0296409813073 -0.00205806264345
|
|
14
|
+
P 0.0757409633086 -0.00860770840682 0.042784078891 0.022589833552 -0.0919732521492 0.0094352664965 -0.0682087048807 -0.039207153398 -0.0484016323395 -0.051023845781 -0.091647674076 -0.0106077901881 0.354629507834 0.0481497903134 -0.0377142358446 -0.00687173098621 0.0199181111388 0.0225294243984 -0.0525069717881 -0.0890062760945
|
|
15
|
+
Q -0.0483151514798 -0.0309903425175 0.000437307476866 0.0281404974641 -0.0930189756622 0.00425299048772 -0.00886454093107 -0.0626883806129 -0.0331603641198 -0.00134001844501 0.00190198496329 0.0413965183445 0.0481497903134 0.177175171536 0.00715630304762 0.0357241930907 0.027467611659 -0.032780800211 -0.0118972341632 -0.0487465602402
|
|
16
|
+
R -0.0136431408498 -0.0482562439545 -0.0591268568576 -0.0697402405064 -0.0626297946351 -0.0232280105465 0.109052662874 -0.110160438997 0.21516548555 -0.074934598492 -0.0257018542091 -0.0338327997913 -0.0377142358446 0.00715630304762 0.389022190137 0.0204288367942 -0.0408668326839 -0.0934989556047 -0.0557627605155 0.00827128508526
|
|
17
|
+
S 0.038455041596 -0.0217965163697 -0.0104660502173 -0.0172364513778 -0.0868415233743 0.0304312733191 0.00938179429131 -0.0618078497671 0.00476731287861 -0.0584956357369 -0.0295349216339 0.0170820288313 -0.00687173098621 0.0357241930907 0.0204288367942 0.161573840097 0.0839261885951 -0.00816241136786 -0.0444334801409 -0.0561239385213
|
|
18
|
+
T 0.0520376087986 -0.0227322740574 0.00656101264316 -0.0054830504799 -0.0777292391855 0.00861853592388 -0.0234122309305 -0.0339233811197 -0.0331318604828 -0.031311528799 -0.0454820084051 -0.00295174153884 0.0199181111388 0.027467611659 -0.0408668326839 0.0839261885951 0.142525860495 0.0493244941272 -0.0264928932645 -0.0468623824337
|
|
19
|
+
V 0.081101427454 -0.0154276574266 -0.0193560886308 -0.00806269508269 -0.015794520965 -0.0127217072682 -0.0870188771708 0.091300054417 -0.0400367780545 0.0449678806271 -0.0120310888206 -0.0436807942705 0.0225294243984 -0.032780800211 -0.0934989556047 -0.00816241136786 0.0493244941272 0.172778293246 -0.0289445753682 -0.0444846240282
|
|
20
|
+
W -0.125564718844 0.0412325888637 0.00415097887978 -0.00791955104235 0.0625957490761 -0.0246539147339 0.0123622841944 0.00138488610169 -0.0598522551401 -0.00754567267671 0.0210041287765 0.0296409813073 -0.0525069717881 -0.0118972341632 -0.0557627605155 -0.0444334801409 -0.0264928932645 -0.0289445753682 0.194048086876 0.0791543436022
|
|
21
|
+
Y -0.0747500389698 0.00600631739163 -0.0191575919464 -0.0231187170833 0.0858189617896 -0.0205094859119 0.0365879336865 -0.0230596885329 -0.00464804635586 -0.0137219703366 0.0126203200265 -0.00205806264345 -0.0890062760945 -0.0487465602402 0.00827128508526 -0.0561239385213 -0.0468623824337 -0.0444846240282 0.0791543436022 0.237788221516
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#H ZHAC000105
|
|
2
|
+
#D Environment-dependent residue contact energies (rows = strand, cols = coil)
|
|
3
|
+
#R PMID:10706611
|
|
4
|
+
#A Zhang, C. and Kim, S.H.
|
|
5
|
+
#T Environment-dependent residue contact energies for proteins
|
|
6
|
+
#J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
|
|
7
|
+
#M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
|
|
8
|
+
-0.57 0.47 0.30 0.62 -1.60 0.45 0.61 -0.24 0.07 -1.64 -1.63 0.62 -1.03 -1.55 -0.11 -0.10 -0.34 -1.44 -0.39 -1.55
|
|
9
|
+
0.23 0.79 0.76 0.39 -0.41 0.92 0.76 0.52 0.51 -0.30 0.13 1.58 0.88 -0.07 0.60 0.65 0.37 0.14 0.32 0.17
|
|
10
|
+
-0.28 0.74 0.57 0.87 -0.68 0.52 1.00 -0.07 0.32 -0.31 -0.08 0.87 0.29 -0.17 0.57 0.11 0.19 0.04 0.24 -0.23
|
|
11
|
+
0.15 -0.25 0.46 0.69 -0.46 0.41 1.34 0.56 -0.51 -0.23 0.27 0.59 0.60 -0.38 1.02 0.08 0.05 -0.48 0.02 0.34
|
|
12
|
+
-1.19 -0.46 0.21 0.51 -3.30 0.26 0.20 -1.03 -0.72 -1.55 -1.71 0.27 -1.24 -1.70 -0.50 -0.55 -0.97 -0.67 -1.26 -1.62
|
|
13
|
+
0.63 1.18 0.92 1.37 -0.30 0.93 1.27 0.56 0.91 -0.28 -0.11 0.98 0.15 -0.30 0.64 0.88 0.68 -0.44 0.66 0.15
|
|
14
|
+
0.97 0.89 1.37 1.89 0.30 1.25 2.34 0.98 0.58 0.20 0.50 0.67 1.23 0.58 1.26 0.95 1.06 0.04 0.87 0.48
|
|
15
|
+
-0.64 0.12 0.27 0.31 -1.37 0.38 0.98 -0.40 -0.12 -1.58 -1.40 0.78 -0.46 -1.38 -0.21 0.05 -0.26 -1.41 -0.61 -1.13
|
|
16
|
+
-0.02 0.75 0.68 0.14 -0.58 0.73 0.84 0.41 -0.64 -0.75 0.03 1.46 -0.16 -0.49 0.52 0.31 -0.11 -1.00 -0.58 0.03
|
|
17
|
+
-0.94 -0.14 0.31 0.26 -1.70 0.07 0.46 -0.37 -0.50 -1.88 -1.79 0.84 -0.99 -1.82 -0.47 -0.05 -0.54 -1.65 -1.09 -1.64
|
|
18
|
+
-0.76 0.32 0.43 0.25 -1.63 0.22 0.68 -0.17 -0.40 -1.84 -1.70 0.47 -1.06 -1.76 -0.39 0.09 -0.42 -1.81 -1.15 -1.64
|
|
19
|
+
1.02 1.99 1.18 0.59 0.08 1.10 0.60 0.61 0.95 0.24 0.34 2.69 0.97 -0.03 1.23 1.07 0.83 0.00 0.26 0.36
|
|
20
|
+
-0.16 0.83 0.47 0.92 -1.63 0.36 0.71 -0.20 0.90 -1.00 -1.12 1.55 -0.31 -1.35 -0.01 0.34 0.20 -1.70 -0.60 -0.79
|
|
21
|
+
-0.70 0.03 0.63 0.15 -1.26 0.29 0.35 -0.11 -0.36 -1.73 -1.55 0.71 -0.97 -1.55 -0.28 -0.09 -0.32 -1.23 -0.91 -1.30
|
|
22
|
+
0.17 0.50 0.60 0.67 -1.31 0.50 0.94 0.02 -0.45 -1.26 -0.91 1.08 0.83 -0.87 0.63 0.31 0.26 -0.50 -0.55 -0.79
|
|
23
|
+
-0.06 0.99 0.73 0.86 -0.89 0.85 0.67 0.08 0.06 -0.22 -0.29 0.94 -0.08 -0.41 0.67 0.33 0.13 -1.01 0.13 -0.24
|
|
24
|
+
0.26 0.93 0.70 0.87 -0.78 0.58 1.20 0.12 0.52 -0.30 -0.24 1.11 0.01 -0.08 0.65 0.47 0.41 -0.31 0.12 -0.32
|
|
25
|
+
-0.03 -0.11 0.27 0.66 -1.50 0.65 0.50 -0.12 -0.32 -1.13 -1.01 0.52 -1.08 -1.04 -0.32 -0.03 -0.10 -0.67 -0.73 -0.64
|
|
26
|
+
-0.44 0.20 0.20 0.20 -1.26 0.16 0.10 -0.21 -0.52 -1.26 -1.30 0.60 -0.76 -1.17 -0.42 0.05 -0.27 -1.20 -0.75 -0.84
|
|
27
|
+
-0.83 0.20 0.48 0.62 -1.44 0.17 0.73 -0.12 -0.26 -1.64 -1.59 0.52 -0.70 -1.55 -0.28 0.12 -0.17 -1.16 -0.85 -1.42
|
weirdo/model_manager.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Model manager for trained foreignness scorers.
|
|
2
|
+
|
|
3
|
+
Handles saving, loading, and listing trained ML models.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
|
+
|
|
12
|
+
# Default model storage directory
|
|
13
|
+
DEFAULT_MODEL_DIR = Path.home() / '.weirdo' / 'models'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModelInfo:
|
|
17
|
+
"""Information about a saved model."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
name: str,
|
|
22
|
+
scorer_type: str,
|
|
23
|
+
path: Path,
|
|
24
|
+
created: Optional[str] = None,
|
|
25
|
+
params: Optional[Dict[str, Any]] = None,
|
|
26
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
27
|
+
):
|
|
28
|
+
self.name = name
|
|
29
|
+
self.scorer_type = scorer_type
|
|
30
|
+
self.path = path
|
|
31
|
+
self.created = created
|
|
32
|
+
self.params = params or {}
|
|
33
|
+
self.metadata = metadata or {}
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
return f"ModelInfo(name='{self.name}', type='{self.scorer_type}')"
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
39
|
+
"""Convert to dictionary."""
|
|
40
|
+
return {
|
|
41
|
+
'name': self.name,
|
|
42
|
+
'scorer_type': self.scorer_type,
|
|
43
|
+
'path': str(self.path),
|
|
44
|
+
'created': self.created,
|
|
45
|
+
'params': self.params,
|
|
46
|
+
'metadata': self.metadata,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ModelManager:
|
|
51
|
+
"""Manager for trained foreignness scoring models.
|
|
52
|
+
|
|
53
|
+
Handles saving, loading, and listing trained models in a
|
|
54
|
+
centralized directory structure.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
model_dir : str or Path, optional
|
|
59
|
+
Directory for storing models. Defaults to ~/.weirdo/models.
|
|
60
|
+
|
|
61
|
+
Example
|
|
62
|
+
-------
|
|
63
|
+
>>> mm = ModelManager()
|
|
64
|
+
>>> mm.list_models() # List available models
|
|
65
|
+
>>> model = mm.load('my-mlp') # Load a saved model
|
|
66
|
+
>>> mm.save(scorer, 'my-new-model') # Save a trained model
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, model_dir: Optional[Union[str, Path]] = None):
|
|
70
|
+
self._model_dir = Path(model_dir) if model_dir else DEFAULT_MODEL_DIR
|
|
71
|
+
self._model_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def model_dir(self) -> Path:
|
|
75
|
+
"""Get model storage directory."""
|
|
76
|
+
return self._model_dir
|
|
77
|
+
|
|
78
|
+
def list_models(self) -> List[ModelInfo]:
|
|
79
|
+
"""List all available models.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
models : list of ModelInfo
|
|
84
|
+
Information about each saved model.
|
|
85
|
+
"""
|
|
86
|
+
models = []
|
|
87
|
+
|
|
88
|
+
if not self._model_dir.exists():
|
|
89
|
+
return models
|
|
90
|
+
|
|
91
|
+
for path in sorted(self._model_dir.iterdir()):
|
|
92
|
+
if path.is_dir() and (path / 'config.json').exists():
|
|
93
|
+
try:
|
|
94
|
+
info = self._load_model_info(path)
|
|
95
|
+
models.append(info)
|
|
96
|
+
except Exception:
|
|
97
|
+
# Skip corrupted models
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
return models
|
|
101
|
+
|
|
102
|
+
def _load_model_info(self, path: Path) -> ModelInfo:
|
|
103
|
+
"""Load model info from a model directory."""
|
|
104
|
+
with open(path / 'config.json', 'r') as f:
|
|
105
|
+
config = json.load(f)
|
|
106
|
+
|
|
107
|
+
metadata = {}
|
|
108
|
+
if (path / 'metadata.json').exists():
|
|
109
|
+
with open(path / 'metadata.json', 'r') as f:
|
|
110
|
+
metadata = json.load(f)
|
|
111
|
+
|
|
112
|
+
# Get creation time
|
|
113
|
+
created = None
|
|
114
|
+
stat = path.stat()
|
|
115
|
+
created = datetime.fromtimestamp(stat.st_mtime).isoformat()
|
|
116
|
+
|
|
117
|
+
return ModelInfo(
|
|
118
|
+
name=path.name,
|
|
119
|
+
scorer_type=config.get('scorer_type', 'unknown'),
|
|
120
|
+
path=path,
|
|
121
|
+
created=created,
|
|
122
|
+
params=config.get('params', {}),
|
|
123
|
+
metadata=metadata,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def get_model_info(self, name: str) -> Optional[ModelInfo]:
|
|
127
|
+
"""Get info for a specific model.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
name : str
|
|
132
|
+
Model name.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
info : ModelInfo or None
|
|
137
|
+
Model info if found, None otherwise.
|
|
138
|
+
"""
|
|
139
|
+
path = self._model_dir / name
|
|
140
|
+
if path.exists() and (path / 'config.json').exists():
|
|
141
|
+
return self._load_model_info(path)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
def load(self, name: str) -> 'TrainableScorer':
|
|
145
|
+
"""Load a trained model by name.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
name : str
|
|
150
|
+
Model name (directory name in model storage).
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
scorer : TrainableScorer
|
|
155
|
+
Loaded model ready for inference.
|
|
156
|
+
"""
|
|
157
|
+
from .scorers.trainable import TrainableScorer
|
|
158
|
+
from .scorers.mlp import MLPScorer
|
|
159
|
+
|
|
160
|
+
path = self._model_dir / name
|
|
161
|
+
if not path.exists():
|
|
162
|
+
raise FileNotFoundError(f"Model not found: {name}")
|
|
163
|
+
|
|
164
|
+
# Load config to determine scorer type
|
|
165
|
+
with open(path / 'config.json', 'r') as f:
|
|
166
|
+
config = json.load(f)
|
|
167
|
+
|
|
168
|
+
scorer_type = config.get('scorer_type', '')
|
|
169
|
+
|
|
170
|
+
# Map to scorer class
|
|
171
|
+
scorer_classes = {
|
|
172
|
+
'MLPScorer': MLPScorer,
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if scorer_type not in scorer_classes:
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"Unknown scorer type: {scorer_type}. "
|
|
178
|
+
f"Available: {list(scorer_classes.keys())}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return scorer_classes[scorer_type].load(path)
|
|
182
|
+
|
|
183
|
+
def save(
|
|
184
|
+
self,
|
|
185
|
+
scorer: 'TrainableScorer',
|
|
186
|
+
name: str,
|
|
187
|
+
overwrite: bool = False,
|
|
188
|
+
) -> Path:
|
|
189
|
+
"""Save a trained model.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
scorer : TrainableScorer
|
|
194
|
+
Trained model to save.
|
|
195
|
+
name : str
|
|
196
|
+
Name for the saved model.
|
|
197
|
+
overwrite : bool, default=False
|
|
198
|
+
Whether to overwrite existing model with same name.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
path : Path
|
|
203
|
+
Path where model was saved.
|
|
204
|
+
"""
|
|
205
|
+
path = self._model_dir / name
|
|
206
|
+
|
|
207
|
+
if path.exists() and not overwrite:
|
|
208
|
+
raise FileExistsError(
|
|
209
|
+
f"Model already exists: {name}. "
|
|
210
|
+
"Use overwrite=True to replace."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
scorer.save(path)
|
|
214
|
+
return path
|
|
215
|
+
|
|
216
|
+
def delete(self, name: str) -> bool:
|
|
217
|
+
"""Delete a saved model.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
name : str
|
|
222
|
+
Model name to delete.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
deleted : bool
|
|
227
|
+
True if model was deleted.
|
|
228
|
+
"""
|
|
229
|
+
import shutil
|
|
230
|
+
path = self._model_dir / name
|
|
231
|
+
|
|
232
|
+
if path.exists():
|
|
233
|
+
shutil.rmtree(path)
|
|
234
|
+
return True
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
def print_models(self) -> None:
|
|
238
|
+
"""Print formatted list of available models."""
|
|
239
|
+
models = self.list_models()
|
|
240
|
+
|
|
241
|
+
if not models:
|
|
242
|
+
print("No trained models found.")
|
|
243
|
+
print(f"Model directory: {self._model_dir}")
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
print(f"Trained models ({len(models)}):")
|
|
247
|
+
print("-" * 60)
|
|
248
|
+
|
|
249
|
+
for model in models:
|
|
250
|
+
print(f"\n {model.name}")
|
|
251
|
+
print(f" Type: {model.scorer_type}")
|
|
252
|
+
if model.created:
|
|
253
|
+
print(f" Created: {model.created[:19]}")
|
|
254
|
+
if 'n_train' in model.metadata:
|
|
255
|
+
print(f" Training samples: {model.metadata['n_train']}")
|
|
256
|
+
if 'n_epochs' in model.metadata:
|
|
257
|
+
print(f" Epochs trained: {model.metadata['n_epochs']}")
|
|
258
|
+
if 'final_train_loss' in model.metadata:
|
|
259
|
+
print(f" Final train loss: {model.metadata['final_train_loss']:.4f}")
|
|
260
|
+
if 'best_val_loss' in model.metadata:
|
|
261
|
+
print(f" Best val loss: {model.metadata['best_val_loss']:.4f}")
|
|
262
|
+
if 'k' in model.params:
|
|
263
|
+
print(f" K-mer size: {model.params['k']}")
|
|
264
|
+
|
|
265
|
+
print(f"\nModel directory: {self._model_dir}")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# Singleton instance
|
|
269
|
+
_model_manager: Optional[ModelManager] = None
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_model_manager(model_dir: Optional[Union[str, Path]] = None) -> ModelManager:
|
|
273
|
+
"""Get the model manager instance.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
model_dir : str or Path, optional
|
|
278
|
+
Custom model directory.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
manager : ModelManager
|
|
283
|
+
"""
|
|
284
|
+
global _model_manager
|
|
285
|
+
if _model_manager is None or model_dir is not None:
|
|
286
|
+
_model_manager = ModelManager(model_dir)
|
|
287
|
+
return _model_manager
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def list_models(model_dir: Optional[Union[str, Path]] = None) -> List[ModelInfo]:
|
|
291
|
+
"""List all available trained models.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
model_dir : str or Path, optional
|
|
296
|
+
Custom model directory.
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
models : list of ModelInfo
|
|
301
|
+
"""
|
|
302
|
+
return get_model_manager(model_dir).list_models()
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def load_model(name: str, model_dir: Optional[Union[str, Path]] = None) -> 'TrainableScorer':
|
|
306
|
+
"""Load a trained model by name.
|
|
307
|
+
|
|
308
|
+
Parameters
|
|
309
|
+
----------
|
|
310
|
+
name : str
|
|
311
|
+
Model name.
|
|
312
|
+
model_dir : str or Path, optional
|
|
313
|
+
Custom model directory.
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
scorer : TrainableScorer
|
|
318
|
+
Loaded model.
|
|
319
|
+
"""
|
|
320
|
+
return get_model_manager(model_dir).load(name)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def save_model(
|
|
324
|
+
scorer: 'TrainableScorer',
|
|
325
|
+
name: str,
|
|
326
|
+
model_dir: Optional[Union[str, Path]] = None,
|
|
327
|
+
overwrite: bool = False,
|
|
328
|
+
) -> Path:
|
|
329
|
+
"""Save a trained model.
|
|
330
|
+
|
|
331
|
+
Parameters
|
|
332
|
+
----------
|
|
333
|
+
scorer : TrainableScorer
|
|
334
|
+
Trained model.
|
|
335
|
+
name : str
|
|
336
|
+
Model name.
|
|
337
|
+
model_dir : str or Path, optional
|
|
338
|
+
Custom model directory.
|
|
339
|
+
overwrite : bool, default=False
|
|
340
|
+
Overwrite existing model.
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
path : Path
|
|
345
|
+
"""
|
|
346
|
+
return get_model_manager(model_dir).save(scorer, name, overwrite)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
15
|
+
from sklearn.preprocessing import normalize
|
|
16
|
+
|
|
17
|
+
def make_count_vectorizer(reduced_alphabet, max_ngram):
|
|
18
|
+
if reduced_alphabet is None:
|
|
19
|
+
preprocessor = None
|
|
20
|
+
else:
|
|
21
|
+
preprocessor = lambda s: "".join([reduced_alphabet[si] for si in s])
|
|
22
|
+
|
|
23
|
+
return CountVectorizer(
|
|
24
|
+
analyzer='char',
|
|
25
|
+
ngram_range=(1, max_ngram),
|
|
26
|
+
dtype=np.float64,
|
|
27
|
+
preprocessor=preprocessor)
|
|
28
|
+
|
|
29
|
+
class PeptideVectorizer(object):
|
|
30
|
+
"""
|
|
31
|
+
Make n-gram frequency vectors from peptide sequences
|
|
32
|
+
"""
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
max_ngram=1,
|
|
36
|
+
normalize_row=True,
|
|
37
|
+
reduced_alphabet=None,
|
|
38
|
+
training_already_reduced=False):
|
|
39
|
+
self.reduced_alphabet = reduced_alphabet
|
|
40
|
+
self.max_ngram = max_ngram
|
|
41
|
+
self.normalize_row = normalize_row
|
|
42
|
+
self.training_already_reduced = training_already_reduced
|
|
43
|
+
self.count_vectorizer = None
|
|
44
|
+
|
|
45
|
+
def __getstate__(self):
|
|
46
|
+
return {
|
|
47
|
+
'reduced_alphabet': self.reduced_alphabet,
|
|
48
|
+
'count_vectorizer': self.count_vectorizer,
|
|
49
|
+
'training_already_reduced': self.training_already_reduced,
|
|
50
|
+
'normalize_row': self.normalize_row,
|
|
51
|
+
'max_ngram': self.max_ngram,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def fit_transform(self, amino_acid_strings):
|
|
55
|
+
self.count_vectorizer = \
|
|
56
|
+
make_count_vectorizer(self.reduced_alphabet, self.max_ngram)
|
|
57
|
+
|
|
58
|
+
if self.training_already_reduced:
|
|
59
|
+
c = make_count_vectorizer(None, self.max_ngram)
|
|
60
|
+
X = c.fit_transform(amino_acid_strings).toarray()
|
|
61
|
+
self.count_vectorizer.vocabulary_ = c.vocabulary_
|
|
62
|
+
else:
|
|
63
|
+
c = self.count_vectorizer
|
|
64
|
+
X = c.fit_transform(amino_acid_strings).toarray()
|
|
65
|
+
|
|
66
|
+
if self.normalize_row:
|
|
67
|
+
X = normalize(X, norm='l1')
|
|
68
|
+
return X
|
|
69
|
+
|
|
70
|
+
def fit(self, amino_acid_strings):
|
|
71
|
+
self.fit_transform(amino_acid_strings)
|
|
72
|
+
|
|
73
|
+
def transform(self, amino_acid_strings):
|
|
74
|
+
assert self.count_vectorizer, "Must call 'fit' before 'transform'"
|
|
75
|
+
X = self.count_vectorizer.transform(amino_acid_strings).toarray()
|
|
76
|
+
if self.normalize_row:
|
|
77
|
+
X = normalize(X, norm='l1')
|
|
78
|
+
return X
|
weirdo/pmbec.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
2
|
+
# you may not use this file except in compliance with the License.
|
|
3
|
+
# You may obtain a copy of the License at
|
|
4
|
+
#
|
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
from os.path import join
|
|
14
|
+
|
|
15
|
+
from .static_data import MATRIX_DIR
|
|
16
|
+
|
|
17
|
+
from .amino_acid_alphabet import dict_to_amino_acid_matrix
|
|
18
|
+
|
|
19
|
+
def read_pmbec_coefficients(
|
|
20
|
+
key_type='row',
|
|
21
|
+
verbose=True,
|
|
22
|
+
filename=join(MATRIX_DIR, 'pmbec.mat')):
|
|
23
|
+
"""
|
|
24
|
+
Parameters
|
|
25
|
+
------------
|
|
26
|
+
|
|
27
|
+
filename : str
|
|
28
|
+
Location of PMBEC coefficient matrix
|
|
29
|
+
|
|
30
|
+
key_type : str
|
|
31
|
+
'row' : every key is a single amino acid,
|
|
32
|
+
which maps to a dictionary for that row
|
|
33
|
+
'pair' : every key is a tuple of amino acids
|
|
34
|
+
'pair_string' : every key is a string of two amino acid characters
|
|
35
|
+
|
|
36
|
+
verbose : bool
|
|
37
|
+
Print rows of matrix as we read them
|
|
38
|
+
"""
|
|
39
|
+
d = {}
|
|
40
|
+
if key_type == 'row':
|
|
41
|
+
def add_pair(row_letter, col_letter, value):
|
|
42
|
+
if row_letter not in d:
|
|
43
|
+
d[row_letter] = {}
|
|
44
|
+
d[row_letter][col_letter] = value
|
|
45
|
+
elif key_type == 'pair':
|
|
46
|
+
def add_pair(row_letter, col_letter, value):
|
|
47
|
+
d[(row_letter, col_letter)] = value
|
|
48
|
+
|
|
49
|
+
else:
|
|
50
|
+
assert key_type == 'pair_string', \
|
|
51
|
+
"Invalid dictionary key type: %s" % key_type
|
|
52
|
+
|
|
53
|
+
def add_pair(row_letter, col_letter, value):
|
|
54
|
+
d["%s%s" % (row_letter, col_letter)] = value
|
|
55
|
+
|
|
56
|
+
with open(filename, 'r') as f:
|
|
57
|
+
lines = [line for line in f.read().split('\n') if len(line) > 0]
|
|
58
|
+
header = lines[0]
|
|
59
|
+
if verbose:
|
|
60
|
+
print(header)
|
|
61
|
+
residues = [
|
|
62
|
+
x for x in header.split()
|
|
63
|
+
if len(x) == 1 and x != ' ' and x != '\t'
|
|
64
|
+
]
|
|
65
|
+
assert len(residues) == 20
|
|
66
|
+
if verbose:
|
|
67
|
+
print(residues)
|
|
68
|
+
for line in lines[1:]:
|
|
69
|
+
cols = [
|
|
70
|
+
x
|
|
71
|
+
for x in line.split(' ')
|
|
72
|
+
if len(x) > 0 and x != ' ' and x != '\t'
|
|
73
|
+
]
|
|
74
|
+
assert len(cols) == 21, "Expected 20 values + letter, got %s" % cols
|
|
75
|
+
row_letter = cols[0]
|
|
76
|
+
for i, col in enumerate(cols[1:]):
|
|
77
|
+
col_letter = residues[i]
|
|
78
|
+
assert col_letter != ' ' and col_letter != '\t'
|
|
79
|
+
value = float(col)
|
|
80
|
+
add_pair(row_letter, col_letter, value)
|
|
81
|
+
return d
|
|
82
|
+
|
|
83
|
+
# dictionary of PMBEC coefficient accessed like pmbec_dict["V"]["R"]
|
|
84
|
+
pmbec_dict = read_pmbec_coefficients(key_type="row")
|
|
85
|
+
pmbec_matrix = dict_to_amino_acid_matrix(pmbec_dict)
|