weirdo 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ #H ZHAC000103
2
+ #D Environment-dependent residue contact energies (rows = helix, cols = coil)
3
+ #R PMID:10706611
4
+ #A Zhang, C. and Kim, S.H.
5
+ #T Environment-dependent residue contact energies for proteins
6
+ #J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
7
+ #M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
8
+ 0.12 1.17 0.84 0.90 -0.81 1.16 1.44 0.10 0.69 -0.81 -0.78 1.16 -0.22 -0.67 0.61 0.47 0.36 -0.72 -0.37 -0.43
9
+ 0.98 1.65 1.16 0.60 -0.21 1.26 1.12 1.09 1.16 -0.04 -0.09 2.37 0.47 -0.04 1.22 1.05 0.92 -0.09 0.06 0.32
10
+ 0.69 1.16 1.16 1.22 -0.06 1.23 1.45 0.96 0.88 0.26 0.12 1.48 0.32 0.03 1.14 0.73 0.62 0.62 0.53 0.23
11
+ 0.90 0.40 1.06 1.45 0.58 1.88 2.18 1.13 0.69 0.43 0.65 0.95 0.75 0.33 1.41 0.39 0.54 -0.10 0.12 0.77
12
+ -0.83 0.10 0.40 0.12 -2.65 -0.24 0.96 -0.26 -0.26 -1.61 -1.77 0.80 -1.02 -1.47 -0.31 -0.31 -0.49 -1.30 -0.98 -1.62
13
+ 1.13 1.10 1.28 1.37 0.14 1.62 1.84 1.29 1.31 0.05 -0.05 1.50 0.41 0.20 1.14 0.86 0.62 0.45 0.31 0.48
14
+ 1.33 0.91 1.33 1.60 0.31 1.60 1.93 1.62 1.01 0.33 0.38 1.12 0.82 0.55 1.54 0.78 0.54 0.23 0.52 0.86
15
+ -0.22 0.72 0.27 0.47 -0.95 0.42 1.39 -0.23 0.40 -0.48 -0.81 1.04 -0.62 -0.36 0.41 0.23 -0.04 -0.71 0.08 -0.35
16
+ 0.47 0.81 0.95 0.51 -1.56 0.90 0.89 0.86 0.20 -0.43 -0.48 1.31 -0.63 -0.41 0.56 0.40 0.28 -0.20 -0.22 -0.21
17
+ -0.58 0.17 0.61 0.46 -1.17 0.24 0.80 0.04 -0.16 -1.64 -1.66 0.87 -0.89 -1.56 -0.27 0.02 -0.32 -1.40 -1.13 -1.36
18
+ -0.44 0.20 0.50 0.71 -1.56 0.11 0.82 0.28 -0.15 -1.67 -1.62 0.72 -0.96 -1.55 0.02 0.19 -0.09 -1.46 -0.95 -1.32
19
+ 1.07 2.48 1.75 0.98 0.42 1.68 1.04 1.31 1.39 0.41 0.29 2.95 0.98 0.27 1.63 1.51 1.48 0.32 0.60 0.64
20
+ -0.22 0.65 0.76 0.88 -0.95 0.68 1.92 0.27 0.31 -1.32 -1.04 1.02 -0.57 -1.60 0.07 0.47 0.04 -1.29 -0.85 -0.82
21
+ -0.33 -0.06 0.42 0.42 -1.90 0.25 0.64 0.12 -0.01 -1.64 -1.50 0.58 -1.36 -1.77 -0.30 0.02 0.04 -1.41 -1.36 -1.34
22
+ 0.78 1.30 1.31 1.27 -0.04 1.44 1.71 0.69 0.84 0.05 0.15 1.68 0.38 0.27 1.05 1.19 0.83 -0.24 0.23 0.12
23
+ 0.46 1.07 1.04 0.73 -0.31 1.47 1.23 0.57 0.58 -0.11 -0.24 1.37 0.08 -0.34 0.76 0.51 0.48 -0.04 0.47 0.18
24
+ 0.50 0.90 0.75 0.91 -0.26 1.03 1.25 0.55 0.55 -0.20 -0.26 1.42 0.50 -0.22 0.88 0.69 0.56 0.41 0.11 -0.15
25
+ -0.41 -0.06 -0.19 0.32 -0.79 -0.14 0.58 0.07 -0.62 -1.58 -1.16 0.18 -1.03 -1.33 -0.56 0.15 -0.19 -1.83 -0.67 -0.92
26
+ -0.22 -0.07 0.52 0.46 -0.87 0.38 0.59 0.40 -0.17 -1.29 -1.15 0.83 -0.98 -1.16 -0.16 0.34 -0.12 -0.79 -0.77 -0.78
27
+ -0.51 0.49 0.48 0.67 -1.40 0.66 0.63 -0.06 0.28 -1.25 -1.50 1.14 -0.93 -1.36 -0.04 0.10 -0.01 -1.11 -0.82 -1.14
28
+
@@ -0,0 +1,27 @@
1
+ #H ZHAC000102
2
+ #D Environment-dependent residue contact energies (rows = helix, cols = strand)
3
+ #R PMID:10706611
4
+ #A Zhang, C. and Kim, S.H.
5
+ #T Environment-dependent residue contact energies for proteins
6
+ #J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
7
+ #M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
8
+ -0.94 1.26 0.55 0.76 -1.54 1.14 1.57 -0.78 0.44 -1.59 -1.64 1.91 -0.90 -1.49 0.28 0.20 -0.04 -0.92 -0.75 -1.45
9
+ 0.56 1.79 2.31 0.79 -0.67 2.54 0.72 1.09 0.94 -0.01 0.01 3.68 0.89 -0.05 1.37 0.83 1.35 0.00 0.33 0.44
10
+ 0.59 2.21 1.82 0.77 -0.90 0.46 3.06 -0.16 0.63 -0.33 0.20 2.43 0.99 0.63 0.54 0.24 0.63 0.11 -0.19 0.23
11
+ 0.66 0.76 0.76 1.19 -0.21 1.66 2.22 0.29 0.57 0.59 0.79 1.13 1.41 0.49 1.70 1.03 1.19 1.85 0.18 0.86
12
+ -1.75 0.78 -1.00 0.32 -3.64 0.48 0.87 -1.67 -0.62 -2.77 -2.32 0.19 -1.22 -2.67 -1.62 -0.83 -1.14 -0.52 -1.94 -2.35
13
+ 0.33 2.15 1.22 1.26 1.37 1.17 2.56 0.92 1.02 0.11 0.00 2.58 0.79 -0.26 0.53 1.19 1.11 0.21 0.39 0.15
14
+ 0.82 1.05 2.18 2.11 0.01 2.42 2.58 1.15 0.97 0.20 0.31 1.31 1.25 0.12 2.00 1.09 1.13 0.58 0.31 0.39
15
+ -0.40 0.95 0.03 0.14 -1.00 0.34 0.99 -1.32 0.13 -1.40 -1.36 1.58 -0.90 -1.41 0.82 -0.27 0.21 -0.59 -1.27 -1.09
16
+ -0.75 2.19 0.13 0.68 -1.37 1.98 1.13 0.01 1.52 -0.83 -0.58 2.26 -0.82 -1.01 0.53 -0.17 0.02 -49.00 -0.61 -0.56
17
+ -1.99 0.25 -0.20 1.00 -2.44 -0.12 0.88 -1.54 -0.05 -2.64 -2.33 0.75 -1.85 -2.46 -1.06 -0.59 -0.65 -1.82 -1.88 -2.45
18
+ -2.02 0.34 -0.04 0.13 -2.29 0.24 0.73 -1.27 -0.46 -2.53 -2.44 0.67 -1.80 -2.28 -1.29 -0.40 -0.34 -1.76 -1.66 -2.26
19
+ 0.60 3.11 2.23 1.06 0.50 1.80 1.65 0.82 1.25 0.10 0.34 3.51 0.98 -0.21 1.15 2.09 1.30 -0.14 0.28 0.13
20
+ -1.54 -0.06 -0.63 1.76 -2.51 0.14 0.72 -1.74 0.07 -2.27 -2.22 1.27 -1.77 -1.87 0.34 -0.02 -0.21 -0.93 -1.54 -1.81
21
+ -2.12 0.33 -0.70 0.17 -2.30 -0.59 0.26 -1.60 -0.88 -2.53 -2.44 -0.42 -1.83 -2.68 -1.40 -0.82 -0.61 -1.63 -1.83 -2.25
22
+ 0.63 2.43 -0.19 1.31 -1.63 1.46 1.91 0.08 1.11 -0.20 0.47 1.94 -0.34 0.15 0.57 0.00 1.15 0.06 0.26 -0.06
23
+ -0.41 0.88 1.02 1.04 -0.21 1.27 0.94 0.04 0.75 -0.48 -0.67 2.28 0.45 -0.92 0.75 0.50 0.96 0.22 -0.19 -0.54
24
+ -0.32 1.48 0.35 0.43 -1.44 0.38 1.36 -0.38 0.20 -1.14 -1.00 1.38 -0.35 -0.97 -0.05 -0.16 0.29 -0.53 -0.76 -0.73
25
+ -1.85 0.45 -0.03 0.80 -1.64 -0.23 0.11 -0.95 0.67 -1.58 -2.13 0.61 -1.75 -1.59 -1.07 -0.34 -0.40 -1.29 -1.27 -1.79
26
+ -0.88 -0.20 -0.29 0.14 -1.31 0.09 0.71 -0.56 -0.57 -1.66 -1.38 1.40 -1.60 -1.97 -0.73 -0.32 -0.37 -1.40 -0.96 -1.38
27
+ -1.74 0.85 0.24 0.72 -2.25 0.45 0.81 -1.29 -0.24 -2.46 -2.38 0.37 -1.21 -2.16 -1.00 -0.10 -0.57 -1.34 -1.52 -2.31
@@ -0,0 +1,21 @@
1
+ A C D E F G H I K L M N P Q R S T V W Y
2
+ A 0.322860152036 0.0113750373506 -0.0156239175966 -0.00259952715456 -0.0508792185716 0.0382679273874 -0.0832539299638 -0.00196691041626 -0.0103729638696 -0.042393907322 -0.0651042403697 -0.0853704925231 0.0757409633086 -0.0483151514798 -0.0136431408498 0.038455041596 0.0520376087986 0.081101427454 -0.125564718844 -0.0747500389698
3
+ C 0.0113750373506 0.100680270274 0.0102951033136 0.0147570340938 0.0345785831581 0.00933463557214 -0.00750101609651 0.00476007239717 -0.0459237939975 -0.0182998264075 -0.0155971113182 0.0021128481374 -0.00860770840682 -0.0309903425175 -0.0482562439545 -0.0217965163697 -0.0227322740574 -0.0154276574266 0.0412325888637 0.00600631739163
4
+ D -0.0156239175966 0.0102951033136 0.157208255034 0.0724279735923 -0.0189545540921 -0.00870389879389 -0.0180188107498 -0.0283467966687 -0.0634240071162 -0.0279979457557 -0.0241192288182 0.0194310374127 0.042784078891 0.000437307476866 -0.0591268568576 -0.0104660502173 0.00656101264316 -0.0193560886308 0.00415097887978 -0.0191575919464
5
+ E -0.00259952715456 0.0147570340938 0.0724279735923 0.131775168933 -0.00519060032543 -0.00547805492393 -0.0335600821273 -0.0135417817213 -0.069471604426 0.00353800457557 -0.017166710134 0.00534055417468 0.022589833552 0.0281404974641 -0.0697402405064 -0.0172364513778 -0.0054830504799 -0.00806269508269 -0.00791955104235 -0.0231187170833
6
+ F -0.0508792185716 0.0345785831581 -0.0189545540921 -0.00519060032543 0.259179996995 -0.00445131805782 -0.00639743486807 0.0628717025094 -0.049227253611 0.0488666377736 0.0315353570161 -0.0223593028205 -0.0919732521492 -0.0930189756622 -0.0626297946351 -0.0868415233743 -0.0777292391855 -0.015794520965 0.0625957490761 0.0858189617896
7
+ G 0.0382679273874 0.00933463557214 -0.00870389879389 -0.00547805492393 -0.00445131805782 0.122499934434 -0.025558278086 -0.0207027221208 -0.0137316756786 -0.0326424665142 -0.0264215095016 -0.00403752148352 0.0094352664965 0.00425299048772 -0.0232280105465 0.0304312733191 0.00861853592388 -0.0127217072682 -0.0246539147339 -0.0205094859119
8
+ H -0.0832539299638 -0.00750101609651 -0.0180188107498 -0.0335600821273 -0.00639743486807 -0.025558278086 0.207657765989 -0.0888505073496 0.0761447053198 -0.0351727012494 -0.000760393877348 0.0353903619255 -0.0682087048807 -0.00886454093107 0.109052662874 0.00938179429131 -0.0234122309305 -0.0870188771708 0.0123622841944 0.0365879336865
9
+ I -0.00196691041626 0.00476007239717 -0.0283467966687 -0.0135417817213 0.0628717025094 -0.0207027221208 -0.0888505073496 0.27773187827 -0.0381642025534 0.0886112938313 0.0551293441776 -0.0593694184462 -0.039207153398 -0.0626883806129 -0.110160438997 -0.0618078497671 -0.0339233811197 0.091300054417 0.00138488610169 -0.0230596885329
10
+ K -0.0103729638696 -0.0459237939975 -0.0634240071162 -0.069471604426 -0.049227253611 -0.0137316756786 0.0761447053198 -0.0381642025534 0.273355694189 -0.0177282663533 -0.00817300753785 -0.0339854863013 -0.0484016323395 -0.0331603641198 0.21516548555 0.00476731287861 -0.0331318604828 -0.0400367780545 -0.0598522551401 -0.00464804635586
11
+ L -0.042393907322 -0.0182998264075 -0.0279979457557 0.00353800457557 0.0488666377736 -0.0326424665142 -0.0351727012494 0.0886112938313 -0.0177282663533 0.162738321535 0.0750528874999 -0.0111666419731 -0.051023845781 -0.00134001844501 -0.074934598492 -0.0584956357369 -0.031311528799 0.0449678806271 -0.00754567267671 -0.0137219703366
12
+ M -0.0651042403697 -0.0155971113182 -0.0241192288182 -0.017166710134 0.0315353570161 -0.0264215095016 -0.000760393877348 0.0551293441776 -0.00817300753785 0.0750528874999 0.156957428383 0.00753829785887 -0.091647674076 0.00190198496329 -0.0257018542091 -0.0295349216339 -0.0454820084051 -0.0120310888206 0.0210041287765 0.0126203200265
13
+ N -0.0853704925231 0.0021128481374 0.0194310374127 0.00534055417468 -0.0223593028205 -0.00403752148352 0.0353903619255 -0.0593694184462 -0.0339854863013 -0.0111666419731 0.00753829785887 0.151487423988 -0.0106077901881 0.0413965183445 -0.0338327997913 0.0170820288313 -0.00295174153884 -0.0436807942705 0.0296409813073 -0.00205806264345
14
+ P 0.0757409633086 -0.00860770840682 0.042784078891 0.022589833552 -0.0919732521492 0.0094352664965 -0.0682087048807 -0.039207153398 -0.0484016323395 -0.051023845781 -0.091647674076 -0.0106077901881 0.354629507834 0.0481497903134 -0.0377142358446 -0.00687173098621 0.0199181111388 0.0225294243984 -0.0525069717881 -0.0890062760945
15
+ Q -0.0483151514798 -0.0309903425175 0.000437307476866 0.0281404974641 -0.0930189756622 0.00425299048772 -0.00886454093107 -0.0626883806129 -0.0331603641198 -0.00134001844501 0.00190198496329 0.0413965183445 0.0481497903134 0.177175171536 0.00715630304762 0.0357241930907 0.027467611659 -0.032780800211 -0.0118972341632 -0.0487465602402
16
+ R -0.0136431408498 -0.0482562439545 -0.0591268568576 -0.0697402405064 -0.0626297946351 -0.0232280105465 0.109052662874 -0.110160438997 0.21516548555 -0.074934598492 -0.0257018542091 -0.0338327997913 -0.0377142358446 0.00715630304762 0.389022190137 0.0204288367942 -0.0408668326839 -0.0934989556047 -0.0557627605155 0.00827128508526
17
+ S 0.038455041596 -0.0217965163697 -0.0104660502173 -0.0172364513778 -0.0868415233743 0.0304312733191 0.00938179429131 -0.0618078497671 0.00476731287861 -0.0584956357369 -0.0295349216339 0.0170820288313 -0.00687173098621 0.0357241930907 0.0204288367942 0.161573840097 0.0839261885951 -0.00816241136786 -0.0444334801409 -0.0561239385213
18
+ T 0.0520376087986 -0.0227322740574 0.00656101264316 -0.0054830504799 -0.0777292391855 0.00861853592388 -0.0234122309305 -0.0339233811197 -0.0331318604828 -0.031311528799 -0.0454820084051 -0.00295174153884 0.0199181111388 0.027467611659 -0.0408668326839 0.0839261885951 0.142525860495 0.0493244941272 -0.0264928932645 -0.0468623824337
19
+ V 0.081101427454 -0.0154276574266 -0.0193560886308 -0.00806269508269 -0.015794520965 -0.0127217072682 -0.0870188771708 0.091300054417 -0.0400367780545 0.0449678806271 -0.0120310888206 -0.0436807942705 0.0225294243984 -0.032780800211 -0.0934989556047 -0.00816241136786 0.0493244941272 0.172778293246 -0.0289445753682 -0.0444846240282
20
+ W -0.125564718844 0.0412325888637 0.00415097887978 -0.00791955104235 0.0625957490761 -0.0246539147339 0.0123622841944 0.00138488610169 -0.0598522551401 -0.00754567267671 0.0210041287765 0.0296409813073 -0.0525069717881 -0.0118972341632 -0.0557627605155 -0.0444334801409 -0.0264928932645 -0.0289445753682 0.194048086876 0.0791543436022
21
+ Y -0.0747500389698 0.00600631739163 -0.0191575919464 -0.0231187170833 0.0858189617896 -0.0205094859119 0.0365879336865 -0.0230596885329 -0.00464804635586 -0.0137219703366 0.0126203200265 -0.00205806264345 -0.0890062760945 -0.0487465602402 0.00827128508526 -0.0561239385213 -0.0468623824337 -0.0444846240282 0.0791543436022 0.237788221516
@@ -0,0 +1,27 @@
1
+ #H ZHAC000105
2
+ #D Environment-dependent residue contact energies (rows = strand, cols = coil)
3
+ #R PMID:10706611
4
+ #A Zhang, C. and Kim, S.H.
5
+ #T Environment-dependent residue contact energies for proteins
6
+ #J Proc. Natl. Acad. Sci. USA 97, 2550-2555 (2000)
7
+ #M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
8
+ -0.57 0.47 0.30 0.62 -1.60 0.45 0.61 -0.24 0.07 -1.64 -1.63 0.62 -1.03 -1.55 -0.11 -0.10 -0.34 -1.44 -0.39 -1.55
9
+ 0.23 0.79 0.76 0.39 -0.41 0.92 0.76 0.52 0.51 -0.30 0.13 1.58 0.88 -0.07 0.60 0.65 0.37 0.14 0.32 0.17
10
+ -0.28 0.74 0.57 0.87 -0.68 0.52 1.00 -0.07 0.32 -0.31 -0.08 0.87 0.29 -0.17 0.57 0.11 0.19 0.04 0.24 -0.23
11
+ 0.15 -0.25 0.46 0.69 -0.46 0.41 1.34 0.56 -0.51 -0.23 0.27 0.59 0.60 -0.38 1.02 0.08 0.05 -0.48 0.02 0.34
12
+ -1.19 -0.46 0.21 0.51 -3.30 0.26 0.20 -1.03 -0.72 -1.55 -1.71 0.27 -1.24 -1.70 -0.50 -0.55 -0.97 -0.67 -1.26 -1.62
13
+ 0.63 1.18 0.92 1.37 -0.30 0.93 1.27 0.56 0.91 -0.28 -0.11 0.98 0.15 -0.30 0.64 0.88 0.68 -0.44 0.66 0.15
14
+ 0.97 0.89 1.37 1.89 0.30 1.25 2.34 0.98 0.58 0.20 0.50 0.67 1.23 0.58 1.26 0.95 1.06 0.04 0.87 0.48
15
+ -0.64 0.12 0.27 0.31 -1.37 0.38 0.98 -0.40 -0.12 -1.58 -1.40 0.78 -0.46 -1.38 -0.21 0.05 -0.26 -1.41 -0.61 -1.13
16
+ -0.02 0.75 0.68 0.14 -0.58 0.73 0.84 0.41 -0.64 -0.75 0.03 1.46 -0.16 -0.49 0.52 0.31 -0.11 -1.00 -0.58 0.03
17
+ -0.94 -0.14 0.31 0.26 -1.70 0.07 0.46 -0.37 -0.50 -1.88 -1.79 0.84 -0.99 -1.82 -0.47 -0.05 -0.54 -1.65 -1.09 -1.64
18
+ -0.76 0.32 0.43 0.25 -1.63 0.22 0.68 -0.17 -0.40 -1.84 -1.70 0.47 -1.06 -1.76 -0.39 0.09 -0.42 -1.81 -1.15 -1.64
19
+ 1.02 1.99 1.18 0.59 0.08 1.10 0.60 0.61 0.95 0.24 0.34 2.69 0.97 -0.03 1.23 1.07 0.83 0.00 0.26 0.36
20
+ -0.16 0.83 0.47 0.92 -1.63 0.36 0.71 -0.20 0.90 -1.00 -1.12 1.55 -0.31 -1.35 -0.01 0.34 0.20 -1.70 -0.60 -0.79
21
+ -0.70 0.03 0.63 0.15 -1.26 0.29 0.35 -0.11 -0.36 -1.73 -1.55 0.71 -0.97 -1.55 -0.28 -0.09 -0.32 -1.23 -0.91 -1.30
22
+ 0.17 0.50 0.60 0.67 -1.31 0.50 0.94 0.02 -0.45 -1.26 -0.91 1.08 0.83 -0.87 0.63 0.31 0.26 -0.50 -0.55 -0.79
23
+ -0.06 0.99 0.73 0.86 -0.89 0.85 0.67 0.08 0.06 -0.22 -0.29 0.94 -0.08 -0.41 0.67 0.33 0.13 -1.01 0.13 -0.24
24
+ 0.26 0.93 0.70 0.87 -0.78 0.58 1.20 0.12 0.52 -0.30 -0.24 1.11 0.01 -0.08 0.65 0.47 0.41 -0.31 0.12 -0.32
25
+ -0.03 -0.11 0.27 0.66 -1.50 0.65 0.50 -0.12 -0.32 -1.13 -1.01 0.52 -1.08 -1.04 -0.32 -0.03 -0.10 -0.67 -0.73 -0.64
26
+ -0.44 0.20 0.20 0.20 -1.26 0.16 0.10 -0.21 -0.52 -1.26 -1.30 0.60 -0.76 -1.17 -0.42 0.05 -0.27 -1.20 -0.75 -0.84
27
+ -0.83 0.20 0.48 0.62 -1.44 0.17 0.73 -0.12 -0.26 -1.64 -1.59 0.52 -0.70 -1.55 -0.28 0.12 -0.17 -1.16 -0.85 -1.42
@@ -0,0 +1,346 @@
1
+ """Model manager for trained foreignness scorers.
2
+
3
+ Handles saving, loading, and listing trained ML models.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ # Default model storage directory
13
+ DEFAULT_MODEL_DIR = Path.home() / '.weirdo' / 'models'
14
+
15
+
16
+ class ModelInfo:
17
+ """Information about a saved model."""
18
+
19
+ def __init__(
20
+ self,
21
+ name: str,
22
+ scorer_type: str,
23
+ path: Path,
24
+ created: Optional[str] = None,
25
+ params: Optional[Dict[str, Any]] = None,
26
+ metadata: Optional[Dict[str, Any]] = None,
27
+ ):
28
+ self.name = name
29
+ self.scorer_type = scorer_type
30
+ self.path = path
31
+ self.created = created
32
+ self.params = params or {}
33
+ self.metadata = metadata or {}
34
+
35
+ def __repr__(self) -> str:
36
+ return f"ModelInfo(name='{self.name}', type='{self.scorer_type}')"
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ """Convert to dictionary."""
40
+ return {
41
+ 'name': self.name,
42
+ 'scorer_type': self.scorer_type,
43
+ 'path': str(self.path),
44
+ 'created': self.created,
45
+ 'params': self.params,
46
+ 'metadata': self.metadata,
47
+ }
48
+
49
+
50
+ class ModelManager:
51
+ """Manager for trained foreignness scoring models.
52
+
53
+ Handles saving, loading, and listing trained models in a
54
+ centralized directory structure.
55
+
56
+ Parameters
57
+ ----------
58
+ model_dir : str or Path, optional
59
+ Directory for storing models. Defaults to ~/.weirdo/models.
60
+
61
+ Example
62
+ -------
63
+ >>> mm = ModelManager()
64
+ >>> mm.list_models() # List available models
65
+ >>> model = mm.load('my-mlp') # Load a saved model
66
+ >>> mm.save(scorer, 'my-new-model') # Save a trained model
67
+ """
68
+
69
+ def __init__(self, model_dir: Optional[Union[str, Path]] = None):
70
+ self._model_dir = Path(model_dir) if model_dir else DEFAULT_MODEL_DIR
71
+ self._model_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ @property
74
+ def model_dir(self) -> Path:
75
+ """Get model storage directory."""
76
+ return self._model_dir
77
+
78
+ def list_models(self) -> List[ModelInfo]:
79
+ """List all available models.
80
+
81
+ Returns
82
+ -------
83
+ models : list of ModelInfo
84
+ Information about each saved model.
85
+ """
86
+ models = []
87
+
88
+ if not self._model_dir.exists():
89
+ return models
90
+
91
+ for path in sorted(self._model_dir.iterdir()):
92
+ if path.is_dir() and (path / 'config.json').exists():
93
+ try:
94
+ info = self._load_model_info(path)
95
+ models.append(info)
96
+ except Exception:
97
+ # Skip corrupted models
98
+ pass
99
+
100
+ return models
101
+
102
+ def _load_model_info(self, path: Path) -> ModelInfo:
103
+ """Load model info from a model directory."""
104
+ with open(path / 'config.json', 'r') as f:
105
+ config = json.load(f)
106
+
107
+ metadata = {}
108
+ if (path / 'metadata.json').exists():
109
+ with open(path / 'metadata.json', 'r') as f:
110
+ metadata = json.load(f)
111
+
112
+ # Get creation time
113
+ created = None
114
+ stat = path.stat()
115
+ created = datetime.fromtimestamp(stat.st_mtime).isoformat()
116
+
117
+ return ModelInfo(
118
+ name=path.name,
119
+ scorer_type=config.get('scorer_type', 'unknown'),
120
+ path=path,
121
+ created=created,
122
+ params=config.get('params', {}),
123
+ metadata=metadata,
124
+ )
125
+
126
+ def get_model_info(self, name: str) -> Optional[ModelInfo]:
127
+ """Get info for a specific model.
128
+
129
+ Parameters
130
+ ----------
131
+ name : str
132
+ Model name.
133
+
134
+ Returns
135
+ -------
136
+ info : ModelInfo or None
137
+ Model info if found, None otherwise.
138
+ """
139
+ path = self._model_dir / name
140
+ if path.exists() and (path / 'config.json').exists():
141
+ return self._load_model_info(path)
142
+ return None
143
+
144
+ def load(self, name: str) -> 'TrainableScorer':
145
+ """Load a trained model by name.
146
+
147
+ Parameters
148
+ ----------
149
+ name : str
150
+ Model name (directory name in model storage).
151
+
152
+ Returns
153
+ -------
154
+ scorer : TrainableScorer
155
+ Loaded model ready for inference.
156
+ """
157
+ from .scorers.trainable import TrainableScorer
158
+ from .scorers.mlp import MLPScorer
159
+
160
+ path = self._model_dir / name
161
+ if not path.exists():
162
+ raise FileNotFoundError(f"Model not found: {name}")
163
+
164
+ # Load config to determine scorer type
165
+ with open(path / 'config.json', 'r') as f:
166
+ config = json.load(f)
167
+
168
+ scorer_type = config.get('scorer_type', '')
169
+
170
+ # Map to scorer class
171
+ scorer_classes = {
172
+ 'MLPScorer': MLPScorer,
173
+ }
174
+
175
+ if scorer_type not in scorer_classes:
176
+ raise ValueError(
177
+ f"Unknown scorer type: {scorer_type}. "
178
+ f"Available: {list(scorer_classes.keys())}"
179
+ )
180
+
181
+ return scorer_classes[scorer_type].load(path)
182
+
183
+ def save(
184
+ self,
185
+ scorer: 'TrainableScorer',
186
+ name: str,
187
+ overwrite: bool = False,
188
+ ) -> Path:
189
+ """Save a trained model.
190
+
191
+ Parameters
192
+ ----------
193
+ scorer : TrainableScorer
194
+ Trained model to save.
195
+ name : str
196
+ Name for the saved model.
197
+ overwrite : bool, default=False
198
+ Whether to overwrite existing model with same name.
199
+
200
+ Returns
201
+ -------
202
+ path : Path
203
+ Path where model was saved.
204
+ """
205
+ path = self._model_dir / name
206
+
207
+ if path.exists() and not overwrite:
208
+ raise FileExistsError(
209
+ f"Model already exists: {name}. "
210
+ "Use overwrite=True to replace."
211
+ )
212
+
213
+ scorer.save(path)
214
+ return path
215
+
216
+ def delete(self, name: str) -> bool:
217
+ """Delete a saved model.
218
+
219
+ Parameters
220
+ ----------
221
+ name : str
222
+ Model name to delete.
223
+
224
+ Returns
225
+ -------
226
+ deleted : bool
227
+ True if model was deleted.
228
+ """
229
+ import shutil
230
+ path = self._model_dir / name
231
+
232
+ if path.exists():
233
+ shutil.rmtree(path)
234
+ return True
235
+ return False
236
+
237
+ def print_models(self) -> None:
238
+ """Print formatted list of available models."""
239
+ models = self.list_models()
240
+
241
+ if not models:
242
+ print("No trained models found.")
243
+ print(f"Model directory: {self._model_dir}")
244
+ return
245
+
246
+ print(f"Trained models ({len(models)}):")
247
+ print("-" * 60)
248
+
249
+ for model in models:
250
+ print(f"\n {model.name}")
251
+ print(f" Type: {model.scorer_type}")
252
+ if model.created:
253
+ print(f" Created: {model.created[:19]}")
254
+ if 'n_train' in model.metadata:
255
+ print(f" Training samples: {model.metadata['n_train']}")
256
+ if 'n_epochs' in model.metadata:
257
+ print(f" Epochs trained: {model.metadata['n_epochs']}")
258
+ if 'final_train_loss' in model.metadata:
259
+ print(f" Final train loss: {model.metadata['final_train_loss']:.4f}")
260
+ if 'best_val_loss' in model.metadata:
261
+ print(f" Best val loss: {model.metadata['best_val_loss']:.4f}")
262
+ if 'k' in model.params:
263
+ print(f" K-mer size: {model.params['k']}")
264
+
265
+ print(f"\nModel directory: {self._model_dir}")
266
+
267
+
268
+ # Singleton instance
269
+ _model_manager: Optional[ModelManager] = None
270
+
271
+
272
+ def get_model_manager(model_dir: Optional[Union[str, Path]] = None) -> ModelManager:
273
+ """Get the model manager instance.
274
+
275
+ Parameters
276
+ ----------
277
+ model_dir : str or Path, optional
278
+ Custom model directory.
279
+
280
+ Returns
281
+ -------
282
+ manager : ModelManager
283
+ """
284
+ global _model_manager
285
+ if _model_manager is None or model_dir is not None:
286
+ _model_manager = ModelManager(model_dir)
287
+ return _model_manager
288
+
289
+
290
+ def list_models(model_dir: Optional[Union[str, Path]] = None) -> List[ModelInfo]:
291
+ """List all available trained models.
292
+
293
+ Parameters
294
+ ----------
295
+ model_dir : str or Path, optional
296
+ Custom model directory.
297
+
298
+ Returns
299
+ -------
300
+ models : list of ModelInfo
301
+ """
302
+ return get_model_manager(model_dir).list_models()
303
+
304
+
305
+ def load_model(name: str, model_dir: Optional[Union[str, Path]] = None) -> 'TrainableScorer':
306
+ """Load a trained model by name.
307
+
308
+ Parameters
309
+ ----------
310
+ name : str
311
+ Model name.
312
+ model_dir : str or Path, optional
313
+ Custom model directory.
314
+
315
+ Returns
316
+ -------
317
+ scorer : TrainableScorer
318
+ Loaded model.
319
+ """
320
+ return get_model_manager(model_dir).load(name)
321
+
322
+
323
+ def save_model(
324
+ scorer: 'TrainableScorer',
325
+ name: str,
326
+ model_dir: Optional[Union[str, Path]] = None,
327
+ overwrite: bool = False,
328
+ ) -> Path:
329
+ """Save a trained model.
330
+
331
+ Parameters
332
+ ----------
333
+ scorer : TrainableScorer
334
+ Trained model.
335
+ name : str
336
+ Model name.
337
+ model_dir : str or Path, optional
338
+ Custom model directory.
339
+ overwrite : bool, default=False
340
+ Overwrite existing model.
341
+
342
+ Returns
343
+ -------
344
+ path : Path
345
+ """
346
+ return get_model_manager(model_dir).save(scorer, name, overwrite)
@@ -0,0 +1,78 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ import numpy as np
14
+ from sklearn.feature_extraction.text import CountVectorizer
15
+ from sklearn.preprocessing import normalize
16
+
17
+ def make_count_vectorizer(reduced_alphabet, max_ngram):
18
+ if reduced_alphabet is None:
19
+ preprocessor = None
20
+ else:
21
+ preprocessor = lambda s: "".join([reduced_alphabet[si] for si in s])
22
+
23
+ return CountVectorizer(
24
+ analyzer='char',
25
+ ngram_range=(1, max_ngram),
26
+ dtype=np.float64,
27
+ preprocessor=preprocessor)
28
+
29
+ class PeptideVectorizer(object):
30
+ """
31
+ Make n-gram frequency vectors from peptide sequences
32
+ """
33
+ def __init__(
34
+ self,
35
+ max_ngram=1,
36
+ normalize_row=True,
37
+ reduced_alphabet=None,
38
+ training_already_reduced=False):
39
+ self.reduced_alphabet = reduced_alphabet
40
+ self.max_ngram = max_ngram
41
+ self.normalize_row = normalize_row
42
+ self.training_already_reduced = training_already_reduced
43
+ self.count_vectorizer = None
44
+
45
+ def __getstate__(self):
46
+ return {
47
+ 'reduced_alphabet': self.reduced_alphabet,
48
+ 'count_vectorizer': self.count_vectorizer,
49
+ 'training_already_reduced': self.training_already_reduced,
50
+ 'normalize_row': self.normalize_row,
51
+ 'max_ngram': self.max_ngram,
52
+ }
53
+
54
+ def fit_transform(self, amino_acid_strings):
55
+ self.count_vectorizer = \
56
+ make_count_vectorizer(self.reduced_alphabet, self.max_ngram)
57
+
58
+ if self.training_already_reduced:
59
+ c = make_count_vectorizer(None, self.max_ngram)
60
+ X = c.fit_transform(amino_acid_strings).toarray()
61
+ self.count_vectorizer.vocabulary_ = c.vocabulary_
62
+ else:
63
+ c = self.count_vectorizer
64
+ X = c.fit_transform(amino_acid_strings).toarray()
65
+
66
+ if self.normalize_row:
67
+ X = normalize(X, norm='l1')
68
+ return X
69
+
70
+ def fit(self, amino_acid_strings):
71
+ self.fit_transform(amino_acid_strings)
72
+
73
+ def transform(self, amino_acid_strings):
74
+ assert self.count_vectorizer, "Must call 'fit' before 'transform'"
75
+ X = self.count_vectorizer.transform(amino_acid_strings).toarray()
76
+ if self.normalize_row:
77
+ X = normalize(X, norm='l1')
78
+ return X
weirdo/pmbec.py ADDED
@@ -0,0 +1,85 @@
1
+ # Licensed under the Apache License, Version 2.0 (the "License");
2
+ # you may not use this file except in compliance with the License.
3
+ # You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ from os.path import join
14
+
15
+ from .static_data import MATRIX_DIR
16
+
17
+ from .amino_acid_alphabet import dict_to_amino_acid_matrix
18
+
19
+ def read_pmbec_coefficients(
20
+ key_type='row',
21
+ verbose=True,
22
+ filename=join(MATRIX_DIR, 'pmbec.mat')):
23
+ """
24
+ Parameters
25
+ ------------
26
+
27
+ filename : str
28
+ Location of PMBEC coefficient matrix
29
+
30
+ key_type : str
31
+ 'row' : every key is a single amino acid,
32
+ which maps to a dictionary for that row
33
+ 'pair' : every key is a tuple of amino acids
34
+ 'pair_string' : every key is a string of two amino acid characters
35
+
36
+ verbose : bool
37
+ Print rows of matrix as we read them
38
+ """
39
+ d = {}
40
+ if key_type == 'row':
41
+ def add_pair(row_letter, col_letter, value):
42
+ if row_letter not in d:
43
+ d[row_letter] = {}
44
+ d[row_letter][col_letter] = value
45
+ elif key_type == 'pair':
46
+ def add_pair(row_letter, col_letter, value):
47
+ d[(row_letter, col_letter)] = value
48
+
49
+ else:
50
+ assert key_type == 'pair_string', \
51
+ "Invalid dictionary key type: %s" % key_type
52
+
53
+ def add_pair(row_letter, col_letter, value):
54
+ d["%s%s" % (row_letter, col_letter)] = value
55
+
56
+ with open(filename, 'r') as f:
57
+ lines = [line for line in f.read().split('\n') if len(line) > 0]
58
+ header = lines[0]
59
+ if verbose:
60
+ print(header)
61
+ residues = [
62
+ x for x in header.split()
63
+ if len(x) == 1 and x != ' ' and x != '\t'
64
+ ]
65
+ assert len(residues) == 20
66
+ if verbose:
67
+ print(residues)
68
+ for line in lines[1:]:
69
+ cols = [
70
+ x
71
+ for x in line.split(' ')
72
+ if len(x) > 0 and x != ' ' and x != '\t'
73
+ ]
74
+ assert len(cols) == 21, "Expected 20 values + letter, got %s" % cols
75
+ row_letter = cols[0]
76
+ for i, col in enumerate(cols[1:]):
77
+ col_letter = residues[i]
78
+ assert col_letter != ' ' and col_letter != '\t'
79
+ value = float(col)
80
+ add_pair(row_letter, col_letter, value)
81
+ return d
82
+
83
+ # dictionary of PMBEC coefficient accessed like pmbec_dict["V"]["R"]
84
+ pmbec_dict = read_pmbec_coefficients(key_type="row")
85
+ pmbec_matrix = dict_to_amino_acid_matrix(pmbec_dict)