PyPI - weirdo - Versions diffs - 1.0.0__tar.gz - Mend

weirdo 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from .amino_acid_alphabet import (
+    AminoAcid,
+    canonical_amino_acids,
+    canonical_amino_acid_letters,
+    extended_amino_acids,
+    extended_amino_acid_letters,
+    amino_acid_letter_indices,
+    amino_acid_name_indices,
+)
+from .peptide_vectorizer import PeptideVectorizer
+from .distances import hamming
+__version__ = "1.0.0"
+__all__ = [
+    "AminoAcid",
+    "canonical_amino_acids",
+    "canonical_amino_acid_letters",
+    "extended_amino_acids",
+    "extended_amino_acid_letters",
+    "amino_acid_letter_indices",
+    "amino_acid_name_indices",
+    "PeptideVectorizer",
+    "hamming",
+]

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid_alphabet.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/amino_acid_properties.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/blosum.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/chou_fasman.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/common.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/distances.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/peptide_vectorizer.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/pmbec.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/reduced_alphabet.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/residue_contact_energies.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/__pycache__/static_data.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid.py ADDED Viewed

@@ -0,0 +1,33 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+class AminoAcid(object):
+    def __init__(
+            self, full_name, short_name, letter, contains=None):
+        self.letter = letter
+        self.full_name = full_name
+        self.short_name = short_name
+        if not contains:
+            contains = [letter]
+        self.contains = contains
+    def __str__(self):
+        return (
+            ("AminoAcid(full_name='%s', short_name='%s', letter='%s', "
+             "contains=%s)") % (
+            self.letter, self.full_name, self.short_name, self.contains))
+    def __repr__(self):
+        return str(self)
+    def __eq__(self, other):
+        return other.__class__ is AminoAcid and self.letter == other.letter

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid_alphabet.py ADDED Viewed

@@ -0,0 +1,158 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Quantify amino acids by their physical/chemical properties
+"""
+import numpy as np
+from .amino_acid import AminoAcid
+canonical_amino_acids = [
+    AminoAcid("Alanine", "Ala", "A"),
+    AminoAcid("Arginine", "Arg", "R"),
+    AminoAcid("Asparagine","Asn", "N"),
+    AminoAcid("Aspartic Acid", "Asp", "D"),
+    AminoAcid("Cysteine", "Cys", "C"),
+    AminoAcid("Glutamic Acid", "Glu", "E"),
+    AminoAcid("Glutamine", "Gln", "Q"),
+    AminoAcid("Glycine", "Gly", "G"),
+    AminoAcid("Histidine", "His", "H"),
+    AminoAcid("Isoleucine",  "Ile", "I"),
+    AminoAcid("Leucine", "Leu", "L"),
+    AminoAcid("Lysine", "Lys", "K"),
+    AminoAcid("Methionine",  "Met", "M"),
+    AminoAcid("Phenylalanine", "Phe", "F"),
+    AminoAcid("Proline", "Pro", "P"),
+    AminoAcid("Serine", "Ser", "S"),
+    AminoAcid("Threonine", "Thr", "T"),
+    AminoAcid("Tryptophan", "Trp", "W"),
+    AminoAcid("Tyrosine", "Tyr", "Y"),
+    AminoAcid("Valine", "Val", "V")
+]
+canonical_amino_acid_letters = [aa.letter for aa in canonical_amino_acids]
+###
+# Post-translation modifications commonly detected by mass-spec
+###
+# TODO: figure out three letter codes for modified AAs
+modified_amino_acids = [
+    AminoAcid("Phospho-Serine", "Sep", "s"),
+    AminoAcid("Phospho-Threonine", "???", "t"),
+    AminoAcid("Phospho-Tyrosine", "???", "y"),
+    AminoAcid("Cystine", "???", "c"),
+    AminoAcid("Methionine sulfoxide", "???", "m"),
+    AminoAcid("Pyroglutamate", "???", "q"),
+    AminoAcid("Pyroglutamic acid", "???", "n"),
+]
+###
+# Amino acid tokens which represent multiple canonical amino acids
+###
+wildcard_amino_acids = [
+    AminoAcid("Unknown", "Xaa", "X", contains=set(canonical_amino_acid_letters)),
+    AminoAcid("Asparagine-or-Aspartic-Acid", "Asx",  "B", contains={"D", "N"}),
+    AminoAcid("Glutamine-or-Glutamic-Acid", "Glx", "Z", contains={"E", "Q"}),
+    AminoAcid("Leucine-or-Isoleucine", "Xle", "J", contains={"I", "L"})
+]
+###
+# Canonical amino acids + wilcard tokens
+###
+canonical_amino_acids_with_unknown = canonical_amino_acids + wildcard_amino_acids
+###
+# Rare amino acids which aren't considered part of the core 20 "canonical"
+###
+rare_amino_acids = [
+    AminoAcid("Selenocysteine", "Sec", "U"),
+    AminoAcid("Pyrrolysine", "Pyl", "O"),
+]
+###
+# Extended amino acids + wildcard tokens
+###
+extended_amino_acids = canonical_amino_acids + rare_amino_acids + wildcard_amino_acids
+extended_amino_acid_letters = [
+    aa.letter for aa in extended_amino_acids
+]
+extended_amino_acids_with_unknown_names = [
+    aa.full_name for aa in extended_amino_acids
+]
+amino_acid_letter_indices = {
+    c: i for (i, c) in
+    enumerate(extended_amino_acid_letters)
+}
+amino_acid_letter_pairs = [
+    "%s%s" % (x, y)
+    for y in extended_amino_acids
+    for x in extended_amino_acids
+]
+amino_acid_name_indices = {
+    aa_name: i for (i, aa_name)
+    in enumerate(extended_amino_acids_with_unknown_names)
+}
+amino_acid_pair_positions = {
+    pair: i for (i, pair) in enumerate(amino_acid_letter_pairs)
+}
+def index_to_full_name(idx):
+    return extended_amino_acids[idx].full_name
+def index_to_short_name(idx):
+    return extended_amino_acids[idx].short_name
+def index_to_letter(idx):
+    return extended_amino_acids[idx]
+def letter_to_index(x):
+    """
+    Convert from an amino acid's letter code to its position index
+    """
+    assert x in amino_acid_letter_indices, "Unknown amino acid: %s" % x
+    return amino_acid_letter_indices[x]
+def peptide_to_indices(xs):
+    return [amino_acid_letter_indices[x] for x in xs]
+def letter_to_short_name(x):
+    return index_to_short_name(letter_to_index(x))
+def peptide_to_short_amino_acid_names(xs):
+    return [amino_acid_letter_indices[x] for x in xs]
+def dict_to_amino_acid_matrix(d, alphabet=canonical_amino_acids):
+    n_aa = len(d)
+    result_matrix = np.zeros((n_aa, n_aa), dtype="float32")
+    for i, aa_row in enumerate(alphabet):
+        d_row = d[aa_row.letter]
+        for j, aa_col in enumerate(alphabet):
+            value = d_row[aa_col.letter]
+            result_matrix[i, j] = value
+    return result_matrix

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/amino_acid_properties.py ADDED Viewed

@@ -0,0 +1,358 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Quantify amino acids by their physical/chemical properties
+"""
+from .amino_acid_alphabet import letter_to_index
+def aa_dict_to_positional_list(aa_property_dict):
+    value_list = [None] * 20
+    for letter, value in aa_property_dict.items():
+        idx = letter_to_index(letter)
+        assert idx >= 0
+        assert idx < 20
+        value_list[idx] = value
+    assert all(elt is not None for elt in value_list), \
+        "Missing amino acids in:\n%s" % aa_property_dict.keys()
+    return value_list
+def parse_property_table(table_string):
+    value_dict = {}
+    for line in table_string.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        fields = line.split(" ")
+        fields = [f for f in fields if len(f.strip()) > 0]
+        assert len(fields) >= 2
+        value, letter = fields[:2]
+        assert letter not in value_dict, "Repeated amino acid " + line
+        value_dict[letter] = float(value)
+    return value_dict
+"""
+Amino acids property tables copied from CRASP website
+"""
+hydropathy = parse_property_table("""
+1.80000 A ALA
+-4.5000 R ARG
+-3.5000 N ASN
+-3.5000 D ASP
+2.50000 C CYS
+-3.5000 Q GLN
+-3.5000 E GLU
+-0.4000 G GLY
+-3.2000 H HIS
+4.50000 I ILE
+3.80000 L LEU
+-3.9000 K LYS
+1.90000 M MET
+2.80000 F PHE
+-1.6000 P PRO
+-0.8000 S SER
+-0.7000 T THR
+-0.9000 W TRP
+-1.3000 Y TYR
+4.20000 V VAL
+""")
+volume = parse_property_table("""
+91.5000 A ALA
+202.0000 R ARG
+135.2000 N ASN
+124.5000 D ASP
+118.0000 C CYS
+161.1000 Q GLN
+155.1000 E GLU
+66.40000 G GLY
+167.3000 H HIS
+168.8000 I ILE
+167.9000 L LEU
+171.3000 K LYS
+170.8000 M MET
+203.4000 F PHE
+129.3000 P PRO
+99.10000 S SER
+122.1000 T THR
+237.6000 W TRP
+203.6000 Y TYR
+141.7000 V VAL
+""")
+polarity = parse_property_table("""
+0.0000 A ALA
+52.000 R ARG
+3.3800 N ASN
+40.700 D ASP
+1.4800 C CYS
+3.5300 Q GLN
+49.910 E GLU
+0.0000 G GLY
+51.600 H HIS
+0.1500 I ILE
+0.4500 L LEU
+49.500 K LYS
+1.4300 M MET
+0.3500 F PHE
+1.5800 P PRO
+1.6700 S SER
+1.6600 T THR
+2.1000 W TRP
+1.6100 Y TYR
+0.1300 V VAL
+""")
+pK_side_chain = parse_property_table("""
+0.0000 A ALA
+12.480 R ARG
+0.0000 N ASN
+3.6500 D ASP
+8.1800 C CYS
+0.0000 Q GLN
+4.2500 E GLU
+0.0000 G GLY
+6.0000 H HIS
+0.0000 I ILE
+0.0000 L LEU
+10.530 K LYS
+0.0000 M MET
+0.0000 F PHE
+0.0000 P PRO
+0.0000 S SER
+0.0000 T THR
+0.0000 W TRP
+10.700 Y TYR
+0.0000 V VAL
+""")
+prct_exposed_residues = parse_property_table("""
+15.0000 A ALA
+67.0000 R ARG
+49.0000 N ASN
+50.0000 D ASP
+5.00000 C CYS
+56.0000 Q GLN
+55.0000 E GLU
+10.0000 G GLY
+34.0000 H HIS
+13.0000 I ILE
+16.0000 L LEU
+85.0000 K LYS
+20.0000 M MET
+10.0000 F PHE
+45.0000 P PRO
+32.0000 S SER
+32.0000 T THR
+17.0000 W TRP
+41.0000 Y TYR
+14.0000 V VAL
+""")
+hydrophilicity = parse_property_table("""
+-0.5000 A ALA
+3.00000 R ARG
+0.20000 N ASN
+3.00000 D ASP
+-1.0000 C CYS
+0.20000 Q GLN
+3.00000 E GLU
+0.00000 G GLY
+-0.5000 H HIS
+-1.8000 I ILE
+-1.8000 L LEU
+3.00000 K LYS
+-1.3000 M MET
+-2.5000 F PHE
+0.00000 P PRO
+0.30000 S SER
+-0.4000 T THR
+-3.4000 W TRP
+-2.3000 Y TYR
+-1.5000 V VAL
+""")
+accessible_surface_area = parse_property_table("""
+27.8000 A ALA
+94.7000 R ARG
+60.1000 N ASN
+60.6000 D ASP
+15.5000 C CYS
+68.7000 Q GLN
+68.2000 E GLU
+24.5000 G GLY
+50.7000 H HIS
+22.8000 I ILE
+27.6000 L LEU
+103.000 K LYS
+33.5000 M MET
+25.5000 F PHE
+51.5000 P PRO
+42.0000 S SER
+45.0000 T THR
+34.7000 W TRP
+55.2000 Y TYR
+23.7000 V VAL
+""")
+local_flexibility = parse_property_table("""
+705.42000 A ALA
+1484.2800 R ARG
+513.46010 N ASN
+34.960000 D ASP
+2412.5601 C CYS
+1087.8300 Q GLN
+1158.6600 E GLU
+33.180000 G GLY
+1637.1300 H HIS
+5979.3701 I ILE
+4985.7300 L LEU
+699.69000 K LYS
+4491.6602 M MET
+5203.8599 F PHE
+431.96000 P PRO
+174.76000 S SER
+601.88000 T THR
+6374.0698 W TRP
+4291.1001 Y TYR
+4474.4199 V VAL
+""")
+accessible_surface_area_folded = parse_property_table("""
+31.5000 A ALA
+93.8000 R ARG
+62.2000 N ASN
+60.9000 D ASP
+13.9000 C CYS
+74.0000 Q GLN
+72.3000 E GLU
+25.2000 G GLY
+46.7000 H HIS
+23.0000 I ILE
+29.0000 L LEU
+110.300 K LYS
+30.5000 M MET
+28.7000 F PHE
+53.7000 P PRO
+44.2000 S SER
+46.0000 T THR
+41.7000 W TRP
+59.1000 Y TYR
+23.5000 V VAL
+""")
+refractivity = parse_property_table("""
+4.34000 A ALA
+26.6600 R ARG
+13.2800 N ASN
+12.0000 D ASP
+35.7700 C CYS
+17.5600 Q GLN
+17.2600 E GLU
+0.00000 G GLY
+21.8100 H HIS
+19.0600 I ILE
+18.7800 L LEU
+21.2900 K LYS
+21.6400 M MET
+29.4000 F PHE
+10.9300 P PRO
+6.35000 S SER
+11.0100 T THR
+42.5300 W TRP
+31.5300 Y TYR
+13.9200 V VAL
+""")
+mass = parse_property_table("""
+70.079 A ALA
+156.188 R ARG
+114.104 N ASN
+115.089 D ASP
+103.144 C CYS
+128.131 Q GLN
+129.116 E GLU
+57.052 G GLY
+137.142 H HIS
+113.160 I ILE
+113.160 L LEU
+128.174 K LYS
+131.198 M MET
+147.177 F PHE
+97.177 P PRO
+87.078 S SER
+101.105 T THR
+186.213 W TRP
+163.170 Y TYR
+99.133 V VAL
+""")
+###
+# Values copied from:
+# "Solvent accessibility of AA in known protein structures"
+# http://prowl.rockefeller.edu/aainfo/access.htm
+###
+"""
+Solvent accessibility of AA in known protein structures
+Figure 1.
+S   0.70    0.20    0.10
+T   0.71    0.16    0.13
+A   0.48    0.35    0.17
+G   0.51    0.36    0.13
+P   0.78    0.13    0.09
+C   0.32    0.54    0.14
+D   0.81    0.09    0.10
+E   0.93    0.04    0.03
+Q   0.81    0.10    0.09
+N   0.82    0.10    0.08
+L   0.41    0.49    0.10
+I   0.39    0.47    0.14
+V   0.40    0.50    0.10
+M   0.44    0.20    0.36
+F   0.42    0.42    0.16
+Y   0.67    0.20    0.13
+W   0.49    0.44    0.07
+K   0.93    0.02    0.05
+R   0.84    0.05    0.11
+H   0.66    0.19    0.15
+"""
+solvent_exposed_area = dict(
+    S=0.70,
+    T=0.71,
+    A=0.48,
+    G=0.51,
+    P=0.78,
+    C=0.32,
+    D=0.81,
+    E=0.93,
+    Q=0.81,
+    N=0.82,
+    L=0.41,
+    I=0.39,
+    V=0.40,
+    M=0.44,
+    F=0.42,
+    Y=0.67,
+    W=0.49,
+    K=0.93,
+    R=0.84,
+    H=0.66,
+)

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/blosum.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from os.path import join
+from .static_data import MATRIX_DIR
+from .amino_acid_alphabet import dict_to_amino_acid_matrix
+def parse_blosum_table(table, coeff_type=int, key_type='row'):
+    """
+    Parse a table of pairwise amino acid coefficient (e.g. BLOSUM50)
+    """
+    lines = table.split("\n")
+    # drop comments
+    lines = [line for line in lines if not line.startswith("#")]
+    # drop CR endline characters
+    lines = [line.replace("\r", "") for line in lines]
+    # skip empty lines
+    lines = [line for line in lines if line]
+    labels = lines[0].split()
+    if len(labels) < 20:
+        raise ValueError(
+            "Expected 20+ amino acids but first line '%s' has %d fields" % (
+                lines[0],
+                len(labels)))
+    coeffs = {}
+    for line in lines[1:]:
+        fields = line.split()
+        assert len(fields) >= 21, \
+            "Expected AA and 20+ coefficients but '%s' has %d fields" % (
+                line, len(fields))
+        x = fields[0]
+        for i, coeff_str in enumerate(fields[1:]):
+            y = labels[i]
+            coeff = coeff_type(coeff_str)
+            if key_type == 'pair':
+                coeffs[(x, y)] = coeff
+            elif key_type == 'pair_string':
+                coeffs[x + y] = coeff
+            else:
+                assert key_type == 'row', "Unknown key type: %s" % key_type
+                if x not in coeffs:
+                    coeffs[x] = {}
+                coeffs[x][y] = coeff
+    return coeffs
+with open(join(MATRIX_DIR, 'BLOSUM30'), 'r') as f:
+    blosum30_dict = parse_blosum_table(f.read())
+    blosum30_matrix = dict_to_amino_acid_matrix(blosum30_dict)
+with open(join(MATRIX_DIR, 'BLOSUM50'), 'r') as f:
+    blosum50_dict = parse_blosum_table(f.read())
+    blosum50_matrix = dict_to_amino_acid_matrix(blosum50_dict)
+with open(join(MATRIX_DIR, 'BLOSUM62'), 'r') as f:
+    blosum62_dict = parse_blosum_table(f.read())
+    blosum62_matrix = dict_to_amino_acid_matrix(blosum62_dict)

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/chou_fasman.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function, division, absolute_import
+from .amino_acid_alphabet import amino_acid_name_indices
+# Chou-Fasman of structural properties from
+# http://prowl.rockefeller.edu/aainfo/chou.htm
+chou_fasman_table = """
+Alanine        142     83       66      0.06    0.076   0.035   0.058
+Arginine        98     93       95      0.070   0.106   0.099   0.085
+Aspartic Acid  101     54      146      0.147   0.110   0.179   0.081
+Asparagine      67     89      156      0.161   0.083   0.191   0.091
+Cysteine        70    119      119      0.149   0.050   0.117   0.128
+Glutamic Acid  151    037       74      0.056   0.060   0.077   0.064
+Glutamine      111    110       98      0.074   0.098   0.037   0.098
+Glycine         57     75      156      0.102   0.085   0.190   0.152
+Histidine      100     87       95      0.140   0.047   0.093   0.054
+Isoleucine     108    160       47      0.043   0.034   0.013   0.056
+Leucine        121    130       59      0.061   0.025   0.036   0.070
+Lysine         114     74      101      0.055   0.115   0.072   0.095
+Methionine     145    105       60      0.068   0.082   0.014   0.055
+Phenylalanine  113    138       60      0.059   0.041   0.065   0.065
+Proline         57     55      152      0.102   0.301   0.034   0.068
+Serine          77     75      143      0.120   0.139   0.125   0.106
+Threonine       83    119       96      0.086   0.108   0.065   0.079
+Tryptophan     108    137       96      0.077   0.013   0.064   0.167
+Tyrosine        69    147      114      0.082   0.065   0.114   0.125
+Valine         106    170       50      0.062   0.048   0.028   0.053
+"""
+def parse_chou_fasman(table):
+    alpha_helix_score_dict = {}
+    beta_sheet_score_dict = {}
+    turn_score_dict = {}
+    for line in table.split("\n"):
+        fields = [field for field in line.split(" ") if len(field.strip()) > 0]
+        if len(fields) == 0:
+            continue
+        if fields[1] == 'Acid':
+            name = fields[0] + " " + fields[1]
+            fields = fields[1:]
+        else:
+            name = fields[0]
+        assert name in amino_acid_name_indices, "Invalid amino acid name %s" % name
+        letter = amino_acid_name_indices[name]
+        alpha = int(fields[1])
+        beta = int(fields[2])
+        turn = int(fields[3])
+        alpha_helix_score_dict[letter] = alpha
+        beta_sheet_score_dict[letter] = beta
+        turn_score_dict[letter] = turn
+    assert len(alpha_helix_score_dict) == 20
+    assert len(beta_sheet_score_dict) == 20
+    assert len(turn_score_dict) == 20
+    return alpha_helix_score_dict, beta_sheet_score_dict, turn_score_dict
+alpha_helix_score, beta_sheet_score, turn_score = \
+    parse_chou_fasman(chou_fasman_table)

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/common.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+def transform_peptide(peptide, property_dict):
+    return np.array([property_dict[amino_acid] for amino_acid in peptide])
+def transform_peptides(peptides, property_dict):
+    return np.array([
+        [property_dict[aa] for aa in peptide]
+        for peptide in peptides])

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/distances.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+def hamming(p1, p2):
+    n = min(len(p1), len(p2))
+    return sum([p1[i] != p2[i] for i in range(n)])

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/matrices/__init__.py ADDED Viewed

File without changes

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/matrices/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/peptide_vectorizer.py ADDED Viewed

@@ -0,0 +1,80 @@
+# Copyright (c) 2014-2016. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.preprocessing import normalize
+def make_count_vectorizer(reduced_alphabet, max_ngram):
+    if reduced_alphabet is None:
+        preprocessor = None
+    else:
+        preprocessor = lambda s: "".join([reduced_alphabet[si] for si in s])
+    return CountVectorizer(
+        analyzer='char',
+        ngram_range=(1, max_ngram),
+        dtype=np.float,
+        preprocessor=preprocessor)
+class PeptideVectorizer(object):
+    """
+    Make n-gram frequency vectors from peptide sequences
+    """
+    def __init__(
+            self,
+            max_ngram=1,
+            normalize_row=True,
+            reduced_alphabet=None,
+            training_already_reduced=False):
+        self.reduced_alphabet = reduced_alphabet
+        self.max_ngram = max_ngram
+        self.normalize_row = normalize_row
+        self.training_already_reduced = training_already_reduced
+        self.count_vectorizer = None
+    def __getstate__(self):
+        return {
+            'reduced_alphabet': self.reduced_alphabet,
+            'count_vectorizer': self.count_vectorizer,
+            'training_already_reduced': self.training_already_reduced,
+            'normalize_row': self.normalize_row,
+            'max_ngram': self.max_ngram,
+        }
+    def fit_transform(self, amino_acid_strings):
+        self.count_vectorizer = \
+            make_count_vectorizer(self.reduced_alphabet, self.max_ngram)
+        if self.training_already_reduced:
+            c = make_count_vectorizer(None, self.max_ngram)
+            X = c.fit_transform(amino_acid_strings).todense()
+            self.count_vectorizer.vocabulary_ = c.vocabulary_
+        else:
+            c = self.count_vectorizer
+            X = c.fit_transform(amino_acid_strings).todense()
+        if self.normalize_row:
+            X = normalize(X, norm='l1')
+        return X
+    def fit(self, amino_acid_strings):
+        self.fit_transform(amino_acid_strings)
+    def transform(self, amino_acid_strings):
+        assert self.count_vectorizer, "Must call 'fit' before 'transform'"
+        X = self.count_vectorizer.transform(amino_acid_strings).todense()
+        if self.normalize_row:
+            X = normalize(X, norm='l1')
+        return X

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/pmbec.py ADDED Viewed

@@ -0,0 +1,87 @@
+# Copyright (c) 2014-2016. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from os.path import join
+from .static_data import MATRIX_DIR
+from .amino_acid_alphabet import dict_to_amino_acid_matrix
+def read_pmbec_coefficients(
+        key_type='row',
+        verbose=True,
+        filename=join(MATRIX_DIR, 'pmbec.mat')):
+    """
+    Parameters
+    ------------
+    filename : str
+        Location of PMBEC coefficient matrix
+    key_type : str
+        'row' : every key is a single amino acid,
+           which maps to a dictionary for that row
+        'pair' : every key is a tuple of amino acids
+        'pair_string' : every key is a string of two amino acid characters
+    verbose : bool
+        Print rows of matrix as we read them
+    """
+    d = {}
+    if key_type == 'row':
+        def add_pair(row_letter, col_letter, value):
+            if row_letter not in d:
+                d[row_letter] = {}
+            d[row_letter][col_letter] = value
+    elif key_type == 'pair':
+        def add_pair(row_letter, col_letter, value):
+            d[(row_letter, col_letter)] = value
+    else:
+        assert key_type == 'pair_string', \
+            "Invalid dictionary key type: %s" % key_type
+        def add_pair(row_letter, col_letter, value):
+            d["%s%s" % (row_letter, col_letter)] = value
+    with open(filename, 'r') as f:
+        lines = [line for line in f.read().split('\n') if len(line) > 0]
+        header = lines[0]
+        if verbose:
+            print(header)
+        residues = [
+            x for x in header.split()
+            if len(x) == 1 and x != ' ' and x != '\t'
+        ]
+        assert len(residues) == 20
+        if verbose:
+            print(residues)
+        for line in lines[1:]:
+            cols = [
+                x
+                for x in line.split(' ')
+                if len(x) > 0 and x != ' ' and x != '\t'
+            ]
+            assert len(cols) == 21, "Expected 20 values + letter, got %s" % cols
+            row_letter = cols[0]
+            for i, col in enumerate(cols[1:]):
+                col_letter = residues[i]
+                assert col_letter != ' ' and col_letter != '\t'
+                value = float(col)
+                add_pair(row_letter, col_letter, value)
+    return d
+# dictionary of PMBEC coefficient accessed like pmbec_dict["V"]["R"]
+pmbec_dict = read_pmbec_coefficients(key_type="row")
+pmbec_matrix = dict_to_amino_acid_matrix(pmbec_dict)

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/reduced_alphabet.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright (c) 2014-2018. Mount Sinai School of Medicine
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Amino acid groupings from
+'Reduced amino acid alphabets improve the sensitivity...' by
+Peterson, Kondev, et al.
+http://www.rpgroup.caltech.edu/publications/Peterson2008.pdf
+"""
+def dict_from_list(groups):
+    aa_to_group = {}
+    for i, group in enumerate(groups):
+        for c in group:
+            aa_to_group[c] = group[0]
+    return aa_to_group
+gbmr4 = dict_from_list(["ADKERNTSQ", "YFLIVMCWH", "G", "P"])
+sdm12 = dict_from_list([
+    "A", "D", "KER", "N", "TSQ", "YF", "LIVM", "C", "W", "H", "G", "P"
+])
+hsdm17 = dict_from_list([
+    "A", "D", "KE", "R", "N", "T", "S", "Q", "Y",
+    "F", "LIV", "M", "C", "W", "H", "G", "P"
+])
+"""
+Other alphabets from
+http://bio.math-inf.uni-greifswald.de/viscose/html/alphabets.html
+"""
+# hydrophilic vs. hydrophobic
+hp2 = dict_from_list(["AGTSNQDEHRKP", "CMFILVWY"])
+murphy10 = dict_from_list([
+    "LVIM", "C", "A", "G", "ST", "P", "FYW", "EDNQ", "KR", "H"
+])
+alex6 = dict_from_list(["C", "G", "P", "FYW", "AVILM", "STNQRHKDE"])
+aromatic2 = dict_from_list(["FHWY", "ADKERNTSQLIVMCGP"])
+hp_vs_aromatic = dict_from_list(["H", "CMILV", "FWY", "ADKERNTSQGP"])

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/residue_contact_energies.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from os.path import join
+from .amino_acid_alphabet import canonical_amino_acid_letters, dict_to_amino_acid_matrix
+from .static_data import MATRIX_DIR
+def parse_interaction_table(table, amino_acid_order="ARNDCQEGHILKMFPSTWYV"):
+    table = table.strip()
+    while "  " in table:
+        table = table.replace("  ", " ")
+    lines = [l.strip() for l in table.split("\n")]
+    lines = [l for l in lines if len(l) > 0 and not l.startswith("#")]
+    assert len(lines) == 20, "Malformed amino acid interaction table"
+    d = {}
+    for i, line in enumerate(lines):
+        coeff_strings = line.split(" ")
+        assert len(coeff_strings) == 20, \
+            "Malformed row in amino acid interaction table"
+        x = amino_acid_order[i]
+        d[x] = {}
+        for j, coeff_str in enumerate(coeff_strings):
+            value = float(coeff_str)
+            y = amino_acid_order[j]
+            d[x][y] = value
+    return d
+def transpose_interaction_dict(d):
+    transposed = {}
+    for x in canonical_amino_acid_letters:
+        transposed[x] = {}
+        for y in canonical_amino_acid_letters:
+            transposed[x][y] = d[y][x]
+    return transposed
+with open(join(MATRIX_DIR, 'strand_vs_coil.txt'), 'r') as f:
+    # Strand vs. Coil
+    strand_vs_coil_dict = parse_interaction_table(f.read())
+    strand_vs_coil_array = dict_to_amino_acid_matrix(strand_vs_coil_dict)
+    # Coil vs. Strand
+    coil_vs_strand_dict = transpose_interaction_dict(strand_vs_coil_dict)
+    coil_vs_strand_array = dict_to_amino_acid_matrix(coil_vs_strand_dict)
+with open(join(MATRIX_DIR, 'helix_vs_strand.txt'), 'r') as f:
+    # Helix vs. Strand
+    helix_vs_strand_dict = parse_interaction_table(f.read())
+    helix_vs_strand_array = dict_to_amino_acid_matrix(helix_vs_strand_dict)
+    # Strand vs. Helix
+    strand_vs_helix_dict = transpose_interaction_dict(helix_vs_strand_dict)
+    strand_vs_helix_array = dict_to_amino_acid_matrix(strand_vs_helix_dict)
+with open(join(MATRIX_DIR, 'helix_vs_coil.txt'), 'r') as f:
+    # Helix vs. Coil
+    helix_vs_coil_dict = parse_interaction_table(f.read())
+    helix_vs_coil_array = dict_to_amino_acid_matrix(helix_vs_coil_dict)
+    # Coil vs. Helix
+    coil_vs_helix_dict = transpose_interaction_dict(helix_vs_coil_dict)
+    coil_vs_helix_array = dict_to_amino_acid_matrix(coil_vs_helix_dict)

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo/static_data.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from os.path import dirname, realpath, join
+PACKAGE_DIR = dirname(realpath(__file__))
+MATRIX_DIR = join(PACKAGE_DIR, 'matrices')

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,66 @@
+Metadata-Version: 2.1
+Name: weirdo
+Version: 1.0.0
+Summary: Peptide similarity measures, distance functions, and attempts to quantify the 'self' proteome
+Home-page: https://github.com/pirl-unc/weirdo
+Author: Alex Rubinsteyn
+Author-email: alex.rubinsteyn@unc.edu
+License: http://www.apache.org/licenses/LICENSE-2.0.html
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Description-Content-Type: text/markdown
+License-File: LICENSE
+<a href="https://travis-ci.org/openvax/weirdo">
+    <img src="https://travis-ci.org/openvax/weirdo.svg?branch=master" alt="Build Status" />
+</a>
+<a href="https://coveralls.io/github/openvax/weirdo?branch=master">
+    <img src="https://coveralls.io/repos/openvax/weirdo/badge.svg?branch=master&service=github" alt="Coverage Status" />
+</a>
+<a href="https://pypi.python.org/pypi/weirdo/">
+    <img src="https://img.shields.io/pypi/v/weirdo.svg?maxAge=1000" alt="PyPI" />
+</a>
+# weirdo
+Metrics of immunological foreignness for candidate T-cell epitopes. An extension of the [pepdata](https://www.github.com/peptdata) library.
+**Amino Acid Properties**
+The `amino_acid` module contains a variety of physical/chemical properties for both single amino residues and interactions between pairs of residues.
+Single residue feature tables are parsed into `StringTransformer` objects, which can be treated as dictionaries or will vectorize a string when you call their method `transform_string`.
+Examples of single residue features:
+- `hydropathy`
+- `volume`
+- `polarity`
+- `pK_side_chain`
+- `prct_exposed_residues`
+- `hydrophilicity`
+- `accessible_surface_area`
+- `refractivity`
+- `local_flexibility`
+- `accessible_surface_area_folded`
+- `alpha_helix_score` (Chou-Fasman)
+- `beta_sheet_score` (Chou-Fasman)
+- `turn_score` (Chou-Fasman)
+Pairwise interaction tables are parsed into nested dictionaries, so that the interaction between amino acids `x` and `y` can be determined from `d[x][y]`.
+Pairwise interaction dictionaries:
+- `strand_vs_coil` (and its transpose `coil_vs_strand`)
+- `helix_vs_strand` (and its transpose `strand_vs_helix`)
+- `helix_vs_coil` (and its transpose `coil_vs_helix`)
+- `blosum30`
+- `blosum50`
+- `blosum62`
+There is also a function to parse the coefficients of the [PMBEC similarity matrix](http://www.biomedcentral.com/1471-2105/10/394), though this currently lives in the separate `pmbec` module.

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,27 @@
+LICENSE
+README.md
+setup.py
+test/test_amino_acids.py
+test/test_blosum.py
+test/test_hamming.py
+test/test_ngram.py
+test/test_pmbec.py
+weirdo/__init__.py
+weirdo/amino_acid.py
+weirdo/amino_acid_alphabet.py
+weirdo/amino_acid_properties.py
+weirdo/blosum.py
+weirdo/chou_fasman.py
+weirdo/common.py
+weirdo/distances.py
+weirdo/peptide_vectorizer.py
+weirdo/pmbec.py
+weirdo/reduced_alphabet.py
+weirdo/residue_contact_energies.py
+weirdo/static_data.py
+weirdo.egg-info/PKG-INFO
+weirdo.egg-info/SOURCES.txt
+weirdo.egg-info/dependency_links.txt
+weirdo.egg-info/requires.txt
+weirdo.egg-info/top_level.txt
+weirdo/matrices/__init__.py

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,3 @@
+numpy
+scikit-learn
+pandas

Users/iskander/miniconda3/lib/python3.8/site-packages/weirdo-1.0.0-py3.8.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ weirdo