stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,148 @@
1
+ """
2
+ Created on 10 May 2017
3
+ @author: leem
4
+ Based on the ABDB.AbPDB.AntibodyStructure class.
5
+ """
6
+
7
+ from Bio import SeqUtils
8
+ from .Entity import Entity
9
+ from .TCR import TCR
10
+ from .MHC import MHC
11
+
12
+
13
+ class TCRStructure(Entity):
14
+ """
15
+ The TCRStructure class contains a collection of models
16
+ """
17
+
18
+ def __init__(self, identifier):
19
+ self.level = "TS"
20
+ Entity.__init__(self, identifier)
21
+ self.header = {}
22
+
23
+ def __repr__(self):
24
+ return "<Structure id=%s>" % self.get_id()
25
+
26
+ def _sort(self, m1, m2):
27
+ """Sort models.
28
+
29
+ This sorting function sorts the Model instances in the Structure instance.
30
+ The sorting is done based on the model id, which is a simple int that
31
+ reflects the order of the models in the PDB file.
32
+
33
+ Arguments:
34
+ o m1, m2 - Model instances
35
+ """
36
+ return (m1.get_id() > m2.get_id()) - (m1.get_id() < m2.get_id())
37
+
38
+ def _set_numbering_scheme(self, scheme=None):
39
+ """
40
+ Set the numbering scheme used.
41
+ """
42
+ self.numbering_scheme = scheme
43
+
44
+ # Public
45
+ def set_header(self, header):
46
+ """
47
+ Set the header as the parsed header dictionary from biopython
48
+ """
49
+ self.header = header
50
+
51
+ def get_header(self):
52
+ return self.header
53
+
54
+ def get_models(self):
55
+ for m in self:
56
+ yield m
57
+
58
+ def get_holders(self):
59
+ for m in self.get_models():
60
+ for h in m:
61
+ yield h
62
+
63
+ def get_TCRs(self):
64
+ """
65
+ Get any instance of the TCR object.
66
+ Hierarchy:
67
+ TCRStructure
68
+ |
69
+ |______ TCR
70
+ |
71
+ |______ MHC
72
+ """
73
+ for h in self.get_holders():
74
+ if isinstance(h, TCR):
75
+ yield h
76
+
77
+ def get_TCRchains(self):
78
+ """Gets all TCR chains"""
79
+ for h in self.get_holders():
80
+ if h.id == "TCRchain":
81
+ for c in h:
82
+ yield c
83
+ elif isinstance(h, TCR):
84
+ for c in h:
85
+ yield c
86
+
87
+ def get_MHCs(self):
88
+ """
89
+ Get any instance of the MHC object.
90
+ Hierarchy:
91
+ TCRStructure
92
+ |
93
+ |______ TCR
94
+ |
95
+ |______ MHC
96
+ """
97
+ for h in self.get_holders():
98
+ if isinstance(h, MHC):
99
+ yield h
100
+
101
+ def get_antigens(self):
102
+ """
103
+ This gets the 'antigen' chains in the structure,
104
+ that have been assigned to a TCR or an MHC.
105
+ """
106
+ antigens = set([])
107
+ for h in self.get_holders():
108
+ if isinstance(h, MHC) or isinstance(h, TCR) or h.id == "TCRchain":
109
+ for c in h.antigen:
110
+ if c not in antigens:
111
+ antigens = antigens.union(set([c]))
112
+ yield c
113
+
114
+ def get_unpaired_TCRchains(self):
115
+ """
116
+ This gets the TCR chains that are not paired
117
+ """
118
+ for h in self.get_holders():
119
+ if h.id == "TCRchain":
120
+ for c in h:
121
+ yield c
122
+
123
+ def get_chains(self):
124
+ for h in self.get_holders():
125
+ for c in h:
126
+ yield c
127
+
128
+ def get_residues(self):
129
+ for c in self.get_chains():
130
+ for r in c:
131
+ yield r
132
+
133
+ def get_atoms(self):
134
+ for r in self.get_residues():
135
+ for a in r:
136
+ yield a
137
+
138
+ def get_seq(self, model=0):
139
+ seq = ""
140
+ for c in self[model]:
141
+ for r in c.get_residues():
142
+ # Skip over water molecules
143
+ if r.resname == "HOH":
144
+ continue
145
+ seq += SeqUtils.IUPACData.protein_letters_3to1[r.resname]
146
+ seq += "/"
147
+
148
+ return seq[:-1]
@@ -0,0 +1,160 @@
1
+ from Bio.PDB import Chain
2
+ from Bio import SeqUtils
3
+ from .utils.region_definitions import get_region
4
+ from .Entity import Entity
5
+ from .Fragment import Fragment
6
+
7
+ regions = {
8
+ "B": ["fwb1", "cdrb1", "fwb2", "cdrb2", "fwb3", "cdrb3", "fwb4"],
9
+ "A": ["fwa1", "cdra1", "fwa2", "cdra2", "fwa3", "cdra3", "fwa4"],
10
+ "D": ["fwd1", "cdrd1", "fwd2", "cdrd2", "fwd3", "cdrd3", "fwd4"],
11
+ "G": ["fwg1", "cdrg1", "fwg2", "cdrg2", "fwg3", "cdrg3", "fwg4"],
12
+ }
13
+
14
+
15
+ class TCRchain(Chain.Chain, Entity):
16
+ """
17
+ A class to hold a TCR chain.
18
+ """
19
+
20
+ def __init__(self, identifier):
21
+ Chain.Chain.__init__(self, identifier)
22
+ Entity.__init__(self, identifier)
23
+ self.level = "C"
24
+ self.mhc = []
25
+ self.antigen = []
26
+ self.unnumbered = []
27
+ self.sequence = {}
28
+ self.residue_order = {}
29
+ self.engineered = False
30
+
31
+ def __repr__(self):
32
+ return "<TCRchain %s type: %s>" % (self.id, self.chain_type)
33
+
34
+ def _add_mhc(self, mhc=None):
35
+ self.mhc.append(mhc)
36
+
37
+ def _add_antigen(self, antigen=None):
38
+ if antigen not in self.antigen:
39
+ self.antigen.append(antigen)
40
+
41
+ def is_bound(self):
42
+ """
43
+ Check whether there is an antigen bound to the TCR
44
+ """
45
+ if self.get_antigen():
46
+ return True
47
+ else:
48
+ return False
49
+
50
+ def analyse(self, chain_type):
51
+ self.set_chain_type(chain_type)
52
+ self._init_fragments()
53
+ self.annotate_children()
54
+ self.set_sequence()
55
+
56
+ def set_chain_type(self, chain_type):
57
+ """
58
+ Set the chain type to B, A, D, or G
59
+ """
60
+ self.chain_type = chain_type
61
+
62
+ def set_sequence(self):
63
+ i = 0
64
+ for residue in self:
65
+ if (
66
+ residue.get_resname().capitalize()
67
+ in SeqUtils.IUPACData.protein_letters_3to1
68
+ ):
69
+ resname = SeqUtils.IUPACData.protein_letters_3to1[
70
+ residue.get_resname().capitalize()
71
+ ] # change this to use our chemical components.
72
+ else:
73
+ # skip the residue if the code is not recognised - e.g. UNK
74
+ continue
75
+ hetflag, resseq, icode = residue.get_id()
76
+ self.sequence[(self.chain_type + str(resseq) + str(icode)).strip()] = (
77
+ resname
78
+ )
79
+ self.residue_order[(self.chain_type + str(resseq) + str(icode)).strip()] = i
80
+ i += 1
81
+
82
+ def set_engineered(self, engineered):
83
+ if engineered:
84
+ self.engineered = True
85
+ else:
86
+ self.engineered = False
87
+
88
+ def add_unnumbered(self, residue):
89
+ self.unnumbered.append(residue.id)
90
+
91
+ def _get_region(self, residue):
92
+ region = ""
93
+ if hasattr(residue, "imgt_numbered") and residue.imgt_numbered:
94
+ region = get_region((residue.id[1], residue.id[2]), self.chain_type)
95
+ return region
96
+ return "?"
97
+
98
+ def annotate_children(self):
99
+ for residue in self:
100
+ residue.chain_type = self.chain_type
101
+ residue.region = self._get_region(residue)
102
+ for atom in residue:
103
+ atom.chain_type = self.chain_type
104
+ atom.region = residue.region
105
+
106
+ if residue.region != "?":
107
+ self.fragments.child_dict[residue.region].add(residue)
108
+
109
+ def _init_fragments(self):
110
+ self.fragments = Entity("Fragments")
111
+ self.fragments.set_parent(self)
112
+ for region in regions[self.chain_type]:
113
+ self.fragments.add(Fragment(region))
114
+
115
+ def is_engineered(self):
116
+ return self.engineered
117
+
118
+ def get_MHC(self):
119
+ return self.mhc
120
+
121
+ def get_antigen(self):
122
+ return self.antigen
123
+
124
+ def get_fragments(self):
125
+ for f in self.fragments:
126
+ yield f
127
+
128
+ def get_CDRs(self):
129
+ for f in self.fragments:
130
+ if f.id.lower()[:3] == "cdr":
131
+ yield f
132
+
133
+ def get_frameworks(self):
134
+ """
135
+ Obtain framework regions from a TCRChain object.
136
+ """
137
+ for f in self.get_fragments():
138
+ if "fw" in f.id:
139
+ yield f
140
+
141
+ def get_sequence(self, type=dict):
142
+ if not self.sequence:
143
+ self.set_sequence()
144
+ if type is dict:
145
+ return self.sequence
146
+ else:
147
+ ordered = sorted(
148
+ list(self.sequence.items()), key=lambda x: self.residue_order[x[0]]
149
+ )
150
+ if type is str:
151
+ return "".join([r[1] for r in ordered])
152
+ else:
153
+ return ordered
154
+
155
+ def get_unnumbered(self):
156
+ for r in self.unnumbered:
157
+ yield self.child_dict[r]
158
+
159
+ def get_germline_assignments(self):
160
+ return self.xtra["genetic_origin"]
@@ -0,0 +1,3 @@
1
+ from .TCR import TCR, abTCR, gdTCR, dbTCR
2
+ from .MHC import MHC
3
+ from .MHCchain import MHCchain