stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,261 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ __init__.py
5
+ Description: Calculate the TCR/pMHC docking angle.
6
+ Based on the method by Rudolph, Stanfield, Wilson (Annu Revl Immmunol 2006, 24:419-466).
7
+ This method is preferred for cases when the antigen is not a peptide.
8
+ Formula uses SVD.
9
+ Jun 12, 2017
10
+ """
11
+ # from TCRDB.TcrPDB.TCR import TCR
12
+ from ..tcr_processing import TCR, MHC, MHCchain
13
+ from ..utils.error_stream import ErrorStream
14
+
15
+ import sys
16
+ import warnings
17
+ import numpy as np
18
+
19
+
20
+ class TCRDock(object):
21
+ def __init__(self, tcr, QUIET=False):
22
+ """
23
+ Calculate the docking angle between TCR and pMHC.
24
+ @param TCR: input a TCR object (abTCR or gdTCR or dbTCR).
25
+ """
26
+ self.warnings = ErrorStream()
27
+ self.QUIET = QUIET
28
+
29
+ self.TCR = tcr
30
+ self.angle = np.nan
31
+
32
+ # Get the MHC for the TCR.
33
+ self.MHC = tcr.get_MHC()
34
+
35
+ if not self.MHC:
36
+ self.warnings.write(
37
+ "The TCR structure does not have a detected MHC molecule. No docking angle will be calculated.\n"
38
+ )
39
+ self.abort = True
40
+ return
41
+
42
+ self.MHC = self.MHC[0]
43
+
44
+ self.abort = False
45
+ if not isinstance(self.TCR, TCR):
46
+ self.warnings.write(
47
+ "The TCR structure is an unpaired TCR chain. No docking angle will be calculated.\n"
48
+ )
49
+ self.abort = True
50
+ return
51
+
52
+ if isinstance(self.MHC, MHC):
53
+ pass
54
+
55
+ elif not isinstance(self.MHC, MHC) and isinstance(self.MHC, MHCchain):
56
+ if (
57
+ self.MHC.chain_type == "MH1"
58
+ or self.MHC.chain_type == "CD1"
59
+ or self.MHC.chain_type == "MR1"
60
+ or self.MHC.chain_type == "GA1"
61
+ ):
62
+ acceptable_range = list(range(50, 87))
63
+ residues = [
64
+ r
65
+ for r in self.MHC.get_residues()
66
+ if r.id[1] % 1000 in acceptable_range
67
+ ]
68
+ if len(residues) >= (len(acceptable_range) - 10):
69
+ self.warnings.write(
70
+ "Warning: detected an MHC chain of type %s; doesn't seem to have an associated B2M molecule.\n"
71
+ % self.MHC.chain_type
72
+ )
73
+ pass
74
+ else:
75
+ self.warnings.write(
76
+ "An MHC molecule was not found. No docking angle will be calculated.\n"
77
+ )
78
+ self.abort = True
79
+ return
80
+ else:
81
+ self.warnings.write(
82
+ "An MHC molecule was not found. No docking angle will be calculated.\n"
83
+ )
84
+ self.abort = True
85
+ return
86
+
87
+ elif not isinstance(self.MHC, MHC):
88
+ self.warnings.write(
89
+ "An MHC molecule was not found. No docking angle will be calculated.\n"
90
+ )
91
+ self.abort = True
92
+ return
93
+
94
+ # Resolve the vectors for the TCR
95
+ self._resolve_vectors()
96
+
97
+ def _resolve_vectors(self):
98
+ if self.abort:
99
+ return
100
+ # Get the vector between cysteine centroids.
101
+ self._get_cysteine_vector()
102
+
103
+ # Get the vector of helices
104
+ self._get_helix_vectors()
105
+
106
+ def _get_cysteine_vector(self):
107
+ """
108
+ Get the centroids of the disulphide bridge and calculate a vector through it.
109
+ """
110
+ # Get variable domains
111
+ if self.TCR.get_TCR_type() == "abTCR":
112
+ vbg, vda = self.TCR.get_VB(), self.TCR.get_VA()
113
+ elif self.TCR.get_TCR_type() == "gdTCR":
114
+ vbg, vda = self.TCR.get_VD(), self.TCR.get_VG()
115
+ elif self.TCR.get_TCR_type() == "dbTCR":
116
+ vbg, vda = self.TCR.get_VB(), self.TCR.get_VD()
117
+
118
+ try:
119
+ # Get sulphur atoms of each of the cysteines
120
+ bg_23, bg_104 = vbg[23]["SG"], vbg[104]["SG"]
121
+ da_23, da_104 = vda[23]["SG"], vda[104]["SG"]
122
+ bg_centroid = np.mean((bg_23.coord, bg_104.coord), axis=0)
123
+ da_centroid = np.mean((da_23.coord, da_104.coord), axis=0)
124
+
125
+ # Compute the vector between the centroids
126
+ self.vec_centroid = bg_centroid - da_centroid
127
+
128
+ except KeyError:
129
+ self.warnings.write(
130
+ "Cysteine(s) or sulphur atom(s) not detected. Check for IMGT residues 23/104 in beta/alpha/delta/gamma chains.\n"
131
+ )
132
+ self.abort = True
133
+ return
134
+
135
+ def _get_helix_vectors(self):
136
+ """
137
+ Get the best fit vector for the CA atoms.
138
+ For MH1 and MH2, the atoms are based on the positions from Rudolph et al., 2006 with the IMGT numbering;
139
+ For CD1 and MR1, we use the same rules as MH1.
140
+ """
141
+ try:
142
+ if self.MHC.get_MHC_type() == "MH1":
143
+
144
+ # Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
145
+ # Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
146
+ acceptable_range = list(range(50, 87))
147
+ ca_atoms = np.array(
148
+ [
149
+ r["CA"].coord
150
+ for r in self.MHC.get_alpha().get_residues()
151
+ if r.id[1] % 1000 in acceptable_range and "CA" in r
152
+ ]
153
+ )
154
+
155
+ elif self.MHC.get_MHC_type() == "CD1":
156
+ # Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
157
+ # Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
158
+ acceptable_range = list(range(50, 87))
159
+ ca_atoms = np.array(
160
+ [
161
+ r["CA"].coord
162
+ for r in self.MHC.get_CD1().get_residues()
163
+ if r.id[1] % 1000 in acceptable_range and "CA" in r
164
+ ]
165
+ )
166
+
167
+ elif self.MHC.get_MHC_type() == "MR1":
168
+ # Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
169
+ # Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
170
+ acceptable_range = list(range(50, 87))
171
+ ca_atoms = np.array(
172
+ [
173
+ r["CA"].coord
174
+ for r in self.MHC.get_MR1().get_residues()
175
+ if r.id[1] % 1000 in acceptable_range and "CA" in r
176
+ ]
177
+ )
178
+
179
+ elif self.MHC.get_MHC_type() == "MH2":
180
+ # Get CA atoms of A and B52-87
181
+ # Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
182
+ alpha_range = list(range(50, 88))
183
+ beta_range = alpha_range[2:]
184
+ ca_atoms = [
185
+ r["CA"].coord
186
+ for r in self.MHC.get_GA().get_residues()
187
+ if r.id[1] in alpha_range and "CA" in r
188
+ ]
189
+ ca_atoms += [
190
+ r["CA"].coord
191
+ for r in self.MHC.get_GB().get_residues()
192
+ if r.id[1] in beta_range and "CA" in r
193
+ ]
194
+ ca_atoms = np.array(ca_atoms)
195
+
196
+ except AttributeError:
197
+ if (
198
+ self.MHC.chain_type == "MH1"
199
+ or self.MHC.chain_type == "CD1"
200
+ or self.MHC.chain_type == "MR1"
201
+ or self.MHC.chain_type == "GA1"
202
+ ):
203
+ acceptable_range = list(range(50, 87))
204
+ ca_atoms = np.array(
205
+ [
206
+ r["CA"].coord
207
+ for r in self.MHC.get_residues()
208
+ if r.id[1] % 1000 in acceptable_range and "CA" in r
209
+ ]
210
+ )
211
+ else:
212
+ self.abort = True
213
+ return
214
+
215
+ self.ca = ca_atoms
216
+
217
+ def calculate_docking_angle(self, force=False):
218
+ if not np.isnan(self.angle):
219
+ return self.angle
220
+ elif force:
221
+ self._resolve_vectors()
222
+ elif self.abort:
223
+ return np.nan
224
+
225
+ # Compute the mean and calculate the vector using SVD
226
+ # https://stackoverflow.com/a/2333251
227
+ ca_centroid = self.ca.mean(axis=0)
228
+ centred_dat = self.ca - ca_centroid
229
+ u, d, v = np.linalg.svd(centred_dat)
230
+
231
+ # The first row of v is the 1st principal component.
232
+ self.V = v[0]
233
+
234
+ self.angle = self._angle(self.V, self.vec_centroid)
235
+
236
+ if not self.QUIET and self.warnings.log:
237
+ sys.stderr.write("\n".join(self.warnings.log))
238
+ sys.stderr.write("\n")
239
+
240
+ return self.angle
241
+
242
+ def _angle(self, v1, v2):
243
+ """
244
+ Return the angle between two vectors in degrees.
245
+ print an error message if the numerator is negative
246
+ """
247
+ # Check the direction of the dot product; assert positive, as we know the angle should be between 0-90 deg.
248
+ # This is because the singular-value decomposition for finding the best fit might return a different sign than we require
249
+ # https://stackoverflow.com/questions/17682626/singular-value-decomposition-different-results-with-jama-pcolt-and-numpy
250
+ # https://math.stackexchange.com/questions/2359992/how-to-resolve-the-sign-issue-in-a-svd-problem
251
+
252
+ numerator = np.dot(v1, v2)
253
+ denominator = np.linalg.norm(v1) * np.linalg.norm(v2)
254
+
255
+ if numerator < 0:
256
+ numerator = abs(numerator)
257
+
258
+ if numerator / denominator > 1.0:
259
+ return 180.0
260
+ else:
261
+ return np.degrees(np.arccos(numerator / denominator))