stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
__init__.py
|
|
5
|
+
Description: Calculate the TCR/pMHC docking angle.
|
|
6
|
+
Based on the method by Rudolph, Stanfield, Wilson (Annu Revl Immmunol 2006, 24:419-466).
|
|
7
|
+
This method is preferred for cases when the antigen is not a peptide.
|
|
8
|
+
Formula uses SVD.
|
|
9
|
+
Jun 12, 2017
|
|
10
|
+
"""
|
|
11
|
+
# from TCRDB.TcrPDB.TCR import TCR
|
|
12
|
+
from ..tcr_processing import TCR, MHC, MHCchain
|
|
13
|
+
from ..utils.error_stream import ErrorStream
|
|
14
|
+
|
|
15
|
+
import sys
|
|
16
|
+
import warnings
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TCRDock(object):
|
|
21
|
+
def __init__(self, tcr, QUIET=False):
|
|
22
|
+
"""
|
|
23
|
+
Calculate the docking angle between TCR and pMHC.
|
|
24
|
+
@param TCR: input a TCR object (abTCR or gdTCR or dbTCR).
|
|
25
|
+
"""
|
|
26
|
+
self.warnings = ErrorStream()
|
|
27
|
+
self.QUIET = QUIET
|
|
28
|
+
|
|
29
|
+
self.TCR = tcr
|
|
30
|
+
self.angle = np.nan
|
|
31
|
+
|
|
32
|
+
# Get the MHC for the TCR.
|
|
33
|
+
self.MHC = tcr.get_MHC()
|
|
34
|
+
|
|
35
|
+
if not self.MHC:
|
|
36
|
+
self.warnings.write(
|
|
37
|
+
"The TCR structure does not have a detected MHC molecule. No docking angle will be calculated.\n"
|
|
38
|
+
)
|
|
39
|
+
self.abort = True
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
self.MHC = self.MHC[0]
|
|
43
|
+
|
|
44
|
+
self.abort = False
|
|
45
|
+
if not isinstance(self.TCR, TCR):
|
|
46
|
+
self.warnings.write(
|
|
47
|
+
"The TCR structure is an unpaired TCR chain. No docking angle will be calculated.\n"
|
|
48
|
+
)
|
|
49
|
+
self.abort = True
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
if isinstance(self.MHC, MHC):
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
elif not isinstance(self.MHC, MHC) and isinstance(self.MHC, MHCchain):
|
|
56
|
+
if (
|
|
57
|
+
self.MHC.chain_type == "MH1"
|
|
58
|
+
or self.MHC.chain_type == "CD1"
|
|
59
|
+
or self.MHC.chain_type == "MR1"
|
|
60
|
+
or self.MHC.chain_type == "GA1"
|
|
61
|
+
):
|
|
62
|
+
acceptable_range = list(range(50, 87))
|
|
63
|
+
residues = [
|
|
64
|
+
r
|
|
65
|
+
for r in self.MHC.get_residues()
|
|
66
|
+
if r.id[1] % 1000 in acceptable_range
|
|
67
|
+
]
|
|
68
|
+
if len(residues) >= (len(acceptable_range) - 10):
|
|
69
|
+
self.warnings.write(
|
|
70
|
+
"Warning: detected an MHC chain of type %s; doesn't seem to have an associated B2M molecule.\n"
|
|
71
|
+
% self.MHC.chain_type
|
|
72
|
+
)
|
|
73
|
+
pass
|
|
74
|
+
else:
|
|
75
|
+
self.warnings.write(
|
|
76
|
+
"An MHC molecule was not found. No docking angle will be calculated.\n"
|
|
77
|
+
)
|
|
78
|
+
self.abort = True
|
|
79
|
+
return
|
|
80
|
+
else:
|
|
81
|
+
self.warnings.write(
|
|
82
|
+
"An MHC molecule was not found. No docking angle will be calculated.\n"
|
|
83
|
+
)
|
|
84
|
+
self.abort = True
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
elif not isinstance(self.MHC, MHC):
|
|
88
|
+
self.warnings.write(
|
|
89
|
+
"An MHC molecule was not found. No docking angle will be calculated.\n"
|
|
90
|
+
)
|
|
91
|
+
self.abort = True
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Resolve the vectors for the TCR
|
|
95
|
+
self._resolve_vectors()
|
|
96
|
+
|
|
97
|
+
def _resolve_vectors(self):
|
|
98
|
+
if self.abort:
|
|
99
|
+
return
|
|
100
|
+
# Get the vector between cysteine centroids.
|
|
101
|
+
self._get_cysteine_vector()
|
|
102
|
+
|
|
103
|
+
# Get the vector of helices
|
|
104
|
+
self._get_helix_vectors()
|
|
105
|
+
|
|
106
|
+
def _get_cysteine_vector(self):
|
|
107
|
+
"""
|
|
108
|
+
Get the centroids of the disulphide bridge and calculate a vector through it.
|
|
109
|
+
"""
|
|
110
|
+
# Get variable domains
|
|
111
|
+
if self.TCR.get_TCR_type() == "abTCR":
|
|
112
|
+
vbg, vda = self.TCR.get_VB(), self.TCR.get_VA()
|
|
113
|
+
elif self.TCR.get_TCR_type() == "gdTCR":
|
|
114
|
+
vbg, vda = self.TCR.get_VD(), self.TCR.get_VG()
|
|
115
|
+
elif self.TCR.get_TCR_type() == "dbTCR":
|
|
116
|
+
vbg, vda = self.TCR.get_VB(), self.TCR.get_VD()
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
# Get sulphur atoms of each of the cysteines
|
|
120
|
+
bg_23, bg_104 = vbg[23]["SG"], vbg[104]["SG"]
|
|
121
|
+
da_23, da_104 = vda[23]["SG"], vda[104]["SG"]
|
|
122
|
+
bg_centroid = np.mean((bg_23.coord, bg_104.coord), axis=0)
|
|
123
|
+
da_centroid = np.mean((da_23.coord, da_104.coord), axis=0)
|
|
124
|
+
|
|
125
|
+
# Compute the vector between the centroids
|
|
126
|
+
self.vec_centroid = bg_centroid - da_centroid
|
|
127
|
+
|
|
128
|
+
except KeyError:
|
|
129
|
+
self.warnings.write(
|
|
130
|
+
"Cysteine(s) or sulphur atom(s) not detected. Check for IMGT residues 23/104 in beta/alpha/delta/gamma chains.\n"
|
|
131
|
+
)
|
|
132
|
+
self.abort = True
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
def _get_helix_vectors(self):
|
|
136
|
+
"""
|
|
137
|
+
Get the best fit vector for the CA atoms.
|
|
138
|
+
For MH1 and MH2, the atoms are based on the positions from Rudolph et al., 2006 with the IMGT numbering;
|
|
139
|
+
For CD1 and MR1, we use the same rules as MH1.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
if self.MHC.get_MHC_type() == "MH1":
|
|
143
|
+
|
|
144
|
+
# Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
|
|
145
|
+
# Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
|
|
146
|
+
acceptable_range = list(range(50, 87))
|
|
147
|
+
ca_atoms = np.array(
|
|
148
|
+
[
|
|
149
|
+
r["CA"].coord
|
|
150
|
+
for r in self.MHC.get_alpha().get_residues()
|
|
151
|
+
if r.id[1] % 1000 in acceptable_range and "CA" in r
|
|
152
|
+
]
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
elif self.MHC.get_MHC_type() == "CD1":
|
|
156
|
+
# Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
|
|
157
|
+
# Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
|
|
158
|
+
acceptable_range = list(range(50, 87))
|
|
159
|
+
ca_atoms = np.array(
|
|
160
|
+
[
|
|
161
|
+
r["CA"].coord
|
|
162
|
+
for r in self.MHC.get_CD1().get_residues()
|
|
163
|
+
if r.id[1] % 1000 in acceptable_range and "CA" in r
|
|
164
|
+
]
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
elif self.MHC.get_MHC_type() == "MR1":
|
|
168
|
+
# Get CA atoms of 50-86 and 1050-1086 (A140-A176 on Rudolph et al).
|
|
169
|
+
# Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
|
|
170
|
+
acceptable_range = list(range(50, 87))
|
|
171
|
+
ca_atoms = np.array(
|
|
172
|
+
[
|
|
173
|
+
r["CA"].coord
|
|
174
|
+
for r in self.MHC.get_MR1().get_residues()
|
|
175
|
+
if r.id[1] % 1000 in acceptable_range and "CA" in r
|
|
176
|
+
]
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
elif self.MHC.get_MHC_type() == "MH2":
|
|
180
|
+
# Get CA atoms of A and B52-87
|
|
181
|
+
# Using the modulus operator helps to get the last 2 digits of the IMGT-numbered residue. https://stackoverflow.com/a/28570538
|
|
182
|
+
alpha_range = list(range(50, 88))
|
|
183
|
+
beta_range = alpha_range[2:]
|
|
184
|
+
ca_atoms = [
|
|
185
|
+
r["CA"].coord
|
|
186
|
+
for r in self.MHC.get_GA().get_residues()
|
|
187
|
+
if r.id[1] in alpha_range and "CA" in r
|
|
188
|
+
]
|
|
189
|
+
ca_atoms += [
|
|
190
|
+
r["CA"].coord
|
|
191
|
+
for r in self.MHC.get_GB().get_residues()
|
|
192
|
+
if r.id[1] in beta_range and "CA" in r
|
|
193
|
+
]
|
|
194
|
+
ca_atoms = np.array(ca_atoms)
|
|
195
|
+
|
|
196
|
+
except AttributeError:
|
|
197
|
+
if (
|
|
198
|
+
self.MHC.chain_type == "MH1"
|
|
199
|
+
or self.MHC.chain_type == "CD1"
|
|
200
|
+
or self.MHC.chain_type == "MR1"
|
|
201
|
+
or self.MHC.chain_type == "GA1"
|
|
202
|
+
):
|
|
203
|
+
acceptable_range = list(range(50, 87))
|
|
204
|
+
ca_atoms = np.array(
|
|
205
|
+
[
|
|
206
|
+
r["CA"].coord
|
|
207
|
+
for r in self.MHC.get_residues()
|
|
208
|
+
if r.id[1] % 1000 in acceptable_range and "CA" in r
|
|
209
|
+
]
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
self.abort = True
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
self.ca = ca_atoms
|
|
216
|
+
|
|
217
|
+
def calculate_docking_angle(self, force=False):
|
|
218
|
+
if not np.isnan(self.angle):
|
|
219
|
+
return self.angle
|
|
220
|
+
elif force:
|
|
221
|
+
self._resolve_vectors()
|
|
222
|
+
elif self.abort:
|
|
223
|
+
return np.nan
|
|
224
|
+
|
|
225
|
+
# Compute the mean and calculate the vector using SVD
|
|
226
|
+
# https://stackoverflow.com/a/2333251
|
|
227
|
+
ca_centroid = self.ca.mean(axis=0)
|
|
228
|
+
centred_dat = self.ca - ca_centroid
|
|
229
|
+
u, d, v = np.linalg.svd(centred_dat)
|
|
230
|
+
|
|
231
|
+
# The first row of v is the 1st principal component.
|
|
232
|
+
self.V = v[0]
|
|
233
|
+
|
|
234
|
+
self.angle = self._angle(self.V, self.vec_centroid)
|
|
235
|
+
|
|
236
|
+
if not self.QUIET and self.warnings.log:
|
|
237
|
+
sys.stderr.write("\n".join(self.warnings.log))
|
|
238
|
+
sys.stderr.write("\n")
|
|
239
|
+
|
|
240
|
+
return self.angle
|
|
241
|
+
|
|
242
|
+
def _angle(self, v1, v2):
|
|
243
|
+
"""
|
|
244
|
+
Return the angle between two vectors in degrees.
|
|
245
|
+
print an error message if the numerator is negative
|
|
246
|
+
"""
|
|
247
|
+
# Check the direction of the dot product; assert positive, as we know the angle should be between 0-90 deg.
|
|
248
|
+
# This is because the singular-value decomposition for finding the best fit might return a different sign than we require
|
|
249
|
+
# https://stackoverflow.com/questions/17682626/singular-value-decomposition-different-results-with-jama-pcolt-and-numpy
|
|
250
|
+
# https://math.stackexchange.com/questions/2359992/how-to-resolve-the-sign-issue-in-a-svd-problem
|
|
251
|
+
|
|
252
|
+
numerator = np.dot(v1, v2)
|
|
253
|
+
denominator = np.linalg.norm(v1) * np.linalg.norm(v2)
|
|
254
|
+
|
|
255
|
+
if numerator < 0:
|
|
256
|
+
numerator = abs(numerator)
|
|
257
|
+
|
|
258
|
+
if numerator / denominator > 1.0:
|
|
259
|
+
return 180.0
|
|
260
|
+
else:
|
|
261
|
+
return np.degrees(np.arccos(numerator / denominator))
|