stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from scipy.stats import norm, gamma
|
|
3
|
+
from sklearn.mixture import GaussianMixture
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# DEFAULT PARAMETERS FROM FIT TO STCRDAB (SAMPLED JULY 2024)
|
|
8
|
+
|
|
9
|
+
# Scanning angle is fit with unimodel gaussian
|
|
10
|
+
SCANNING_ANGLE_MEAN = np.array([[67.92200037]])
|
|
11
|
+
SCANNING_ANGLE_VARIANCE = np.array([[202.11710875]])
|
|
12
|
+
|
|
13
|
+
# Pitch angle is fit with gamma distribution
|
|
14
|
+
PITCH_ANGLE_ALPHA = 1.1993765213484138
|
|
15
|
+
PITCH_ANGLE_LOC = 0.036500005492399054
|
|
16
|
+
PITCH_ANGLE_SCALE = 5.781270719307026
|
|
17
|
+
|
|
18
|
+
# Z distance of TCR CoM is fit with bimodal gaussian mixture
|
|
19
|
+
Z_DIST_WEIGHTS = np.array([0.43420871, 0.56579129])
|
|
20
|
+
Z_DIST_VARIANCE = np.array([[[0.46845205]], [[0.90790909]]])
|
|
21
|
+
Z_DIST_MEAN = np.array([[27.04673494], [28.62071785]])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DockingGeometryFilter:
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
|
|
28
|
+
self.scanning_angle_pdf = ScanningAnglePDF(
|
|
29
|
+
mean=SCANNING_ANGLE_MEAN, variance=SCANNING_ANGLE_VARIANCE
|
|
30
|
+
)
|
|
31
|
+
self.pitch_angle_pdf = PitchAnglePDF(
|
|
32
|
+
alpha=PITCH_ANGLE_ALPHA, loc=PITCH_ANGLE_LOC, scale=PITCH_ANGLE_SCALE
|
|
33
|
+
)
|
|
34
|
+
self.z_dist_pdf = ZDistPDF(
|
|
35
|
+
weights=Z_DIST_WEIGHTS, mean=Z_DIST_MEAN, variance=Z_DIST_VARIANCE
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def __call__(self, *args, **kwargs):
|
|
39
|
+
return self.score_docking_geometry(*args, **kwargs)
|
|
40
|
+
|
|
41
|
+
def score_docking_geometry(
|
|
42
|
+
self, scanning_angles, pitch_angles, z_dists, weights=np.array([1, 1, 1])
|
|
43
|
+
):
|
|
44
|
+
scanning_angle_log_probs = self.scanning_angle_pdf.log_probs(scanning_angles)
|
|
45
|
+
pitch_angle_log_probs = self.pitch_angle_pdf.log_probs(pitch_angles)
|
|
46
|
+
z_dist_log_probs = self.z_dist_pdf.log_probs(z_dists)
|
|
47
|
+
|
|
48
|
+
scores = (
|
|
49
|
+
weights[0] * scanning_angle_log_probs
|
|
50
|
+
+ weights[1] * pitch_angle_log_probs
|
|
51
|
+
+ weights[2] * z_dist_log_probs
|
|
52
|
+
)
|
|
53
|
+
if len(scores) == 1:
|
|
54
|
+
return scores.item()
|
|
55
|
+
return scores
|
|
56
|
+
|
|
57
|
+
def set_scanning_angle_pdf(self, new_scanning_angle_pdf):
|
|
58
|
+
assert isinstance(
|
|
59
|
+
new_scanning_angle_pdf, DockingGeometryPDF
|
|
60
|
+
), "Bespoke scanning angle PDF must inherit from DockingGeometryPDF"
|
|
61
|
+
try:
|
|
62
|
+
log_probs = new_scanning_angle_pdf.log_probs(47.2)
|
|
63
|
+
assert (
|
|
64
|
+
isinstance(log_probs, float) and log_probs < 0.0
|
|
65
|
+
), "Log probs returned by new PDF not correctly configured. Scanning angle PDF has not been set, using default."
|
|
66
|
+
self.scanning_angle_pdf = new_scanning_angle_pdf
|
|
67
|
+
except Exception as e:
|
|
68
|
+
warnings.warn(
|
|
69
|
+
f"Bespoke scanning angle PDF failed test with error {str(e)}. Scanning angle PDF has not been set, using default."
|
|
70
|
+
)
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
def set_zdist_angle_pdf(self, new_zdist_pdf):
|
|
74
|
+
assert isinstance(
|
|
75
|
+
new_zdist_pdf, DockingGeometryPDF
|
|
76
|
+
), "Bespoke Z distance PDF must inherit from DockingGeometryPDF"
|
|
77
|
+
try:
|
|
78
|
+
log_probs = new_zdist_pdf.log_probs(29.3)
|
|
79
|
+
assert (
|
|
80
|
+
isinstance(log_probs, float) and log_probs < 0.0
|
|
81
|
+
), "Log probs returned by new PDF not correctly configured. Z distance PDF has not been set, using default."
|
|
82
|
+
self.zdist_pdf = new_zdist_pdf
|
|
83
|
+
except Exception as e:
|
|
84
|
+
warnings.warn(
|
|
85
|
+
f"Bespoke Z distance PDF failed test with error {str(e)}. Z distance PDF has not been set, using default."
|
|
86
|
+
)
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
def set_pitch_angle_pdf(self, new_pitch_angle_pdf):
|
|
90
|
+
assert isinstance(
|
|
91
|
+
new_pitch_angle_pdf, DockingGeometryPDF
|
|
92
|
+
), "Bespoke pitch angle PDF must inherit from DockingGeometryPDF"
|
|
93
|
+
try:
|
|
94
|
+
log_probs = new_pitch_angle_pdf.log_probs(12.3)
|
|
95
|
+
assert (
|
|
96
|
+
isinstance(log_probs, float) and log_probs < 0.0
|
|
97
|
+
), "Log probs returned by new PDF not correctly configured. Pitch angle PDF has not been set, using default."
|
|
98
|
+
self.pitch_angle_pdf = new_pitch_angle_pdf
|
|
99
|
+
except Exception as e:
|
|
100
|
+
warnings.warn(
|
|
101
|
+
f"Bespoke pitch angle PDF failed test with error {str(e)}. Pitch angle PDF has not been set, using default."
|
|
102
|
+
)
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DockingGeometryPDF:
|
|
107
|
+
def __init__(self, data=None):
|
|
108
|
+
if data is None:
|
|
109
|
+
self._pdf_from_parameters()
|
|
110
|
+
else:
|
|
111
|
+
if any([param is not None for param in self.params]):
|
|
112
|
+
warnings.warn(
|
|
113
|
+
"""Some parameters have been provided alongside data. Using data to fit new parameters."""
|
|
114
|
+
)
|
|
115
|
+
self.fit(data)
|
|
116
|
+
|
|
117
|
+
def _pdf_from_parameters(self):
|
|
118
|
+
raise NotImplementedError
|
|
119
|
+
|
|
120
|
+
def fit(self, data):
|
|
121
|
+
raise NotImplementedError
|
|
122
|
+
|
|
123
|
+
def pdf(self, x):
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
def log_probs(self, x):
|
|
127
|
+
return np.log(self.pdf(x))
|
|
128
|
+
|
|
129
|
+
def plot(self, data=None, plot_min=0, plot_max=150, save_as=None):
|
|
130
|
+
import matplotlib.pyplot as plt
|
|
131
|
+
|
|
132
|
+
fig = plt.figure()
|
|
133
|
+
if data is not None:
|
|
134
|
+
plt.hist(data, bins=50, density=True)
|
|
135
|
+
data_range = max(data) - min(data)
|
|
136
|
+
plot_min = min(data) - (0.1 * data_range)
|
|
137
|
+
plot_max = max(data) + (0.1 * data_range)
|
|
138
|
+
x = np.linspace(plot_min, plot_max, 100)
|
|
139
|
+
plt.plot(x, self.pdf(x))
|
|
140
|
+
if save_as:
|
|
141
|
+
plt.savefig(save_as)
|
|
142
|
+
print(f"PDF distribution plot saved to: {save_as}")
|
|
143
|
+
else:
|
|
144
|
+
plt.show()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class GaussianMixturePDF(DockingGeometryPDF):
|
|
148
|
+
def __init__(
|
|
149
|
+
self,
|
|
150
|
+
data=None,
|
|
151
|
+
n_components=None,
|
|
152
|
+
weights=None,
|
|
153
|
+
means=None,
|
|
154
|
+
covariances=None,
|
|
155
|
+
):
|
|
156
|
+
if (
|
|
157
|
+
means is not None and covariances is not None
|
|
158
|
+
): # check if parameters are provided
|
|
159
|
+
if data is not None:
|
|
160
|
+
warnings.warn(
|
|
161
|
+
"Data and parameters provided for GMM. Default to fitting GMM from data"
|
|
162
|
+
)
|
|
163
|
+
if data is None and n_components is not None:
|
|
164
|
+
warnings.warn(
|
|
165
|
+
"""Nr of components for Gaussian Mixture Model is only set when fitting to data.
|
|
166
|
+
When initialising the PDF from parameters the number of components is inferred from the parameters. """
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
if weights is None: # set weights of GMM if not provided
|
|
170
|
+
weights = np.array([1.0 / len(means) for _ in range(len(means))])
|
|
171
|
+
self.n_components = len(means)
|
|
172
|
+
self.params = (weights, means, covariances)
|
|
173
|
+
|
|
174
|
+
if (
|
|
175
|
+
data is not None
|
|
176
|
+
): # if initialising from data number or components is defined, defaults to 1.
|
|
177
|
+
self.n_components = n_components if n_components is not None else 1
|
|
178
|
+
|
|
179
|
+
if data is None and (means is None or covariances is None):
|
|
180
|
+
raise ValueError(
|
|
181
|
+
"GMM needs to be initialised form parameters or data. Check input arguments."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self.model = GaussianMixture(n_components=self.n_components)
|
|
185
|
+
super().__init__(data)
|
|
186
|
+
|
|
187
|
+
def _pdf_from_parameters(self):
|
|
188
|
+
assert all(
|
|
189
|
+
[param is not None for param in self.params]
|
|
190
|
+
), """Weights, means and covariances must be provided to define PDF from parameters instead of fitting to data.
|
|
191
|
+
Please provide parameters or data to fit."""
|
|
192
|
+
self.weights, self.means, self.covariances = self.params
|
|
193
|
+
|
|
194
|
+
def fit(self, data):
|
|
195
|
+
if len(data.shape) == 1:
|
|
196
|
+
data = data.reshape(-1, 1)
|
|
197
|
+
self.model.fit(data)
|
|
198
|
+
self.weights, self.means, self.covariances = (
|
|
199
|
+
self.model.weights_,
|
|
200
|
+
self.model.means_,
|
|
201
|
+
self.model.covariances_,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def pdf(self, x):
|
|
205
|
+
pdf = np.asarray(
|
|
206
|
+
[
|
|
207
|
+
self.weights[i]
|
|
208
|
+
* norm.pdf(x, self.means[i, 0], np.sqrt(self.covariances[i, 0]))
|
|
209
|
+
for i in range(self.n_components)
|
|
210
|
+
]
|
|
211
|
+
)
|
|
212
|
+
pdf = np.sum(pdf, axis=0)
|
|
213
|
+
return pdf
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class GammaPDF(DockingGeometryPDF):
|
|
217
|
+
def __init__(self, data=None, alpha=None, loc=None, scale=None):
|
|
218
|
+
self.params = (alpha, loc, scale)
|
|
219
|
+
self.model = gamma # gamma from scipy.stats
|
|
220
|
+
super().__init__(data)
|
|
221
|
+
|
|
222
|
+
def _pdf_from_parameters(self):
|
|
223
|
+
assert all(
|
|
224
|
+
[param is not None for param in self.params]
|
|
225
|
+
), """Alpha, location and scale must be provided to define PDF from parameters instead of fitting to data.
|
|
226
|
+
Please provide parameters or data to fit."""
|
|
227
|
+
self.alpha, self.loc, self.scale = self.params
|
|
228
|
+
|
|
229
|
+
def fit(self, data):
|
|
230
|
+
self.alpha, self.loc, self.scale = self.model.fit(data)
|
|
231
|
+
|
|
232
|
+
def pdf(self, x):
|
|
233
|
+
pdf = self.model.pdf(x, a=self.alpha, loc=self.loc, scale=self.scale)
|
|
234
|
+
return pdf
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class ScanningAnglePDF(GaussianMixturePDF):
|
|
238
|
+
def __init__(
|
|
239
|
+
self,
|
|
240
|
+
scanning_angles=None,
|
|
241
|
+
mean=SCANNING_ANGLE_MEAN,
|
|
242
|
+
variance=SCANNING_ANGLE_VARIANCE,
|
|
243
|
+
):
|
|
244
|
+
super().__init__(data=scanning_angles, means=mean, covariances=variance)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class ZDistPDF(GaussianMixturePDF):
|
|
248
|
+
def __init__(
|
|
249
|
+
self,
|
|
250
|
+
z_dists=None,
|
|
251
|
+
n_components=None,
|
|
252
|
+
weights=Z_DIST_WEIGHTS,
|
|
253
|
+
mean=Z_DIST_MEAN,
|
|
254
|
+
variance=Z_DIST_VARIANCE,
|
|
255
|
+
):
|
|
256
|
+
super().__init__(
|
|
257
|
+
z_dists,
|
|
258
|
+
n_components=n_components,
|
|
259
|
+
weights=weights,
|
|
260
|
+
means=mean,
|
|
261
|
+
covariances=variance,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class PitchAnglePDF(GammaPDF):
|
|
266
|
+
def __init__(
|
|
267
|
+
self,
|
|
268
|
+
pitch_angles=None,
|
|
269
|
+
alpha=PITCH_ANGLE_ALPHA,
|
|
270
|
+
loc=PITCH_ANGLE_LOC,
|
|
271
|
+
scale=PITCH_ANGLE_SCALE,
|
|
272
|
+
):
|
|
273
|
+
super().__init__(data=pitch_angles, alpha=alpha, loc=loc, scale=scale)
|
|
File without changes
|
|
File without changes
|