stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,273 @@
1
+ import warnings
2
+ from scipy.stats import norm, gamma
3
+ from sklearn.mixture import GaussianMixture
4
+ import numpy as np
5
+
6
+
7
+ # DEFAULT PARAMETERS FROM FIT TO STCRDAB (SAMPLED JULY 2024)
8
+
9
+ # Scanning angle is fit with unimodel gaussian
10
+ SCANNING_ANGLE_MEAN = np.array([[67.92200037]])
11
+ SCANNING_ANGLE_VARIANCE = np.array([[202.11710875]])
12
+
13
+ # Pitch angle is fit with gamma distribution
14
+ PITCH_ANGLE_ALPHA = 1.1993765213484138
15
+ PITCH_ANGLE_LOC = 0.036500005492399054
16
+ PITCH_ANGLE_SCALE = 5.781270719307026
17
+
18
+ # Z distance of TCR CoM is fit with bimodal gaussian mixture
19
+ Z_DIST_WEIGHTS = np.array([0.43420871, 0.56579129])
20
+ Z_DIST_VARIANCE = np.array([[[0.46845205]], [[0.90790909]]])
21
+ Z_DIST_MEAN = np.array([[27.04673494], [28.62071785]])
22
+
23
+
24
+ class DockingGeometryFilter:
25
+
26
+ def __init__(self):
27
+
28
+ self.scanning_angle_pdf = ScanningAnglePDF(
29
+ mean=SCANNING_ANGLE_MEAN, variance=SCANNING_ANGLE_VARIANCE
30
+ )
31
+ self.pitch_angle_pdf = PitchAnglePDF(
32
+ alpha=PITCH_ANGLE_ALPHA, loc=PITCH_ANGLE_LOC, scale=PITCH_ANGLE_SCALE
33
+ )
34
+ self.z_dist_pdf = ZDistPDF(
35
+ weights=Z_DIST_WEIGHTS, mean=Z_DIST_MEAN, variance=Z_DIST_VARIANCE
36
+ )
37
+
38
+ def __call__(self, *args, **kwargs):
39
+ return self.score_docking_geometry(*args, **kwargs)
40
+
41
+ def score_docking_geometry(
42
+ self, scanning_angles, pitch_angles, z_dists, weights=np.array([1, 1, 1])
43
+ ):
44
+ scanning_angle_log_probs = self.scanning_angle_pdf.log_probs(scanning_angles)
45
+ pitch_angle_log_probs = self.pitch_angle_pdf.log_probs(pitch_angles)
46
+ z_dist_log_probs = self.z_dist_pdf.log_probs(z_dists)
47
+
48
+ scores = (
49
+ weights[0] * scanning_angle_log_probs
50
+ + weights[1] * pitch_angle_log_probs
51
+ + weights[2] * z_dist_log_probs
52
+ )
53
+ if len(scores) == 1:
54
+ return scores.item()
55
+ return scores
56
+
57
+ def set_scanning_angle_pdf(self, new_scanning_angle_pdf):
58
+ assert isinstance(
59
+ new_scanning_angle_pdf, DockingGeometryPDF
60
+ ), "Bespoke scanning angle PDF must inherit from DockingGeometryPDF"
61
+ try:
62
+ log_probs = new_scanning_angle_pdf.log_probs(47.2)
63
+ assert (
64
+ isinstance(log_probs, float) and log_probs < 0.0
65
+ ), "Log probs returned by new PDF not correctly configured. Scanning angle PDF has not been set, using default."
66
+ self.scanning_angle_pdf = new_scanning_angle_pdf
67
+ except Exception as e:
68
+ warnings.warn(
69
+ f"Bespoke scanning angle PDF failed test with error {str(e)}. Scanning angle PDF has not been set, using default."
70
+ )
71
+ return
72
+
73
+ def set_zdist_angle_pdf(self, new_zdist_pdf):
74
+ assert isinstance(
75
+ new_zdist_pdf, DockingGeometryPDF
76
+ ), "Bespoke Z distance PDF must inherit from DockingGeometryPDF"
77
+ try:
78
+ log_probs = new_zdist_pdf.log_probs(29.3)
79
+ assert (
80
+ isinstance(log_probs, float) and log_probs < 0.0
81
+ ), "Log probs returned by new PDF not correctly configured. Z distance PDF has not been set, using default."
82
+ self.zdist_pdf = new_zdist_pdf
83
+ except Exception as e:
84
+ warnings.warn(
85
+ f"Bespoke Z distance PDF failed test with error {str(e)}. Z distance PDF has not been set, using default."
86
+ )
87
+ return
88
+
89
+ def set_pitch_angle_pdf(self, new_pitch_angle_pdf):
90
+ assert isinstance(
91
+ new_pitch_angle_pdf, DockingGeometryPDF
92
+ ), "Bespoke pitch angle PDF must inherit from DockingGeometryPDF"
93
+ try:
94
+ log_probs = new_pitch_angle_pdf.log_probs(12.3)
95
+ assert (
96
+ isinstance(log_probs, float) and log_probs < 0.0
97
+ ), "Log probs returned by new PDF not correctly configured. Pitch angle PDF has not been set, using default."
98
+ self.pitch_angle_pdf = new_pitch_angle_pdf
99
+ except Exception as e:
100
+ warnings.warn(
101
+ f"Bespoke pitch angle PDF failed test with error {str(e)}. Pitch angle PDF has not been set, using default."
102
+ )
103
+ return
104
+
105
+
106
+ class DockingGeometryPDF:
107
+ def __init__(self, data=None):
108
+ if data is None:
109
+ self._pdf_from_parameters()
110
+ else:
111
+ if any([param is not None for param in self.params]):
112
+ warnings.warn(
113
+ """Some parameters have been provided alongside data. Using data to fit new parameters."""
114
+ )
115
+ self.fit(data)
116
+
117
+ def _pdf_from_parameters(self):
118
+ raise NotImplementedError
119
+
120
+ def fit(self, data):
121
+ raise NotImplementedError
122
+
123
+ def pdf(self, x):
124
+ pass
125
+
126
+ def log_probs(self, x):
127
+ return np.log(self.pdf(x))
128
+
129
+ def plot(self, data=None, plot_min=0, plot_max=150, save_as=None):
130
+ import matplotlib.pyplot as plt
131
+
132
+ fig = plt.figure()
133
+ if data is not None:
134
+ plt.hist(data, bins=50, density=True)
135
+ data_range = max(data) - min(data)
136
+ plot_min = min(data) - (0.1 * data_range)
137
+ plot_max = max(data) + (0.1 * data_range)
138
+ x = np.linspace(plot_min, plot_max, 100)
139
+ plt.plot(x, self.pdf(x))
140
+ if save_as:
141
+ plt.savefig(save_as)
142
+ print(f"PDF distribution plot saved to: {save_as}")
143
+ else:
144
+ plt.show()
145
+
146
+
147
+ class GaussianMixturePDF(DockingGeometryPDF):
148
+ def __init__(
149
+ self,
150
+ data=None,
151
+ n_components=None,
152
+ weights=None,
153
+ means=None,
154
+ covariances=None,
155
+ ):
156
+ if (
157
+ means is not None and covariances is not None
158
+ ): # check if parameters are provided
159
+ if data is not None:
160
+ warnings.warn(
161
+ "Data and parameters provided for GMM. Default to fitting GMM from data"
162
+ )
163
+ if data is None and n_components is not None:
164
+ warnings.warn(
165
+ """Nr of components for Gaussian Mixture Model is only set when fitting to data.
166
+ When initialising the PDF from parameters the number of components is inferred from the parameters. """
167
+ )
168
+
169
+ if weights is None: # set weights of GMM if not provided
170
+ weights = np.array([1.0 / len(means) for _ in range(len(means))])
171
+ self.n_components = len(means)
172
+ self.params = (weights, means, covariances)
173
+
174
+ if (
175
+ data is not None
176
+ ): # if initialising from data number or components is defined, defaults to 1.
177
+ self.n_components = n_components if n_components is not None else 1
178
+
179
+ if data is None and (means is None or covariances is None):
180
+ raise ValueError(
181
+ "GMM needs to be initialised form parameters or data. Check input arguments."
182
+ )
183
+
184
+ self.model = GaussianMixture(n_components=self.n_components)
185
+ super().__init__(data)
186
+
187
+ def _pdf_from_parameters(self):
188
+ assert all(
189
+ [param is not None for param in self.params]
190
+ ), """Weights, means and covariances must be provided to define PDF from parameters instead of fitting to data.
191
+ Please provide parameters or data to fit."""
192
+ self.weights, self.means, self.covariances = self.params
193
+
194
+ def fit(self, data):
195
+ if len(data.shape) == 1:
196
+ data = data.reshape(-1, 1)
197
+ self.model.fit(data)
198
+ self.weights, self.means, self.covariances = (
199
+ self.model.weights_,
200
+ self.model.means_,
201
+ self.model.covariances_,
202
+ )
203
+
204
+ def pdf(self, x):
205
+ pdf = np.asarray(
206
+ [
207
+ self.weights[i]
208
+ * norm.pdf(x, self.means[i, 0], np.sqrt(self.covariances[i, 0]))
209
+ for i in range(self.n_components)
210
+ ]
211
+ )
212
+ pdf = np.sum(pdf, axis=0)
213
+ return pdf
214
+
215
+
216
+ class GammaPDF(DockingGeometryPDF):
217
+ def __init__(self, data=None, alpha=None, loc=None, scale=None):
218
+ self.params = (alpha, loc, scale)
219
+ self.model = gamma # gamma from scipy.stats
220
+ super().__init__(data)
221
+
222
+ def _pdf_from_parameters(self):
223
+ assert all(
224
+ [param is not None for param in self.params]
225
+ ), """Alpha, location and scale must be provided to define PDF from parameters instead of fitting to data.
226
+ Please provide parameters or data to fit."""
227
+ self.alpha, self.loc, self.scale = self.params
228
+
229
+ def fit(self, data):
230
+ self.alpha, self.loc, self.scale = self.model.fit(data)
231
+
232
+ def pdf(self, x):
233
+ pdf = self.model.pdf(x, a=self.alpha, loc=self.loc, scale=self.scale)
234
+ return pdf
235
+
236
+
237
+ class ScanningAnglePDF(GaussianMixturePDF):
238
+ def __init__(
239
+ self,
240
+ scanning_angles=None,
241
+ mean=SCANNING_ANGLE_MEAN,
242
+ variance=SCANNING_ANGLE_VARIANCE,
243
+ ):
244
+ super().__init__(data=scanning_angles, means=mean, covariances=variance)
245
+
246
+
247
+ class ZDistPDF(GaussianMixturePDF):
248
+ def __init__(
249
+ self,
250
+ z_dists=None,
251
+ n_components=None,
252
+ weights=Z_DIST_WEIGHTS,
253
+ mean=Z_DIST_MEAN,
254
+ variance=Z_DIST_VARIANCE,
255
+ ):
256
+ super().__init__(
257
+ z_dists,
258
+ n_components=n_components,
259
+ weights=weights,
260
+ means=mean,
261
+ covariances=variance,
262
+ )
263
+
264
+
265
+ class PitchAnglePDF(GammaPDF):
266
+ def __init__(
267
+ self,
268
+ pitch_angles=None,
269
+ alpha=PITCH_ANGLE_ALPHA,
270
+ loc=PITCH_ANGLE_LOC,
271
+ scale=PITCH_ANGLE_SCALE,
272
+ ):
273
+ super().__init__(data=pitch_angles, alpha=alpha, loc=loc, scale=scale)
File without changes
File without changes