stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
from importlib import reload
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from ..tcr_processing.TCRParser import TCRParser
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import plip
|
|
10
|
+
from plip.basic.remote import VisualizerData
|
|
11
|
+
from plip.visualization.visualize import visualize_in_pymol
|
|
12
|
+
except ModuleNotFoundError as e:
|
|
13
|
+
if "pymol" in str(e):
|
|
14
|
+
warnings.warn(
|
|
15
|
+
"""\nPymol package not found. \nInteraction profiler initialising without visualisation capabilitites. \nTo enable pymol visualisations, install pymol with:
|
|
16
|
+
\nconda install -c conda-forge -c schrodinger numpy pymol-bundle\n\n"""
|
|
17
|
+
)
|
|
18
|
+
elif "plip" in str(e):
|
|
19
|
+
warnings.warn(
|
|
20
|
+
"""\n\nPLIP package not found. \nProfiling interactions will not be possible \nTo enable interaction profiling, install PLIP with:
|
|
21
|
+
\npip install plip --no-deps\n\n"""
|
|
22
|
+
)
|
|
23
|
+
except ImportError as e:
|
|
24
|
+
if "pymol" in str(e):
|
|
25
|
+
warnings.warn(
|
|
26
|
+
f"""pymol was not imported. Interactions were not visualised. \nThis is due to an import error. Perhaps try reinstalling pymol?
|
|
27
|
+
\nThe error trace was: {str(e)}
|
|
28
|
+
"""
|
|
29
|
+
)
|
|
30
|
+
elif "plip" in str(e):
|
|
31
|
+
warnings.warn(
|
|
32
|
+
f"""\n\nPLIP was not imported. \nProfiling interactions will not be possible
|
|
33
|
+
\nThis is due to an import error. Perhaps try reinstalling plip?
|
|
34
|
+
\nThe error trace was: {str(e)}"""
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
from . import utils as plip_utils
|
|
39
|
+
from .PLIPParser import PLIPParser
|
|
40
|
+
from .TCRpMHC_PLIP_Model_Parser import TCRpMHC_PLIP_Model_Parser
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TCRInteractionProfiler:
|
|
44
|
+
|
|
45
|
+
def __init__(self, **kwargs):
|
|
46
|
+
self.tcr_parser = TCRParser()
|
|
47
|
+
self.model_parser = TCRpMHC_PLIP_Model_Parser()
|
|
48
|
+
self.plip_parser = PLIPParser()
|
|
49
|
+
|
|
50
|
+
from plip.basic import config
|
|
51
|
+
|
|
52
|
+
config = reload(config)
|
|
53
|
+
self.config = config
|
|
54
|
+
|
|
55
|
+
if len(kwargs) > 0:
|
|
56
|
+
self.set_interaction_parameters(**kwargs)
|
|
57
|
+
|
|
58
|
+
def reset_parameters(self):
|
|
59
|
+
from plip.basic import config
|
|
60
|
+
|
|
61
|
+
config = reload(config)
|
|
62
|
+
self.config = config
|
|
63
|
+
|
|
64
|
+
def set_interaction_parameters(self, **kwargs):
|
|
65
|
+
"""
|
|
66
|
+
Function to set global PLIP detection parameters, ie. the stcrpy API for PLIP config parameters.
|
|
67
|
+
See https://github.com/pharmai/plip/blob/master/plip/plipcmd.py for how these are set in native PLIP
|
|
68
|
+
See https://github.com/pharmai/plip/blob/master/plip/basic/config.py for the default values
|
|
69
|
+
|
|
70
|
+
Default Parameters (from PLIP distribution):
|
|
71
|
+
BS_DIST = 7.5 # Determines maximum distance to include binding site residues
|
|
72
|
+
AROMATIC_PLANARITY = 5.0 # Determines allowed deviation from planarity in aromatic rings
|
|
73
|
+
MIN_DIST = 0.5 # Minimum distance for all distance thresholds
|
|
74
|
+
HYDROPH_DIST_MAX = 4.0 # Distance cutoff for detection of hydrophobic contacts
|
|
75
|
+
HBOND_DIST_MAX = 4.1 # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A
|
|
76
|
+
HBOND_DON_ANGLE_MIN = 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10
|
|
77
|
+
PISTACK_DIST_MAX = 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998)
|
|
78
|
+
PISTACK_ANG_DEV = 30 # Max. Deviation from parallel or perpendicular orientation (in degrees)
|
|
79
|
+
PISTACK_OFFSET_MAX = 2.0 # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A)
|
|
80
|
+
PICATION_DIST_MAX = 6.0 # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999)
|
|
81
|
+
SALTBRIDGE_DIST_MAX = 5.5 # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5
|
|
82
|
+
HALOGEN_DIST_MAX = 4.0 # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5
|
|
83
|
+
HALOGEN_ACC_ANGLE = 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger)
|
|
84
|
+
HALOGEN_DON_ANGLE = 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger)
|
|
85
|
+
HALOGEN_ANGLE_DEV = 30 # Max. deviation from optimal angle
|
|
86
|
+
WATER_BRIDGE_MINDIST = 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1
|
|
87
|
+
WATER_BRIDGE_MAXDIST = 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5
|
|
88
|
+
WATER_BRIDGE_OMEGA_MIN = 71 # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9
|
|
89
|
+
WATER_BRIDGE_OMEGA_MAX = 140 # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005)
|
|
90
|
+
WATER_BRIDGE_THETA_MIN = 100 # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005)
|
|
91
|
+
METAL_DIST_MAX = 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001)
|
|
92
|
+
MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
AttributeError: Raised if parameter being modified does not exist in config
|
|
96
|
+
ValueError: Raised if value being set is not permitted.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
self.reset_parameters() # reset to ensure no leaks between configurations
|
|
100
|
+
for param, value in kwargs.items():
|
|
101
|
+
if not hasattr(self.config, param):
|
|
102
|
+
raise AttributeError(f"PLIP self.config has no parameter {param}")
|
|
103
|
+
|
|
104
|
+
if (
|
|
105
|
+
"ANGLE" in param and not 0 < value < 180
|
|
106
|
+
): # Check value for angle thresholds
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"Threshold for angles need to have values within 0 and 180."
|
|
109
|
+
)
|
|
110
|
+
if "DIST" in param:
|
|
111
|
+
if value > 10: # Check value for distance thresholds
|
|
112
|
+
raise ValueError(
|
|
113
|
+
"Threshold for distances must not be larger than 10 Angstrom."
|
|
114
|
+
)
|
|
115
|
+
elif (
|
|
116
|
+
value > self.config.BS_DIST + 1
|
|
117
|
+
): # Dynamically adapt the search space for binding site residues
|
|
118
|
+
self.config.BS_DIST = value + 1
|
|
119
|
+
setattr(self.config, param, value)
|
|
120
|
+
# Check additional conditions for interdependent thresholds
|
|
121
|
+
if not self.config.HALOGEN_ACC_ANGLE > self.config.HALOGEN_ANGLE_DEV:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
"The halogen acceptor angle has to be larger than the halogen angle deviation."
|
|
124
|
+
)
|
|
125
|
+
if not self.config.HALOGEN_DON_ANGLE > self.config.HALOGEN_ANGLE_DEV:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"The halogen donor angle has to be larger than the halogen angle deviation."
|
|
128
|
+
)
|
|
129
|
+
if not self.config.WATER_BRIDGE_MINDIST < self.config.WATER_BRIDGE_MAXDIST:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
"The water bridge minimum distance has to be smaller than the water bridge maximum distance."
|
|
132
|
+
)
|
|
133
|
+
if not self.config.WATER_BRIDGE_OMEGA_MIN < self.config.WATER_BRIDGE_OMEGA_MAX:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def _visualize_interactions(self, complex: "plip.structure.preparation.PDBComplex"):
|
|
139
|
+
|
|
140
|
+
from plip.basic import config
|
|
141
|
+
|
|
142
|
+
if not config.PYMOL:
|
|
143
|
+
config.PYMOL = True
|
|
144
|
+
for ligand in complex.ligands:
|
|
145
|
+
complex.characterize_complex(ligand)
|
|
146
|
+
visualizer_complexes = [
|
|
147
|
+
VisualizerData(complex, site)
|
|
148
|
+
for site in sorted(complex.interaction_sets)
|
|
149
|
+
if not len(complex.interaction_sets[site].interacting_res) == 0
|
|
150
|
+
]
|
|
151
|
+
try:
|
|
152
|
+
visualize_in_pymol(visualizer_complexes[0])
|
|
153
|
+
except NameError as e:
|
|
154
|
+
warnings.warn(
|
|
155
|
+
f"""Interactions could not be visualised. Raised error {e}.
|
|
156
|
+
\nTo enable pymol visualisations please install pymol in a conda environment with:
|
|
157
|
+
\nconda install -c conda-forge -c schrodinger numpy pymol-bundle\n\n
|
|
158
|
+
"""
|
|
159
|
+
)
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
def create_pymol_session(
|
|
163
|
+
self,
|
|
164
|
+
tcr_pmhc: "TCR",
|
|
165
|
+
save_as=None,
|
|
166
|
+
antigen_residues_to_highlight=None,
|
|
167
|
+
):
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
import pymol
|
|
171
|
+
from pymol import cmd
|
|
172
|
+
except ImportError as e:
|
|
173
|
+
warnings.warn(
|
|
174
|
+
f"""pymol could not be imported. Raised error: {str(e)}.
|
|
175
|
+
\nTo enable pymol visualisations please install pymol in a conda environment with:
|
|
176
|
+
\nconda install -c conda-forge -c schrodinger numpy pymol-bundle\n\n
|
|
177
|
+
"""
|
|
178
|
+
)
|
|
179
|
+
return
|
|
180
|
+
|
|
181
|
+
import os
|
|
182
|
+
import re
|
|
183
|
+
|
|
184
|
+
pymol.finish_launching(["pymol", "-qc"])
|
|
185
|
+
|
|
186
|
+
mol = self.model_parser.parse_tcr_pmhc_complex(
|
|
187
|
+
tcr_pmhc, renumber=True, delete_tmp_files=True
|
|
188
|
+
)
|
|
189
|
+
mol, _, _ = mol
|
|
190
|
+
mol.analyze()
|
|
191
|
+
try:
|
|
192
|
+
self.plip_parser.parse_complex(mol)
|
|
193
|
+
self._visualize_interactions(mol)
|
|
194
|
+
except (
|
|
195
|
+
pymol.CmdException
|
|
196
|
+
): # for some reason sometimes this only works the second time? Probably to do with latency in pymol loading and object selection
|
|
197
|
+
self.plip_parser.parse_complex(mol)
|
|
198
|
+
self._visualize_interactions(mol)
|
|
199
|
+
|
|
200
|
+
pymol_session = next(
|
|
201
|
+
(
|
|
202
|
+
f
|
|
203
|
+
for f in os.listdir(".")
|
|
204
|
+
if re.match(rf"^{mol.pymol_name.upper()}.*\.pse$", f)
|
|
205
|
+
),
|
|
206
|
+
None,
|
|
207
|
+
)
|
|
208
|
+
cmd.load(pymol_session)
|
|
209
|
+
|
|
210
|
+
# create temporary file containing the TCR and its MHC and antigen.
|
|
211
|
+
from ..tcr_processing import TCRIO
|
|
212
|
+
|
|
213
|
+
tcrio = TCRIO.TCRIO()
|
|
214
|
+
tmp_file = f"tmp_for_vis_{tcr_pmhc.parent.parent.id}_{tcr_pmhc.id}.pdb"
|
|
215
|
+
tcrio.save(tcr_pmhc, save_as=tmp_file)
|
|
216
|
+
cmd.load(tmp_file)
|
|
217
|
+
|
|
218
|
+
if len(tcr_pmhc.antigen) == 1:
|
|
219
|
+
antigen_chain = tcr_pmhc.antigen[0].id
|
|
220
|
+
cmd.show("sticks", f"chain {antigen_chain}")
|
|
221
|
+
cmd.hide("cartoon", f"chain {antigen_chain}")
|
|
222
|
+
|
|
223
|
+
if antigen_residues_to_highlight is not None:
|
|
224
|
+
if isinstance(antigen_residues_to_highlight, int):
|
|
225
|
+
antigen_residues_to_highlight = [antigen_residues_to_highlight]
|
|
226
|
+
for res_nr in antigen_residues_to_highlight:
|
|
227
|
+
cmd.color(
|
|
228
|
+
"red",
|
|
229
|
+
f"chain {antigen_chain} and res {str(res_nr)} and elem C",
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
if len(tcr_pmhc.antigen) == 0:
|
|
233
|
+
warnings.warn(
|
|
234
|
+
f"""Could not highlight antigen, no antigen found for TCR {tcr_pmhc.parent.parent.id}_{tcr_pmhc.id}"""
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
warnings.warn(
|
|
238
|
+
f"""Could not highlight antigen, multiple antigen {tcr_pmhc.antigen} found for TCR {tcr_pmhc.parent.parent.id}_{tcr_pmhc.id}"""
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if save_as is None:
|
|
242
|
+
save_as = f"{tcr_pmhc.parent.parent.id}_{tcr_pmhc.id}_interactions.pse"
|
|
243
|
+
|
|
244
|
+
# cmd.save(pymol_session)
|
|
245
|
+
cmd.save(save_as)
|
|
246
|
+
cmd.delete("all")
|
|
247
|
+
|
|
248
|
+
# clean up pymol environment and remove temporary files
|
|
249
|
+
del cmd
|
|
250
|
+
os.remove(pymol_session)
|
|
251
|
+
os.remove(tmp_file)
|
|
252
|
+
|
|
253
|
+
return save_as
|
|
254
|
+
|
|
255
|
+
def get_interactions(self, tcr, renumber=True, save_as_csv=None):
|
|
256
|
+
mol = self.model_parser.parse_tcr_pmhc_complex(tcr, renumber=renumber)
|
|
257
|
+
if renumber:
|
|
258
|
+
mol, renumbering, domains = mol
|
|
259
|
+
else:
|
|
260
|
+
renumbering = None
|
|
261
|
+
domains = None
|
|
262
|
+
mol.analyze()
|
|
263
|
+
|
|
264
|
+
interactions_df = self.plip_parser.parse_complex(
|
|
265
|
+
mol, tcr, renumbering=renumbering, domain_assignment=domains
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if save_as_csv is not None:
|
|
269
|
+
interactions_df.to_csv(save_as_csv)
|
|
270
|
+
|
|
271
|
+
return interactions_df
|
|
272
|
+
|
|
273
|
+
def get_interaction_heatmap(self, tcr, renumber=True, **plotting_kwargs):
|
|
274
|
+
interactions_df = self.get_interactions(tcr, renumber=renumber)
|
|
275
|
+
|
|
276
|
+
heatmaps = self._interaction_heatmap(
|
|
277
|
+
interactions_df,
|
|
278
|
+
tcr_name=f"{tcr.parent.parent.id}_{tcr.id}",
|
|
279
|
+
peptide_length=len(tcr.antigen[0]),
|
|
280
|
+
**plotting_kwargs,
|
|
281
|
+
)
|
|
282
|
+
return heatmaps
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def _interaction_heatmap(
|
|
286
|
+
interactions_df,
|
|
287
|
+
tcr_name=None,
|
|
288
|
+
peptide_length=10,
|
|
289
|
+
save_as=None,
|
|
290
|
+
interaction_type=None,
|
|
291
|
+
antigen_name=None,
|
|
292
|
+
mutation_index=None,
|
|
293
|
+
):
|
|
294
|
+
|
|
295
|
+
if interaction_type is not None:
|
|
296
|
+
df = interactions_df[interactions_df.type == interaction_type]
|
|
297
|
+
else:
|
|
298
|
+
df = interactions_df
|
|
299
|
+
|
|
300
|
+
if antigen_name is None:
|
|
301
|
+
antigen_name = "peptide"
|
|
302
|
+
|
|
303
|
+
TCRA_interactions = df[df.domain.apply(lambda x: x in ["VA", "VD"])]
|
|
304
|
+
TCRB_interactions = df[df.domain == "VB"]
|
|
305
|
+
TCRA_tuples = TCRA_interactions.apply(
|
|
306
|
+
lambda x: (
|
|
307
|
+
(x["protein_residue"], x["protein_number"]),
|
|
308
|
+
(x["ligand_residue"], x["ligand_number"]),
|
|
309
|
+
),
|
|
310
|
+
axis=1,
|
|
311
|
+
)
|
|
312
|
+
TCRB_tuples = TCRB_interactions.apply(
|
|
313
|
+
lambda x: (
|
|
314
|
+
(x["protein_residue"], x["protein_number"]),
|
|
315
|
+
(x["ligand_residue"], x["ligand_number"]),
|
|
316
|
+
),
|
|
317
|
+
axis=1,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
heatmap_a = np.zeros((126, peptide_length))
|
|
321
|
+
heatmap_b = np.zeros((126, peptide_length))
|
|
322
|
+
|
|
323
|
+
# check peptide numbering
|
|
324
|
+
offset = max(set(interactions_df.ligand_number)) + 1 - peptide_length
|
|
325
|
+
ligand_number_mapping = {x + int(offset): x for x in range(peptide_length)}
|
|
326
|
+
|
|
327
|
+
if "original_numbering" in interactions_df.columns:
|
|
328
|
+
tcr_a_mapping = list(
|
|
329
|
+
zip(
|
|
330
|
+
*set(
|
|
331
|
+
[
|
|
332
|
+
(
|
|
333
|
+
x.protein_number,
|
|
334
|
+
f"{x.original_numbering}-{x.protein_residue}",
|
|
335
|
+
)
|
|
336
|
+
for _, x in interactions_df.iterrows()
|
|
337
|
+
if x.domain in ["VA", "VD"]
|
|
338
|
+
]
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
tcr_b_mapping = list(
|
|
343
|
+
zip(
|
|
344
|
+
*set(
|
|
345
|
+
[
|
|
346
|
+
(
|
|
347
|
+
x.protein_number,
|
|
348
|
+
f"{x.original_numbering}-{x.protein_residue}",
|
|
349
|
+
)
|
|
350
|
+
for _, x in interactions_df.iterrows()
|
|
351
|
+
if x.domain == "VB"
|
|
352
|
+
]
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
peptide_mapping = list(
|
|
357
|
+
zip(
|
|
358
|
+
*set(
|
|
359
|
+
[
|
|
360
|
+
(
|
|
361
|
+
x.ligand_number - offset,
|
|
362
|
+
f"{x.ligand_number}-{x.ligand_residue}",
|
|
363
|
+
)
|
|
364
|
+
for _, x in interactions_df.iterrows()
|
|
365
|
+
]
|
|
366
|
+
)
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
peptide_mapping_dict = dict(zip(*reversed(peptide_mapping)))
|
|
370
|
+
|
|
371
|
+
if mutation_index is not None:
|
|
372
|
+
if isinstance(mutation_index, str):
|
|
373
|
+
mutation_index = [mutation_index]
|
|
374
|
+
try:
|
|
375
|
+
plot_index = [peptide_mapping_dict[m_idx] for m_idx in mutation_index]
|
|
376
|
+
except KeyError:
|
|
377
|
+
plot_index = []
|
|
378
|
+
warnings.warn(
|
|
379
|
+
f"Mutation index could not be resolved. Peptide residues are: {list(peptide_mapping_dict.keys())}"
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
else:
|
|
383
|
+
plot_index = []
|
|
384
|
+
|
|
385
|
+
if interaction_type is None:
|
|
386
|
+
interaction_type = "all"
|
|
387
|
+
|
|
388
|
+
fig, (ax_alpha, ax_beta) = plt.subplots(2, 1, figsize=(18, 4))
|
|
389
|
+
|
|
390
|
+
plt.subplots_adjust(hspace=0.5)
|
|
391
|
+
|
|
392
|
+
for pair in TCRA_tuples:
|
|
393
|
+
heatmap_a[pair[0][1], ligand_number_mapping[int(pair[1][1])]] = (
|
|
394
|
+
heatmap_a[pair[0][1], ligand_number_mapping[int(pair[1][1])]] + 1
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
ax_alpha.imshow(heatmap_a.T, cmap="PuRd")
|
|
398
|
+
|
|
399
|
+
for i in plot_index:
|
|
400
|
+
ax_alpha.axhline(y=i - 0.5, color="blue", linewidth=1)
|
|
401
|
+
ax_alpha.axhline(y=i + 0.5, color="blue", linewidth=1)
|
|
402
|
+
ax_alpha.set_title(
|
|
403
|
+
f"{tcr_name} TCR alpha chain to {antigen_name}; {interaction_type} interactions"
|
|
404
|
+
)
|
|
405
|
+
if len(tcr_a_mapping) > 0:
|
|
406
|
+
ax_alpha.set_xticks(tcr_a_mapping[0], tcr_a_mapping[1], rotation=90)
|
|
407
|
+
ax_alpha.set_yticks(peptide_mapping[0], peptide_mapping[1])
|
|
408
|
+
else:
|
|
409
|
+
ax_alpha.set_xticks([], [], rotation=90)
|
|
410
|
+
ax_alpha.set_yticks([], [])
|
|
411
|
+
|
|
412
|
+
for pair in TCRB_tuples:
|
|
413
|
+
heatmap_b[pair[0][1], ligand_number_mapping[int(pair[1][1])]] = (
|
|
414
|
+
heatmap_b[pair[0][1], ligand_number_mapping[int(pair[1][1])]] + 1
|
|
415
|
+
)
|
|
416
|
+
ax_beta.imshow(heatmap_b.T, cmap="PuRd")
|
|
417
|
+
for i in plot_index:
|
|
418
|
+
ax_beta.axhline(y=i - 0.5, color="blue", linewidth=1)
|
|
419
|
+
ax_beta.axhline(y=i + 0.5, color="blue", linewidth=1)
|
|
420
|
+
ax_beta.set_title(
|
|
421
|
+
f"{tcr_name} TCR beta chain to {antigen_name}; {interaction_type} interactions"
|
|
422
|
+
)
|
|
423
|
+
if len(tcr_b_mapping) > 0:
|
|
424
|
+
ax_beta.set_xticks(tcr_b_mapping[0], tcr_b_mapping[1], rotation=90)
|
|
425
|
+
ax_beta.set_yticks(peptide_mapping[0], peptide_mapping[1])
|
|
426
|
+
else:
|
|
427
|
+
ax_beta.set_xticks([], [], rotation=90)
|
|
428
|
+
ax_beta.set_yticks([], [])
|
|
429
|
+
|
|
430
|
+
if save_as is not None:
|
|
431
|
+
fig.savefig(save_as, bbox_inches="tight", dpi=200)
|
|
432
|
+
|
|
433
|
+
return {"alpha": heatmap_a, "beta": heatmap_b}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from plip.structure.preparation import PDBComplex
|
|
7
|
+
except ModuleNotFoundError:
|
|
8
|
+
warnings.warn(
|
|
9
|
+
"""\n\nPLIP package not found. \nProfiling interactions will not be possible \nTo enable interaction profiling, install PLIP with:
|
|
10
|
+
\npip install plip --no-deps\n\n"""
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from rdkit import Chem
|
|
14
|
+
from Bio import PDB
|
|
15
|
+
from Bio.PDB.PDBParser import PDBParser
|
|
16
|
+
|
|
17
|
+
from ..tcr_processing.TCRParser import TCRParser
|
|
18
|
+
from ..tcr_processing.TCR import TCR
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TCRpMHC_PLIP_Model_Parser:
|
|
22
|
+
def __init__(self, tmp_dir=None):
|
|
23
|
+
self.parser = PDBParser()
|
|
24
|
+
self.tcr_parser = TCRParser()
|
|
25
|
+
self.tmp_dir = tmp_dir if tmp_dir is not None else "./"
|
|
26
|
+
|
|
27
|
+
def parse_tcr_pmhc_complex(
|
|
28
|
+
self,
|
|
29
|
+
tcr_pmhc_complex: TCR,
|
|
30
|
+
delete_tmp_files=True,
|
|
31
|
+
renumber=True,
|
|
32
|
+
) -> "PDBComplex":
|
|
33
|
+
|
|
34
|
+
# tcr_pmhc_complex = copy.deepcopy(
|
|
35
|
+
# tcr_pmhc_complex
|
|
36
|
+
# ) # copy the complex to prevent renumbering from persisting in TCR object
|
|
37
|
+
|
|
38
|
+
ligand = PDB.Model.Model(id=0)
|
|
39
|
+
|
|
40
|
+
peptide_chain = tcr_pmhc_complex.antigen
|
|
41
|
+
assert (
|
|
42
|
+
len(peptide_chain) == 1
|
|
43
|
+
), f"More or less than one peptide chain found: {peptide_chain}"
|
|
44
|
+
ligand.add(peptide_chain[0].copy())
|
|
45
|
+
|
|
46
|
+
tcr_and_mhc_chains = [
|
|
47
|
+
c.copy()
|
|
48
|
+
for c in list(tcr_pmhc_complex.get_chains())
|
|
49
|
+
+ list(tcr_pmhc_complex.get_MHC()[0].get_chains())
|
|
50
|
+
]
|
|
51
|
+
if renumber:
|
|
52
|
+
# renumber each chain from one to N to avoid automated renumbering issues related to plip and openbabel
|
|
53
|
+
renumbering = {}
|
|
54
|
+
for chain in tcr_and_mhc_chains:
|
|
55
|
+
renumbering[chain.id] = {}
|
|
56
|
+
for new_idx, res in enumerate(chain.get_residues()):
|
|
57
|
+
new_id = (" ", new_idx + 1, " ")
|
|
58
|
+
renumbering[chain.id][new_id] = res.id
|
|
59
|
+
res.id = new_id
|
|
60
|
+
domain_assignment = tcr_pmhc_complex.get_domain_assignment()
|
|
61
|
+
|
|
62
|
+
TCR_MHC_FILE = os.path.join(self.tmp_dir, "tcr_mhc.pdb")
|
|
63
|
+
PEPTIDE_PDB_FILE = os.path.join(self.tmp_dir, "peptide.pdb")
|
|
64
|
+
PEPTIDE_SDF_FILE = os.path.join(self.tmp_dir, "peptide.sdf")
|
|
65
|
+
|
|
66
|
+
io = PDB.PDBIO()
|
|
67
|
+
io.set_structure(ligand)
|
|
68
|
+
io.save(PEPTIDE_PDB_FILE)
|
|
69
|
+
|
|
70
|
+
tcr_mhc_struct = PDB.Model.Model(id=0)
|
|
71
|
+
# add TCR chains to protein structure
|
|
72
|
+
for chain in tcr_pmhc_complex.get_chains():
|
|
73
|
+
tcr_mhc_struct.add(chain)
|
|
74
|
+
# add MHC chain to protein structure
|
|
75
|
+
for chain in tcr_pmhc_complex.get_MHC()[0].get_chains():
|
|
76
|
+
tcr_mhc_struct.add(chain)
|
|
77
|
+
|
|
78
|
+
io = PDB.PDBIO()
|
|
79
|
+
io.set_structure(tcr_mhc_struct)
|
|
80
|
+
io.save(TCR_MHC_FILE)
|
|
81
|
+
|
|
82
|
+
rdkit_mol = Chem.MolFromPDBFile(PEPTIDE_PDB_FILE)
|
|
83
|
+
Chem.MolToMolFile(rdkit_mol, PEPTIDE_SDF_FILE)
|
|
84
|
+
with open(TCR_MHC_FILE, "r") as f:
|
|
85
|
+
protein = f.read()
|
|
86
|
+
protein = [line for line in protein.split("\n") if line.startswith("ATOM")]
|
|
87
|
+
ligand = Chem.MolFromMolFile(PEPTIDE_SDF_FILE)
|
|
88
|
+
ligand_pdb_block = Chem.MolToPDBBlock(ligand)
|
|
89
|
+
complex_pdb_block = "\n".join(protein) + "\n" + ligand_pdb_block
|
|
90
|
+
complex = PDBComplex()
|
|
91
|
+
complex.load_pdb(complex_pdb_block, as_string=True)
|
|
92
|
+
|
|
93
|
+
if delete_tmp_files:
|
|
94
|
+
os.remove(TCR_MHC_FILE)
|
|
95
|
+
os.remove(PEPTIDE_PDB_FILE)
|
|
96
|
+
os.remove(PEPTIDE_SDF_FILE)
|
|
97
|
+
|
|
98
|
+
if renumber:
|
|
99
|
+
return complex, renumbering, domain_assignment
|
|
100
|
+
else:
|
|
101
|
+
return complex
|
|
102
|
+
|
|
103
|
+
else:
|
|
104
|
+
if renumber:
|
|
105
|
+
return (
|
|
106
|
+
complex,
|
|
107
|
+
TCR_MHC_FILE,
|
|
108
|
+
PEPTIDE_PDB_FILE,
|
|
109
|
+
PEPTIDE_SDF_FILE,
|
|
110
|
+
renumbering,
|
|
111
|
+
domain_assignment,
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
return complex, TCR_MHC_FILE, PEPTIDE_PDB_FILE, PEPTIDE_SDF_FILE
|
|
115
|
+
|
|
116
|
+
def map_amino_acids_to_ligands(self, ligand_pdb, ligand_sdf):
|
|
117
|
+
ligand_structure = self.parser.get_structure("tmp", ligand_pdb)
|
|
118
|
+
sdf_supplier = Chem.SDMolSupplier(ligand_sdf)
|
|
119
|
+
mol = [x for x in sdf_supplier][0]
|
|
120
|
+
sdf_coords = mol.GetConformer().GetPositions()
|
|
121
|
+
coord_to_aa = {}
|
|
122
|
+
for coord in sdf_coords:
|
|
123
|
+
pdb_atom = [
|
|
124
|
+
a
|
|
125
|
+
for a in ligand_structure.get_atoms()
|
|
126
|
+
if sum((a.coord - coord) ** 2) < 0.0001
|
|
127
|
+
]
|
|
128
|
+
assert len(pdb_atom) == 1
|
|
129
|
+
coord_to_aa[tuple(coord)] = (
|
|
130
|
+
pdb_atom[0].parent.resname,
|
|
131
|
+
pdb_atom[0].parent.id[1],
|
|
132
|
+
)
|
|
133
|
+
return coord_to_aa
|
|
File without changes
|