stcrpy 1.0.0__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stcrpy/__init__.py +1 -1
- stcrpy/tcr_formats/tcr_formats.py +20 -1
- stcrpy/tcr_geometry/TCRAngle.py +177 -0
- stcrpy/tcr_geometry/TCRDock.py +4 -1
- stcrpy/tcr_geometry/reference_data/Acoreset.txt +30 -0
- stcrpy/tcr_geometry/reference_data/Bcoreset.txt +30 -0
- stcrpy/tcr_geometry/reference_data/consensus_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/pcA.txt +3 -0
- stcrpy/tcr_geometry/reference_data/pcB.txt +3 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +1 -1
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +21 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +14 -10
- stcrpy/tcr_methods/tcr_methods.py +23 -22
- stcrpy/tcr_metrics/tcr_dockq.py +404 -0
- stcrpy/tcr_processing/Chemical_components.py +4 -4
- stcrpy/tcr_processing/Entity.py +15 -16
- stcrpy/tcr_processing/MHC.py +456 -4
- stcrpy/tcr_processing/TCR.py +462 -14
- stcrpy/tcr_processing/TCRParser.py +364 -193
- stcrpy/tcr_processing/annotate.py +35 -24
- stcrpy/tcr_processing/utils/common.py +3 -2
- stcrpy/tcr_processing/utils/constants.py +4 -3
- stcrpy/tcr_processing/utils/region_definitions.py +9 -0
- stcrpy/tcr_processing/utils/symmetry_mates.py +90 -0
- stcrpy-1.0.5.dist-info/METADATA +285 -0
- {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/RECORD +33 -22
- {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/WHEEL +1 -1
- stcrpy-1.0.0.dist-info/METADATA +0 -173
- {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/LICENCE +0 -0
- {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +0 -0
- {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/top_level.txt +0 -0
|
@@ -8,11 +8,14 @@ TCRParser object which is based on ABDB's AntibodyParser and BioPython's PDB par
|
|
|
8
8
|
from itertools import combinations, product
|
|
9
9
|
import sys
|
|
10
10
|
import os
|
|
11
|
+
import tempfile
|
|
11
12
|
from collections import defaultdict
|
|
13
|
+
import warnings
|
|
12
14
|
|
|
13
15
|
from Bio.PDB.PDBParser import PDBParser
|
|
14
16
|
from Bio.PDB.MMCIFParser import MMCIFParser
|
|
15
17
|
from Bio.PDB import NeighborSearch
|
|
18
|
+
from Bio.PDB import PDBIO
|
|
16
19
|
|
|
17
20
|
# TCRDB
|
|
18
21
|
from .annotate import annotate, extract_sequence, align_numbering
|
|
@@ -22,7 +25,7 @@ from ..utils.error_stream import ErrorStream
|
|
|
22
25
|
from .TCRStructure import TCRStructure
|
|
23
26
|
from .Model import Model
|
|
24
27
|
from .TCR import TCR, abTCR, gdTCR
|
|
25
|
-
from .MHC import MHC, MH1, MH2, CD1, MR1, scMH1, scCD1
|
|
28
|
+
from .MHC import MHC, MH1, MH2, CD1, MR1, scMH1, scCD1, scMH2
|
|
26
29
|
from .Holder import Holder
|
|
27
30
|
from .TCRchain import TCRchain
|
|
28
31
|
from .MHCchain import MHCchain
|
|
@@ -57,6 +60,10 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
57
60
|
self.numbering_scheme = "imgt"
|
|
58
61
|
self.definition = "imgt"
|
|
59
62
|
|
|
63
|
+
self.current_file = (
|
|
64
|
+
None # the current file being processed, populated by get_tcr_structure
|
|
65
|
+
)
|
|
66
|
+
|
|
60
67
|
def _create_chain(self, chain, new_chain_id, numbering, chain_type):
|
|
61
68
|
"""
|
|
62
69
|
Create a new TCR or MHC chain.
|
|
@@ -193,19 +200,62 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
193
200
|
|
|
194
201
|
return newchain1, newchain2
|
|
195
202
|
|
|
196
|
-
def
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
203
|
+
def _number_and_annotate_chain(self, chain, prenumbering=None, ali_dict={}):
|
|
204
|
+
# try to number the sequence found in the structure
|
|
205
|
+
if prenumbering and chain.id in prenumbering:
|
|
206
|
+
if len(prenumbering[chain.id]) == 2:
|
|
207
|
+
numbering = [{}, {}]
|
|
208
|
+
region_types = ["", ""]
|
|
201
209
|
|
|
202
|
-
|
|
203
|
-
|
|
210
|
+
numbering[0], region_types[0] = self._prenumbered(
|
|
211
|
+
chain, prenumbering, ali_dict, n=0
|
|
212
|
+
)
|
|
213
|
+
numbering[1], region_types[1] = self._prenumbered(
|
|
214
|
+
chain, prenumbering, ali_dict, n=1
|
|
215
|
+
)
|
|
216
|
+
rtypes = sorted(region_types)
|
|
204
217
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
218
|
+
# Check that we have a beta/alpha domain or gamma/delta domain
|
|
219
|
+
if rtypes == ["A", "B"] or rtypes == ["D", "G"]:
|
|
220
|
+
chain_type = "".join(region_types)
|
|
221
|
+
scTCR = True
|
|
222
|
+
# if not, just take the first region and warn the user
|
|
223
|
+
else:
|
|
224
|
+
chain_type = region_types[0]
|
|
225
|
+
numbering = numbering[0]
|
|
226
|
+
scTCR = False
|
|
227
|
+
print(
|
|
228
|
+
"Warning multiple variable regions of the same type (%s) found on chain %s.\nTaking the first variable region only."
|
|
229
|
+
% (chain_type, chain.id),
|
|
230
|
+
file=self.warnings,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
elif prenumbering[chain.id][0][-1] not in ["B", "A", "D", "G"]:
|
|
234
|
+
numbering, chain_type, scTCR = annotate(chain)
|
|
235
|
+
|
|
236
|
+
else:
|
|
237
|
+
numbering, chain_type = self._prenumbered(
|
|
238
|
+
chain, prenumbering, ali_dict, n=0
|
|
239
|
+
)
|
|
240
|
+
scTCR = False
|
|
241
|
+
|
|
242
|
+
else:
|
|
243
|
+
numbering, chain_type, germline_info, scTCR = annotate(chain)
|
|
244
|
+
|
|
245
|
+
return numbering, chain_type, germline_info, scTCR
|
|
246
|
+
|
|
247
|
+
def _get_header_info(self, tcrstructure, chain, germline_info):
|
|
248
|
+
if chain.id in tcrstructure.header["chain_details"]: # clean this up!!!
|
|
249
|
+
engineered = tcrstructure.header["chain_details"][chain.id]["engineered"]
|
|
250
|
+
details = tcrstructure.header["chain_details"][chain.id]
|
|
251
|
+
else:
|
|
252
|
+
engineered = False
|
|
253
|
+
details = {"molecule": "unknown", "engineered": False}
|
|
254
|
+
|
|
255
|
+
details["genetic_origin"] = germline_info
|
|
256
|
+
return details, engineered
|
|
257
|
+
|
|
258
|
+
def _read_structure_file(self, file, id):
|
|
209
259
|
# get a structure object from biopython.
|
|
210
260
|
_, ext = os.path.splitext(file)
|
|
211
261
|
if ext.lower() == ".pdb":
|
|
@@ -224,6 +274,45 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
224
274
|
# Set and analyse header information
|
|
225
275
|
tcrstructure.set_header(structure.header)
|
|
226
276
|
self._analyse_header(tcrstructure)
|
|
277
|
+
return structure, tcrstructure
|
|
278
|
+
|
|
279
|
+
def _initialise_model(self, model):
|
|
280
|
+
newmodel = Model(model.id)
|
|
281
|
+
|
|
282
|
+
# initialise holder objects for holding TCR, MHC and non-TCR/non-MHC (antigen) chains.
|
|
283
|
+
agchains = Holder("Antigen")
|
|
284
|
+
trchains = Holder("TCRchain")
|
|
285
|
+
mhchains = Holder("MHCchain")
|
|
286
|
+
newmodel.add(agchains)
|
|
287
|
+
newmodel.add(trchains)
|
|
288
|
+
newmodel.add(mhchains)
|
|
289
|
+
return newmodel, agchains, trchains, mhchains
|
|
290
|
+
|
|
291
|
+
def get_tcr_structure(
|
|
292
|
+
self,
|
|
293
|
+
id,
|
|
294
|
+
file,
|
|
295
|
+
prenumbering=None,
|
|
296
|
+
ali_dict={},
|
|
297
|
+
crystal_contacts=[],
|
|
298
|
+
include_symmetry_mates=True,
|
|
299
|
+
):
|
|
300
|
+
"""
|
|
301
|
+
Post processing of the TCRPDB.Bio.PDB structure object into a TCR context.
|
|
302
|
+
|
|
303
|
+
id: a string to identify the structure
|
|
304
|
+
file: the path to the .pdb file
|
|
305
|
+
|
|
306
|
+
optional:
|
|
307
|
+
prenumbering: prenumbering for the chains in the structure.
|
|
308
|
+
"""
|
|
309
|
+
self.warnings = ErrorStream()
|
|
310
|
+
self.include_symmetry_mates = include_symmetry_mates
|
|
311
|
+
self.current_file = file
|
|
312
|
+
|
|
313
|
+
structure, tcrstructure = self._read_structure_file(
|
|
314
|
+
file, id
|
|
315
|
+
) # structure: Bio.PDB.Structure from file; tcrstructure: initialised empty TCRStructure object to be populated
|
|
227
316
|
|
|
228
317
|
# iterate over the models in the structure
|
|
229
318
|
# iterate backwards through the model list - delete old structure as we go
|
|
@@ -232,70 +321,18 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
232
321
|
for mid in range(len(structure.child_list) - 1, -1, -1):
|
|
233
322
|
# add a model to the TCR structure
|
|
234
323
|
model = structure.child_list[mid]
|
|
235
|
-
newmodel =
|
|
324
|
+
newmodel, agchains, trchains, mhchains = self._initialise_model(model)
|
|
236
325
|
tcrstructure.add(newmodel)
|
|
237
326
|
|
|
238
|
-
# initialise holder objects for holding TCR, MHC and non-TCR/non-MHC (antigen) chains.
|
|
239
|
-
agchains = Holder("Antigen")
|
|
240
|
-
trchains = Holder("TCRchain")
|
|
241
|
-
mhchains = Holder("MHCchain")
|
|
242
|
-
newmodel.add(agchains)
|
|
243
|
-
newmodel.add(trchains)
|
|
244
|
-
newmodel.add(mhchains)
|
|
245
|
-
|
|
246
327
|
# iterate over the chains in the model
|
|
247
328
|
for chain in model.get_list():
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
numbering = [{}, {}]
|
|
252
|
-
region_types = ["", ""]
|
|
253
|
-
|
|
254
|
-
numbering[0], region_types[0] = self._prenumbered(
|
|
255
|
-
chain, prenumbering, ali_dict, n=0
|
|
256
|
-
)
|
|
257
|
-
numbering[1], region_types[1] = self._prenumbered(
|
|
258
|
-
chain, prenumbering, ali_dict, n=1
|
|
259
|
-
)
|
|
260
|
-
rtypes = sorted(region_types)
|
|
261
|
-
|
|
262
|
-
# Check that we have a beta/alpha domain or gamma/delta domain
|
|
263
|
-
if rtypes == ["A", "B"] or rtypes == ["D", "G"]:
|
|
264
|
-
chain_type = "".join(region_types)
|
|
265
|
-
scTCR = True
|
|
266
|
-
# if not, just take the first region and warn the user
|
|
267
|
-
else:
|
|
268
|
-
chain_type = region_types[0]
|
|
269
|
-
numbering = numbering[0]
|
|
270
|
-
scTCR = False
|
|
271
|
-
print(
|
|
272
|
-
"Warning multiple variable regions of the same type (%s) found on chain %s.\nTaking the first variable region only."
|
|
273
|
-
% (chain_type, chain.id),
|
|
274
|
-
file=self.warnings,
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
elif prenumbering[chain.id][0][-1] not in ["B", "A", "D", "G"]:
|
|
278
|
-
numbering, chain_type, scTCR = annotate(chain)
|
|
279
|
-
|
|
280
|
-
else:
|
|
281
|
-
numbering, chain_type = self._prenumbered(
|
|
282
|
-
chain, prenumbering, ali_dict, n=0
|
|
283
|
-
)
|
|
284
|
-
scTCR = False
|
|
285
|
-
|
|
286
|
-
else:
|
|
287
|
-
numbering, chain_type, germline_info, scTCR = annotate(chain)
|
|
288
|
-
|
|
289
|
-
if chain.id in tcrstructure.header["chain_details"]: # clean this up!!!
|
|
290
|
-
engineered = tcrstructure.header["chain_details"][chain.id][
|
|
291
|
-
"engineered"
|
|
292
|
-
]
|
|
293
|
-
details = tcrstructure.header["chain_details"][chain.id]
|
|
294
|
-
else:
|
|
295
|
-
engineered = False
|
|
296
|
-
details = {"molecule": "unknown", "engineered": False}
|
|
329
|
+
numbering, chain_type, germline_info, scTCR = (
|
|
330
|
+
self._number_and_annotate_chain(chain, prenumbering, ali_dict)
|
|
331
|
+
)
|
|
297
332
|
|
|
298
|
-
details
|
|
333
|
+
details, engineered = self._get_header_info(
|
|
334
|
+
tcrstructure, chain, germline_info
|
|
335
|
+
)
|
|
299
336
|
|
|
300
337
|
if numbering and chain_type in ["G", "D", "B", "A"]:
|
|
301
338
|
# create a new TCR chain
|
|
@@ -349,7 +386,9 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
349
386
|
elif not obs_chaintypes - set(["G", "D"]):
|
|
350
387
|
tcr = gdTCR(chain1, chain2)
|
|
351
388
|
elif not obs_chaintypes - set(["B", "D"]):
|
|
352
|
-
tcr = abTCR(
|
|
389
|
+
tcr = abTCR(
|
|
390
|
+
chain1, chain2
|
|
391
|
+
) # initial way to deal with anarci missclassification of alpha chains as delta chains
|
|
353
392
|
# tcr = dbTCR(chain1, chain2)
|
|
354
393
|
|
|
355
394
|
tcr.scTCR = True #
|
|
@@ -461,6 +500,7 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
461
500
|
newmodel.add(mhc)
|
|
462
501
|
|
|
463
502
|
# allow instantiation of single chain MH1 type MH class if the alpha helices forming chain has been observed
|
|
503
|
+
# allow instantiation of single chain MH2 type MH class if one of the GA or GB chain has been observed
|
|
464
504
|
ids_to_detach = []
|
|
465
505
|
for mhc_chain in mhchains:
|
|
466
506
|
if mhc_chain.chain_type in ["MH1", "GA1", "GA2"]:
|
|
@@ -471,6 +511,13 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
471
511
|
ids_to_detach.append(mhc_chain.id)
|
|
472
512
|
sc_mhc = scCD1(mhc_chain)
|
|
473
513
|
newmodel.add(sc_mhc)
|
|
514
|
+
elif mhc_chain.chain_type in ["GA", "GB"]:
|
|
515
|
+
ids_to_detach.append(mhc_chain.id)
|
|
516
|
+
sc_mhc = scMH2(mhc_chain)
|
|
517
|
+
newmodel.add(sc_mhc)
|
|
518
|
+
warnings.warn(
|
|
519
|
+
f"Single chain MH class II instantiated with chain type {mhc_chain.chain_type}. It is possible the other MHC class II chain has not been identified."
|
|
520
|
+
)
|
|
474
521
|
|
|
475
522
|
for mhc_chain_id in ids_to_detach:
|
|
476
523
|
mhchains.detach_child(mhc_chain_id)
|
|
@@ -494,6 +541,7 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
494
541
|
sys.stderr.write("\n")
|
|
495
542
|
tcrstructure.warnings = self.warnings
|
|
496
543
|
|
|
544
|
+
self.current_file = None # reset the current file
|
|
497
545
|
return tcrstructure
|
|
498
546
|
|
|
499
547
|
def _analyse_header(self, header):
|
|
@@ -778,19 +826,120 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
778
826
|
|
|
779
827
|
return hetatoms, sugars
|
|
780
828
|
|
|
781
|
-
def
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
829
|
+
def _prepare_tcr(self, tr, cdr_atoms, antigen_hetatoms, antigen_sugars):
|
|
830
|
+
for cdr in tr.get_CDRs():
|
|
831
|
+
# Only get CDR3?
|
|
832
|
+
if "3" not in cdr.id:
|
|
833
|
+
continue
|
|
834
|
+
# only look at CA or CB atoms of the CDR; this is used later.
|
|
835
|
+
cdr_atoms[tr.id] += [
|
|
836
|
+
atom for atom in cdr.get_atoms() if atom.id == "CB" or atom.id == "CA"
|
|
837
|
+
]
|
|
787
838
|
|
|
788
|
-
|
|
789
|
-
""
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
839
|
+
# get TCR type and get chain's hetatoms accordingly
|
|
840
|
+
if isinstance(tr, TCR) and tr.get_TCR_type() == "abTCR":
|
|
841
|
+
beta_chain = tr.get_VB()
|
|
842
|
+
alpha_chain = tr.get_VA()
|
|
843
|
+
|
|
844
|
+
antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = self._find_chain_hetatoms(
|
|
845
|
+
beta_chain
|
|
846
|
+
)
|
|
847
|
+
antigen_hetatoms[tr.VA], antigen_sugars[tr.VA] = self._find_chain_hetatoms(
|
|
848
|
+
alpha_chain
|
|
849
|
+
)
|
|
793
850
|
|
|
851
|
+
elif isinstance(tr, TCR) and tr.get_TCR_type() == "gdTCR":
|
|
852
|
+
delta_chain = tr.get_VD()
|
|
853
|
+
gamma_chain = tr.get_VG()
|
|
854
|
+
antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = self._find_chain_hetatoms(
|
|
855
|
+
delta_chain
|
|
856
|
+
)
|
|
857
|
+
antigen_hetatoms[tr.VG], antigen_sugars[tr.VG] = self._find_chain_hetatoms(
|
|
858
|
+
gamma_chain
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
elif isinstance(tr, TCR) and tr.get_TCR_type() == "dbTCR":
|
|
862
|
+
beta_chain = tr.get_VB()
|
|
863
|
+
delta_chain = tr.get_VD()
|
|
864
|
+
antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = self._find_chain_hetatoms(
|
|
865
|
+
beta_chain
|
|
866
|
+
)
|
|
867
|
+
antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = self._find_chain_hetatoms(
|
|
868
|
+
delta_chain
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# Unpaired TCR chain
|
|
872
|
+
elif isinstance(tr, TCRchain):
|
|
873
|
+
antigen_hetatoms[tr.id], antigen_sugars[tr.id] = self._find_chain_hetatoms(
|
|
874
|
+
tr
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
def _prepare_mhc(self, mh, mh_atoms, antigen_hetatoms, antigen_sugars):
|
|
878
|
+
# Keep G domain atoms; Get the Helix region of MHC
|
|
879
|
+
mh_atoms[mh.id] = [
|
|
880
|
+
atom
|
|
881
|
+
for atom in mh.get_atoms()
|
|
882
|
+
if (atom.id == "CB" or atom.id == "CA") and atom.region == "Helix"
|
|
883
|
+
]
|
|
884
|
+
if isinstance(mh, MHC) and mh.MHC_type == "MH1":
|
|
885
|
+
MH1, B2M = mh.get_MH1(), mh.get_B2M()
|
|
886
|
+
if MH1 is not None:
|
|
887
|
+
antigen_hetatoms[mh.MH1], antigen_sugars[mh.MH1] = (
|
|
888
|
+
self._find_chain_hetatoms(MH1)
|
|
889
|
+
)
|
|
890
|
+
else:
|
|
891
|
+
GA1 = mh.get_GA1()
|
|
892
|
+
antigen_hetatoms[mh.GA1], antigen_sugars[mh.GA1] = (
|
|
893
|
+
self._find_chain_hetatoms(GA1)
|
|
894
|
+
)
|
|
895
|
+
if B2M is not None: # handle single chain MH1 case
|
|
896
|
+
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
897
|
+
self._find_chain_hetatoms(B2M)
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
elif isinstance(mh, MHC) and mh.MHC_type == "CD1":
|
|
901
|
+
CD1, B2M = mh.get_CD1(), mh.get_B2M()
|
|
902
|
+
if CD1 is not None:
|
|
903
|
+
antigen_hetatoms[mh.CD1], antigen_sugars[mh.CD1] = (
|
|
904
|
+
self._find_chain_hetatoms(CD1)
|
|
905
|
+
)
|
|
906
|
+
if B2M is not None:
|
|
907
|
+
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
908
|
+
self._find_chain_hetatoms(B2M)
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
elif isinstance(mh, MHC) and mh.MHC_type == "MR1":
|
|
912
|
+
MR1, B2M = mh.get_MR1(), mh.get_B2M()
|
|
913
|
+
antigen_hetatoms[mh.MR1], antigen_sugars[mh.MR1] = (
|
|
914
|
+
self._find_chain_hetatoms(MR1)
|
|
915
|
+
)
|
|
916
|
+
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
917
|
+
self._find_chain_hetatoms(B2M)
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
elif isinstance(mh, MHC) and mh.MHC_type == "MH2":
|
|
921
|
+
GA, GB = mh.get_GA(), mh.get_GB()
|
|
922
|
+
antigen_hetatoms[mh.GA], antigen_sugars[mh.GA] = self._find_chain_hetatoms(
|
|
923
|
+
GA
|
|
924
|
+
)
|
|
925
|
+
antigen_hetatoms[mh.GB], antigen_sugars[mh.GB] = self._find_chain_hetatoms(
|
|
926
|
+
GB
|
|
927
|
+
)
|
|
928
|
+
|
|
929
|
+
# Unpaired MHC chains -- if any, go here.
|
|
930
|
+
elif isinstance(mh, MHCchain):
|
|
931
|
+
antigen_hetatoms[mh.id], antigen_sugars[mh.id] = self._find_chain_hetatoms(
|
|
932
|
+
mh
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
def _prepare_tcrs_mhcs_and_antigens_for_pairing(
|
|
936
|
+
self,
|
|
937
|
+
model,
|
|
938
|
+
tcell_receptors,
|
|
939
|
+
mhc_complexes,
|
|
940
|
+
agchains,
|
|
941
|
+
crystal_contacts,
|
|
942
|
+
):
|
|
794
943
|
# Initialise 5 dictionaries which carries a list of atoms per chain ID.
|
|
795
944
|
antigen_atoms, cdr_atoms, mh_atoms, antigen_hetatoms, antigen_sugars = (
|
|
796
945
|
defaultdict(list),
|
|
@@ -802,113 +951,11 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
802
951
|
|
|
803
952
|
# Look through TCR and MHC and see if there are any weird hetatoms and sugars in the structure.
|
|
804
953
|
for tr in tcell_receptors:
|
|
805
|
-
|
|
806
|
-
# Only get CDR3?
|
|
807
|
-
if "3" not in cdr.id:
|
|
808
|
-
continue
|
|
809
|
-
# only look at CA or CB atoms of the CDR; this is used later.
|
|
810
|
-
cdr_atoms[tr.id] += [
|
|
811
|
-
atom
|
|
812
|
-
for atom in cdr.get_atoms()
|
|
813
|
-
if atom.id == "CB" or atom.id == "CA"
|
|
814
|
-
]
|
|
815
|
-
|
|
816
|
-
# get TCR type and get chain's hetatoms accordingly
|
|
817
|
-
if isinstance(tr, TCR) and tr.get_TCR_type() == "abTCR":
|
|
818
|
-
beta_chain = tr.get_VB()
|
|
819
|
-
alpha_chain = tr.get_VA()
|
|
820
|
-
|
|
821
|
-
antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = (
|
|
822
|
-
self._find_chain_hetatoms(beta_chain)
|
|
823
|
-
)
|
|
824
|
-
antigen_hetatoms[tr.VA], antigen_sugars[tr.VA] = (
|
|
825
|
-
self._find_chain_hetatoms(alpha_chain)
|
|
826
|
-
)
|
|
827
|
-
|
|
828
|
-
elif isinstance(tr, TCR) and tr.get_TCR_type() == "gdTCR":
|
|
829
|
-
delta_chain = tr.get_VD()
|
|
830
|
-
gamma_chain = tr.get_VG()
|
|
831
|
-
antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = (
|
|
832
|
-
self._find_chain_hetatoms(delta_chain)
|
|
833
|
-
)
|
|
834
|
-
antigen_hetatoms[tr.VG], antigen_sugars[tr.VG] = (
|
|
835
|
-
self._find_chain_hetatoms(gamma_chain)
|
|
836
|
-
)
|
|
837
|
-
|
|
838
|
-
elif isinstance(tr, TCR) and tr.get_TCR_type() == "dbTCR":
|
|
839
|
-
beta_chain = tr.get_VB()
|
|
840
|
-
delta_chain = tr.get_VD()
|
|
841
|
-
antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = (
|
|
842
|
-
self._find_chain_hetatoms(beta_chain)
|
|
843
|
-
)
|
|
844
|
-
antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = (
|
|
845
|
-
self._find_chain_hetatoms(delta_chain)
|
|
846
|
-
)
|
|
847
|
-
|
|
848
|
-
# Unpaired TCR chain
|
|
849
|
-
elif isinstance(tr, TCRchain):
|
|
850
|
-
antigen_hetatoms[tr.id], antigen_sugars[tr.id] = (
|
|
851
|
-
self._find_chain_hetatoms(tr)
|
|
852
|
-
)
|
|
954
|
+
self._prepare_tcr(tr, cdr_atoms, antigen_hetatoms, antigen_sugars)
|
|
853
955
|
|
|
854
956
|
# Do the same for MHC.
|
|
855
957
|
for mh in mhc_complexes:
|
|
856
|
-
|
|
857
|
-
mh_atoms[mh.id] = [
|
|
858
|
-
atom
|
|
859
|
-
for atom in mh.get_atoms()
|
|
860
|
-
if (atom.id == "CB" or atom.id == "CA") and atom.region == "Helix"
|
|
861
|
-
]
|
|
862
|
-
if isinstance(mh, MHC) and mh.MHC_type == "MH1":
|
|
863
|
-
MH1, B2M = mh.get_MH1(), mh.get_B2M()
|
|
864
|
-
if MH1 is not None:
|
|
865
|
-
antigen_hetatoms[mh.MH1], antigen_sugars[mh.MH1] = (
|
|
866
|
-
self._find_chain_hetatoms(MH1)
|
|
867
|
-
)
|
|
868
|
-
else:
|
|
869
|
-
GA1 = mh.get_GA1()
|
|
870
|
-
antigen_hetatoms[mh.GA1], antigen_sugars[mh.GA1] = (
|
|
871
|
-
self._find_chain_hetatoms(GA1)
|
|
872
|
-
)
|
|
873
|
-
if B2M is not None: # handle single chain MH1 case
|
|
874
|
-
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
875
|
-
self._find_chain_hetatoms(B2M)
|
|
876
|
-
)
|
|
877
|
-
|
|
878
|
-
elif isinstance(mh, MHC) and mh.MHC_type == "CD1":
|
|
879
|
-
CD1, B2M = mh.get_CD1(), mh.get_B2M()
|
|
880
|
-
if CD1 is not None:
|
|
881
|
-
antigen_hetatoms[mh.CD1], antigen_sugars[mh.CD1] = (
|
|
882
|
-
self._find_chain_hetatoms(CD1)
|
|
883
|
-
)
|
|
884
|
-
if B2M is not None:
|
|
885
|
-
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
886
|
-
self._find_chain_hetatoms(B2M)
|
|
887
|
-
)
|
|
888
|
-
|
|
889
|
-
elif isinstance(mh, MHC) and mh.MHC_type == "MR1":
|
|
890
|
-
MR1, B2M = mh.get_MR1(), mh.get_B2M()
|
|
891
|
-
antigen_hetatoms[mh.MR1], antigen_sugars[mh.MR1] = (
|
|
892
|
-
self._find_chain_hetatoms(MR1)
|
|
893
|
-
)
|
|
894
|
-
antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
|
|
895
|
-
self._find_chain_hetatoms(B2M)
|
|
896
|
-
)
|
|
897
|
-
|
|
898
|
-
elif isinstance(mh, MHC) and mh.MHC_type == "MH2":
|
|
899
|
-
GA, GB = mh.get_GA(), mh.get_GB()
|
|
900
|
-
antigen_hetatoms[mh.GA], antigen_sugars[mh.GA] = (
|
|
901
|
-
self._find_chain_hetatoms(GA)
|
|
902
|
-
)
|
|
903
|
-
antigen_hetatoms[mh.GB], antigen_sugars[mh.GB] = (
|
|
904
|
-
self._find_chain_hetatoms(GB)
|
|
905
|
-
)
|
|
906
|
-
|
|
907
|
-
# Unpaired MHC chains -- if any, go here.
|
|
908
|
-
elif isinstance(mh, MHCchain):
|
|
909
|
-
antigen_hetatoms[mh.id], antigen_sugars[mh.id] = (
|
|
910
|
-
self._find_chain_hetatoms(mh)
|
|
911
|
-
)
|
|
958
|
+
self._prepare_mhc(mh, mh_atoms, antigen_hetatoms, antigen_sugars)
|
|
912
959
|
|
|
913
960
|
for antigen in agchains:
|
|
914
961
|
antigen_atoms[antigen.id] = [
|
|
@@ -954,7 +1001,18 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
954
1001
|
|
|
955
1002
|
# If a TCR does not have a detected MHC chain, then skip the remaining MHC-specific parsing bits.
|
|
956
1003
|
if not mhc_complexes:
|
|
957
|
-
return
|
|
1004
|
+
return (
|
|
1005
|
+
model,
|
|
1006
|
+
tcell_receptors,
|
|
1007
|
+
mhc_complexes,
|
|
1008
|
+
agchains,
|
|
1009
|
+
crystal_contacts,
|
|
1010
|
+
antigen_atoms,
|
|
1011
|
+
cdr_atoms,
|
|
1012
|
+
mh_atoms,
|
|
1013
|
+
antigen_hetatoms,
|
|
1014
|
+
antigen_sugars,
|
|
1015
|
+
)
|
|
958
1016
|
|
|
959
1017
|
# Have a very tight cutoff for MHCs that present het atoms (e.g. CD1 types)
|
|
960
1018
|
self._het_sugar_pass(
|
|
@@ -970,14 +1028,36 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
970
1028
|
self._protein_peptide_pass(
|
|
971
1029
|
model, mhc_complexes, mh_atoms, antigen_atoms, crystal_contacts
|
|
972
1030
|
)
|
|
1031
|
+
return (
|
|
1032
|
+
model,
|
|
1033
|
+
tcell_receptors,
|
|
1034
|
+
mhc_complexes,
|
|
1035
|
+
agchains,
|
|
1036
|
+
crystal_contacts,
|
|
1037
|
+
antigen_atoms,
|
|
1038
|
+
cdr_atoms,
|
|
1039
|
+
mh_atoms,
|
|
1040
|
+
antigen_hetatoms,
|
|
1041
|
+
antigen_sugars,
|
|
1042
|
+
)
|
|
973
1043
|
|
|
1044
|
+
def _pair_tcr_and_mhc(
|
|
1045
|
+
self,
|
|
1046
|
+
model,
|
|
1047
|
+
tcell_receptors,
|
|
1048
|
+
mhc_complexes,
|
|
1049
|
+
cdr_atoms,
|
|
1050
|
+
mh_atoms,
|
|
1051
|
+
crystal_contacts,
|
|
1052
|
+
already_paired_tr_mh=set(),
|
|
1053
|
+
):
|
|
974
1054
|
# Pair a TCR with an MHC and vice-versa; go through all possible combinations of TCR/MHC
|
|
975
1055
|
# We see if a CB/CA atom of the helix region of an MHC is within 8A of a TCR CDR loop's CB/CA atoms.
|
|
976
1056
|
# This is similar to the _protein_peptide_pass algorithm; we find the number of contacts between MHC and TCR,
|
|
977
1057
|
# and use the MHC with highest no. of contacts
|
|
978
1058
|
contact_freq = defaultdict(int)
|
|
979
|
-
tr_mh_pairs = list(product(tcell_receptors, mhc_complexes))
|
|
980
1059
|
|
|
1060
|
+
tr_mh_pairs = list(product(tcell_receptors, mhc_complexes))
|
|
981
1061
|
for tr, mh in tr_mh_pairs:
|
|
982
1062
|
ns = NeighborSearch(cdr_atoms[tr.id])
|
|
983
1063
|
for atom in mh_atoms[mh.id]:
|
|
@@ -990,24 +1070,115 @@ class TCRParser(PDBParser, MMCIFParser):
|
|
|
990
1070
|
sorted_contacts = sorted(
|
|
991
1071
|
list(contact_freq.items()), key=lambda z: z[1], reverse=True
|
|
992
1072
|
)
|
|
993
|
-
paired_tr_mh = set()
|
|
1073
|
+
paired_tr_mh = set() if not already_paired_tr_mh else already_paired_tr_mh
|
|
994
1074
|
for pair, contacts in sorted_contacts:
|
|
995
1075
|
tr, mh = pair
|
|
996
1076
|
# If the TCR has already been paired, or if we know that the TCR and MHC are forming crystal contacts, move on.
|
|
997
1077
|
if tr in paired_tr_mh or (tr, mh) in crystal_contacts:
|
|
998
1078
|
continue
|
|
1079
|
+
if mh not in model:
|
|
1080
|
+
model.add([mhc for mhc in mhc_complexes if mhc.id == mh][0])
|
|
999
1081
|
model[tr]._add_mhc(model[mh])
|
|
1000
1082
|
model[mh]._add_tcr(model[tr])
|
|
1001
1083
|
paired_tr_mh.add(tr)
|
|
1084
|
+
return model, paired_tr_mh
|
|
1085
|
+
|
|
1086
|
+
def _match_units(self, model, trchains, mhchains, agchains, crystal_contacts=[]):
|
|
1087
|
+
"""
|
|
1088
|
+
Match MHC+Peptide chains to TCR chains.
|
|
1089
|
+
model is the current model - extract the TCRs from it (paired chains have been removed)
|
|
1090
|
+
trchains contains those TCR chains that have been unable to be paired to form TCRs
|
|
1091
|
+
agchains contains non-TCR chains that are potential antigens.
|
|
1092
|
+
|
|
1093
|
+
Goal: Match TCR <-> MHC + peptide antigen.
|
|
1094
|
+
"""
|
|
1095
|
+
# Get all T-cell receptor-like objects (TCR, TCRchain), and MHC-like objects.
|
|
1096
|
+
tcell_receptors = [h for h in model if isinstance(h, TCR)] + trchains.child_list
|
|
1097
|
+
mhc_complexes = [h for h in model if isinstance(h, MHC)] + mhchains.child_list
|
|
1098
|
+
|
|
1099
|
+
(
|
|
1100
|
+
model,
|
|
1101
|
+
tcell_receptors,
|
|
1102
|
+
mhc_complexes,
|
|
1103
|
+
agchains,
|
|
1104
|
+
crystal_contacts,
|
|
1105
|
+
antigen_atoms,
|
|
1106
|
+
cdr_atoms,
|
|
1107
|
+
mh_atoms,
|
|
1108
|
+
antigen_hetatoms,
|
|
1109
|
+
antigen_sugars,
|
|
1110
|
+
) = self._prepare_tcrs_mhcs_and_antigens_for_pairing(
|
|
1111
|
+
model,
|
|
1112
|
+
tcell_receptors,
|
|
1113
|
+
mhc_complexes,
|
|
1114
|
+
agchains,
|
|
1115
|
+
crystal_contacts,
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
model, paired_tr_mh = self._pair_tcr_and_mhc(
|
|
1119
|
+
model=model,
|
|
1120
|
+
tcell_receptors=tcell_receptors,
|
|
1121
|
+
mhc_complexes=mhc_complexes,
|
|
1122
|
+
cdr_atoms=cdr_atoms,
|
|
1123
|
+
mh_atoms=mh_atoms,
|
|
1124
|
+
crystal_contacts=crystal_contacts,
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
if (
|
|
1128
|
+
self.include_symmetry_mates
|
|
1129
|
+
and len(paired_tr_mh) != len(tcell_receptors)
|
|
1130
|
+
and len(mhc_complexes) > 0
|
|
1131
|
+
): # check if all TCRs have been paired if MHC is present.
|
|
1132
|
+
# try searching for symmetry mates
|
|
1133
|
+
symmetry_mates = self._generate_symmetry_mates()
|
|
1134
|
+
mhc_complexes.extend([m for t in symmetry_mates for m in t.get_MHCs()])
|
|
1135
|
+
|
|
1136
|
+
(
|
|
1137
|
+
model,
|
|
1138
|
+
tcell_receptors,
|
|
1139
|
+
mhc_complexes,
|
|
1140
|
+
agchains,
|
|
1141
|
+
crystal_contacts,
|
|
1142
|
+
antigen_atoms,
|
|
1143
|
+
cdr_atoms,
|
|
1144
|
+
mh_atoms,
|
|
1145
|
+
antigen_hetatoms,
|
|
1146
|
+
antigen_sugars,
|
|
1147
|
+
) = self._prepare_tcrs_mhcs_and_antigens_for_pairing(
|
|
1148
|
+
model,
|
|
1149
|
+
tcell_receptors,
|
|
1150
|
+
mhc_complexes,
|
|
1151
|
+
agchains,
|
|
1152
|
+
crystal_contacts,
|
|
1153
|
+
)
|
|
1154
|
+
model, paired_tr_mh = self._pair_tcr_and_mhc(
|
|
1155
|
+
model,
|
|
1156
|
+
tcell_receptors,
|
|
1157
|
+
mhc_complexes,
|
|
1158
|
+
cdr_atoms,
|
|
1159
|
+
mh_atoms,
|
|
1160
|
+
crystal_contacts,
|
|
1161
|
+
already_paired_tr_mh=paired_tr_mh,
|
|
1162
|
+
)
|
|
1163
|
+
|
|
1164
|
+
def _generate_symmetry_mates(self):
|
|
1165
|
+
print("Generating symmetry mates to pair antigens.")
|
|
1166
|
+
from .utils.symmetry_mates import (
|
|
1167
|
+
get_symmetry_mates,
|
|
1168
|
+
) # import here to avoid circular import
|
|
1169
|
+
|
|
1170
|
+
return get_symmetry_mates(self.current_file)
|
|
1002
1171
|
|
|
1003
1172
|
def _protein_peptide_pass(
|
|
1004
1173
|
self, model, complexes, receptor_atoms, antigen_atoms, crystal_contacts=[]
|
|
1005
1174
|
):
|
|
1006
1175
|
"""
|
|
1007
1176
|
This is a generic method to process which proteins/peptides belong to a TCR or MHC. Needs testing.
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1177
|
+
|
|
1178
|
+
Args:
|
|
1179
|
+
complexes: list of TCR/TCRchain objects or MHC/MHCchain objects
|
|
1180
|
+
receptor_atoms: list of atom subset that will likely contact the antigen (e.g. cdr_atoms)
|
|
1181
|
+
antigen_atoms: list of atoms in the antigen.
|
|
1011
1182
|
"""
|
|
1012
1183
|
ns = NeighborSearch(
|
|
1013
1184
|
[atom for chain in receptor_atoms for atom in receptor_atoms[chain]]
|