stcrpy 1.0.0__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. stcrpy/__init__.py +1 -1
  2. stcrpy/tcr_formats/tcr_formats.py +20 -1
  3. stcrpy/tcr_geometry/TCRAngle.py +177 -0
  4. stcrpy/tcr_geometry/TCRDock.py +4 -1
  5. stcrpy/tcr_geometry/reference_data/Acoreset.txt +30 -0
  6. stcrpy/tcr_geometry/reference_data/Bcoreset.txt +30 -0
  7. stcrpy/tcr_geometry/reference_data/consensus_A.pdb +31 -0
  8. stcrpy/tcr_geometry/reference_data/consensus_B.pdb +31 -0
  9. stcrpy/tcr_geometry/reference_data/consensus_D.pdb +31 -0
  10. stcrpy/tcr_geometry/reference_data/consensus_G.pdb +31 -0
  11. stcrpy/tcr_geometry/reference_data/pcA.txt +3 -0
  12. stcrpy/tcr_geometry/reference_data/pcB.txt +3 -0
  13. stcrpy/tcr_interactions/TCRInteractionProfiler.py +1 -1
  14. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +21 -0
  15. stcrpy/tcr_methods/tcr_batch_operations.py +14 -10
  16. stcrpy/tcr_methods/tcr_methods.py +23 -22
  17. stcrpy/tcr_metrics/tcr_dockq.py +404 -0
  18. stcrpy/tcr_processing/Chemical_components.py +4 -4
  19. stcrpy/tcr_processing/Entity.py +15 -16
  20. stcrpy/tcr_processing/MHC.py +456 -4
  21. stcrpy/tcr_processing/TCR.py +462 -14
  22. stcrpy/tcr_processing/TCRParser.py +364 -193
  23. stcrpy/tcr_processing/annotate.py +35 -24
  24. stcrpy/tcr_processing/utils/common.py +3 -2
  25. stcrpy/tcr_processing/utils/constants.py +4 -3
  26. stcrpy/tcr_processing/utils/region_definitions.py +9 -0
  27. stcrpy/tcr_processing/utils/symmetry_mates.py +90 -0
  28. stcrpy-1.0.5.dist-info/METADATA +285 -0
  29. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/RECORD +33 -22
  30. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/WHEEL +1 -1
  31. stcrpy-1.0.0.dist-info/METADATA +0 -173
  32. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/LICENCE +0 -0
  33. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +0 -0
  34. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,14 @@ TCRParser object which is based on ABDB's AntibodyParser and BioPython's PDB par
8
8
  from itertools import combinations, product
9
9
  import sys
10
10
  import os
11
+ import tempfile
11
12
  from collections import defaultdict
13
+ import warnings
12
14
 
13
15
  from Bio.PDB.PDBParser import PDBParser
14
16
  from Bio.PDB.MMCIFParser import MMCIFParser
15
17
  from Bio.PDB import NeighborSearch
18
+ from Bio.PDB import PDBIO
16
19
 
17
20
  # TCRDB
18
21
  from .annotate import annotate, extract_sequence, align_numbering
@@ -22,7 +25,7 @@ from ..utils.error_stream import ErrorStream
22
25
  from .TCRStructure import TCRStructure
23
26
  from .Model import Model
24
27
  from .TCR import TCR, abTCR, gdTCR
25
- from .MHC import MHC, MH1, MH2, CD1, MR1, scMH1, scCD1
28
+ from .MHC import MHC, MH1, MH2, CD1, MR1, scMH1, scCD1, scMH2
26
29
  from .Holder import Holder
27
30
  from .TCRchain import TCRchain
28
31
  from .MHCchain import MHCchain
@@ -57,6 +60,10 @@ class TCRParser(PDBParser, MMCIFParser):
57
60
  self.numbering_scheme = "imgt"
58
61
  self.definition = "imgt"
59
62
 
63
+ self.current_file = (
64
+ None # the current file being processed, populated by get_tcr_structure
65
+ )
66
+
60
67
  def _create_chain(self, chain, new_chain_id, numbering, chain_type):
61
68
  """
62
69
  Create a new TCR or MHC chain.
@@ -193,19 +200,62 @@ class TCRParser(PDBParser, MMCIFParser):
193
200
 
194
201
  return newchain1, newchain2
195
202
 
196
- def get_tcr_structure(
197
- self, id, file, prenumbering=None, ali_dict={}, crystal_contacts=[]
198
- ):
199
- """
200
- Post processing of the TCRPDB.Bio.PDB structure object into a TCR context.
203
+ def _number_and_annotate_chain(self, chain, prenumbering=None, ali_dict={}):
204
+ # try to number the sequence found in the structure
205
+ if prenumbering and chain.id in prenumbering:
206
+ if len(prenumbering[chain.id]) == 2:
207
+ numbering = [{}, {}]
208
+ region_types = ["", ""]
201
209
 
202
- id: a string to identify the structure
203
- file: the path to the .pdb file
210
+ numbering[0], region_types[0] = self._prenumbered(
211
+ chain, prenumbering, ali_dict, n=0
212
+ )
213
+ numbering[1], region_types[1] = self._prenumbered(
214
+ chain, prenumbering, ali_dict, n=1
215
+ )
216
+ rtypes = sorted(region_types)
204
217
 
205
- optional:
206
- prenumbering: prenumbering for the chains in the structure.
207
- """
208
- self.warnings = ErrorStream()
218
+ # Check that we have a beta/alpha domain or gamma/delta domain
219
+ if rtypes == ["A", "B"] or rtypes == ["D", "G"]:
220
+ chain_type = "".join(region_types)
221
+ scTCR = True
222
+ # if not, just take the first region and warn the user
223
+ else:
224
+ chain_type = region_types[0]
225
+ numbering = numbering[0]
226
+ scTCR = False
227
+ print(
228
+ "Warning multiple variable regions of the same type (%s) found on chain %s.\nTaking the first variable region only."
229
+ % (chain_type, chain.id),
230
+ file=self.warnings,
231
+ )
232
+
233
+ elif prenumbering[chain.id][0][-1] not in ["B", "A", "D", "G"]:
234
+ numbering, chain_type, scTCR = annotate(chain)
235
+
236
+ else:
237
+ numbering, chain_type = self._prenumbered(
238
+ chain, prenumbering, ali_dict, n=0
239
+ )
240
+ scTCR = False
241
+
242
+ else:
243
+ numbering, chain_type, germline_info, scTCR = annotate(chain)
244
+
245
+ return numbering, chain_type, germline_info, scTCR
246
+
247
+ def _get_header_info(self, tcrstructure, chain, germline_info):
248
+ if chain.id in tcrstructure.header["chain_details"]: # clean this up!!!
249
+ engineered = tcrstructure.header["chain_details"][chain.id]["engineered"]
250
+ details = tcrstructure.header["chain_details"][chain.id]
251
+ else:
252
+ engineered = False
253
+ details = {"molecule": "unknown", "engineered": False}
254
+
255
+ details["genetic_origin"] = germline_info
256
+ return details, engineered
257
+
258
+ def _read_structure_file(self, file, id):
209
259
  # get a structure object from biopython.
210
260
  _, ext = os.path.splitext(file)
211
261
  if ext.lower() == ".pdb":
@@ -224,6 +274,45 @@ class TCRParser(PDBParser, MMCIFParser):
224
274
  # Set and analyse header information
225
275
  tcrstructure.set_header(structure.header)
226
276
  self._analyse_header(tcrstructure)
277
+ return structure, tcrstructure
278
+
279
+ def _initialise_model(self, model):
280
+ newmodel = Model(model.id)
281
+
282
+ # initialise holder objects for holding TCR, MHC and non-TCR/non-MHC (antigen) chains.
283
+ agchains = Holder("Antigen")
284
+ trchains = Holder("TCRchain")
285
+ mhchains = Holder("MHCchain")
286
+ newmodel.add(agchains)
287
+ newmodel.add(trchains)
288
+ newmodel.add(mhchains)
289
+ return newmodel, agchains, trchains, mhchains
290
+
291
+ def get_tcr_structure(
292
+ self,
293
+ id,
294
+ file,
295
+ prenumbering=None,
296
+ ali_dict={},
297
+ crystal_contacts=[],
298
+ include_symmetry_mates=True,
299
+ ):
300
+ """
301
+ Post processing of the TCRPDB.Bio.PDB structure object into a TCR context.
302
+
303
+ id: a string to identify the structure
304
+ file: the path to the .pdb file
305
+
306
+ optional:
307
+ prenumbering: prenumbering for the chains in the structure.
308
+ """
309
+ self.warnings = ErrorStream()
310
+ self.include_symmetry_mates = include_symmetry_mates
311
+ self.current_file = file
312
+
313
+ structure, tcrstructure = self._read_structure_file(
314
+ file, id
315
+ ) # structure: Bio.PDB.Structure from file; tcrstructure: initialised empty TCRStructure object to be populated
227
316
 
228
317
  # iterate over the models in the structure
229
318
  # iterate backwards through the model list - delete old structure as we go
@@ -232,70 +321,18 @@ class TCRParser(PDBParser, MMCIFParser):
232
321
  for mid in range(len(structure.child_list) - 1, -1, -1):
233
322
  # add a model to the TCR structure
234
323
  model = structure.child_list[mid]
235
- newmodel = Model(model.id)
324
+ newmodel, agchains, trchains, mhchains = self._initialise_model(model)
236
325
  tcrstructure.add(newmodel)
237
326
 
238
- # initialise holder objects for holding TCR, MHC and non-TCR/non-MHC (antigen) chains.
239
- agchains = Holder("Antigen")
240
- trchains = Holder("TCRchain")
241
- mhchains = Holder("MHCchain")
242
- newmodel.add(agchains)
243
- newmodel.add(trchains)
244
- newmodel.add(mhchains)
245
-
246
327
  # iterate over the chains in the model
247
328
  for chain in model.get_list():
248
- # try to number the sequence found in the structure
249
- if prenumbering and chain.id in prenumbering:
250
- if len(prenumbering[chain.id]) == 2:
251
- numbering = [{}, {}]
252
- region_types = ["", ""]
253
-
254
- numbering[0], region_types[0] = self._prenumbered(
255
- chain, prenumbering, ali_dict, n=0
256
- )
257
- numbering[1], region_types[1] = self._prenumbered(
258
- chain, prenumbering, ali_dict, n=1
259
- )
260
- rtypes = sorted(region_types)
261
-
262
- # Check that we have a beta/alpha domain or gamma/delta domain
263
- if rtypes == ["A", "B"] or rtypes == ["D", "G"]:
264
- chain_type = "".join(region_types)
265
- scTCR = True
266
- # if not, just take the first region and warn the user
267
- else:
268
- chain_type = region_types[0]
269
- numbering = numbering[0]
270
- scTCR = False
271
- print(
272
- "Warning multiple variable regions of the same type (%s) found on chain %s.\nTaking the first variable region only."
273
- % (chain_type, chain.id),
274
- file=self.warnings,
275
- )
276
-
277
- elif prenumbering[chain.id][0][-1] not in ["B", "A", "D", "G"]:
278
- numbering, chain_type, scTCR = annotate(chain)
279
-
280
- else:
281
- numbering, chain_type = self._prenumbered(
282
- chain, prenumbering, ali_dict, n=0
283
- )
284
- scTCR = False
285
-
286
- else:
287
- numbering, chain_type, germline_info, scTCR = annotate(chain)
288
-
289
- if chain.id in tcrstructure.header["chain_details"]: # clean this up!!!
290
- engineered = tcrstructure.header["chain_details"][chain.id][
291
- "engineered"
292
- ]
293
- details = tcrstructure.header["chain_details"][chain.id]
294
- else:
295
- engineered = False
296
- details = {"molecule": "unknown", "engineered": False}
329
+ numbering, chain_type, germline_info, scTCR = (
330
+ self._number_and_annotate_chain(chain, prenumbering, ali_dict)
331
+ )
297
332
 
298
- details["genetic_origin"] = germline_info
333
+ details, engineered = self._get_header_info(
334
+ tcrstructure, chain, germline_info
335
+ )
299
336
 
300
337
  if numbering and chain_type in ["G", "D", "B", "A"]:
301
338
  # create a new TCR chain
@@ -349,7 +386,9 @@ class TCRParser(PDBParser, MMCIFParser):
349
386
  elif not obs_chaintypes - set(["G", "D"]):
350
387
  tcr = gdTCR(chain1, chain2)
351
388
  elif not obs_chaintypes - set(["B", "D"]):
352
- tcr = abTCR(chain1, chain2) # initial way to deal with narci missclassification of alpha chains as delta chains
389
+ tcr = abTCR(
390
+ chain1, chain2
391
+ ) # initial way to deal with anarci missclassification of alpha chains as delta chains
353
392
  # tcr = dbTCR(chain1, chain2)
354
393
 
355
394
  tcr.scTCR = True #
@@ -461,6 +500,7 @@ class TCRParser(PDBParser, MMCIFParser):
461
500
  newmodel.add(mhc)
462
501
 
463
502
  # allow instantiation of single chain MH1 type MH class if the alpha helices forming chain has been observed
503
+ # allow instantiation of single chain MH2 type MH class if one of the GA or GB chain has been observed
464
504
  ids_to_detach = []
465
505
  for mhc_chain in mhchains:
466
506
  if mhc_chain.chain_type in ["MH1", "GA1", "GA2"]:
@@ -471,6 +511,13 @@ class TCRParser(PDBParser, MMCIFParser):
471
511
  ids_to_detach.append(mhc_chain.id)
472
512
  sc_mhc = scCD1(mhc_chain)
473
513
  newmodel.add(sc_mhc)
514
+ elif mhc_chain.chain_type in ["GA", "GB"]:
515
+ ids_to_detach.append(mhc_chain.id)
516
+ sc_mhc = scMH2(mhc_chain)
517
+ newmodel.add(sc_mhc)
518
+ warnings.warn(
519
+ f"Single chain MH class II instantiated with chain type {mhc_chain.chain_type}. It is possible the other MHC class II chain has not been identified."
520
+ )
474
521
 
475
522
  for mhc_chain_id in ids_to_detach:
476
523
  mhchains.detach_child(mhc_chain_id)
@@ -494,6 +541,7 @@ class TCRParser(PDBParser, MMCIFParser):
494
541
  sys.stderr.write("\n")
495
542
  tcrstructure.warnings = self.warnings
496
543
 
544
+ self.current_file = None # reset the current file
497
545
  return tcrstructure
498
546
 
499
547
  def _analyse_header(self, header):
@@ -778,19 +826,120 @@ class TCRParser(PDBParser, MMCIFParser):
778
826
 
779
827
  return hetatoms, sugars
780
828
 
781
- def _match_units(self, model, trchains, mhchains, agchains, crystal_contacts=[]):
782
- """
783
- Match MHC+Peptide chains to TCR chains.
784
- model is the current model - extract the TCRs from it (paired chains have been removed)
785
- trchains contains those TCR chains that have been unable to be paired to form TCRs
786
- agchains contains non-TCR chains that are potential antigens.
829
+ def _prepare_tcr(self, tr, cdr_atoms, antigen_hetatoms, antigen_sugars):
830
+ for cdr in tr.get_CDRs():
831
+ # Only get CDR3?
832
+ if "3" not in cdr.id:
833
+ continue
834
+ # only look at CA or CB atoms of the CDR; this is used later.
835
+ cdr_atoms[tr.id] += [
836
+ atom for atom in cdr.get_atoms() if atom.id == "CB" or atom.id == "CA"
837
+ ]
787
838
 
788
- Goal: Match TCR <-> MHC + peptide antigen.
789
- """
790
- # Get all T-cell receptor-like objects (TCR, TCRchain), and MHC-like objects.
791
- tcell_receptors = [h for h in model if isinstance(h, TCR)] + trchains.child_list
792
- mhc_complexes = [h for h in model if isinstance(h, MHC)] + mhchains.child_list
839
+ # get TCR type and get chain's hetatoms accordingly
840
+ if isinstance(tr, TCR) and tr.get_TCR_type() == "abTCR":
841
+ beta_chain = tr.get_VB()
842
+ alpha_chain = tr.get_VA()
843
+
844
+ antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = self._find_chain_hetatoms(
845
+ beta_chain
846
+ )
847
+ antigen_hetatoms[tr.VA], antigen_sugars[tr.VA] = self._find_chain_hetatoms(
848
+ alpha_chain
849
+ )
793
850
 
851
+ elif isinstance(tr, TCR) and tr.get_TCR_type() == "gdTCR":
852
+ delta_chain = tr.get_VD()
853
+ gamma_chain = tr.get_VG()
854
+ antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = self._find_chain_hetatoms(
855
+ delta_chain
856
+ )
857
+ antigen_hetatoms[tr.VG], antigen_sugars[tr.VG] = self._find_chain_hetatoms(
858
+ gamma_chain
859
+ )
860
+
861
+ elif isinstance(tr, TCR) and tr.get_TCR_type() == "dbTCR":
862
+ beta_chain = tr.get_VB()
863
+ delta_chain = tr.get_VD()
864
+ antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = self._find_chain_hetatoms(
865
+ beta_chain
866
+ )
867
+ antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = self._find_chain_hetatoms(
868
+ delta_chain
869
+ )
870
+
871
+ # Unpaired TCR chain
872
+ elif isinstance(tr, TCRchain):
873
+ antigen_hetatoms[tr.id], antigen_sugars[tr.id] = self._find_chain_hetatoms(
874
+ tr
875
+ )
876
+
877
+ def _prepare_mhc(self, mh, mh_atoms, antigen_hetatoms, antigen_sugars):
878
+ # Keep G domain atoms; Get the Helix region of MHC
879
+ mh_atoms[mh.id] = [
880
+ atom
881
+ for atom in mh.get_atoms()
882
+ if (atom.id == "CB" or atom.id == "CA") and atom.region == "Helix"
883
+ ]
884
+ if isinstance(mh, MHC) and mh.MHC_type == "MH1":
885
+ MH1, B2M = mh.get_MH1(), mh.get_B2M()
886
+ if MH1 is not None:
887
+ antigen_hetatoms[mh.MH1], antigen_sugars[mh.MH1] = (
888
+ self._find_chain_hetatoms(MH1)
889
+ )
890
+ else:
891
+ GA1 = mh.get_GA1()
892
+ antigen_hetatoms[mh.GA1], antigen_sugars[mh.GA1] = (
893
+ self._find_chain_hetatoms(GA1)
894
+ )
895
+ if B2M is not None: # handle single chain MH1 case
896
+ antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
897
+ self._find_chain_hetatoms(B2M)
898
+ )
899
+
900
+ elif isinstance(mh, MHC) and mh.MHC_type == "CD1":
901
+ CD1, B2M = mh.get_CD1(), mh.get_B2M()
902
+ if CD1 is not None:
903
+ antigen_hetatoms[mh.CD1], antigen_sugars[mh.CD1] = (
904
+ self._find_chain_hetatoms(CD1)
905
+ )
906
+ if B2M is not None:
907
+ antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
908
+ self._find_chain_hetatoms(B2M)
909
+ )
910
+
911
+ elif isinstance(mh, MHC) and mh.MHC_type == "MR1":
912
+ MR1, B2M = mh.get_MR1(), mh.get_B2M()
913
+ antigen_hetatoms[mh.MR1], antigen_sugars[mh.MR1] = (
914
+ self._find_chain_hetatoms(MR1)
915
+ )
916
+ antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
917
+ self._find_chain_hetatoms(B2M)
918
+ )
919
+
920
+ elif isinstance(mh, MHC) and mh.MHC_type == "MH2":
921
+ GA, GB = mh.get_GA(), mh.get_GB()
922
+ antigen_hetatoms[mh.GA], antigen_sugars[mh.GA] = self._find_chain_hetatoms(
923
+ GA
924
+ )
925
+ antigen_hetatoms[mh.GB], antigen_sugars[mh.GB] = self._find_chain_hetatoms(
926
+ GB
927
+ )
928
+
929
+ # Unpaired MHC chains -- if any, go here.
930
+ elif isinstance(mh, MHCchain):
931
+ antigen_hetatoms[mh.id], antigen_sugars[mh.id] = self._find_chain_hetatoms(
932
+ mh
933
+ )
934
+
935
+ def _prepare_tcrs_mhcs_and_antigens_for_pairing(
936
+ self,
937
+ model,
938
+ tcell_receptors,
939
+ mhc_complexes,
940
+ agchains,
941
+ crystal_contacts,
942
+ ):
794
943
  # Initialise 5 dictionaries which carries a list of atoms per chain ID.
795
944
  antigen_atoms, cdr_atoms, mh_atoms, antigen_hetatoms, antigen_sugars = (
796
945
  defaultdict(list),
@@ -802,113 +951,11 @@ class TCRParser(PDBParser, MMCIFParser):
802
951
 
803
952
  # Look through TCR and MHC and see if there are any weird hetatoms and sugars in the structure.
804
953
  for tr in tcell_receptors:
805
- for cdr in tr.get_CDRs():
806
- # Only get CDR3?
807
- if "3" not in cdr.id:
808
- continue
809
- # only look at CA or CB atoms of the CDR; this is used later.
810
- cdr_atoms[tr.id] += [
811
- atom
812
- for atom in cdr.get_atoms()
813
- if atom.id == "CB" or atom.id == "CA"
814
- ]
815
-
816
- # get TCR type and get chain's hetatoms accordingly
817
- if isinstance(tr, TCR) and tr.get_TCR_type() == "abTCR":
818
- beta_chain = tr.get_VB()
819
- alpha_chain = tr.get_VA()
820
-
821
- antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = (
822
- self._find_chain_hetatoms(beta_chain)
823
- )
824
- antigen_hetatoms[tr.VA], antigen_sugars[tr.VA] = (
825
- self._find_chain_hetatoms(alpha_chain)
826
- )
827
-
828
- elif isinstance(tr, TCR) and tr.get_TCR_type() == "gdTCR":
829
- delta_chain = tr.get_VD()
830
- gamma_chain = tr.get_VG()
831
- antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = (
832
- self._find_chain_hetatoms(delta_chain)
833
- )
834
- antigen_hetatoms[tr.VG], antigen_sugars[tr.VG] = (
835
- self._find_chain_hetatoms(gamma_chain)
836
- )
837
-
838
- elif isinstance(tr, TCR) and tr.get_TCR_type() == "dbTCR":
839
- beta_chain = tr.get_VB()
840
- delta_chain = tr.get_VD()
841
- antigen_hetatoms[tr.VB], antigen_sugars[tr.VB] = (
842
- self._find_chain_hetatoms(beta_chain)
843
- )
844
- antigen_hetatoms[tr.VD], antigen_sugars[tr.VD] = (
845
- self._find_chain_hetatoms(delta_chain)
846
- )
847
-
848
- # Unpaired TCR chain
849
- elif isinstance(tr, TCRchain):
850
- antigen_hetatoms[tr.id], antigen_sugars[tr.id] = (
851
- self._find_chain_hetatoms(tr)
852
- )
954
+ self._prepare_tcr(tr, cdr_atoms, antigen_hetatoms, antigen_sugars)
853
955
 
854
956
  # Do the same for MHC.
855
957
  for mh in mhc_complexes:
856
- # Keep G domain atoms; Get the Helix region of MHC
857
- mh_atoms[mh.id] = [
858
- atom
859
- for atom in mh.get_atoms()
860
- if (atom.id == "CB" or atom.id == "CA") and atom.region == "Helix"
861
- ]
862
- if isinstance(mh, MHC) and mh.MHC_type == "MH1":
863
- MH1, B2M = mh.get_MH1(), mh.get_B2M()
864
- if MH1 is not None:
865
- antigen_hetatoms[mh.MH1], antigen_sugars[mh.MH1] = (
866
- self._find_chain_hetatoms(MH1)
867
- )
868
- else:
869
- GA1 = mh.get_GA1()
870
- antigen_hetatoms[mh.GA1], antigen_sugars[mh.GA1] = (
871
- self._find_chain_hetatoms(GA1)
872
- )
873
- if B2M is not None: # handle single chain MH1 case
874
- antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
875
- self._find_chain_hetatoms(B2M)
876
- )
877
-
878
- elif isinstance(mh, MHC) and mh.MHC_type == "CD1":
879
- CD1, B2M = mh.get_CD1(), mh.get_B2M()
880
- if CD1 is not None:
881
- antigen_hetatoms[mh.CD1], antigen_sugars[mh.CD1] = (
882
- self._find_chain_hetatoms(CD1)
883
- )
884
- if B2M is not None:
885
- antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
886
- self._find_chain_hetatoms(B2M)
887
- )
888
-
889
- elif isinstance(mh, MHC) and mh.MHC_type == "MR1":
890
- MR1, B2M = mh.get_MR1(), mh.get_B2M()
891
- antigen_hetatoms[mh.MR1], antigen_sugars[mh.MR1] = (
892
- self._find_chain_hetatoms(MR1)
893
- )
894
- antigen_hetatoms[mh.B2M], antigen_sugars[mh.B2M] = (
895
- self._find_chain_hetatoms(B2M)
896
- )
897
-
898
- elif isinstance(mh, MHC) and mh.MHC_type == "MH2":
899
- GA, GB = mh.get_GA(), mh.get_GB()
900
- antigen_hetatoms[mh.GA], antigen_sugars[mh.GA] = (
901
- self._find_chain_hetatoms(GA)
902
- )
903
- antigen_hetatoms[mh.GB], antigen_sugars[mh.GB] = (
904
- self._find_chain_hetatoms(GB)
905
- )
906
-
907
- # Unpaired MHC chains -- if any, go here.
908
- elif isinstance(mh, MHCchain):
909
- antigen_hetatoms[mh.id], antigen_sugars[mh.id] = (
910
- self._find_chain_hetatoms(mh)
911
- )
958
+ self._prepare_mhc(mh, mh_atoms, antigen_hetatoms, antigen_sugars)
912
959
 
913
960
  for antigen in agchains:
914
961
  antigen_atoms[antigen.id] = [
@@ -954,7 +1001,18 @@ class TCRParser(PDBParser, MMCIFParser):
954
1001
 
955
1002
  # If a TCR does not have a detected MHC chain, then skip the remaining MHC-specific parsing bits.
956
1003
  if not mhc_complexes:
957
- return
1004
+ return (
1005
+ model,
1006
+ tcell_receptors,
1007
+ mhc_complexes,
1008
+ agchains,
1009
+ crystal_contacts,
1010
+ antigen_atoms,
1011
+ cdr_atoms,
1012
+ mh_atoms,
1013
+ antigen_hetatoms,
1014
+ antigen_sugars,
1015
+ )
958
1016
 
959
1017
  # Have a very tight cutoff for MHCs that present het atoms (e.g. CD1 types)
960
1018
  self._het_sugar_pass(
@@ -970,14 +1028,36 @@ class TCRParser(PDBParser, MMCIFParser):
970
1028
  self._protein_peptide_pass(
971
1029
  model, mhc_complexes, mh_atoms, antigen_atoms, crystal_contacts
972
1030
  )
1031
+ return (
1032
+ model,
1033
+ tcell_receptors,
1034
+ mhc_complexes,
1035
+ agchains,
1036
+ crystal_contacts,
1037
+ antigen_atoms,
1038
+ cdr_atoms,
1039
+ mh_atoms,
1040
+ antigen_hetatoms,
1041
+ antigen_sugars,
1042
+ )
973
1043
 
1044
+ def _pair_tcr_and_mhc(
1045
+ self,
1046
+ model,
1047
+ tcell_receptors,
1048
+ mhc_complexes,
1049
+ cdr_atoms,
1050
+ mh_atoms,
1051
+ crystal_contacts,
1052
+ already_paired_tr_mh=set(),
1053
+ ):
974
1054
  # Pair a TCR with an MHC and vice-versa; go through all possible combinations of TCR/MHC
975
1055
  # We see if a CB/CA atom of the helix region of an MHC is within 8A of a TCR CDR loop's CB/CA atoms.
976
1056
  # This is similar to the _protein_peptide_pass algorithm; we find the number of contacts between MHC and TCR,
977
1057
  # and use the MHC with highest no. of contacts
978
1058
  contact_freq = defaultdict(int)
979
- tr_mh_pairs = list(product(tcell_receptors, mhc_complexes))
980
1059
 
1060
+ tr_mh_pairs = list(product(tcell_receptors, mhc_complexes))
981
1061
  for tr, mh in tr_mh_pairs:
982
1062
  ns = NeighborSearch(cdr_atoms[tr.id])
983
1063
  for atom in mh_atoms[mh.id]:
@@ -990,24 +1070,115 @@ class TCRParser(PDBParser, MMCIFParser):
990
1070
  sorted_contacts = sorted(
991
1071
  list(contact_freq.items()), key=lambda z: z[1], reverse=True
992
1072
  )
993
- paired_tr_mh = set()
1073
+ paired_tr_mh = set() if not already_paired_tr_mh else already_paired_tr_mh
994
1074
  for pair, contacts in sorted_contacts:
995
1075
  tr, mh = pair
996
1076
  # If the TCR has already been paired, or if we know that the TCR and MHC are forming crystal contacts, move on.
997
1077
  if tr in paired_tr_mh or (tr, mh) in crystal_contacts:
998
1078
  continue
1079
+ if mh not in model:
1080
+ model.add([mhc for mhc in mhc_complexes if mhc.id == mh][0])
999
1081
  model[tr]._add_mhc(model[mh])
1000
1082
  model[mh]._add_tcr(model[tr])
1001
1083
  paired_tr_mh.add(tr)
1084
+ return model, paired_tr_mh
1085
+
1086
+ def _match_units(self, model, trchains, mhchains, agchains, crystal_contacts=[]):
1087
+ """
1088
+ Match MHC+Peptide chains to TCR chains.
1089
+ model is the current model - extract the TCRs from it (paired chains have been removed)
1090
+ trchains contains those TCR chains that have been unable to be paired to form TCRs
1091
+ agchains contains non-TCR chains that are potential antigens.
1092
+
1093
+ Goal: Match TCR <-> MHC + peptide antigen.
1094
+ """
1095
+ # Get all T-cell receptor-like objects (TCR, TCRchain), and MHC-like objects.
1096
+ tcell_receptors = [h for h in model if isinstance(h, TCR)] + trchains.child_list
1097
+ mhc_complexes = [h for h in model if isinstance(h, MHC)] + mhchains.child_list
1098
+
1099
+ (
1100
+ model,
1101
+ tcell_receptors,
1102
+ mhc_complexes,
1103
+ agchains,
1104
+ crystal_contacts,
1105
+ antigen_atoms,
1106
+ cdr_atoms,
1107
+ mh_atoms,
1108
+ antigen_hetatoms,
1109
+ antigen_sugars,
1110
+ ) = self._prepare_tcrs_mhcs_and_antigens_for_pairing(
1111
+ model,
1112
+ tcell_receptors,
1113
+ mhc_complexes,
1114
+ agchains,
1115
+ crystal_contacts,
1116
+ )
1117
+
1118
+ model, paired_tr_mh = self._pair_tcr_and_mhc(
1119
+ model=model,
1120
+ tcell_receptors=tcell_receptors,
1121
+ mhc_complexes=mhc_complexes,
1122
+ cdr_atoms=cdr_atoms,
1123
+ mh_atoms=mh_atoms,
1124
+ crystal_contacts=crystal_contacts,
1125
+ )
1126
+
1127
+ if (
1128
+ self.include_symmetry_mates
1129
+ and len(paired_tr_mh) != len(tcell_receptors)
1130
+ and len(mhc_complexes) > 0
1131
+ ): # check if all TCRs have been paired if MHC is present.
1132
+ # try searching for symmetry mates
1133
+ symmetry_mates = self._generate_symmetry_mates()
1134
+ mhc_complexes.extend([m for t in symmetry_mates for m in t.get_MHCs()])
1135
+
1136
+ (
1137
+ model,
1138
+ tcell_receptors,
1139
+ mhc_complexes,
1140
+ agchains,
1141
+ crystal_contacts,
1142
+ antigen_atoms,
1143
+ cdr_atoms,
1144
+ mh_atoms,
1145
+ antigen_hetatoms,
1146
+ antigen_sugars,
1147
+ ) = self._prepare_tcrs_mhcs_and_antigens_for_pairing(
1148
+ model,
1149
+ tcell_receptors,
1150
+ mhc_complexes,
1151
+ agchains,
1152
+ crystal_contacts,
1153
+ )
1154
+ model, paired_tr_mh = self._pair_tcr_and_mhc(
1155
+ model,
1156
+ tcell_receptors,
1157
+ mhc_complexes,
1158
+ cdr_atoms,
1159
+ mh_atoms,
1160
+ crystal_contacts,
1161
+ already_paired_tr_mh=paired_tr_mh,
1162
+ )
1163
+
1164
+ def _generate_symmetry_mates(self):
1165
+ print("Generating symmetry mates to pair antigens.")
1166
+ from .utils.symmetry_mates import (
1167
+ get_symmetry_mates,
1168
+ ) # import here to avoid circular import
1169
+
1170
+ return get_symmetry_mates(self.current_file)
1002
1171
 
1003
1172
  def _protein_peptide_pass(
1004
1173
  self, model, complexes, receptor_atoms, antigen_atoms, crystal_contacts=[]
1005
1174
  ):
1006
1175
  """
1007
1176
  This is a generic method to process which proteins/peptides belong to a TCR or MHC. Needs testing.
1008
- @param complexes: list of TCR/TCRchain objects or MHC/MHCchain objects
1009
- @param receptor_atoms: list of atom subset that will likely contact the antigen (e.g. cdr_atoms)
1010
- @param antigen_atoms: list of atoms in the antigen.
1177
+
1178
+ Args:
1179
+ complexes: list of TCR/TCRchain objects or MHC/MHCchain objects
1180
+ receptor_atoms: list of atom subset that will likely contact the antigen (e.g. cdr_atoms)
1181
+ antigen_atoms: list of atoms in the antigen.
1011
1182
  """
1012
1183
  ns = NeighborSearch(
1013
1184
  [atom for chain in receptor_atoms for atom in receptor_atoms[chain]]