stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,556 @@
1
+ import os
2
+ import re
3
+ import warnings
4
+ import numpy as np
5
+
6
+ import Bio
7
+ from Bio.PDB.Superimposer import Superimposer
8
+
9
+ from .. import tcr_processing
10
+
11
+
12
+ class HADDOCKFormatter:
13
+
14
+ def __init__(self, save_dir: str = None):
15
+ """Constructor HADDOCK formatting object.
16
+
17
+ Args:
18
+ save_dir (str, optional): Path to save formatted files to. Defaults to None.
19
+ """
20
+ self.save_dir = save_dir if save_dir is not None else "."
21
+
22
+ def tcr_to_haddock(self, tcr: "TCR"):
23
+ """Bound reformatting of TCR structure object to HADDOCK compatible PDB file.
24
+
25
+ Args:
26
+ tcr (TCR): TCR structure object
27
+ """
28
+ self.write_TCR_pdb_file(tcr, self.save_dir)
29
+
30
+ def pMHC_to_haddock(self, mhc: "MHC", antigen: list["Antigen"]):
31
+ """Bound reformatting of MHC and antigen structures object to HADDOCK compatible PDB file.
32
+
33
+ Args:
34
+ mhc (MHC): MHC structure object
35
+ antigen (Antigen): Antigen structure object
36
+ """
37
+ self.write_antigen_pdb_file(mhc, antigen, self.save_dir)
38
+
39
+ def write_TCR_pdb_file(self, tcr: "TCR", save_dir: str):
40
+ """
41
+ Writes TCR structure to a PDB file in a format HADDOCK can deal with.
42
+ Generates a PDB file, a mapping from the old to the new numbering,
43
+ and a list of active residues to restrain the HADDOCK simulation.
44
+
45
+ Args:
46
+ tcr (TCR): The TCR structure.
47
+ save_dir (str): The directory to save the files (default is current directory).
48
+ """
49
+ tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
50
+ new_tcr_structure = Bio.PDB.Model.Model(id=0)
51
+ residue_conversion = {}
52
+ for i, chain in enumerate(tcr.get_chains()):
53
+ residue_conversion[chain.id] = {}
54
+ new_chain = Bio.PDB.Chain.Chain(id=chain.id)
55
+ selected_residues = [
56
+ res
57
+ for res in Bio.PDB.Selection.unfold_entities(chain, "R")
58
+ if res.id[1] in list(range(1, 130))
59
+ ]
60
+ for residue in selected_residues:
61
+ # handle insertion numbering for HADDOCK
62
+ new_residue = residue.copy()
63
+ if new_residue.id[-1] != " ":
64
+ new_residue.id = (
65
+ new_residue.id[0],
66
+ 10 * new_residue.id[1]
67
+ + (200 * i)
68
+ + imgt_insertion_char_to_int(new_residue.id[-1]),
69
+ " ",
70
+ )
71
+ else:
72
+ new_residue.id = (
73
+ new_residue.id[0],
74
+ new_residue.id[1] + (200 * i),
75
+ new_residue.id[-1],
76
+ )
77
+ if new_residue.id != residue.id:
78
+ residue_conversion[chain.id][residue.id] = new_residue.id
79
+ new_chain.add(new_residue)
80
+ new_tcr_structure.add(new_chain)
81
+ if not os.path.exists(os.path.join(save_dir, tcr_id)):
82
+ os.mkdir(os.path.join(save_dir, tcr_id))
83
+ with open(
84
+ os.path.join(save_dir, f"{tcr_id}/{tcr_id}_haddock_active_residues.txt"),
85
+ "w",
86
+ ) as f:
87
+ # get cdr numbering
88
+ active_residues = []
89
+ for chain in tcr.get_chains():
90
+ cdrs = chain.get_CDRs()
91
+
92
+ res_list = [r.id for cdr in cdrs for r in cdr.get_residues()]
93
+ for res_key in res_list:
94
+ # res_key = (" ", *res[0])
95
+ if res_key in residue_conversion[chain.id]:
96
+ active_residues.append(residue_conversion[chain.id][res_key][1])
97
+ else:
98
+ active_residues.append(res_key[1])
99
+ f.write("TCR ACTIVE RESIDUES FOR HADDOCK\n")
100
+ f.write(",".join([str(r) for r in active_residues]))
101
+ f.write("\n")
102
+
103
+ with open(
104
+ os.path.join(save_dir, f"{tcr_id}/{tcr_id}_haddock_renumbering.txt"), "w"
105
+ ) as f:
106
+ f.write("TCR RESIDUE RENUMBERING FOR HADDOCK\n")
107
+ for chain_id in residue_conversion:
108
+ for res, new_res in residue_conversion[chain_id].items():
109
+ r_as_str = f"({chain_id},({res[0]},{res[1]},{res[2]}),({new_res[0]},{new_res[1]},{new_res[2]})\n"
110
+ f.write(r_as_str)
111
+
112
+ pdb_io = Bio.PDB.PDBIO()
113
+ pdb_io.set_structure(new_tcr_structure)
114
+ filename = os.path.join(save_dir, f"{tcr_id}/{tcr_id}_tcr_for_docking.pdb")
115
+ pdb_io.save(filename)
116
+ return filename
117
+
118
+ def write_antigen_pdb_file(
119
+ self, mhc: "MHC", antigen: list["Antigen"], save_dir: str
120
+ ):
121
+ """
122
+ Writes the antigen PDB file for docking with HADDOCK.
123
+ Generates a PDB file, a file containing the renumbering mapping, and a list of active residues to restrict the simulation.
124
+
125
+ Args:
126
+ mhc (MHC): MHC structure object.
127
+ antigen (list[Antigen]): List containing antigen chain. Should be length 1.
128
+ save_dir (str, optional): The directory to save the PDB file. Defaults to ".".
129
+
130
+ Returns:
131
+ str: The filename of the saved antigen PDB file.
132
+ """
133
+ # structure = p.get_structure()
134
+ # chains = [i for i in structure.get_antigens() if i.id in antigen_chain_ids]
135
+ mhc_chains = [c for c in mhc.get_chains() if c.chain_type != "B2M"]
136
+ antigen_chains = mhc_chains + antigen
137
+ mhc_id = f"{mhc.parent.parent.id}_MHC_{''.join([c.id for c in antigen_chains])}"
138
+
139
+ new_antigen_structure = Bio.PDB.Model.Model(id=0)
140
+ residue_conversion = {}
141
+ for i, chain in enumerate(antigen_chains):
142
+ residue_conversion[chain.id] = {}
143
+ new_chain = Bio.PDB.Chain.Chain(id=chain.id)
144
+ for residue in chain.get_residues():
145
+ # handle insertion numbering for HADDOCK
146
+ new_residue = residue.copy()
147
+ new_residue.id = (
148
+ new_residue.id[0],
149
+ new_residue.id[1] + (500 * i),
150
+ new_residue.id[-1],
151
+ )
152
+ if new_residue.id != residue.id:
153
+ residue_conversion[chain.id][residue.id] = new_residue.id
154
+ new_chain.add(new_residue)
155
+ new_antigen_structure.add(new_chain)
156
+
157
+ if not os.path.exists(os.path.join(save_dir, mhc_id)):
158
+ os.mkdir(os.path.join(save_dir, mhc_id))
159
+
160
+ with open(
161
+ os.path.join(save_dir, f"{mhc_id}/{mhc_id}_haddock_active_residues.txt"),
162
+ "a",
163
+ ) as f:
164
+ # get peptide numbering and select as active
165
+ active_residues = []
166
+ for chain in antigen:
167
+ res_list = list(r.id for r in chain.get_residues())
168
+ for res_key in res_list:
169
+ if res_key in residue_conversion[chain.id]:
170
+ active_residues.append(residue_conversion[chain.id][res_key][1])
171
+ else:
172
+ active_residues.append(res_key[1])
173
+ f.write("ANTIGEN ACTIVE RESIDUES FOR HADDOCK\n")
174
+ f.write(",".join([str(r) for r in active_residues]))
175
+ f.write("\n")
176
+
177
+ with open(
178
+ os.path.join(save_dir, f"{mhc_id}/{mhc_id}_haddock_renumbering.txt"), "a"
179
+ ) as f:
180
+ f.write("ANTIGEN RESIDUE RENUMBERING FOR HADDOCK\n")
181
+ for chain in residue_conversion:
182
+ for res, new_res in residue_conversion[chain].items():
183
+ r_as_str = f"({chain},({res[0]},{res[1]},{res[2]}),({new_res[0]},{new_res[1]},{new_res[2]})\n"
184
+ f.write(r_as_str)
185
+
186
+ pdb_io = Bio.PDB.PDBIO()
187
+ pdb_io.set_structure(new_antigen_structure)
188
+ filename = os.path.join(save_dir, f"{mhc_id}/{mhc_id}_antigen_for_docking.pdb")
189
+ pdb_io.save(filename)
190
+ return filename
191
+
192
+
193
+ class HADDOCKResultsParser:
194
+
195
+ def __init__(
196
+ self,
197
+ haddock_results_dir: str,
198
+ tcr_renumbering_file: str = None,
199
+ pmhc_renumbering_file: str = None,
200
+ ):
201
+ """Parser for results from HADDOCK simulations. Renumbers TCR, MHC and Antigen using renumbering files, and parses result metrics.
202
+
203
+ Args:
204
+ haddock_results_dir (str): path to HADDOCK simulation results.
205
+ tcr_renumbering_file (str, optional): path to text file containing TCR renumbering to restore from HADDOCK compatible numbering. Defaults to None.
206
+ pmhc_renumbering_file (str, optional): path to text file containing MHC and antigen renumbering to restore from HADDOCK compatible numbering. Defaults to None.
207
+ """
208
+
209
+ self.haddock_results_dir = haddock_results_dir
210
+ self.tcr_renumbering_file = tcr_renumbering_file
211
+ self.pmhc_renumbering_file = pmhc_renumbering_file
212
+
213
+ if self.haddock_results_dir.endswith(".tgz"):
214
+ warnings.warn(
215
+ "HADDOCK results are compressed. Decompress results before proceeding."
216
+ )
217
+
218
+ def renumber_all_haddock_predictions(self):
219
+ """Renumber all haddock predictions contained in results folder. Requires standard HADDOCK output directory format."""
220
+ path = os.path.join(self.haddock_results_dir, "structures/it1/")
221
+ pattern = re.compile(r"complex_.*\.pdb")
222
+
223
+ for filename in os.listdir(path):
224
+ if pattern.match(filename):
225
+ file_path = os.path.join(path, filename)
226
+ self.renumber_haddock_prediction(
227
+ file_path,
228
+ self.tcr_renumbering_file,
229
+ self.pmhc_renumbering_file,
230
+ )
231
+
232
+ def renumber_haddock_prediction(
233
+ self,
234
+ docked_prediction_file: str,
235
+ haddock_renumbering_file: str,
236
+ antigen_renumbering_file: str = None,
237
+ ) -> Bio.PDB.Model.Model:
238
+ """
239
+ Renumber the HADDOCK prediction based on the renumbering files.
240
+
241
+ Args:
242
+ docked_prediction_file (str): Path to the docked prediction file.
243
+ haddock_renumbering_file (str): Path to the HADDOCK renumbering file.
244
+ antigen_renumbering_file (str, optional): Path to the antigen renumbering file.
245
+ Needed for TCR only PDBs with no antigen. Defaults to None.
246
+
247
+ Returns:
248
+ Bio.PDB.Model.Model: The renumbered HADDOCK prediction.
249
+
250
+ Raises:
251
+ ValueError: If the renumbering index is not found in the renumbering file.
252
+
253
+ """
254
+
255
+ # initialise file parsers
256
+ tcr_parser = tcr_processing.TCRParser.TCRParser()
257
+ bio_parser = Bio.PDB.PDBParser()
258
+
259
+ # find chain ID of TCR to distinguish TCR from antigen
260
+ tcr_chain_id = list(
261
+ tcr_parser.get_tcr_structure("tmp", docked_prediction_file).get_TCRchains()
262
+ )[0].get_id()
263
+ docked_prediction = bio_parser.get_structure("docked", docked_prediction_file)
264
+
265
+ # get chains of HADDOCK dock
266
+ merged_tcr_chain = docked_prediction[0][tcr_chain_id]
267
+ merged_antigen_chain = [
268
+ chain
269
+ for chain in docked_prediction.get_chains()
270
+ if chain.id != merged_tcr_chain.id
271
+ ][0]
272
+
273
+ # get renumbering
274
+ with open(haddock_renumbering_file, "r") as f:
275
+ lines = f.readlines()
276
+
277
+ try:
278
+ antigen_renumbering_index = lines.index(
279
+ "ANTIGEN RESIDUE RENUMBERING FOR HADDOCK\n"
280
+ )
281
+ antigen_renumber_indices = (
282
+ antigen_renumbering_index + 1,
283
+ -1,
284
+ )
285
+ except ValueError:
286
+ antigen_renumbering_index = -1
287
+ antigen_renumber_indices = (
288
+ -1,
289
+ -1,
290
+ )
291
+ tcr_renumber_indices = (1, antigen_renumbering_index)
292
+
293
+ # if antigen renumbering file is provided, get antigen renumbering from there
294
+ if antigen_renumbering_file is not None:
295
+ lines = (
296
+ lines[: antigen_renumber_indices[0] - 1]
297
+ if antigen_renumber_indices[0] != -1
298
+ else lines
299
+ )
300
+ tcr_renumber_indices = (1, len(lines) - 1)
301
+ antigen_renumber_indices = (len(lines) + 1, -1)
302
+ with open(antigen_renumbering_file, "r") as f:
303
+ antigen_xtal_lines = f.readlines()
304
+ antigen_renumbering_index = antigen_xtal_lines.index(
305
+ "ANTIGEN RESIDUE RENUMBERING FOR HADDOCK\n"
306
+ )
307
+ lines.extend(antigen_xtal_lines[antigen_renumbering_index:])
308
+
309
+ # renumber TCR by creating new PDB model and populating with residues
310
+ tcr_parsed_lines = list(
311
+ map(
312
+ parse_renumbered_line,
313
+ lines[tcr_renumber_indices[0] : tcr_renumber_indices[1]],
314
+ )
315
+ )
316
+ changed_tcr_chain_ids, _, _ = list(zip(*tcr_parsed_lines))
317
+
318
+ tcr = Bio.PDB.Model.Model(id=0)
319
+
320
+ if len(set(changed_tcr_chain_ids)) > 1:
321
+ id_for_conserved_numbered_chain = min(
322
+ set(changed_tcr_chain_ids), key=changed_tcr_chain_ids.count
323
+ )
324
+ else:
325
+ id_for_conserved_numbered_chain = merged_tcr_chain.id
326
+ tcr.add(Bio.PDB.Chain.Chain(id=id_for_conserved_numbered_chain))
327
+ try:
328
+ tcr.add(
329
+ Bio.PDB.Chain.Chain(
330
+ id=max(set(changed_tcr_chain_ids), key=changed_tcr_chain_ids.count)
331
+ )
332
+ )
333
+ second_tcr_chain_id = None
334
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
335
+ for id_to_try in "ABCDEFGH":
336
+ try:
337
+ tcr.add(Bio.PDB.Chain.Chain(id=id_to_try))
338
+ second_tcr_chain_id = id_to_try
339
+ break
340
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
341
+ continue
342
+
343
+ renumbered_residues = {}
344
+ for renumbering in tcr_parsed_lines:
345
+ try:
346
+ residue = merged_tcr_chain[renumbering[-1]]
347
+ merged_tcr_chain.detach_child(renumbering[-1])
348
+ residue.id = renumbering[1]
349
+ if second_tcr_chain_id is None:
350
+ if renumbering[0] not in renumbered_residues:
351
+ renumbered_residues[renumbering[0]] = []
352
+ renumbered_residues[renumbering[0]].append(residue)
353
+ else:
354
+ if second_tcr_chain_id not in renumbered_residues:
355
+ renumbered_residues[second_tcr_chain_id] = []
356
+ renumbered_residues[second_tcr_chain_id].append(residue)
357
+ # tcr[renumbering[0]].add(residue)
358
+ except KeyError as e:
359
+ warnings.warn(
360
+ f"""Renumbering {renumbering} failed with Key Error {e}"""
361
+ )
362
+ for residue in merged_tcr_chain.get_residues():
363
+ if id_for_conserved_numbered_chain not in renumbered_residues:
364
+ renumbered_residues[id_for_conserved_numbered_chain] = []
365
+ renumbered_residues[id_for_conserved_numbered_chain].append(residue)
366
+ # tcr[id_for_conserved_numbered_chain].add(residue)
367
+
368
+ # sort the residues
369
+ for chain_id in renumbered_residues:
370
+ sorted_residues = sort_residues_by_imgt_numbering(
371
+ renumbered_residues[chain_id]
372
+ )
373
+ for res in sorted_residues:
374
+ tcr[chain_id].add(res)
375
+
376
+ # renumber antigen
377
+ antigen_parsed_lines = list(
378
+ map(
379
+ parse_renumbered_line,
380
+ lines[antigen_renumber_indices[0] :],
381
+ )
382
+ )
383
+ changed_antigen_chain_ids, _, _ = list(zip(*antigen_parsed_lines))
384
+ try:
385
+ tcr.add(Bio.PDB.Chain.Chain(id=merged_antigen_chain.id))
386
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
387
+ for id_to_try in "ABCDEFGH":
388
+ if id_to_try == set(changed_antigen_chain_ids).pop():
389
+ continue
390
+ try:
391
+ tcr.add(Bio.PDB.Chain.Chain(id=id_to_try))
392
+ merged_antigen_chain.id = id_to_try
393
+ break
394
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
395
+ continue
396
+ assert (
397
+ len(set(changed_antigen_chain_ids)) == 1
398
+ ), "More than one chain renumbered in renumbering file"
399
+ try:
400
+ tcr.add(Bio.PDB.Chain.Chain(id=set(changed_antigen_chain_ids).pop()))
401
+ new_antigen_chain_id = None
402
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
403
+ for id_to_try in "ABCDEFGH":
404
+ if id_to_try == set(changed_antigen_chain_ids).pop():
405
+ continue
406
+ try:
407
+ tcr.add(Bio.PDB.Chain.Chain(id=id_to_try))
408
+ new_antigen_chain_id = id_to_try
409
+ break
410
+ except Bio.PDB.PDBExceptions.PDBConstructionException:
411
+ continue
412
+
413
+ for renumbering in antigen_parsed_lines:
414
+ try:
415
+ residue = merged_antigen_chain[renumbering[-1]]
416
+ merged_antigen_chain.detach_child(renumbering[-1])
417
+ residue.id = renumbering[1]
418
+ if new_antigen_chain_id is None:
419
+ tcr[renumbering[0]].add(residue)
420
+ else:
421
+ tcr[new_antigen_chain_id].add(residue)
422
+ except KeyError as e:
423
+ warnings.warn(
424
+ f"""Renumbering {renumbering} failed with Key Error {e}"""
425
+ )
426
+ for residue in merged_antigen_chain.get_residues():
427
+ tcr[merged_antigen_chain.id].add(residue)
428
+
429
+ # create structure object and save
430
+ tcr_struct = Bio.PDB.Structure.Structure(id=0)
431
+ tcr_struct.add(tcr)
432
+
433
+ pdb_io = Bio.PDB.PDBIO()
434
+ pdb_io.set_structure(tcr_struct)
435
+ save_to = "renumbered_" + docked_prediction_file.split("/")[-1]
436
+ filename = os.path.join(*docked_prediction_file.split("/")[:-1], save_to)
437
+ pdb_io.save(filename)
438
+
439
+ def get_haddock_scores(self) -> "pandas.DataFrame":
440
+ """Retrieve HADDOCK energy scoes and RMSD evaluations from simulation output:
441
+ \nColumns:
442
+ \n "haddock_score",
443
+ \n "interface_rmsd",
444
+ \n "ligand_rmsd",
445
+ \n "frac_common_contacts",
446
+ \n "E_vdw",
447
+ \n "E_elec",
448
+ \n "E_air",
449
+ \n "E_desolv",
450
+ \n "ligand_rmsd_2",
451
+ \n "cluster_id",
452
+ Raises:
453
+ FileNotFoundError: HADDOCK file contianing scores not found.
454
+
455
+ Returns:
456
+ pandas.DataFrame: DataFrame with HADDOCK simulation metrics.
457
+ """
458
+ import pandas as pd
459
+ import os
460
+
461
+ haddock_columns = [
462
+ # 'idx',
463
+ "haddock_score",
464
+ "interface_rmsd",
465
+ "ligand_rmsd",
466
+ "frac_common_contacts",
467
+ "E_vdw",
468
+ "E_elec",
469
+ "E_air",
470
+ "E_desolv",
471
+ "ligand_rmsd_2",
472
+ "cluster_id",
473
+ ]
474
+ haddock_scores_file = "complex_HS_irmsd_lrmsd_fnat.list"
475
+ try:
476
+ df = pd.read_csv(
477
+ os.path.join(self.haddock_results_dir, haddock_scores_file),
478
+ sep=" ",
479
+ names=haddock_columns,
480
+ )
481
+ return df
482
+
483
+ except FileNotFoundError:
484
+ raise FileNotFoundError(
485
+ f"File: complex_HS_irmsd_lrmsd_fnat.list containing HADDOCK docking metrics not found in {self.haddock_results_dir}"
486
+ )
487
+
488
+
489
+ def imgt_insertion_char_to_int(char: str) -> int:
490
+ """
491
+ Converts an IMGT insertion character to an integer.
492
+
493
+ Args:
494
+ char (str): The IMGT insertion character.
495
+
496
+ Returns:
497
+ int: The corresponding integer value.
498
+ """
499
+ return ord(char) - ord("A") + 1
500
+
501
+
502
+ def parse_renumbered_line(line: str) -> tuple:
503
+ """
504
+ Parses a renumbered line from a file and extracts the chain ID, original numbering, and HADDOCK numbering.
505
+
506
+ Args:
507
+ line (str): The renumbered line to parse.
508
+
509
+ Returns:
510
+ tuple: A tuple containing the chain ID, original numbering, and HADDOCK numbering.
511
+
512
+ Example:
513
+ line = "(O,( ,3, ),( ,203, )"
514
+ result = parse_renumbered_line(line)
515
+ # Output: (O)', ('', '3', ''), ('', '203', ''))
516
+ """
517
+ chain_id = line[1]
518
+ content = re.findall(r"\((.*?)\)", line)
519
+ original_numbering = tuple(
520
+ int(x.strip()) if x.isdigit() else x.strip()
521
+ for x in content[0].split("(")[-1].split(",")
522
+ )
523
+ haddock_numbering = tuple(
524
+ int(x.strip()) if x.isdigit() else x.strip()
525
+ for x in re.split(r",\s*", content[1])
526
+ )
527
+
528
+ def add_empty_id(numbering):
529
+ return tuple(x if x != "" else " " for x in numbering)
530
+
531
+ return chain_id, add_empty_id(original_numbering), add_empty_id(haddock_numbering)
532
+
533
+
534
+ def sort_residues_by_imgt_numbering(
535
+ residues: "list[Bio.PDB.Residue]",
536
+ ) -> "list[Bio.PDB.Residue]":
537
+ """Sort residues in order by IMGT numbering.
538
+
539
+ Args:
540
+ residues (list[Bio.PDB.Residue]): List of IMGT numbered residues.
541
+
542
+ Returns:
543
+ list[Bio.PDB.Residue]: Sorted list of IMGT numbered residuess.
544
+ """
545
+ sorted_residues = sorted(residues, key=lambda x: (x.id[1], x.id[2]))
546
+ imgt_nr_112_subsequence = [
547
+ (i, res) for i, res in enumerate(sorted_residues) if res.id[1] == 112
548
+ ]
549
+ if len(imgt_nr_112_subsequence) > 0:
550
+ indices, imgt_nr_112_subsequence = list(zip(*imgt_nr_112_subsequence))
551
+ sorted_imgt_nr_112_subsequence = sorted(
552
+ imgt_nr_112_subsequence, key=lambda x: x.id[2], reverse=True
553
+ )
554
+ for i, idx in enumerate(indices):
555
+ sorted_residues[idx] = sorted_imgt_nr_112_subsequence[i]
556
+ return sorted_residues