stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,170 @@
1
+ import warnings
2
+
3
+ try:
4
+ from plip.structure.preparation import PDBComplex
5
+ except ModuleNotFoundError:
6
+ warnings.warn(
7
+ """\n\nPLIP package not found. \nProfiling interactions will not be possible \nTo enable interaction profiling, install PLIP with:
8
+ \npip install plip --no-deps\n\n"""
9
+ )
10
+ from rdkit import Chem
11
+
12
+
13
+ def return_interactions(
14
+ protein_file=None, ligand_file=None, complex_file=None, pymol_visualization=False
15
+ ):
16
+ with open(protein_file, "r") as f:
17
+ protein = f.read()
18
+ protein = [line for line in protein.split("\n") if line.startswith("ATOM")]
19
+ ligand = Chem.MolFromMolFile(ligand_file)
20
+ ligand_pdb_block = Chem.MolToPDBBlock(ligand)
21
+ complex_pdb_block = "\n".join(protein) + "\n" + ligand_pdb_block
22
+ # return complex_pdb_block, ligand_pdb_block, protein
23
+ my_mol = PDBComplex()
24
+ my_mol.load_pdb(complex_pdb_block, as_string=True)
25
+ my_mol.analyze()
26
+ return my_mol
27
+
28
+
29
+ class Interaction:
30
+
31
+ def __init__(
32
+ self,
33
+ type,
34
+ protein_atom,
35
+ protein_chain,
36
+ protein_residue,
37
+ protein_number,
38
+ ligand_atom,
39
+ distance,
40
+ angle,
41
+ plip_id,
42
+ ) -> None:
43
+ self.type = type
44
+ self.protein_atom = protein_atom
45
+ self.protein_chain = protein_chain
46
+ self.protein_residue = protein_residue
47
+ self.protein_number = protein_number
48
+ self.ligand_atom = ligand_atom
49
+ self.distance = distance
50
+ self.angle = angle
51
+ self.plip_id = plip_id
52
+
53
+ def to_tuple(self):
54
+ return (
55
+ self.type,
56
+ self.protein_atom,
57
+ self.protein_chain,
58
+ self.protein_residue,
59
+ self.protein_number,
60
+ self.ligand_atom,
61
+ self.distance,
62
+ self.angle,
63
+ self.plip_id,
64
+ )
65
+
66
+
67
+ def parse_interaction(interaction) -> Interaction:
68
+ if "saltbridge" in str(type(interaction)):
69
+ return Interaction("saltbridge", *process_saltbridge(interaction))
70
+ elif "hydroph" in str(type(interaction)):
71
+ return Interaction("hydrophobic", *process_hydrophobic(interaction))
72
+ elif "hbond" in str(type(interaction)):
73
+ return Interaction("hbond", *process_hbond(interaction))
74
+ elif "pistack" in str(type(interaction)):
75
+ return Interaction("pistack", *process_pi_stack(interaction))
76
+ else:
77
+ raise NotImplementedError(f"Parsing not implemented for {type(interaction)}")
78
+
79
+
80
+ def process_pi_stack(interaction):
81
+ protein_ring_atoms = [
82
+ (j.coords, j.atomicnum) for j in interaction.proteinring.atoms
83
+ ]
84
+ protein_chain = interaction.reschain
85
+ protein_residue = interaction.restype
86
+ protein_number = interaction.resnr
87
+ ligand_ring_atoms = [(j.coords, j.atomicnum) for j in interaction.ligandring.atoms]
88
+ distance = interaction.distance
89
+ angle = interaction.angle
90
+ plip_id = None
91
+ return (
92
+ protein_ring_atoms,
93
+ protein_chain,
94
+ protein_residue,
95
+ protein_number,
96
+ ligand_ring_atoms,
97
+ distance,
98
+ angle,
99
+ plip_id,
100
+ )
101
+
102
+
103
+ def process_hydrophobic(interaction):
104
+ protein_atom = [(interaction.bsatom.coords, interaction.bsatom.atomicnum)]
105
+ protein_chain = interaction.reschain
106
+ protein_residue = interaction.restype
107
+ protein_number = interaction.resnr
108
+ ligand_atom = [(interaction.ligatom.coords, interaction.ligatom.atomicnum)]
109
+ distance = interaction.distance
110
+ plip_id = None
111
+ return (
112
+ protein_atom,
113
+ protein_chain,
114
+ protein_residue,
115
+ protein_number,
116
+ ligand_atom,
117
+ distance,
118
+ None,
119
+ plip_id,
120
+ )
121
+
122
+
123
+ def process_hbond(interaction):
124
+ if interaction.protisdon:
125
+ protein_atom = [(interaction.d.coords, interaction.d.atomicnum)]
126
+ ligand_atom = [(interaction.a.coords, interaction.a.atomicnum)]
127
+ else:
128
+ protein_atom = [(interaction.a.coords, interaction.a.atomicnum)]
129
+ ligand_atom = [(interaction.d.coords, interaction.d.atomicnum)]
130
+
131
+ protein_chain = interaction.reschain
132
+ protein_residue = interaction.restype
133
+ protein_number = interaction.resnr
134
+ distance = interaction.distance_ad
135
+ angle = interaction.angle
136
+ plip_id = None
137
+ return (
138
+ protein_atom,
139
+ protein_chain,
140
+ protein_residue,
141
+ protein_number,
142
+ ligand_atom,
143
+ distance,
144
+ angle,
145
+ plip_id,
146
+ )
147
+
148
+
149
+ def process_saltbridge(interaction):
150
+ if interaction.protispos:
151
+ protein_atom = [(a.coords, a.atomicnum) for a in interaction.positive.atoms]
152
+ ligand_atom = [(a.coords, a.atomicnum) for a in interaction.negative.atoms]
153
+ else:
154
+ protein_atom = [(a.coords, a.atomicnum) for a in interaction.negative.atoms]
155
+ ligand_atom = [(a.coords, a.atomicnum) for a in interaction.positive.atoms]
156
+ protein_chain = interaction.reschain
157
+ protein_residue = interaction.restype
158
+ protein_number = interaction.resnr
159
+ distance = interaction.distance
160
+ plip_id = None
161
+ return (
162
+ protein_atom,
163
+ protein_chain,
164
+ protein_residue,
165
+ protein_number,
166
+ ligand_atom,
167
+ distance,
168
+ None,
169
+ plip_id,
170
+ )
File without changes
@@ -0,0 +1,223 @@
1
+ import warnings
2
+ import os
3
+ import pandas as pd
4
+
5
+ from ..tcr_processing.TCRParser import TCRParser
6
+ from ..tcr_interactions.TCRInteractionProfiler import TCRInteractionProfiler
7
+ from ..tcr_geometry.TCRGeom import TCRGeom
8
+ from ..tcr_geometry.TCRGeomFiltering import DockingGeometryFilter
9
+ from ..tcr_formats.tcr_formats import get_sequences
10
+
11
+
12
+ class TCRBatchOperator:
13
+ def __init__(self):
14
+ self._tcr_parser = TCRParser()
15
+
16
+ def _load_geometry_calculator(self):
17
+ self._geometry_calculator = TCRGeom()
18
+
19
+ def _load_geometry_filter(self):
20
+ self._geometry_filter = DockingGeometryFilter()
21
+
22
+ def tcrs_from_file_list(self, file_list):
23
+ for file in file_list:
24
+ tcr_id = file.split("/")[-1].split(".")[0]
25
+ try:
26
+ for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
27
+ yield tcr
28
+ except Exception as e:
29
+ warnings.warn(f"Loading {file} failed with error {str(e)}")
30
+ yield None
31
+
32
+ def tcrs_from_file_dict(self, file_dict):
33
+ for tcr_id, file in file_dict.items():
34
+ try:
35
+ for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
36
+ yield tcr_id, tcr
37
+ except Exception as e:
38
+ warnings.warn(f"Loading {tcr_id}: {file} failed with error {str(e)}")
39
+ yield None
40
+
41
+ def get_TCR_pMHC_interactions(self, tcr_generator, renumber=True, save_as_csv=None):
42
+ interaction_analysis_dict = {}
43
+ for tcr in tcr_generator:
44
+ if tcr is None: # handles case where file could not be parsed in generator
45
+ continue
46
+ tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
47
+ if isinstance(
48
+ tcr, tuple
49
+ ): # handle case where tcr is passed as (key, value)
50
+ tcr_id, tcr = tcr
51
+ try:
52
+ interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
53
+ except Exception as e:
54
+ warnings.warn(
55
+ f"Interactions profile failed for {tcr} with error {str(e)}"
56
+ )
57
+ interactions_df = pd.concat(
58
+ interaction_analysis_dict.values(),
59
+ keys=interaction_analysis_dict.keys(),
60
+ axis=0,
61
+ )
62
+
63
+ if save_as_csv is not None:
64
+ interactions_df.to_csv(save_as_csv)
65
+
66
+ return interactions_df
67
+
68
+ def get_TCR_geometry(self, tcr_generator, mode="rudolph", save_as_csv=None):
69
+ geometries_dict = {}
70
+ for tcr in tcr_generator:
71
+ if tcr is None: # handles case where file could not be parsed in generator
72
+ continue
73
+
74
+ if isinstance(
75
+ tcr, tuple
76
+ ): # handle case where tcr is passed as (key, value)
77
+ tcr_id, tcr = tcr
78
+ else:
79
+ tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
80
+ try:
81
+ geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
82
+ mode=mode, as_df=True
83
+ )
84
+ except Exception as e:
85
+ warnings.warn(
86
+ f"Geometry calculation failed for {tcr} with error {str(e)}"
87
+ )
88
+ geometries_df = pd.concat(geometries_dict).droplevel(1)
89
+
90
+ if save_as_csv is not None:
91
+ geometries_df.to_csv(save_as_csv)
92
+
93
+ return geometries_df
94
+
95
+ def get_germlines_and_alleles(self, tcr_generator, save_as_csv=None):
96
+ germlines_and_alleles_dict = {}
97
+ for tcr in tcr_generator:
98
+ if tcr is None: # handles case where file could not be parsed in generator
99
+ continue
100
+ tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
101
+ if isinstance(
102
+ tcr, tuple
103
+ ): # handle case where tcr is passed as (key, value)
104
+ tcr_id, tcr = tcr
105
+ germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
106
+
107
+ germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
108
+
109
+ if save_as_csv is not None:
110
+ germlines_and_alleles_df.to_csv(save_as_csv)
111
+
112
+ return germlines_and_alleles_df
113
+
114
+ def full_analysis(self, tcr_generator, geometry_mode="rudolph", save_dir=None):
115
+ from tqdm import tqdm
116
+
117
+ germlines_and_alleles_dict = {}
118
+ geometries_dict = {}
119
+ interaction_analysis_dict = {}
120
+
121
+ for tcr in tqdm(tcr_generator):
122
+ if tcr is None: # handles case where file could not be parsed in generator
123
+ continue
124
+ if isinstance(
125
+ tcr, tuple
126
+ ): # handle case where tcr is passed as (key, value)
127
+ tcr_id, tcr = tcr
128
+ else:
129
+ tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
130
+ try:
131
+ germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
132
+ except Exception as e:
133
+ warnings.warn(
134
+ f"Germline and allele retrieval failed for {tcr} with error {str(e)}"
135
+ )
136
+ try:
137
+ geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
138
+ mode=geometry_mode, as_df=True
139
+ )
140
+ except Exception as e:
141
+ warnings.warn(
142
+ f"Geometry calculation failed for {tcr} with error {str(e)}"
143
+ )
144
+ try:
145
+ interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
146
+ except Exception as e:
147
+ warnings.warn(
148
+ f"Interaction profiling failed for {tcr} with error {str(e)}"
149
+ )
150
+ germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
151
+
152
+ geometries_df = pd.concat(geometries_dict).droplevel(1)
153
+
154
+ interactions_df = pd.concat(
155
+ interaction_analysis_dict.values(),
156
+ keys=interaction_analysis_dict.keys(),
157
+ axis=0,
158
+ )
159
+
160
+ if save_dir is not None:
161
+ geometries_df.to_csv(os.path.join(save_dir, "geometries.csv"))
162
+ germlines_and_alleles_df.to_csv(
163
+ os.path.join(save_dir, "germlines_and_alleles.csv")
164
+ )
165
+ interactions_df.to_csv(os.path.join(save_dir, "interactions.csv"))
166
+
167
+ return germlines_and_alleles_df, geometries_df, interactions_df
168
+
169
+
170
+ def batch_load_TCRs(tcr_files):
171
+ if isinstance(tcr_files, dict):
172
+ return dict(TCRBatchOperator().tcrs_from_file_dict(tcr_files))
173
+ else:
174
+ return list(TCRBatchOperator().tcrs_from_file_list(tcr_files))
175
+
176
+
177
+ def batch_yield_TCRs(tcr_files):
178
+ if isinstance(tcr_files, dict):
179
+ return TCRBatchOperator().tcrs_from_file_dict(tcr_files)
180
+ else:
181
+ return TCRBatchOperator().tcrs_from_file_list(tcr_files)
182
+
183
+
184
+ def get_TCR_interactions(tcr_files, renumber=True, save_as_csv=None):
185
+ batch_ops = TCRBatchOperator()
186
+ if isinstance(tcr_files, list):
187
+ tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
188
+ if isinstance(tcr_files, dict):
189
+ tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
190
+
191
+ return batch_ops.get_TCR_pMHC_interactions(
192
+ tcr_generator, renumber=renumber, save_as_csv=save_as_csv
193
+ )
194
+
195
+
196
+ def get_TCR_geometry(tcr_files, mode="rudolph", save_as_csv=None):
197
+ batch_ops = TCRBatchOperator()
198
+ if isinstance(tcr_files, list):
199
+ tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
200
+ if isinstance(tcr_files, dict):
201
+ tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
202
+
203
+ return batch_ops.get_TCR_geometry(tcr_generator, mode=mode, save_as_csv=save_as_csv)
204
+
205
+
206
+ def get_germlines_and_alleles(tcr_files, save_as_csv=None):
207
+ batch_ops = TCRBatchOperator()
208
+ if isinstance(tcr_files, list):
209
+ tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
210
+ if isinstance(tcr_files, dict):
211
+ tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
212
+
213
+ return batch_ops.get_germlines_and_alleles(tcr_generator, save_as_csv=save_as_csv)
214
+
215
+
216
+ def analyse_tcrs(tcr_files, save_dir=None):
217
+ batch_ops = TCRBatchOperator()
218
+ if isinstance(tcr_files, list):
219
+ tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
220
+ if isinstance(tcr_files, dict):
221
+ tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
222
+
223
+ return batch_ops.full_analysis(tcr_generator, save_dir=save_dir)
@@ -0,0 +1,150 @@
1
+ import warnings
2
+ import requests
3
+ import os
4
+
5
+ from ..tcr_processing.TCRParser import TCRParser
6
+ from .tcr_batch_operations import batch_load_TCRs, batch_yield_TCRs
7
+
8
+
9
+ def load_TCR(tcr_structure_file, tcr_id=None):
10
+ tcr_parser = TCRParser()
11
+ if tcr_id is None:
12
+ tcr_id = tcr_structure_file.split("/")[-1].split(".")[0]
13
+ tcr_structure = list(
14
+ tcr_parser.get_tcr_structure(tcr_id, tcr_structure_file).get_TCRs()
15
+ )
16
+ if len(tcr_structure) == 1:
17
+ return tcr_structure[0]
18
+ return tcr_structure
19
+
20
+
21
+ def load_TCRs(tcr_structure_files, tcr_ids=None):
22
+ tcr_parser = TCRParser()
23
+ if isinstance(tcr_structure_files, str): # loading single file
24
+ tcr_id = tcr_structure_files.split("/")[-1].split(".")[
25
+ 0
26
+ ] # set tcr_id to file name without extension
27
+ if tcr_ids is not None:
28
+ if not isinstance(tcr_ids, str):
29
+ warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
30
+ tcr_id = tcr_ids
31
+
32
+ tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
33
+ return list(tcr_structure.get_TCRs())
34
+
35
+ if len(tcr_structure_files) > 10:
36
+ warnings.warn(
37
+ "Loading more than 10 TCR structure objects into memory. Consider applying generator methods to reduce memory load."
38
+ )
39
+
40
+ if tcr_ids is not None:
41
+ if len(tcr_structure_files) == len(tcr_ids):
42
+ return batch_load_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
43
+ else:
44
+ warnings.warn(
45
+ f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
46
+ )
47
+ return batch_load_TCRs(tcr_structure_files)
48
+
49
+
50
+ def yield_TCRs(tcr_structure_files, tcr_ids=None):
51
+ tcr_parser = TCRParser()
52
+ if isinstance(tcr_structure_files, str): # loading single file
53
+ tcr_id = tcr_structure_files.split("/")[-1].split(".")[
54
+ 0
55
+ ] # set tcr_id to file name without extension
56
+ if tcr_ids is not None:
57
+ if not isinstance(tcr_ids, str):
58
+ warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
59
+ tcr_id = tcr_ids
60
+
61
+ tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
62
+ return list(tcr_structure.get_TCRs())
63
+
64
+ if tcr_ids is not None:
65
+ if len(tcr_structure_files) == len(tcr_ids):
66
+ return batch_yield_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
67
+ else:
68
+ warnings.warn(
69
+ f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
70
+ )
71
+ return batch_yield_TCRs(tcr_structure_files)
72
+
73
+
74
+ def fetch_TCR(pdb_id: str):
75
+ """
76
+ Fetches and parses a T-cell receptor (TCR) structure from the STCRDab or RCSB PDB databases.
77
+
78
+ The function first attempts to download a PDB file from the STCRDab database.
79
+ If the PDB file is not found, it falls back to downloading a CIF file from RCSB PDB.
80
+ The downloaded file is then parsed using `TCRParser` to extract TCR structures.
81
+
82
+ Parameters:
83
+ pdb_id (str): The PDB identifier of the structure to be fetched.
84
+
85
+ Returns:
86
+ - A single TCR structure if exactly one is found.
87
+ - A list of TCR structures if multiple are found.
88
+ - None if no TCRs are identified (with a `UserWarning` issued).
89
+
90
+ Raises:
91
+ - A warning if no TCR structures are found in the downloaded file.
92
+ - Prints an error message if the file cannot be downloaded.
93
+
94
+ Notes:
95
+ - STCRDab returns an error message if the requested PDB ID does not exist.
96
+ - The function temporarily saves the downloaded file and deletes it after parsing.
97
+
98
+ Example:
99
+ tcr = fetch_TCR("6eqa")
100
+
101
+ """
102
+
103
+ stcrdab_base_url = "https://opig.stats.ox.ac.uk/webapps/stcrdab-stcrpred/pdb/"
104
+ pdb_base_url = "https://files.rcsb.org/download/"
105
+
106
+ filename = f"{pdb_id.upper()}.pdb"
107
+
108
+ url = stcrdab_base_url + pdb_id.lower()
109
+ TCR_FOUND = False
110
+
111
+ try:
112
+ response = requests.get(url, stream=True, timeout=10)
113
+ if response.status_code == 200:
114
+ with open(filename, "wb") as file:
115
+ for chunk in response.iter_content(chunk_size=1024):
116
+ file.write(chunk)
117
+ if (
118
+ not b"does not exist" in chunk
119
+ ): # STCRDab returns '$PDB does not exist for downloading' if PDB code not found in database
120
+ TCR_FOUND = True
121
+
122
+ except requests.exceptions.Timeout:
123
+ warnings.warn(f"Request to STCRDab ({url}) timed out. Trying RCSB.")
124
+
125
+ if not TCR_FOUND:
126
+ if os.path.exists(filename):
127
+ os.remove(filename) # remove the file written with response from STCRDab
128
+
129
+ # Request from RCSB data base
130
+ filename = f"{pdb_id.upper()}.cif"
131
+ url = pdb_base_url + filename
132
+ response = requests.get(url, stream=True, timeout=10)
133
+
134
+ if response.status_code == 200:
135
+ with open(filename, "wb") as file:
136
+ for chunk in response.iter_content(chunk_size=1024):
137
+ file.write(chunk)
138
+ else:
139
+ print("Failed to download file")
140
+
141
+ tcr_parser = TCRParser()
142
+ tcr = list(tcr_parser.get_tcr_structure(pdb_id, filename).get_TCRs())
143
+ os.remove(filename)
144
+ if len(tcr) == 1:
145
+ return tcr[0]
146
+ elif len(tcr) == 0:
147
+ warnings.warn(f"No TCRs identified in {pdb_id}")
148
+ return None
149
+ else:
150
+ return tcr
@@ -0,0 +1,18 @@
1
+ def tcrs_to_AF3_json(tcrs, path=None, **kwargs):
2
+ from ..tcr_formats.tcr_formats import to_AF3_json
3
+ import json
4
+
5
+ if isinstance(tcrs[0], str):
6
+ from .tcr_methods import load_TCRs
7
+
8
+ tcrs = load_TCRs(tcrs)
9
+ else:
10
+ from ..tcr_processing.TCR import TCR
11
+
12
+ assert isinstance(tcrs[0], TCR)
13
+ multiple_job_json = [to_AF3_json(tcr, save=False, **kwargs) for tcr in tcrs]
14
+ path = path if path is not None else "stcrpy_AF3_TCRs.json"
15
+ with open(path, "w") as f:
16
+ json.dump(multiple_job_json, f)
17
+ print(f"{len(tcrs)} saved as AF3 json job: {path}")
18
+ return multiple_job_json
@@ -0,0 +1,2 @@
1
+ from .tcr_rmsd import RMSD
2
+ from .tcr_interface_rmsd import InterfaceRMSD
@@ -0,0 +1,39 @@
1
+ ATOM_TYPES = [
2
+ "N",
3
+ "CA",
4
+ "C",
5
+ "CB",
6
+ "O",
7
+ "CG",
8
+ "CG1",
9
+ "CG2",
10
+ "OG",
11
+ "OG1",
12
+ "SG",
13
+ "CD",
14
+ "CD1",
15
+ "CD2",
16
+ "ND1",
17
+ "ND2",
18
+ "OD1",
19
+ "OD2",
20
+ "SD",
21
+ "CE",
22
+ "CE1",
23
+ "CE2",
24
+ "CE3",
25
+ "NE",
26
+ "NE1",
27
+ "NE2",
28
+ "OE1",
29
+ "OE2",
30
+ "CH2",
31
+ "NH1",
32
+ "NH2",
33
+ "OH",
34
+ "CZ",
35
+ "CZ2",
36
+ "CZ3",
37
+ "NZ",
38
+ "OXT",
39
+ ]