stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from plip.structure.preparation import PDBComplex
|
|
5
|
+
except ModuleNotFoundError:
|
|
6
|
+
warnings.warn(
|
|
7
|
+
"""\n\nPLIP package not found. \nProfiling interactions will not be possible \nTo enable interaction profiling, install PLIP with:
|
|
8
|
+
\npip install plip --no-deps\n\n"""
|
|
9
|
+
)
|
|
10
|
+
from rdkit import Chem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def return_interactions(
|
|
14
|
+
protein_file=None, ligand_file=None, complex_file=None, pymol_visualization=False
|
|
15
|
+
):
|
|
16
|
+
with open(protein_file, "r") as f:
|
|
17
|
+
protein = f.read()
|
|
18
|
+
protein = [line for line in protein.split("\n") if line.startswith("ATOM")]
|
|
19
|
+
ligand = Chem.MolFromMolFile(ligand_file)
|
|
20
|
+
ligand_pdb_block = Chem.MolToPDBBlock(ligand)
|
|
21
|
+
complex_pdb_block = "\n".join(protein) + "\n" + ligand_pdb_block
|
|
22
|
+
# return complex_pdb_block, ligand_pdb_block, protein
|
|
23
|
+
my_mol = PDBComplex()
|
|
24
|
+
my_mol.load_pdb(complex_pdb_block, as_string=True)
|
|
25
|
+
my_mol.analyze()
|
|
26
|
+
return my_mol
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Interaction:
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
type,
|
|
34
|
+
protein_atom,
|
|
35
|
+
protein_chain,
|
|
36
|
+
protein_residue,
|
|
37
|
+
protein_number,
|
|
38
|
+
ligand_atom,
|
|
39
|
+
distance,
|
|
40
|
+
angle,
|
|
41
|
+
plip_id,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.type = type
|
|
44
|
+
self.protein_atom = protein_atom
|
|
45
|
+
self.protein_chain = protein_chain
|
|
46
|
+
self.protein_residue = protein_residue
|
|
47
|
+
self.protein_number = protein_number
|
|
48
|
+
self.ligand_atom = ligand_atom
|
|
49
|
+
self.distance = distance
|
|
50
|
+
self.angle = angle
|
|
51
|
+
self.plip_id = plip_id
|
|
52
|
+
|
|
53
|
+
def to_tuple(self):
|
|
54
|
+
return (
|
|
55
|
+
self.type,
|
|
56
|
+
self.protein_atom,
|
|
57
|
+
self.protein_chain,
|
|
58
|
+
self.protein_residue,
|
|
59
|
+
self.protein_number,
|
|
60
|
+
self.ligand_atom,
|
|
61
|
+
self.distance,
|
|
62
|
+
self.angle,
|
|
63
|
+
self.plip_id,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_interaction(interaction) -> Interaction:
|
|
68
|
+
if "saltbridge" in str(type(interaction)):
|
|
69
|
+
return Interaction("saltbridge", *process_saltbridge(interaction))
|
|
70
|
+
elif "hydroph" in str(type(interaction)):
|
|
71
|
+
return Interaction("hydrophobic", *process_hydrophobic(interaction))
|
|
72
|
+
elif "hbond" in str(type(interaction)):
|
|
73
|
+
return Interaction("hbond", *process_hbond(interaction))
|
|
74
|
+
elif "pistack" in str(type(interaction)):
|
|
75
|
+
return Interaction("pistack", *process_pi_stack(interaction))
|
|
76
|
+
else:
|
|
77
|
+
raise NotImplementedError(f"Parsing not implemented for {type(interaction)}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def process_pi_stack(interaction):
|
|
81
|
+
protein_ring_atoms = [
|
|
82
|
+
(j.coords, j.atomicnum) for j in interaction.proteinring.atoms
|
|
83
|
+
]
|
|
84
|
+
protein_chain = interaction.reschain
|
|
85
|
+
protein_residue = interaction.restype
|
|
86
|
+
protein_number = interaction.resnr
|
|
87
|
+
ligand_ring_atoms = [(j.coords, j.atomicnum) for j in interaction.ligandring.atoms]
|
|
88
|
+
distance = interaction.distance
|
|
89
|
+
angle = interaction.angle
|
|
90
|
+
plip_id = None
|
|
91
|
+
return (
|
|
92
|
+
protein_ring_atoms,
|
|
93
|
+
protein_chain,
|
|
94
|
+
protein_residue,
|
|
95
|
+
protein_number,
|
|
96
|
+
ligand_ring_atoms,
|
|
97
|
+
distance,
|
|
98
|
+
angle,
|
|
99
|
+
plip_id,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def process_hydrophobic(interaction):
|
|
104
|
+
protein_atom = [(interaction.bsatom.coords, interaction.bsatom.atomicnum)]
|
|
105
|
+
protein_chain = interaction.reschain
|
|
106
|
+
protein_residue = interaction.restype
|
|
107
|
+
protein_number = interaction.resnr
|
|
108
|
+
ligand_atom = [(interaction.ligatom.coords, interaction.ligatom.atomicnum)]
|
|
109
|
+
distance = interaction.distance
|
|
110
|
+
plip_id = None
|
|
111
|
+
return (
|
|
112
|
+
protein_atom,
|
|
113
|
+
protein_chain,
|
|
114
|
+
protein_residue,
|
|
115
|
+
protein_number,
|
|
116
|
+
ligand_atom,
|
|
117
|
+
distance,
|
|
118
|
+
None,
|
|
119
|
+
plip_id,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def process_hbond(interaction):
|
|
124
|
+
if interaction.protisdon:
|
|
125
|
+
protein_atom = [(interaction.d.coords, interaction.d.atomicnum)]
|
|
126
|
+
ligand_atom = [(interaction.a.coords, interaction.a.atomicnum)]
|
|
127
|
+
else:
|
|
128
|
+
protein_atom = [(interaction.a.coords, interaction.a.atomicnum)]
|
|
129
|
+
ligand_atom = [(interaction.d.coords, interaction.d.atomicnum)]
|
|
130
|
+
|
|
131
|
+
protein_chain = interaction.reschain
|
|
132
|
+
protein_residue = interaction.restype
|
|
133
|
+
protein_number = interaction.resnr
|
|
134
|
+
distance = interaction.distance_ad
|
|
135
|
+
angle = interaction.angle
|
|
136
|
+
plip_id = None
|
|
137
|
+
return (
|
|
138
|
+
protein_atom,
|
|
139
|
+
protein_chain,
|
|
140
|
+
protein_residue,
|
|
141
|
+
protein_number,
|
|
142
|
+
ligand_atom,
|
|
143
|
+
distance,
|
|
144
|
+
angle,
|
|
145
|
+
plip_id,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def process_saltbridge(interaction):
|
|
150
|
+
if interaction.protispos:
|
|
151
|
+
protein_atom = [(a.coords, a.atomicnum) for a in interaction.positive.atoms]
|
|
152
|
+
ligand_atom = [(a.coords, a.atomicnum) for a in interaction.negative.atoms]
|
|
153
|
+
else:
|
|
154
|
+
protein_atom = [(a.coords, a.atomicnum) for a in interaction.negative.atoms]
|
|
155
|
+
ligand_atom = [(a.coords, a.atomicnum) for a in interaction.positive.atoms]
|
|
156
|
+
protein_chain = interaction.reschain
|
|
157
|
+
protein_residue = interaction.restype
|
|
158
|
+
protein_number = interaction.resnr
|
|
159
|
+
distance = interaction.distance
|
|
160
|
+
plip_id = None
|
|
161
|
+
return (
|
|
162
|
+
protein_atom,
|
|
163
|
+
protein_chain,
|
|
164
|
+
protein_residue,
|
|
165
|
+
protein_number,
|
|
166
|
+
ligand_atom,
|
|
167
|
+
distance,
|
|
168
|
+
None,
|
|
169
|
+
plip_id,
|
|
170
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import os
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from ..tcr_processing.TCRParser import TCRParser
|
|
6
|
+
from ..tcr_interactions.TCRInteractionProfiler import TCRInteractionProfiler
|
|
7
|
+
from ..tcr_geometry.TCRGeom import TCRGeom
|
|
8
|
+
from ..tcr_geometry.TCRGeomFiltering import DockingGeometryFilter
|
|
9
|
+
from ..tcr_formats.tcr_formats import get_sequences
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TCRBatchOperator:
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._tcr_parser = TCRParser()
|
|
15
|
+
|
|
16
|
+
def _load_geometry_calculator(self):
|
|
17
|
+
self._geometry_calculator = TCRGeom()
|
|
18
|
+
|
|
19
|
+
def _load_geometry_filter(self):
|
|
20
|
+
self._geometry_filter = DockingGeometryFilter()
|
|
21
|
+
|
|
22
|
+
def tcrs_from_file_list(self, file_list):
|
|
23
|
+
for file in file_list:
|
|
24
|
+
tcr_id = file.split("/")[-1].split(".")[0]
|
|
25
|
+
try:
|
|
26
|
+
for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
|
|
27
|
+
yield tcr
|
|
28
|
+
except Exception as e:
|
|
29
|
+
warnings.warn(f"Loading {file} failed with error {str(e)}")
|
|
30
|
+
yield None
|
|
31
|
+
|
|
32
|
+
def tcrs_from_file_dict(self, file_dict):
|
|
33
|
+
for tcr_id, file in file_dict.items():
|
|
34
|
+
try:
|
|
35
|
+
for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
|
|
36
|
+
yield tcr_id, tcr
|
|
37
|
+
except Exception as e:
|
|
38
|
+
warnings.warn(f"Loading {tcr_id}: {file} failed with error {str(e)}")
|
|
39
|
+
yield None
|
|
40
|
+
|
|
41
|
+
def get_TCR_pMHC_interactions(self, tcr_generator, renumber=True, save_as_csv=None):
|
|
42
|
+
interaction_analysis_dict = {}
|
|
43
|
+
for tcr in tcr_generator:
|
|
44
|
+
if tcr is None: # handles case where file could not be parsed in generator
|
|
45
|
+
continue
|
|
46
|
+
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
|
|
47
|
+
if isinstance(
|
|
48
|
+
tcr, tuple
|
|
49
|
+
): # handle case where tcr is passed as (key, value)
|
|
50
|
+
tcr_id, tcr = tcr
|
|
51
|
+
try:
|
|
52
|
+
interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
|
|
53
|
+
except Exception as e:
|
|
54
|
+
warnings.warn(
|
|
55
|
+
f"Interactions profile failed for {tcr} with error {str(e)}"
|
|
56
|
+
)
|
|
57
|
+
interactions_df = pd.concat(
|
|
58
|
+
interaction_analysis_dict.values(),
|
|
59
|
+
keys=interaction_analysis_dict.keys(),
|
|
60
|
+
axis=0,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if save_as_csv is not None:
|
|
64
|
+
interactions_df.to_csv(save_as_csv)
|
|
65
|
+
|
|
66
|
+
return interactions_df
|
|
67
|
+
|
|
68
|
+
def get_TCR_geometry(self, tcr_generator, mode="rudolph", save_as_csv=None):
|
|
69
|
+
geometries_dict = {}
|
|
70
|
+
for tcr in tcr_generator:
|
|
71
|
+
if tcr is None: # handles case where file could not be parsed in generator
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
if isinstance(
|
|
75
|
+
tcr, tuple
|
|
76
|
+
): # handle case where tcr is passed as (key, value)
|
|
77
|
+
tcr_id, tcr = tcr
|
|
78
|
+
else:
|
|
79
|
+
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
|
|
80
|
+
try:
|
|
81
|
+
geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
|
|
82
|
+
mode=mode, as_df=True
|
|
83
|
+
)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
warnings.warn(
|
|
86
|
+
f"Geometry calculation failed for {tcr} with error {str(e)}"
|
|
87
|
+
)
|
|
88
|
+
geometries_df = pd.concat(geometries_dict).droplevel(1)
|
|
89
|
+
|
|
90
|
+
if save_as_csv is not None:
|
|
91
|
+
geometries_df.to_csv(save_as_csv)
|
|
92
|
+
|
|
93
|
+
return geometries_df
|
|
94
|
+
|
|
95
|
+
def get_germlines_and_alleles(self, tcr_generator, save_as_csv=None):
|
|
96
|
+
germlines_and_alleles_dict = {}
|
|
97
|
+
for tcr in tcr_generator:
|
|
98
|
+
if tcr is None: # handles case where file could not be parsed in generator
|
|
99
|
+
continue
|
|
100
|
+
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
|
|
101
|
+
if isinstance(
|
|
102
|
+
tcr, tuple
|
|
103
|
+
): # handle case where tcr is passed as (key, value)
|
|
104
|
+
tcr_id, tcr = tcr
|
|
105
|
+
germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
|
|
106
|
+
|
|
107
|
+
germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
|
|
108
|
+
|
|
109
|
+
if save_as_csv is not None:
|
|
110
|
+
germlines_and_alleles_df.to_csv(save_as_csv)
|
|
111
|
+
|
|
112
|
+
return germlines_and_alleles_df
|
|
113
|
+
|
|
114
|
+
def full_analysis(self, tcr_generator, geometry_mode="rudolph", save_dir=None):
|
|
115
|
+
from tqdm import tqdm
|
|
116
|
+
|
|
117
|
+
germlines_and_alleles_dict = {}
|
|
118
|
+
geometries_dict = {}
|
|
119
|
+
interaction_analysis_dict = {}
|
|
120
|
+
|
|
121
|
+
for tcr in tqdm(tcr_generator):
|
|
122
|
+
if tcr is None: # handles case where file could not be parsed in generator
|
|
123
|
+
continue
|
|
124
|
+
if isinstance(
|
|
125
|
+
tcr, tuple
|
|
126
|
+
): # handle case where tcr is passed as (key, value)
|
|
127
|
+
tcr_id, tcr = tcr
|
|
128
|
+
else:
|
|
129
|
+
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
|
|
130
|
+
try:
|
|
131
|
+
germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
|
|
132
|
+
except Exception as e:
|
|
133
|
+
warnings.warn(
|
|
134
|
+
f"Germline and allele retrieval failed for {tcr} with error {str(e)}"
|
|
135
|
+
)
|
|
136
|
+
try:
|
|
137
|
+
geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
|
|
138
|
+
mode=geometry_mode, as_df=True
|
|
139
|
+
)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
warnings.warn(
|
|
142
|
+
f"Geometry calculation failed for {tcr} with error {str(e)}"
|
|
143
|
+
)
|
|
144
|
+
try:
|
|
145
|
+
interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
|
|
146
|
+
except Exception as e:
|
|
147
|
+
warnings.warn(
|
|
148
|
+
f"Interaction profiling failed for {tcr} with error {str(e)}"
|
|
149
|
+
)
|
|
150
|
+
germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
|
|
151
|
+
|
|
152
|
+
geometries_df = pd.concat(geometries_dict).droplevel(1)
|
|
153
|
+
|
|
154
|
+
interactions_df = pd.concat(
|
|
155
|
+
interaction_analysis_dict.values(),
|
|
156
|
+
keys=interaction_analysis_dict.keys(),
|
|
157
|
+
axis=0,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if save_dir is not None:
|
|
161
|
+
geometries_df.to_csv(os.path.join(save_dir, "geometries.csv"))
|
|
162
|
+
germlines_and_alleles_df.to_csv(
|
|
163
|
+
os.path.join(save_dir, "germlines_and_alleles.csv")
|
|
164
|
+
)
|
|
165
|
+
interactions_df.to_csv(os.path.join(save_dir, "interactions.csv"))
|
|
166
|
+
|
|
167
|
+
return germlines_and_alleles_df, geometries_df, interactions_df
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def batch_load_TCRs(tcr_files):
|
|
171
|
+
if isinstance(tcr_files, dict):
|
|
172
|
+
return dict(TCRBatchOperator().tcrs_from_file_dict(tcr_files))
|
|
173
|
+
else:
|
|
174
|
+
return list(TCRBatchOperator().tcrs_from_file_list(tcr_files))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def batch_yield_TCRs(tcr_files):
|
|
178
|
+
if isinstance(tcr_files, dict):
|
|
179
|
+
return TCRBatchOperator().tcrs_from_file_dict(tcr_files)
|
|
180
|
+
else:
|
|
181
|
+
return TCRBatchOperator().tcrs_from_file_list(tcr_files)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def get_TCR_interactions(tcr_files, renumber=True, save_as_csv=None):
|
|
185
|
+
batch_ops = TCRBatchOperator()
|
|
186
|
+
if isinstance(tcr_files, list):
|
|
187
|
+
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
|
|
188
|
+
if isinstance(tcr_files, dict):
|
|
189
|
+
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
|
|
190
|
+
|
|
191
|
+
return batch_ops.get_TCR_pMHC_interactions(
|
|
192
|
+
tcr_generator, renumber=renumber, save_as_csv=save_as_csv
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def get_TCR_geometry(tcr_files, mode="rudolph", save_as_csv=None):
|
|
197
|
+
batch_ops = TCRBatchOperator()
|
|
198
|
+
if isinstance(tcr_files, list):
|
|
199
|
+
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
|
|
200
|
+
if isinstance(tcr_files, dict):
|
|
201
|
+
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
|
|
202
|
+
|
|
203
|
+
return batch_ops.get_TCR_geometry(tcr_generator, mode=mode, save_as_csv=save_as_csv)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def get_germlines_and_alleles(tcr_files, save_as_csv=None):
|
|
207
|
+
batch_ops = TCRBatchOperator()
|
|
208
|
+
if isinstance(tcr_files, list):
|
|
209
|
+
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
|
|
210
|
+
if isinstance(tcr_files, dict):
|
|
211
|
+
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
|
|
212
|
+
|
|
213
|
+
return batch_ops.get_germlines_and_alleles(tcr_generator, save_as_csv=save_as_csv)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def analyse_tcrs(tcr_files, save_dir=None):
|
|
217
|
+
batch_ops = TCRBatchOperator()
|
|
218
|
+
if isinstance(tcr_files, list):
|
|
219
|
+
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
|
|
220
|
+
if isinstance(tcr_files, dict):
|
|
221
|
+
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
|
|
222
|
+
|
|
223
|
+
return batch_ops.full_analysis(tcr_generator, save_dir=save_dir)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import requests
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from ..tcr_processing.TCRParser import TCRParser
|
|
6
|
+
from .tcr_batch_operations import batch_load_TCRs, batch_yield_TCRs
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_TCR(tcr_structure_file, tcr_id=None):
|
|
10
|
+
tcr_parser = TCRParser()
|
|
11
|
+
if tcr_id is None:
|
|
12
|
+
tcr_id = tcr_structure_file.split("/")[-1].split(".")[0]
|
|
13
|
+
tcr_structure = list(
|
|
14
|
+
tcr_parser.get_tcr_structure(tcr_id, tcr_structure_file).get_TCRs()
|
|
15
|
+
)
|
|
16
|
+
if len(tcr_structure) == 1:
|
|
17
|
+
return tcr_structure[0]
|
|
18
|
+
return tcr_structure
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def load_TCRs(tcr_structure_files, tcr_ids=None):
|
|
22
|
+
tcr_parser = TCRParser()
|
|
23
|
+
if isinstance(tcr_structure_files, str): # loading single file
|
|
24
|
+
tcr_id = tcr_structure_files.split("/")[-1].split(".")[
|
|
25
|
+
0
|
|
26
|
+
] # set tcr_id to file name without extension
|
|
27
|
+
if tcr_ids is not None:
|
|
28
|
+
if not isinstance(tcr_ids, str):
|
|
29
|
+
warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
|
|
30
|
+
tcr_id = tcr_ids
|
|
31
|
+
|
|
32
|
+
tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
|
|
33
|
+
return list(tcr_structure.get_TCRs())
|
|
34
|
+
|
|
35
|
+
if len(tcr_structure_files) > 10:
|
|
36
|
+
warnings.warn(
|
|
37
|
+
"Loading more than 10 TCR structure objects into memory. Consider applying generator methods to reduce memory load."
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if tcr_ids is not None:
|
|
41
|
+
if len(tcr_structure_files) == len(tcr_ids):
|
|
42
|
+
return batch_load_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
|
|
43
|
+
else:
|
|
44
|
+
warnings.warn(
|
|
45
|
+
f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
|
|
46
|
+
)
|
|
47
|
+
return batch_load_TCRs(tcr_structure_files)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def yield_TCRs(tcr_structure_files, tcr_ids=None):
|
|
51
|
+
tcr_parser = TCRParser()
|
|
52
|
+
if isinstance(tcr_structure_files, str): # loading single file
|
|
53
|
+
tcr_id = tcr_structure_files.split("/")[-1].split(".")[
|
|
54
|
+
0
|
|
55
|
+
] # set tcr_id to file name without extension
|
|
56
|
+
if tcr_ids is not None:
|
|
57
|
+
if not isinstance(tcr_ids, str):
|
|
58
|
+
warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
|
|
59
|
+
tcr_id = tcr_ids
|
|
60
|
+
|
|
61
|
+
tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
|
|
62
|
+
return list(tcr_structure.get_TCRs())
|
|
63
|
+
|
|
64
|
+
if tcr_ids is not None:
|
|
65
|
+
if len(tcr_structure_files) == len(tcr_ids):
|
|
66
|
+
return batch_yield_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
|
|
67
|
+
else:
|
|
68
|
+
warnings.warn(
|
|
69
|
+
f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
|
|
70
|
+
)
|
|
71
|
+
return batch_yield_TCRs(tcr_structure_files)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def fetch_TCR(pdb_id: str):
|
|
75
|
+
"""
|
|
76
|
+
Fetches and parses a T-cell receptor (TCR) structure from the STCRDab or RCSB PDB databases.
|
|
77
|
+
|
|
78
|
+
The function first attempts to download a PDB file from the STCRDab database.
|
|
79
|
+
If the PDB file is not found, it falls back to downloading a CIF file from RCSB PDB.
|
|
80
|
+
The downloaded file is then parsed using `TCRParser` to extract TCR structures.
|
|
81
|
+
|
|
82
|
+
Parameters:
|
|
83
|
+
pdb_id (str): The PDB identifier of the structure to be fetched.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
- A single TCR structure if exactly one is found.
|
|
87
|
+
- A list of TCR structures if multiple are found.
|
|
88
|
+
- None if no TCRs are identified (with a `UserWarning` issued).
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
- A warning if no TCR structures are found in the downloaded file.
|
|
92
|
+
- Prints an error message if the file cannot be downloaded.
|
|
93
|
+
|
|
94
|
+
Notes:
|
|
95
|
+
- STCRDab returns an error message if the requested PDB ID does not exist.
|
|
96
|
+
- The function temporarily saves the downloaded file and deletes it after parsing.
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
tcr = fetch_TCR("6eqa")
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
stcrdab_base_url = "https://opig.stats.ox.ac.uk/webapps/stcrdab-stcrpred/pdb/"
|
|
104
|
+
pdb_base_url = "https://files.rcsb.org/download/"
|
|
105
|
+
|
|
106
|
+
filename = f"{pdb_id.upper()}.pdb"
|
|
107
|
+
|
|
108
|
+
url = stcrdab_base_url + pdb_id.lower()
|
|
109
|
+
TCR_FOUND = False
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
response = requests.get(url, stream=True, timeout=10)
|
|
113
|
+
if response.status_code == 200:
|
|
114
|
+
with open(filename, "wb") as file:
|
|
115
|
+
for chunk in response.iter_content(chunk_size=1024):
|
|
116
|
+
file.write(chunk)
|
|
117
|
+
if (
|
|
118
|
+
not b"does not exist" in chunk
|
|
119
|
+
): # STCRDab returns '$PDB does not exist for downloading' if PDB code not found in database
|
|
120
|
+
TCR_FOUND = True
|
|
121
|
+
|
|
122
|
+
except requests.exceptions.Timeout:
|
|
123
|
+
warnings.warn(f"Request to STCRDab ({url}) timed out. Trying RCSB.")
|
|
124
|
+
|
|
125
|
+
if not TCR_FOUND:
|
|
126
|
+
if os.path.exists(filename):
|
|
127
|
+
os.remove(filename) # remove the file written with response from STCRDab
|
|
128
|
+
|
|
129
|
+
# Request from RCSB data base
|
|
130
|
+
filename = f"{pdb_id.upper()}.cif"
|
|
131
|
+
url = pdb_base_url + filename
|
|
132
|
+
response = requests.get(url, stream=True, timeout=10)
|
|
133
|
+
|
|
134
|
+
if response.status_code == 200:
|
|
135
|
+
with open(filename, "wb") as file:
|
|
136
|
+
for chunk in response.iter_content(chunk_size=1024):
|
|
137
|
+
file.write(chunk)
|
|
138
|
+
else:
|
|
139
|
+
print("Failed to download file")
|
|
140
|
+
|
|
141
|
+
tcr_parser = TCRParser()
|
|
142
|
+
tcr = list(tcr_parser.get_tcr_structure(pdb_id, filename).get_TCRs())
|
|
143
|
+
os.remove(filename)
|
|
144
|
+
if len(tcr) == 1:
|
|
145
|
+
return tcr[0]
|
|
146
|
+
elif len(tcr) == 0:
|
|
147
|
+
warnings.warn(f"No TCRs identified in {pdb_id}")
|
|
148
|
+
return None
|
|
149
|
+
else:
|
|
150
|
+
return tcr
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
def tcrs_to_AF3_json(tcrs, path=None, **kwargs):
|
|
2
|
+
from ..tcr_formats.tcr_formats import to_AF3_json
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
if isinstance(tcrs[0], str):
|
|
6
|
+
from .tcr_methods import load_TCRs
|
|
7
|
+
|
|
8
|
+
tcrs = load_TCRs(tcrs)
|
|
9
|
+
else:
|
|
10
|
+
from ..tcr_processing.TCR import TCR
|
|
11
|
+
|
|
12
|
+
assert isinstance(tcrs[0], TCR)
|
|
13
|
+
multiple_job_json = [to_AF3_json(tcr, save=False, **kwargs) for tcr in tcrs]
|
|
14
|
+
path = path if path is not None else "stcrpy_AF3_TCRs.json"
|
|
15
|
+
with open(path, "w") as f:
|
|
16
|
+
json.dump(multiple_job_json, f)
|
|
17
|
+
print(f"{len(tcrs)} saved as AF3 json job: {path}")
|
|
18
|
+
return multiple_job_json
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
ATOM_TYPES = [
|
|
2
|
+
"N",
|
|
3
|
+
"CA",
|
|
4
|
+
"C",
|
|
5
|
+
"CB",
|
|
6
|
+
"O",
|
|
7
|
+
"CG",
|
|
8
|
+
"CG1",
|
|
9
|
+
"CG2",
|
|
10
|
+
"OG",
|
|
11
|
+
"OG1",
|
|
12
|
+
"SG",
|
|
13
|
+
"CD",
|
|
14
|
+
"CD1",
|
|
15
|
+
"CD2",
|
|
16
|
+
"ND1",
|
|
17
|
+
"ND2",
|
|
18
|
+
"OD1",
|
|
19
|
+
"OD2",
|
|
20
|
+
"SD",
|
|
21
|
+
"CE",
|
|
22
|
+
"CE1",
|
|
23
|
+
"CE2",
|
|
24
|
+
"CE3",
|
|
25
|
+
"NE",
|
|
26
|
+
"NE1",
|
|
27
|
+
"NE2",
|
|
28
|
+
"OE1",
|
|
29
|
+
"OE2",
|
|
30
|
+
"CH2",
|
|
31
|
+
"NH1",
|
|
32
|
+
"NH2",
|
|
33
|
+
"OH",
|
|
34
|
+
"CZ",
|
|
35
|
+
"CZ2",
|
|
36
|
+
"CZ3",
|
|
37
|
+
"NZ",
|
|
38
|
+
"OXT",
|
|
39
|
+
]
|