stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import Bio
|
|
3
|
+
from Bio.PDB.Superimposer import Superimposer
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class InterfaceRMSD:
|
|
7
|
+
def __init__(self):
|
|
8
|
+
return
|
|
9
|
+
|
|
10
|
+
def get_interface_rmsd(self, dock: "abTCR", reference: "abTCR") -> float:
|
|
11
|
+
"""
|
|
12
|
+
Calculates the root-mean-square deviation (RMSD) between the interface residues of a
|
|
13
|
+
docked TCR structure and a reference TCR structure.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
dock (abTCR): The docked TCR structure.
|
|
17
|
+
reference (abTCR): The reference TCR structure.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
float: The RMSD value between the interface residues of the docked TCR structure and
|
|
21
|
+
the reference TCR structure.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# check all residues in dock can be mapped to reference
|
|
25
|
+
dock_to_ref_chain_mapping = self.check_residue_mapping(dock, reference)
|
|
26
|
+
|
|
27
|
+
# get the interface of the reference
|
|
28
|
+
tcr_interface, antigen_interface = self.get_interface_residues(reference)
|
|
29
|
+
|
|
30
|
+
# align the dock by MHC
|
|
31
|
+
self.align_by_mhc(dock, reference, dock_to_ref_chain_mapping)
|
|
32
|
+
|
|
33
|
+
# get the docked residues found in the reference interface
|
|
34
|
+
ref_to_dock_chain_mapping = {v: k for k, v in dock_to_ref_chain_mapping.items()}
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
docked_tcr_interface = [
|
|
38
|
+
dock[ref_to_dock_chain_mapping[res.parent.id]][res.id]
|
|
39
|
+
for res in tcr_interface
|
|
40
|
+
]
|
|
41
|
+
docked_antigen_interface = [
|
|
42
|
+
dock.parent[ref_to_dock_chain_mapping[res.parent.id]][res.id]
|
|
43
|
+
for res in antigen_interface
|
|
44
|
+
]
|
|
45
|
+
except KeyError as e:
|
|
46
|
+
warnings.warn(
|
|
47
|
+
f"""Key error {str(e)} matching dock chains to reference chains for dock: {
|
|
48
|
+
' '.join([str(i) for i in dock.full_id])
|
|
49
|
+
} to reference {
|
|
50
|
+
' '.join([str(i) for i in reference.full_id])
|
|
51
|
+
}. Interface RMSD could not be calculated."""
|
|
52
|
+
)
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
# extract coordinates from interfaces
|
|
56
|
+
reference_coordinates = np.asarray(
|
|
57
|
+
[
|
|
58
|
+
atom.get_coord()
|
|
59
|
+
for res in (tcr_interface + antigen_interface)
|
|
60
|
+
for atom in res
|
|
61
|
+
if atom.element in ["N", "O", "C", "S"]
|
|
62
|
+
and (
|
|
63
|
+
atom.id
|
|
64
|
+
in [
|
|
65
|
+
a.id
|
|
66
|
+
for a in dock.parent[ref_to_dock_chain_mapping[res.parent.id]][
|
|
67
|
+
res.id
|
|
68
|
+
].get_atoms()
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
docked_coordinates = np.asarray(
|
|
75
|
+
[
|
|
76
|
+
atom.get_coord()
|
|
77
|
+
for res in (docked_tcr_interface + docked_antigen_interface)
|
|
78
|
+
for atom in res
|
|
79
|
+
if atom.element in ["N", "O", "C", "S"]
|
|
80
|
+
and (
|
|
81
|
+
atom.id
|
|
82
|
+
in [
|
|
83
|
+
a.id
|
|
84
|
+
for a in reference.parent[
|
|
85
|
+
dock_to_ref_chain_mapping[res.parent.id]
|
|
86
|
+
][res.id].get_atoms()
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# calculate rmsd
|
|
93
|
+
rmsd = np.sqrt(
|
|
94
|
+
((docked_coordinates - reference_coordinates) ** 2).sum()
|
|
95
|
+
/ len(docked_coordinates)
|
|
96
|
+
)
|
|
97
|
+
return rmsd
|
|
98
|
+
|
|
99
|
+
def check_residue_mapping(self, dock: "abTCR", reference: "abTCR") -> dict:
|
|
100
|
+
chain_mapping = {}
|
|
101
|
+
|
|
102
|
+
for i, tcr_chain in enumerate(dock.get_chains()):
|
|
103
|
+
try:
|
|
104
|
+
for j, res in enumerate(tcr_chain.get_residues()):
|
|
105
|
+
if j > 3 and j < len(tcr_chain) - 2:
|
|
106
|
+
assert (
|
|
107
|
+
res.resname
|
|
108
|
+
== list(reference.get_chains())[i][res.id].resname
|
|
109
|
+
), f"""
|
|
110
|
+
TCR chain mapping {tcr_chain.id} -> {list(reference.get_chains())[i]} failed. Trying chain swap."""
|
|
111
|
+
chain_mapping[tcr_chain.id] = list(reference.get_chains())[i].id
|
|
112
|
+
except (AssertionError, KeyError):
|
|
113
|
+
for j, res in enumerate(tcr_chain.get_residues()):
|
|
114
|
+
if (
|
|
115
|
+
j > 3 and j < len(tcr_chain) - 2
|
|
116
|
+
): # avoids small mismatches at beginnings and ends of sequences
|
|
117
|
+
assert (
|
|
118
|
+
res.resname
|
|
119
|
+
== list(reference.get_chains())[1 - i][res.id].resname
|
|
120
|
+
), f"""
|
|
121
|
+
TCR chain mapping {tcr_chain.id} -> {list(reference.get_chains())[1-i]} failed. Residue mapping failed"""
|
|
122
|
+
chain_mapping[tcr_chain.id] = list(reference.get_chains())[
|
|
123
|
+
1 - i
|
|
124
|
+
].id
|
|
125
|
+
|
|
126
|
+
for i, antigen_chain in enumerate(dock.get_antigen()):
|
|
127
|
+
try:
|
|
128
|
+
for res in antigen_chain.get_residues():
|
|
129
|
+
assert (
|
|
130
|
+
res.resname == reference.get_antigen()[i][res.id].resname
|
|
131
|
+
), f"""
|
|
132
|
+
antigen chain mapping {antigen_chain.id} -> {reference.get_antigen()[i]} failed. Trying chain swap."""
|
|
133
|
+
chain_mapping[antigen_chain.id] = reference.get_antigen()[i].id
|
|
134
|
+
except (AssertionError, KeyError):
|
|
135
|
+
for res in antigen_chain.get_residues():
|
|
136
|
+
assert (
|
|
137
|
+
res.resname == reference.get_antigen()[1 - i][res.id].resname
|
|
138
|
+
), f"""
|
|
139
|
+
antigen chain mapping {antigen_chain.id} -> {reference.get_antigen()[1-i]} failed. Residue mapping failed"""
|
|
140
|
+
chain_mapping[antigen_chain.id] = reference.get_antigen()[1 - i].id
|
|
141
|
+
return chain_mapping
|
|
142
|
+
|
|
143
|
+
def get_interface_residues(
|
|
144
|
+
self, tcr: "abTCR", angstrom_cutoff: float = 8.0
|
|
145
|
+
) -> list:
|
|
146
|
+
"""
|
|
147
|
+
Retrieves the interface residues between a TCR and its antigen based on a distance cutoff.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
tcr (abTCR): The TCR object.
|
|
151
|
+
angstrom_cutoff (float, optional): The distance cutoff in angstroms. Defaults to 8.0.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
tuple: A tuple containing two lists: the interface residues of the TCR and the
|
|
155
|
+
interface residues of the antigen.
|
|
156
|
+
"""
|
|
157
|
+
tcr_c_alphas = [atom for atom in tcr.get_atoms() if atom.id == "CA"]
|
|
158
|
+
antigen_c_alphas = [
|
|
159
|
+
atom
|
|
160
|
+
for chain in tcr.get_antigen()
|
|
161
|
+
for atom in chain.get_atoms()
|
|
162
|
+
if atom.id == "CA"
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
tcr_c_coords = np.asarray([[x.get_coord()] for x in tcr_c_alphas])
|
|
166
|
+
antigen_c_coords = np.asarray([[x.get_coord() for x in antigen_c_alphas]])
|
|
167
|
+
|
|
168
|
+
tcr_c_coords = np.broadcast_to(
|
|
169
|
+
tcr_c_coords, (tcr_c_coords.shape[0], antigen_c_coords.shape[1], 3)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
antigen_c_coords = np.broadcast_to(
|
|
173
|
+
antigen_c_coords, (tcr_c_coords.shape[0], antigen_c_coords.shape[1], 3)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
pairwise_distances = np.sqrt(((tcr_c_coords - antigen_c_coords) ** 2).sum(-1))
|
|
177
|
+
contacts = np.argwhere(pairwise_distances <= angstrom_cutoff)
|
|
178
|
+
tcr_interface_idx = set(contacts[:, 0])
|
|
179
|
+
antigen_interface_idx = set(contacts[:, 1])
|
|
180
|
+
tcr_interface = [tcr_c_alphas[idx].parent for idx in tcr_interface_idx]
|
|
181
|
+
antigen_interface = [
|
|
182
|
+
antigen_c_alphas[idx].parent for idx in antigen_interface_idx
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
return tcr_interface, antigen_interface
|
|
186
|
+
|
|
187
|
+
def align_by_mhc(
|
|
188
|
+
self, dock: "abTCR", reference: "abTCR", chain_mapping: dict
|
|
189
|
+
) -> None:
|
|
190
|
+
"""
|
|
191
|
+
Aligns the docked TCR structure to the reference TCR structure by aligning the MHC.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
dock (abTCR): The docked TCR structure.
|
|
195
|
+
reference (abTCR): The reference TCR structure.
|
|
196
|
+
chain_mapping (dict): A dictionary mapping the chain IDs of the docked TCR structure to the chain IDs of the
|
|
197
|
+
reference TCR structure.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
None
|
|
201
|
+
"""
|
|
202
|
+
mhc_chain = dock.get_MHC()
|
|
203
|
+
assert len(mhc_chain) >= 1, ValueError("No MHC chains found")
|
|
204
|
+
if hasattr(mhc_chain[0], "get_MH1"):
|
|
205
|
+
mhc_chain = mhc_chain[
|
|
206
|
+
0
|
|
207
|
+
].get_MH1() # This will only work for class I MHC, ie. single chain helices.
|
|
208
|
+
reference_mhc_chain = reference.get_MHC()[0].get_MH1()
|
|
209
|
+
else:
|
|
210
|
+
# For Class II MHC try creating new entity with GA and GB chains
|
|
211
|
+
class_II_mhc_chain = Bio.PDB.Entity.Entity()
|
|
212
|
+
class_II_mhc_chain.add(mhc_chain[0].get_GA())
|
|
213
|
+
class_II_mhc_chain.add(mhc_chain[0].get_GB())
|
|
214
|
+
mhc_chain = class_II_mhc_chain
|
|
215
|
+
reference_mhc_chain = Bio.PDB.Entity.Entity()
|
|
216
|
+
reference_mhc_chain.add(reference.get_MHC()[0].get_GA())
|
|
217
|
+
reference_mhc_chain.add(reference.get_MHC()[0].get_GB())
|
|
218
|
+
|
|
219
|
+
mutual_residue_ids = set(
|
|
220
|
+
[r.id for r in reference_mhc_chain.get_residues()]
|
|
221
|
+
).intersection(set([r.id for r in mhc_chain.get_residues()]))
|
|
222
|
+
reference_atoms = [
|
|
223
|
+
a
|
|
224
|
+
for res in mutual_residue_ids
|
|
225
|
+
for a in reference_mhc_chain[res].get_atoms()
|
|
226
|
+
if a.id in ["N", "C", "O", "CA"]
|
|
227
|
+
]
|
|
228
|
+
docked_atoms = [
|
|
229
|
+
a
|
|
230
|
+
for res in mutual_residue_ids
|
|
231
|
+
for a in mhc_chain[res].get_atoms()
|
|
232
|
+
if a.id in ["N", "C", "O", "CA"]
|
|
233
|
+
]
|
|
234
|
+
|
|
235
|
+
superimposer = Superimposer()
|
|
236
|
+
superimposer.set_atoms(reference_atoms, docked_atoms)
|
|
237
|
+
superimposer.apply(dock.parent.get_atoms())
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from Bio.PDB.Superimposer import Superimposer
|
|
6
|
+
|
|
7
|
+
from . import constants
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RMSD:
|
|
11
|
+
def __init__(self):
|
|
12
|
+
return
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def _retrieve_chain(tcr, chain_type):
|
|
16
|
+
assert chain_type in ["A", "B", "G", "D"], ValueError(
|
|
17
|
+
"TCR chain type not recognised"
|
|
18
|
+
)
|
|
19
|
+
try:
|
|
20
|
+
return tcr[tcr.get_domain_assignment()[f"V{chain_type}"]]
|
|
21
|
+
except KeyError:
|
|
22
|
+
# map chain type A to G and B to D
|
|
23
|
+
chain_type = {"A": "G", "B": "D"}[chain_type]
|
|
24
|
+
return tcr[tcr.get_domain_assignment()[f"V{chain_type}"]]
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _rmsd(x1, x2):
|
|
28
|
+
assert x1.shape == x2.shape
|
|
29
|
+
assert x1.shape[-1] == 3
|
|
30
|
+
return np.sqrt(np.mean(3 * (x1 - x2) ** 2))
|
|
31
|
+
|
|
32
|
+
def calculate_rmsd(self, tcr_to_align, tcr_ref, save_alignment=False):
|
|
33
|
+
rmsds = {}
|
|
34
|
+
for chain_type in ["A", "B"]:
|
|
35
|
+
chain_to_align = self._retrieve_chain(tcr_to_align, chain_type)
|
|
36
|
+
ref_chain = self._retrieve_chain(tcr_ref, chain_type)
|
|
37
|
+
|
|
38
|
+
ref_residue_numbering = [
|
|
39
|
+
x.id
|
|
40
|
+
for x in ref_chain.get_residues()
|
|
41
|
+
if all([not a.is_disordered() for a in x.get_atoms()])
|
|
42
|
+
]
|
|
43
|
+
residue_numbering_intersection = [
|
|
44
|
+
x.id
|
|
45
|
+
for x in chain_to_align.get_residues()
|
|
46
|
+
if x.id in ref_residue_numbering
|
|
47
|
+
and all([not a.is_disordered() for a in x.get_atoms()])
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
# Get residues to align
|
|
51
|
+
ref_residues = [
|
|
52
|
+
ref_chain[x]
|
|
53
|
+
for x in residue_numbering_intersection
|
|
54
|
+
if (x in chain_to_align) and (x in ref_chain)
|
|
55
|
+
]
|
|
56
|
+
to_align_residues = [
|
|
57
|
+
chain_to_align[x]
|
|
58
|
+
for x in residue_numbering_intersection
|
|
59
|
+
if (x in chain_to_align) and (x in ref_chain)
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
# Get backbone atoms to align
|
|
63
|
+
fixed = []
|
|
64
|
+
moved = []
|
|
65
|
+
for i in range(len(to_align_residues)):
|
|
66
|
+
fixed += [
|
|
67
|
+
ref_residues[i][atom]
|
|
68
|
+
for atom in constants.ATOM_TYPES[:4]
|
|
69
|
+
if (atom in to_align_residues[i]) and (atom in ref_residues[i])
|
|
70
|
+
]
|
|
71
|
+
moved += [
|
|
72
|
+
to_align_residues[i][atom]
|
|
73
|
+
for atom in constants.ATOM_TYPES[:4]
|
|
74
|
+
if (atom in to_align_residues[i]) and (atom in ref_residues[i])
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
# Calculate superimposer and align
|
|
78
|
+
imposer = Superimposer()
|
|
79
|
+
imposer.set_atoms(fixed, moved)
|
|
80
|
+
imposer.apply(tcr_to_align.get_atoms())
|
|
81
|
+
|
|
82
|
+
rmsds[chain_type] = (
|
|
83
|
+
imposer.rms
|
|
84
|
+
) # whole chain RMSD after alignment calculated across all atoms
|
|
85
|
+
|
|
86
|
+
if save_alignment:
|
|
87
|
+
tcr_ref.save(
|
|
88
|
+
save_as=f"{tcr_ref.parent.parent.id}_RMSD_reference_alignment_{chain_type}.pdb",
|
|
89
|
+
tcr_only=True,
|
|
90
|
+
)
|
|
91
|
+
tcr_to_align.save(
|
|
92
|
+
save_as=f"{tcr_to_align.parent.parent.id}_RMSD_aligned_to_{tcr_ref.id}_{chain_type}.pdb",
|
|
93
|
+
tcr_only=True,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# calculate CDR RMSD
|
|
97
|
+
for CDR_loop_nr, (ref_CDR, aligned_CDR) in enumerate(
|
|
98
|
+
zip(ref_chain.get_CDRs(), chain_to_align.get_CDRs())
|
|
99
|
+
):
|
|
100
|
+
ref_CDR_atom_coords = np.asarray(
|
|
101
|
+
[
|
|
102
|
+
r[a].get_coord()
|
|
103
|
+
for r in ref_CDR
|
|
104
|
+
for a in constants.ATOM_TYPES[:4]
|
|
105
|
+
if r.id in residue_numbering_intersection
|
|
106
|
+
and a in aligned_CDR[r.id]
|
|
107
|
+
]
|
|
108
|
+
)
|
|
109
|
+
aligned_CDR_atom_coords = np.asarray(
|
|
110
|
+
[
|
|
111
|
+
r[a].get_coord()
|
|
112
|
+
for r in aligned_CDR
|
|
113
|
+
for a in constants.ATOM_TYPES[:4]
|
|
114
|
+
if r.id in residue_numbering_intersection and a in ref_CDR[r.id]
|
|
115
|
+
]
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
rmsds[f"CDR{chain_type}{CDR_loop_nr + 1}"] = self._rmsd(
|
|
119
|
+
ref_CDR_atom_coords, aligned_CDR_atom_coords
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# calculate framework RMSD
|
|
123
|
+
ref_framework_residues = {
|
|
124
|
+
r.id: r
|
|
125
|
+
for fw in ref_chain.get_frameworks()
|
|
126
|
+
for r in fw.get_residues()
|
|
127
|
+
if r.id in residue_numbering_intersection
|
|
128
|
+
}
|
|
129
|
+
aligned_framework_residues = {
|
|
130
|
+
r.id: r
|
|
131
|
+
for fw in chain_to_align.get_frameworks()
|
|
132
|
+
for r in fw.get_residues()
|
|
133
|
+
if r.id in residue_numbering_intersection
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
ref_framework_atom_coords = np.asarray(
|
|
137
|
+
[
|
|
138
|
+
r[a].get_coord()
|
|
139
|
+
for r_id, r in ref_framework_residues.items()
|
|
140
|
+
for a in constants.ATOM_TYPES[:4]
|
|
141
|
+
if r_id in residue_numbering_intersection
|
|
142
|
+
and a in aligned_framework_residues[r_id]
|
|
143
|
+
]
|
|
144
|
+
)
|
|
145
|
+
aligned_framework_atom_coords = np.asarray(
|
|
146
|
+
[
|
|
147
|
+
r[a].get_coord()
|
|
148
|
+
for r_id, r in aligned_framework_residues.items()
|
|
149
|
+
for a in constants.ATOM_TYPES[:4]
|
|
150
|
+
if r_id in residue_numbering_intersection
|
|
151
|
+
and a in ref_framework_residues[r_id]
|
|
152
|
+
]
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
rmsds[f"FW{chain_type}"] = self._rmsd(
|
|
156
|
+
ref_framework_atom_coords, aligned_framework_atom_coords
|
|
157
|
+
)
|
|
158
|
+
return rmsds
|
|
159
|
+
|
|
160
|
+
def rmsd_from_files(self, pred_and_target_files: list) -> pd.DataFrame:
|
|
161
|
+
"""Calculates the RMSD between TCR structures from a list of files.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
pred_and_target_files (list of tuples): List of tuples, where each tuple contains
|
|
165
|
+
the path to the predicticted PDB at index 0 and the path to the target PDB at index 1.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
pandas.Dataframe: Pandas dataframe indexed by the TCR ID of the predicted structure, with columns
|
|
169
|
+
containing the RMSD of the whole alpha and beta chain, and all framework and CDR regions.
|
|
170
|
+
"""
|
|
171
|
+
from ..tcr_methods.tcr_methods import load_TCRs
|
|
172
|
+
|
|
173
|
+
all_rmsds = {}
|
|
174
|
+
for pred_tcr_file, target_tcr_file in pred_and_target_files:
|
|
175
|
+
pred_tcr, target_tcr = load_TCRs([pred_tcr_file, target_tcr_file])
|
|
176
|
+
all_rmsds[pred_tcr.parent.parent.id] = self.calculate_rmsd(
|
|
177
|
+
pred_tcr, target_tcr
|
|
178
|
+
)
|
|
179
|
+
return pd.DataFrame(all_rmsds).transpose()
|
|
File without changes
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on 10 May 2017
|
|
3
|
+
@author: leem
|
|
4
|
+
|
|
5
|
+
Based on the AGchain class from ABDB.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from Bio.PDB.Chain import Chain
|
|
11
|
+
from .Chemical_components import get_res_type
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AGchain(Chain):
|
|
15
|
+
"""
|
|
16
|
+
Non-TCR and non-MHC (peptide) chains are described using this class.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, identifier):
|
|
20
|
+
Chain.__init__(self, identifier)
|
|
21
|
+
self.level = "C"
|
|
22
|
+
self.type = ""
|
|
23
|
+
self.engineered = False
|
|
24
|
+
|
|
25
|
+
def set_type(self):
|
|
26
|
+
"""
|
|
27
|
+
Use the type check to check the residue name from the chemical component dictionary
|
|
28
|
+
For ease of use I have binned these into four types
|
|
29
|
+
peptide
|
|
30
|
+
nucleic-acid
|
|
31
|
+
saccharide (carbohydrate)
|
|
32
|
+
non-polymer
|
|
33
|
+
"""
|
|
34
|
+
# Most structures are going to be proteins.
|
|
35
|
+
# Check the composition of the chain.
|
|
36
|
+
composition = {
|
|
37
|
+
"peptide": 0,
|
|
38
|
+
"nucleic-acid": 0,
|
|
39
|
+
"non-polymer": 0,
|
|
40
|
+
"saccharide": 0,
|
|
41
|
+
None: 0,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
for r in self.child_list:
|
|
45
|
+
composition[get_res_type(r)] += 1
|
|
46
|
+
|
|
47
|
+
if (
|
|
48
|
+
composition["nucleic-acid"]
|
|
49
|
+
or composition["peptide"]
|
|
50
|
+
or composition["saccharide"]
|
|
51
|
+
):
|
|
52
|
+
composition["non-polymer"] = 0
|
|
53
|
+
composition[None] = 0
|
|
54
|
+
|
|
55
|
+
chain_comp_type = max(composition, key=lambda x: composition[x])
|
|
56
|
+
|
|
57
|
+
if chain_comp_type == "peptide":
|
|
58
|
+
if composition["peptide"] < 30: # peptide
|
|
59
|
+
self.type = "peptide"
|
|
60
|
+
else:
|
|
61
|
+
self.type = "protein"
|
|
62
|
+
elif chain_comp_type == "nucleic-acid":
|
|
63
|
+
self.type = "nucleic-acid"
|
|
64
|
+
elif chain_comp_type == "saccharide":
|
|
65
|
+
self.type = "carbohydrate"
|
|
66
|
+
elif chain_comp_type == "non-polymer":
|
|
67
|
+
self.type = "non-polymer"
|
|
68
|
+
elif chain_comp_type is None:
|
|
69
|
+
print(
|
|
70
|
+
"Warning: Unknown antigen type for chain %s" % self.id, file=sys.stderr
|
|
71
|
+
)
|
|
72
|
+
self.type = "unknown"
|
|
73
|
+
else:
|
|
74
|
+
print(
|
|
75
|
+
"Warning: Unknown antigen type for chain %s" % self.id, file=sys.stderr
|
|
76
|
+
)
|
|
77
|
+
self.type = "unknown"
|
|
78
|
+
|
|
79
|
+
def get_type(self):
|
|
80
|
+
return self.type
|
|
81
|
+
|
|
82
|
+
def set_engineered(self, engineered):
|
|
83
|
+
if engineered:
|
|
84
|
+
self.engineered = True
|
|
85
|
+
else:
|
|
86
|
+
self.engineered = False
|
|
87
|
+
|
|
88
|
+
def is_engineered(self):
|
|
89
|
+
return self.engineered
|