stcrpy 1.0.3__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stcrpy/__init__.py +1 -1
- stcrpy/tcr_formats/tcr_formats.py +20 -1
- stcrpy/tcr_geometry/TCRAngle.py +177 -0
- stcrpy/tcr_geometry/reference_data/Acoreset.txt +30 -0
- stcrpy/tcr_geometry/reference_data/Bcoreset.txt +30 -0
- stcrpy/tcr_geometry/reference_data/consensus_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/consensus_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/pcA.txt +3 -0
- stcrpy/tcr_geometry/reference_data/pcB.txt +3 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +8 -1
- stcrpy/tcr_methods/tcr_batch_operations.py +14 -10
- stcrpy/tcr_methods/tcr_methods.py +23 -22
- stcrpy/tcr_metrics/tcr_dockq.py +404 -0
- stcrpy/tcr_processing/MHC.py +389 -4
- stcrpy/tcr_processing/TCR.py +252 -0
- stcrpy/tcr_processing/TCRParser.py +351 -189
- stcrpy/tcr_processing/annotate.py +6 -1
- stcrpy/tcr_processing/utils/region_definitions.py +9 -0
- stcrpy/tcr_processing/utils/symmetry_mates.py +96 -0
- stcrpy-1.0.6.dist-info/METADATA +286 -0
- {stcrpy-1.0.3.dist-info → stcrpy-1.0.6.dist-info}/RECORD +27 -16
- {stcrpy-1.0.3.dist-info → stcrpy-1.0.6.dist-info}/WHEEL +1 -1
- stcrpy-1.0.3.dist-info/METADATA +0 -173
- {stcrpy-1.0.3.dist-info → stcrpy-1.0.6.dist-info}/licenses/LICENCE +0 -0
- {stcrpy-1.0.3.dist-info → stcrpy-1.0.6.dist-info}/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +0 -0
- {stcrpy-1.0.3.dist-info → stcrpy-1.0.6.dist-info}/top_level.txt +0 -0
stcrpy/__init__.py
CHANGED
|
@@ -2,4 +2,4 @@ from .tcr_processing.TCRParser import TCRParser
|
|
|
2
2
|
from .tcr_processing.TCRIO import TCRIO
|
|
3
3
|
from .tcr_geometry.TCRDock import TCRDock
|
|
4
4
|
from .tcr_geometry.TCRGeom import TCRGeom
|
|
5
|
-
from .tcr_methods.tcr_methods import load_TCRs,
|
|
5
|
+
from .tcr_methods.tcr_methods import load_TCRs, fetch_TCRs, yield_TCRs, load_TCR
|
|
@@ -99,7 +99,7 @@ def get_sequences(
|
|
|
99
99
|
for chain in entity.get_chains()
|
|
100
100
|
}
|
|
101
101
|
except AttributeError as e:
|
|
102
|
-
if entity.level == "C":
|
|
102
|
+
if entity.level == "C" or entity.level == "F": # covers chains and fragments
|
|
103
103
|
sequences = {
|
|
104
104
|
entity.id: seq1(
|
|
105
105
|
"".join(
|
|
@@ -112,3 +112,22 @@ def get_sequences(
|
|
|
112
112
|
if amino_acids_only:
|
|
113
113
|
sequences = {k: seq.replace("X", "") for k, seq in sequences.items()}
|
|
114
114
|
return sequences
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def merge_chains(chains, new_chain_id=None):
|
|
118
|
+
from Bio import PDB
|
|
119
|
+
|
|
120
|
+
if new_chain_id is None:
|
|
121
|
+
new_chain_id = f"{chains[0].id}_{chains[1].id}"
|
|
122
|
+
new_chain = PDB.Chain.Chain(new_chain_id)
|
|
123
|
+
new_res_id = 1
|
|
124
|
+
|
|
125
|
+
for chain in chains:
|
|
126
|
+
for residue in chain.get_residues():
|
|
127
|
+
new_residue = residue.copy()
|
|
128
|
+
new_residue.id = (" ", new_res_id, " ")
|
|
129
|
+
|
|
130
|
+
new_chain.add(new_residue)
|
|
131
|
+
new_res_id += 1
|
|
132
|
+
|
|
133
|
+
return new_chain
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import math
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from Bio.SVDSuperimposer import SVDSuperimposer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def superimpose(coordsA, coordsB):
|
|
9
|
+
sup = SVDSuperimposer()
|
|
10
|
+
sup.set(coordsA, coordsB)
|
|
11
|
+
sup.run()
|
|
12
|
+
return sup
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TCRAngle:
|
|
16
|
+
|
|
17
|
+
def __init__(self, tcr_type="abTCR"):
|
|
18
|
+
"""Unified class for calculating TCR orientation angles for abTCRs and gdTCRs."""
|
|
19
|
+
|
|
20
|
+
self.dat_path = os.path.join(os.path.dirname(__file__), "reference_data")
|
|
21
|
+
|
|
22
|
+
from stcrpy.tcr_processing.TCRParser import TCRParser # avoids circular import
|
|
23
|
+
|
|
24
|
+
self.sparser = TCRParser(QUIET=True)
|
|
25
|
+
|
|
26
|
+
self.chain_configs = {
|
|
27
|
+
"abTCR": {
|
|
28
|
+
"chains": ("A", "B"),
|
|
29
|
+
"consensus": ("consensus_A.pdb", "consensus_B.pdb"),
|
|
30
|
+
"pc": ("pcA.txt", "pcB.txt"),
|
|
31
|
+
"coreset": ("Acoreset.txt", "Bcoreset.txt"),
|
|
32
|
+
"angle_labels": ["BA", "BC1", "AC1", "BC2", "AC2", "dc"],
|
|
33
|
+
},
|
|
34
|
+
"gdTCR": {
|
|
35
|
+
"chains": ("G", "D"),
|
|
36
|
+
"consensus": ("consensus_G.pdb", "consensus_D.pdb"),
|
|
37
|
+
"pc": ("pcA.txt", "pcB.txt"),
|
|
38
|
+
"coreset": ("Acoreset.txt", "Bcoreset.txt"),
|
|
39
|
+
"angle_labels": ["DG", "DC1", "GC1", "DC2", "GC2", "dc"],
|
|
40
|
+
},
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
self._init_tcr_type_specific_reference_data(tcr_type)
|
|
44
|
+
|
|
45
|
+
def _init_tcr_type_specific_reference_data(self, tcr_type):
|
|
46
|
+
if tcr_type not in self.chain_configs:
|
|
47
|
+
raise ValueError("tcr_type must be 'abTCR' or 'gdTCR'")
|
|
48
|
+
self.tcr_type = tcr_type
|
|
49
|
+
self.cfg = self.chain_configs[tcr_type]
|
|
50
|
+
self._read_consensus()
|
|
51
|
+
self._read_pc()
|
|
52
|
+
self._read_coreset()
|
|
53
|
+
|
|
54
|
+
def _normalise(self, v):
|
|
55
|
+
a = np.array(v)
|
|
56
|
+
return a / np.linalg.norm(a)
|
|
57
|
+
|
|
58
|
+
def _read_consensus(self):
|
|
59
|
+
c1, c2 = self.cfg["chains"]
|
|
60
|
+
f1, f2 = self.cfg["consensus"]
|
|
61
|
+
self.consensus_1 = self.sparser.get_tcr_structure(
|
|
62
|
+
c1, os.path.join(self.dat_path, f1)
|
|
63
|
+
)
|
|
64
|
+
self.consensus_2 = self.sparser.get_tcr_structure(
|
|
65
|
+
c2, os.path.join(self.dat_path, f2)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
self.consensus_1_atoms = sorted(
|
|
69
|
+
list(self.consensus_1.get_atoms()), key=lambda x: x.parent.id[1]
|
|
70
|
+
)
|
|
71
|
+
self.consensus_2_atoms = sorted(
|
|
72
|
+
list(self.consensus_2.get_atoms()), key=lambda x: x.parent.id[1]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def _read_pc(self):
|
|
76
|
+
f1, f2 = self.cfg["pc"]
|
|
77
|
+
self.pos1 = [
|
|
78
|
+
list(map(float, x.split())) for x in open(os.path.join(self.dat_path, f1))
|
|
79
|
+
]
|
|
80
|
+
self.pos2 = [
|
|
81
|
+
list(map(float, x.split())) for x in open(os.path.join(self.dat_path, f2))
|
|
82
|
+
]
|
|
83
|
+
self.c1 = [
|
|
84
|
+
6 * 0.5 * self.pos1[0][i] - 2 * 0.5 * self.pos1[1][i] + self.pos1[2][i]
|
|
85
|
+
for i in range(3)
|
|
86
|
+
]
|
|
87
|
+
self.c2 = [
|
|
88
|
+
-10 * 0.5 * self.pos2[0][i] + 1 * 0.5 * self.pos2[1][i] + self.pos2[2][i]
|
|
89
|
+
for i in range(3)
|
|
90
|
+
]
|
|
91
|
+
self.p1a = [self.c1[i] + self.pos1[0][i] for i in range(3)]
|
|
92
|
+
self.p1b = [self.c1[i] + self.pos1[1][i] for i in range(3)]
|
|
93
|
+
self.p2a = [self.c2[i] + self.pos2[0][i] for i in range(3)]
|
|
94
|
+
self.p2b = [self.c2[i] + self.pos2[1][i] for i in range(3)]
|
|
95
|
+
|
|
96
|
+
def _read_coreset(self):
|
|
97
|
+
f1, f2 = self.cfg["coreset"]
|
|
98
|
+
self.coreset1 = [
|
|
99
|
+
int(l.strip()[1:]) for l in open(os.path.join(self.dat_path, f1))
|
|
100
|
+
]
|
|
101
|
+
self.coreset2 = [
|
|
102
|
+
int(l.strip()[1:]) for l in open(os.path.join(self.dat_path, f2))
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
def calculate_angles(self, tcr):
|
|
106
|
+
if self.tcr_type not in str(type(tcr)):
|
|
107
|
+
self.tcr_type = str(type(tcr)).split(".")[-1].split("'>")[0]
|
|
108
|
+
self._init_tcr_type_specific_reference_data(self.tcr_type)
|
|
109
|
+
|
|
110
|
+
c1, c2 = self.cfg["chains"]
|
|
111
|
+
tcr_chain_1_coreset_atoms = [
|
|
112
|
+
res["CA"]
|
|
113
|
+
for res in tcr[tcr.get_domain_assignment()["V" + c1]].get_residues()
|
|
114
|
+
if res.id[1] in self.coreset1
|
|
115
|
+
]
|
|
116
|
+
tcr_chain_2_coreset_atoms = [
|
|
117
|
+
res["CA"]
|
|
118
|
+
for res in tcr[tcr.get_domain_assignment()["V" + c2]].get_residues()
|
|
119
|
+
if res.id[1] in self.coreset2
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
tcr_chain_1_coreset_atoms = sorted(
|
|
123
|
+
tcr_chain_1_coreset_atoms, key=lambda x: x.parent.id[1]
|
|
124
|
+
)
|
|
125
|
+
tcr_chain_2_coreset_atoms = sorted(
|
|
126
|
+
tcr_chain_2_coreset_atoms, key=lambda x: x.parent.id[1]
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
rot1, tran1 = superimpose(
|
|
130
|
+
np.asarray([a.coord for a in tcr_chain_1_coreset_atoms]),
|
|
131
|
+
np.asarray([a.coord for a in self.consensus_1_atoms]),
|
|
132
|
+
).get_rotran()
|
|
133
|
+
rot2, tran2 = superimpose(
|
|
134
|
+
np.asarray([a.coord for a in tcr_chain_2_coreset_atoms]),
|
|
135
|
+
np.asarray([a.coord for a in self.consensus_2_atoms]),
|
|
136
|
+
).get_rotran()
|
|
137
|
+
|
|
138
|
+
points1 = [
|
|
139
|
+
np.dot(self.c1, rot1) + tran1,
|
|
140
|
+
np.dot(self.p1a, rot1) + tran1,
|
|
141
|
+
np.dot(self.p1b, rot1) + tran1,
|
|
142
|
+
]
|
|
143
|
+
points2 = [
|
|
144
|
+
np.dot(self.c2, rot2) + tran2,
|
|
145
|
+
np.dot(self.p2a, rot2) + tran2,
|
|
146
|
+
np.dot(self.p2b, rot2) + tran2,
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
C = self._normalise([points2[0][i] - points1[0][i] for i in range(3)])
|
|
150
|
+
Cminus = [-x for x in C]
|
|
151
|
+
|
|
152
|
+
v1a = self._normalise([points1[1][i] - points1[0][i] for i in range(3)])
|
|
153
|
+
v1b = self._normalise([points1[2][i] - points1[0][i] for i in range(3)])
|
|
154
|
+
v2a = self._normalise([points2[1][i] - points2[0][i] for i in range(3)])
|
|
155
|
+
v2b = self._normalise([points2[2][i] - points2[0][i] for i in range(3)])
|
|
156
|
+
|
|
157
|
+
dc = np.linalg.norm(points2[0] - points1[0])
|
|
158
|
+
|
|
159
|
+
n_x = np.cross(v1a, C)
|
|
160
|
+
n_y = np.cross(C, n_x)
|
|
161
|
+
tmp1 = self._normalise([0, np.dot(v1a, n_x), np.dot(v1a, n_y)])
|
|
162
|
+
tmp2 = self._normalise([0, np.dot(v2a, n_x), np.dot(v2a, n_y)])
|
|
163
|
+
|
|
164
|
+
angle = math.degrees(math.acos(np.dot(tmp1, tmp2)))
|
|
165
|
+
if np.dot(np.cross(tmp1, tmp2), [1, 0, 0]) < 0:
|
|
166
|
+
angle = -angle
|
|
167
|
+
|
|
168
|
+
results = [
|
|
169
|
+
angle,
|
|
170
|
+
math.degrees(math.acos(np.dot(v1a, C))),
|
|
171
|
+
math.degrees(math.acos(np.dot(v2a, Cminus))),
|
|
172
|
+
math.degrees(math.acos(np.dot(v1b, C))),
|
|
173
|
+
math.degrees(math.acos(np.dot(v2b, Cminus))),
|
|
174
|
+
dc,
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
return dict(zip(self.cfg["angle_labels"], results))
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA GLY A 16 -12.766 -6.146 4.154 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLU A 17 -12.984 -3.096 1.985 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA ASN A 18 -10.777 -2.216 -0.940 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA THR A 20 -6.622 2.515 -3.475 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA ASN A 22 -2.375 6.893 -5.615 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA ASN A 38 10.871 1.660 -12.510 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA LEU A 39 8.338 2.153 -9.768 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA GLN A 40 8.338 0.459 -6.382 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA TRP A 41 5.923 0.253 -3.477 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA TYR A 42 7.139 -0.195 0.076 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA ARG A 43 5.258 -0.682 3.297 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA GLN A 44 6.468 0.441 6.681 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ASN A 45 4.955 -0.914 9.806 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA HIS A 52 9.234 -4.199 -0.523 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA LYS A 85 2.672 9.293 -15.022 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER A 86 0.797 7.515 -12.294 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA SER A 88 -1.761 3.077 -8.014 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA LEU A 89 -1.639 0.408 -5.358 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA LEU A 90 -4.860 -1.486 -4.764 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA SER A 94 -8.077 -5.060 3.718 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG A 95 -7.018 -6.656 6.969 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA ALA A 96 -5.916 -5.018 10.209 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ALA A 97 -2.373 -6.175 9.542 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP A 98 -2.346 -3.975 6.457 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR A 99 -2.284 -0.874 8.638 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA ALA A 100 0.968 0.902 7.897 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA SER A 101 2.587 3.693 5.962 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR A 102 2.875 3.032 2.230 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE A 103 5.639 4.573 0.152 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS A 104 5.899 4.893 -3.569 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA THR B 5 7.680 4.957 6.318 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLN B 6 4.157 4.013 5.323 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA PRO B 8 -1.597 4.163 7.655 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA LYS B 9 -0.563 0.753 8.980 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA PHE B 10 -4.125 -0.534 8.872 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA GLN B 11 -7.228 0.591 7.033 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA VAL B 12 -10.787 -0.634 6.761 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA LYS B 14 -16.366 0.265 2.522 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA THR B 15 -16.732 -0.673 -1.121 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA GLY B 16 -16.733 2.365 -3.364 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA GLN B 22 1.267 7.341 1.834 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA CYS B 23 4.911 6.380 1.563 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ALA B 24 7.861 8.498 2.594 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA TYR B 38 13.247 5.051 -7.303 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA MET B 39 10.086 4.125 -5.461 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER B 40 7.830 1.118 -5.919 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA TRP B 41 4.421 -0.118 -4.828 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA TYR B 42 3.741 -3.827 -4.381 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA ARG B 43 0.720 -5.770 -3.219 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA GLN B 44 0.883 -9.078 -1.429 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG B 51 3.180 -8.202 -7.285 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA LEU B 52 4.411 -4.907 -8.681 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ILE B 53 1.744 -2.252 -9.110 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP B 86 6.783 11.037 -1.067 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR B 99 -8.080 -6.335 0.175 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA SER B 100 -4.378 -6.994 0.577 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA VAL B 101 -1.115 -5.893 2.125 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR B 102 0.511 -2.993 0.305 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE B 103 4.257 -2.495 0.489 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS B 104 6.223 0.500 -0.633 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA GLY D 16 -12.766 -6.146 4.154 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLU D 17 -12.984 -3.096 1.985 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA ASN D 18 -10.777 -2.216 -0.940 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA THR D 20 -6.622 2.515 -3.475 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA ASN D 22 -2.375 6.893 -5.615 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA ASN D 38 10.871 1.660 -12.510 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA LEU D 39 8.338 2.153 -9.768 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA GLN D 40 8.338 0.459 -6.382 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA TRP D 41 5.923 0.253 -3.477 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA TYR D 42 7.139 -0.195 0.076 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA ARG D 43 5.258 -0.682 3.297 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA GLN D 44 6.468 0.441 6.681 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ASN D 45 4.955 -0.914 9.806 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA HIS D 52 9.234 -4.199 -0.523 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA LYS D 85 2.672 9.293 -15.022 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER D 86 0.797 7.515 -12.294 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA SER D 88 -1.761 3.077 -8.014 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA LEU D 89 -1.639 0.408 -5.358 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA LEU D 90 -4.860 -1.486 -4.764 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA SER D 94 -8.077 -5.060 3.718 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG D 95 -7.018 -6.656 6.969 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA ALA D 96 -5.916 -5.018 10.209 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ALA D 97 -2.373 -6.175 9.542 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP D 98 -2.346 -3.975 6.457 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR D 99 -2.284 -0.874 8.638 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA ALA D 100 0.968 0.902 7.897 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA SER D 101 2.587 3.693 5.962 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR D 102 2.875 3.032 2.230 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE D 103 5.639 4.573 0.152 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS D 104 5.899 4.893 -3.569 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA THR G 5 7.680 4.957 6.318 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLN G 6 4.157 4.013 5.323 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA PRO G 8 -1.597 4.163 7.655 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA LYS G 9 -0.563 0.753 8.980 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA PHE G 10 -4.125 -0.534 8.872 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA GLN G 11 -7.228 0.591 7.033 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA VAL G 12 -10.787 -0.634 6.761 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA LYS G 14 -16.366 0.265 2.522 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA THR G 15 -16.732 -0.673 -1.121 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA GLY G 16 -16.733 2.365 -3.364 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA GLN G 22 1.267 7.341 1.834 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA CYS G 23 4.911 6.380 1.563 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ALA G 24 7.861 8.498 2.594 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA TYR G 38 13.247 5.051 -7.303 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA MET G 39 10.086 4.125 -5.461 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER G 40 7.830 1.118 -5.919 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA TRP G 41 4.421 -0.118 -4.828 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA TYR G 42 3.741 -3.827 -4.381 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA ARG G 43 0.720 -5.770 -3.219 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA GLN G 44 0.883 -9.078 -1.429 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG G 51 3.180 -8.202 -7.285 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA LEU G 52 4.411 -4.907 -8.681 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ILE G 53 1.744 -2.252 -9.110 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP G 86 6.783 11.037 -1.067 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR G 99 -8.080 -6.335 0.175 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA SER G 100 -4.378 -6.994 0.577 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA VAL G 101 -1.115 -5.893 2.125 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR G 102 0.511 -2.993 0.305 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE G 103 4.257 -2.495 0.489 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS G 104 6.223 0.500 -0.633 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -9,6 +9,7 @@ try:
|
|
|
9
9
|
import plip
|
|
10
10
|
from plip.basic.remote import VisualizerData
|
|
11
11
|
from plip.visualization.visualize import visualize_in_pymol
|
|
12
|
+
|
|
12
13
|
except ModuleNotFoundError as e:
|
|
13
14
|
if "pymol" in str(e):
|
|
14
15
|
warnings.warn(
|
|
@@ -413,7 +414,8 @@ class TCRInteractionProfiler:
|
|
|
413
414
|
heatmap_b[pair[0][1], ligand_number_mapping[int(pair[1][1])]] = (
|
|
414
415
|
heatmap_b[pair[0][1], ligand_number_mapping[int(pair[1][1])]] + 1
|
|
415
416
|
)
|
|
416
|
-
|
|
417
|
+
|
|
418
|
+
im_beta = ax_beta.imshow(heatmap_b.T, cmap="PuRd")
|
|
417
419
|
for i in plot_index:
|
|
418
420
|
ax_beta.axhline(y=i - 0.5, color="blue", linewidth=1)
|
|
419
421
|
ax_beta.axhline(y=i + 0.5, color="blue", linewidth=1)
|
|
@@ -427,6 +429,11 @@ class TCRInteractionProfiler:
|
|
|
427
429
|
ax_beta.set_xticks([], [], rotation=90)
|
|
428
430
|
ax_beta.set_yticks([], [])
|
|
429
431
|
|
|
432
|
+
cbar = fig.colorbar(
|
|
433
|
+
im_beta, ax=[ax_alpha, ax_beta], orientation="vertical", shrink=0.8
|
|
434
|
+
)
|
|
435
|
+
cbar.set_label("Interaction count", rotation=270, labelpad=15)
|
|
436
|
+
|
|
430
437
|
if save_as is not None:
|
|
431
438
|
fig.savefig(save_as, bbox_inches="tight", dpi=200)
|
|
432
439
|
|
|
@@ -19,20 +19,24 @@ class TCRBatchOperator:
|
|
|
19
19
|
def _load_geometry_filter(self):
|
|
20
20
|
self._geometry_filter = DockingGeometryFilter()
|
|
21
21
|
|
|
22
|
-
def tcrs_from_file_list(self, file_list):
|
|
22
|
+
def tcrs_from_file_list(self, file_list, **kwargs):
|
|
23
23
|
for file in file_list:
|
|
24
24
|
tcr_id = file.split("/")[-1].split(".")[0]
|
|
25
25
|
try:
|
|
26
|
-
for tcr in self._tcr_parser.get_tcr_structure(
|
|
26
|
+
for tcr in self._tcr_parser.get_tcr_structure(
|
|
27
|
+
tcr_id, file, **kwargs
|
|
28
|
+
).get_TCRs():
|
|
27
29
|
yield tcr
|
|
28
30
|
except Exception as e:
|
|
29
31
|
warnings.warn(f"Loading {file} failed with error {str(e)}")
|
|
30
32
|
yield None
|
|
31
33
|
|
|
32
|
-
def tcrs_from_file_dict(self, file_dict):
|
|
34
|
+
def tcrs_from_file_dict(self, file_dict, **kwargs):
|
|
33
35
|
for tcr_id, file in file_dict.items():
|
|
34
36
|
try:
|
|
35
|
-
for tcr in self._tcr_parser.get_tcr_structure(
|
|
37
|
+
for tcr in self._tcr_parser.get_tcr_structure(
|
|
38
|
+
tcr_id, file, **kwargs
|
|
39
|
+
).get_TCRs():
|
|
36
40
|
yield tcr_id, tcr
|
|
37
41
|
except Exception as e:
|
|
38
42
|
warnings.warn(f"Loading {tcr_id}: {file} failed with error {str(e)}")
|
|
@@ -167,18 +171,18 @@ class TCRBatchOperator:
|
|
|
167
171
|
return germlines_and_alleles_df, geometries_df, interactions_df
|
|
168
172
|
|
|
169
173
|
|
|
170
|
-
def batch_load_TCRs(tcr_files):
|
|
174
|
+
def batch_load_TCRs(tcr_files, **kwargs):
|
|
171
175
|
if isinstance(tcr_files, dict):
|
|
172
|
-
return dict(TCRBatchOperator().tcrs_from_file_dict(tcr_files))
|
|
176
|
+
return dict(TCRBatchOperator().tcrs_from_file_dict(tcr_files), **kwargs)
|
|
173
177
|
else:
|
|
174
|
-
return list(TCRBatchOperator().tcrs_from_file_list(tcr_files))
|
|
178
|
+
return list(TCRBatchOperator().tcrs_from_file_list(tcr_files), **kwargs)
|
|
175
179
|
|
|
176
180
|
|
|
177
|
-
def batch_yield_TCRs(tcr_files):
|
|
181
|
+
def batch_yield_TCRs(tcr_files, **kwargs):
|
|
178
182
|
if isinstance(tcr_files, dict):
|
|
179
|
-
return TCRBatchOperator().tcrs_from_file_dict(tcr_files)
|
|
183
|
+
return TCRBatchOperator().tcrs_from_file_dict(tcr_files, **kwargs)
|
|
180
184
|
else:
|
|
181
|
-
return TCRBatchOperator().tcrs_from_file_list(tcr_files)
|
|
185
|
+
return TCRBatchOperator().tcrs_from_file_list(tcr_files, **kwargs)
|
|
182
186
|
|
|
183
187
|
|
|
184
188
|
def get_TCR_interactions(tcr_files, renumber=True, save_as_csv=None):
|
|
@@ -2,23 +2,24 @@ import warnings
|
|
|
2
2
|
import requests
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
|
+
from ..tcr_processing import TCR
|
|
5
6
|
from ..tcr_processing.TCRParser import TCRParser
|
|
6
7
|
from .tcr_batch_operations import batch_load_TCRs, batch_yield_TCRs
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
def load_TCR(tcr_structure_file, tcr_id=None):
|
|
10
|
+
def load_TCR(tcr_structure_file, tcr_id=None, **kwargs):
|
|
10
11
|
tcr_parser = TCRParser()
|
|
11
12
|
if tcr_id is None:
|
|
12
13
|
tcr_id = tcr_structure_file.split("/")[-1].split(".")[0]
|
|
13
14
|
tcr_structure = list(
|
|
14
|
-
tcr_parser.get_tcr_structure(tcr_id, tcr_structure_file).get_TCRs()
|
|
15
|
+
tcr_parser.get_tcr_structure(tcr_id, tcr_structure_file, **kwargs).get_TCRs()
|
|
15
16
|
)
|
|
16
17
|
if len(tcr_structure) == 1:
|
|
17
18
|
return tcr_structure[0]
|
|
18
19
|
return tcr_structure
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def load_TCRs(tcr_structure_files, tcr_ids=None):
|
|
22
|
+
def load_TCRs(tcr_structure_files, tcr_ids=None, **kwargs):
|
|
22
23
|
tcr_parser = TCRParser()
|
|
23
24
|
if isinstance(tcr_structure_files, str): # loading single file
|
|
24
25
|
tcr_id = tcr_structure_files.split("/")[-1].split(".")[
|
|
@@ -29,7 +30,9 @@ def load_TCRs(tcr_structure_files, tcr_ids=None):
|
|
|
29
30
|
warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
|
|
30
31
|
tcr_id = tcr_ids
|
|
31
32
|
|
|
32
|
-
tcr_structure = tcr_parser.get_tcr_structure(
|
|
33
|
+
tcr_structure = tcr_parser.get_tcr_structure(
|
|
34
|
+
tcr_id, tcr_structure_files, **kwargs
|
|
35
|
+
)
|
|
33
36
|
return list(tcr_structure.get_TCRs())
|
|
34
37
|
|
|
35
38
|
if len(tcr_structure_files) > 10:
|
|
@@ -39,15 +42,15 @@ def load_TCRs(tcr_structure_files, tcr_ids=None):
|
|
|
39
42
|
|
|
40
43
|
if tcr_ids is not None:
|
|
41
44
|
if len(tcr_structure_files) == len(tcr_ids):
|
|
42
|
-
return batch_load_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
|
|
45
|
+
return batch_load_TCRs(dict(zip(tcr_ids, tcr_structure_files)), **kwargs)
|
|
43
46
|
else:
|
|
44
47
|
warnings.warn(
|
|
45
48
|
f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
|
|
46
49
|
)
|
|
47
|
-
return batch_load_TCRs(tcr_structure_files)
|
|
50
|
+
return batch_load_TCRs(tcr_structure_files, **kwargs)
|
|
48
51
|
|
|
49
52
|
|
|
50
|
-
def yield_TCRs(tcr_structure_files, tcr_ids=None):
|
|
53
|
+
def yield_TCRs(tcr_structure_files, tcr_ids=None, **kwargs):
|
|
51
54
|
tcr_parser = TCRParser()
|
|
52
55
|
if isinstance(tcr_structure_files, str): # loading single file
|
|
53
56
|
tcr_id = tcr_structure_files.split("/")[-1].split(".")[
|
|
@@ -58,20 +61,22 @@ def yield_TCRs(tcr_structure_files, tcr_ids=None):
|
|
|
58
61
|
warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
|
|
59
62
|
tcr_id = tcr_ids
|
|
60
63
|
|
|
61
|
-
tcr_structure = tcr_parser.get_tcr_structure(
|
|
64
|
+
tcr_structure = tcr_parser.get_tcr_structure(
|
|
65
|
+
tcr_id, tcr_structure_files, **kwargs
|
|
66
|
+
)
|
|
62
67
|
return list(tcr_structure.get_TCRs())
|
|
63
68
|
|
|
64
69
|
if tcr_ids is not None:
|
|
65
70
|
if len(tcr_structure_files) == len(tcr_ids):
|
|
66
|
-
return batch_yield_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
|
|
71
|
+
return batch_yield_TCRs(dict(zip(tcr_ids, tcr_structure_files)), **kwargs)
|
|
67
72
|
else:
|
|
68
73
|
warnings.warn(
|
|
69
74
|
f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
|
|
70
75
|
)
|
|
71
|
-
return batch_yield_TCRs(tcr_structure_files)
|
|
76
|
+
return batch_yield_TCRs(tcr_structure_files, **kwargs)
|
|
72
77
|
|
|
73
78
|
|
|
74
|
-
def
|
|
79
|
+
def fetch_TCRs(pdb_id: str, **kwargs) -> list[TCR]:
|
|
75
80
|
"""
|
|
76
81
|
Fetches and parses a T-cell receptor (TCR) structure from the STCRDab or RCSB PDB databases.
|
|
77
82
|
|
|
@@ -83,9 +88,7 @@ def fetch_TCR(pdb_id: str):
|
|
|
83
88
|
pdb_id (str): The PDB identifier of the structure to be fetched.
|
|
84
89
|
|
|
85
90
|
Returns:
|
|
86
|
-
|
|
87
|
-
- A list of TCR structures if multiple are found.
|
|
88
|
-
- None if no TCRs are identified (with a `UserWarning` issued).
|
|
91
|
+
A list of the TCR structures found in the fetched PDB file
|
|
89
92
|
|
|
90
93
|
Raises:
|
|
91
94
|
- A warning if no TCR structures are found in the downloaded file.
|
|
@@ -96,7 +99,7 @@ def fetch_TCR(pdb_id: str):
|
|
|
96
99
|
- The function temporarily saves the downloaded file and deletes it after parsing.
|
|
97
100
|
|
|
98
101
|
Example:
|
|
99
|
-
tcr =
|
|
102
|
+
tcr = fetch_TCRs("6eqa")
|
|
100
103
|
|
|
101
104
|
"""
|
|
102
105
|
|
|
@@ -139,12 +142,10 @@ def fetch_TCR(pdb_id: str):
|
|
|
139
142
|
print("Failed to download file")
|
|
140
143
|
|
|
141
144
|
tcr_parser = TCRParser()
|
|
142
|
-
|
|
145
|
+
tcrs = list(tcr_parser.get_tcr_structure(pdb_id, filename, **kwargs).get_TCRs())
|
|
143
146
|
os.remove(filename)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
elif len(tcr) == 0:
|
|
147
|
+
|
|
148
|
+
if len(tcrs) == 0:
|
|
147
149
|
warnings.warn(f"No TCRs identified in {pdb_id}")
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
return tcr
|
|
150
|
+
|
|
151
|
+
return tcrs
|