stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on 9 May 2017
|
|
3
|
+
@author: leem
|
|
4
|
+
|
|
5
|
+
A modified Entity class based on SAbDab's ABDB.AbPDB and Bio.PDB's entity
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
import io
|
|
11
|
+
from copy import copy
|
|
12
|
+
from textwrap import wrap
|
|
13
|
+
|
|
14
|
+
import Bio
|
|
15
|
+
import Bio.PDB
|
|
16
|
+
from Bio.Data.IUPACData import atom_weights
|
|
17
|
+
from .Select import select_all
|
|
18
|
+
|
|
19
|
+
_ATOM_FORMAT_STRING = (
|
|
20
|
+
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Entity(Bio.PDB.Entity.Entity):
|
|
25
|
+
"""
|
|
26
|
+
A modified entity object allows for direct writing of coordinates.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def _get_atom_line(self, atom, atom_number=None, charge=" "):
|
|
30
|
+
"""
|
|
31
|
+
Modified from TCRDB.Bio.PDB.PDBIO
|
|
32
|
+
"""
|
|
33
|
+
hetfield, resseq, icode = atom.parent.get_id()
|
|
34
|
+
resname = atom.parent.get_resname()
|
|
35
|
+
segid = atom.parent.get_segid()
|
|
36
|
+
|
|
37
|
+
if atom_number is None:
|
|
38
|
+
atom_number = atom.serial_number
|
|
39
|
+
|
|
40
|
+
chain_id = atom.parent.parent.get_id()
|
|
41
|
+
|
|
42
|
+
if hetfield != " ":
|
|
43
|
+
record_type = "HETATM"
|
|
44
|
+
else:
|
|
45
|
+
record_type = "ATOM "
|
|
46
|
+
|
|
47
|
+
if atom.element:
|
|
48
|
+
element = atom.element.strip().upper()
|
|
49
|
+
if element.capitalize() not in atom_weights:
|
|
50
|
+
raise ValueError("Unrecognised element %r" % atom.element)
|
|
51
|
+
element = element.rjust(2)
|
|
52
|
+
else:
|
|
53
|
+
element = " "
|
|
54
|
+
|
|
55
|
+
name = atom.get_fullname()
|
|
56
|
+
altloc = atom.get_altloc()
|
|
57
|
+
x, y, z = atom.get_coord()
|
|
58
|
+
bfactor = atom.get_bfactor()
|
|
59
|
+
occupancy = atom.get_occupancy()
|
|
60
|
+
|
|
61
|
+
args = (
|
|
62
|
+
record_type,
|
|
63
|
+
atom_number,
|
|
64
|
+
name,
|
|
65
|
+
altloc,
|
|
66
|
+
resname,
|
|
67
|
+
chain_id,
|
|
68
|
+
resseq,
|
|
69
|
+
icode,
|
|
70
|
+
x,
|
|
71
|
+
y,
|
|
72
|
+
z,
|
|
73
|
+
occupancy,
|
|
74
|
+
bfactor,
|
|
75
|
+
segid,
|
|
76
|
+
element,
|
|
77
|
+
charge,
|
|
78
|
+
)
|
|
79
|
+
return _ATOM_FORMAT_STRING % args
|
|
80
|
+
|
|
81
|
+
def _get_output_string(self, selection, n):
|
|
82
|
+
"""
|
|
83
|
+
Method to get the atom lines of the entity's children.
|
|
84
|
+
@param selection: Selector object from TcrPDB.Select or inherited class.
|
|
85
|
+
@param n: An integer value to number the current atom with.
|
|
86
|
+
If this is False the original numbering is used from the pdb file.
|
|
87
|
+
"""
|
|
88
|
+
output_string = ""
|
|
89
|
+
|
|
90
|
+
for child in self.get_list():
|
|
91
|
+
if selection.accept(child):
|
|
92
|
+
if child.level == "A":
|
|
93
|
+
output_string += self._get_atom_line(child, atom_number=n)
|
|
94
|
+
if n:
|
|
95
|
+
n += 1
|
|
96
|
+
else:
|
|
97
|
+
output_string_add, n = child._get_output_string(selection, n)
|
|
98
|
+
output_string += output_string_add
|
|
99
|
+
return output_string, n
|
|
100
|
+
|
|
101
|
+
# TODO
|
|
102
|
+
|
|
103
|
+
def save(self, output=sys.stdout, renumber=True, selection=False, remarks=True):
|
|
104
|
+
"""
|
|
105
|
+
Save the coordinates of the entity
|
|
106
|
+
|
|
107
|
+
@param output: Where to write coordinates to. Should be an an open file, string or sys.stdout.
|
|
108
|
+
By default the output is written to stdout
|
|
109
|
+
@param renumber: Flag whether to renumber the atoms to IMGT scheme
|
|
110
|
+
Default is to renumber the atoms so that the first is 1 etc.
|
|
111
|
+
Use renumber = False to retain the original atom numbering from the pdb file
|
|
112
|
+
|
|
113
|
+
@param selection: Provide a selector object to select which children of the entity should be outputted.
|
|
114
|
+
Selection should be a selector object from TcrPDB.Select.
|
|
115
|
+
Some basic selector classes are provided in the module. More complex classes can be created by inheriting from these.
|
|
116
|
+
If selection = False (default) all atoms in the entity are output
|
|
117
|
+
|
|
118
|
+
@param remarks: Flag to print out remarks generated by TcrPDB. Default TRUE
|
|
119
|
+
Example:
|
|
120
|
+
entity.save("path/to/file/filename.pdb")
|
|
121
|
+
residue.save( "residue1.pdb" )
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def ag_chain_and_type(ags):
|
|
125
|
+
|
|
126
|
+
if ags:
|
|
127
|
+
chains, types = [], []
|
|
128
|
+
for ag in ags:
|
|
129
|
+
if ag.level == "C": # peptide/protein/nucleic acid.
|
|
130
|
+
chains.append(ag.id)
|
|
131
|
+
types.append(ag.type)
|
|
132
|
+
elif ag.level == "R": # hapten / single carb
|
|
133
|
+
chains.append(ag.parent.id)
|
|
134
|
+
types.append(ag.type)
|
|
135
|
+
elif ag.level == "F": # carbs
|
|
136
|
+
try:
|
|
137
|
+
chains.append(ag.child_list[0].parent.id)
|
|
138
|
+
types.append(ag.type)
|
|
139
|
+
except IndexError:
|
|
140
|
+
chains.append("UNKNOWN")
|
|
141
|
+
types.append("UNKNOWN")
|
|
142
|
+
else:
|
|
143
|
+
chains.append("UNKNOWN")
|
|
144
|
+
types.append("UNKNOWN")
|
|
145
|
+
return ";".join(chains), ";".join(types)
|
|
146
|
+
else:
|
|
147
|
+
return None, None
|
|
148
|
+
|
|
149
|
+
if renumber:
|
|
150
|
+
n = 1
|
|
151
|
+
else:
|
|
152
|
+
n = None
|
|
153
|
+
|
|
154
|
+
if not selection:
|
|
155
|
+
selection = select_all()
|
|
156
|
+
|
|
157
|
+
if self.level != "TS" or (self.level == "TS" and len(self.child_list) == 1):
|
|
158
|
+
output_string, n_atoms = self._get_output_string(selection, n)
|
|
159
|
+
|
|
160
|
+
elif self.level == "TS":
|
|
161
|
+
# output method for NMR structures
|
|
162
|
+
output_string, n_atoms = "", 0
|
|
163
|
+
# sort models by model id
|
|
164
|
+
self.child_list = sorted(self.child_list, key=lambda z: z.id)
|
|
165
|
+
|
|
166
|
+
for mod in self.child_list:
|
|
167
|
+
number = ("%d" % mod.id).rjust(9) # space by 9 for models
|
|
168
|
+
output_string += "MODEL%s\n" % number
|
|
169
|
+
string, n_atoms = mod._get_output_string(selection, n_atoms + 1)
|
|
170
|
+
output_string += string
|
|
171
|
+
output_string += "ENDMDL\n"
|
|
172
|
+
n_atoms -= 1
|
|
173
|
+
|
|
174
|
+
remark_string = ""
|
|
175
|
+
|
|
176
|
+
if remarks:
|
|
177
|
+
remark_string = (
|
|
178
|
+
"REMARK 5 IMGT RENUMBERED STRUCTURE %s GENERATED BY STCRDAB\n"
|
|
179
|
+
% str(self.id).upper()
|
|
180
|
+
)
|
|
181
|
+
remark_string += (
|
|
182
|
+
"REMARK 5 TCR CHAINS ARE RENUMBERED IN THE VARIABLE REGIONS ONLY\n"
|
|
183
|
+
)
|
|
184
|
+
remark_string += "REMARK 5 MHC CHAINS ARE RENUMBERED IN THE G DOMAINS OR FOR B2M-GLOBULIN\n"
|
|
185
|
+
remark_string += "REMARK 5 NON-TCR and NON-MHC CHAINS ARE LEFT WITH RESIDUE IDS AS IN PDB\n"
|
|
186
|
+
p = self
|
|
187
|
+
i = 0
|
|
188
|
+
while i < 6: # only try and go up residue,chain, holder, model, structure
|
|
189
|
+
if hasattr(p, "warnings"):
|
|
190
|
+
# the entity is the top structure
|
|
191
|
+
for TCR in p.get_TCRs():
|
|
192
|
+
antigen_chain, antigen_type = ag_chain_and_type(TCR.antigen)
|
|
193
|
+
if TCR.MHC:
|
|
194
|
+
mh_chains = TCR.MHC[0].get_id()
|
|
195
|
+
else:
|
|
196
|
+
mh_chains = ""
|
|
197
|
+
ch0, ch1 = [(c.chain_type, c.id) for c in TCR.get_chains()]
|
|
198
|
+
remark_string += (
|
|
199
|
+
"REMARK 5 PAIRED_%s %sCHAIN=%s %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
|
|
200
|
+
% (
|
|
201
|
+
TCR.get_TCR_type(),
|
|
202
|
+
ch0[0],
|
|
203
|
+
ch0[1],
|
|
204
|
+
ch1[0],
|
|
205
|
+
ch1[1],
|
|
206
|
+
mh_chains,
|
|
207
|
+
antigen_chain,
|
|
208
|
+
antigen_type,
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
for TR_chain in p.get_unpaired_TCRchains():
|
|
213
|
+
antigen_chain, antigen_type = ag_chain_and_type(
|
|
214
|
+
TR_chain.antigen
|
|
215
|
+
)
|
|
216
|
+
if TR_chain.get_MHC():
|
|
217
|
+
mh_chains = TR_chain.get_MHC()[0].get_id()
|
|
218
|
+
else:
|
|
219
|
+
mh_chains = ""
|
|
220
|
+
remark_string += (
|
|
221
|
+
"REMARK 5 SINGLE %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
|
|
222
|
+
% (
|
|
223
|
+
TR_chain.chain_type,
|
|
224
|
+
TR_chain.id,
|
|
225
|
+
mh_chains,
|
|
226
|
+
antigen_chain,
|
|
227
|
+
antigen_type,
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
for warning in str(p.warnings).split("\n"):
|
|
231
|
+
if warning:
|
|
232
|
+
remark_string += (
|
|
233
|
+
"\n".join(
|
|
234
|
+
[
|
|
235
|
+
"REMARK 5 " + w.upper()
|
|
236
|
+
for w in wrap(warning, width=60)
|
|
237
|
+
]
|
|
238
|
+
)
|
|
239
|
+
+ "\n"
|
|
240
|
+
)
|
|
241
|
+
break
|
|
242
|
+
if p:
|
|
243
|
+
p = p.get_parent()
|
|
244
|
+
i += 1
|
|
245
|
+
else:
|
|
246
|
+
remark_string = ""
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
output_string = remark_string.upper() + output_string
|
|
250
|
+
|
|
251
|
+
if output is sys.stdout:
|
|
252
|
+
output.write(output_string)
|
|
253
|
+
elif type(output) is str:
|
|
254
|
+
with open(output, "w") as out:
|
|
255
|
+
out.write(output_string)
|
|
256
|
+
elif isinstance(output, io.IOBase):
|
|
257
|
+
output.write(output_string)
|
|
258
|
+
|
|
259
|
+
def transform(self, rot, tran):
|
|
260
|
+
"""
|
|
261
|
+
Apply rotation and translation to the atomic coordinates.
|
|
262
|
+
|
|
263
|
+
Example:
|
|
264
|
+
>>> rotation=rotmat(pi, Vector(1,0,0))
|
|
265
|
+
>>> translation=array((0,0,1), 'f')
|
|
266
|
+
>>> entity.transform(rotation, translation)
|
|
267
|
+
|
|
268
|
+
@param rot: A right multiplying rotation matrix
|
|
269
|
+
@type rot: 3x3 Numeric array
|
|
270
|
+
|
|
271
|
+
@param tran: the translation vector
|
|
272
|
+
@type tran: size 3 Numeric array
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
for o in self.get_list():
|
|
276
|
+
o.transform(rot, tran)
|
|
277
|
+
|
|
278
|
+
def copy(self):
|
|
279
|
+
"""
|
|
280
|
+
Copy has been played with a bit. For my purposes the version in 1.61 did not work as explicit copying of the child list
|
|
281
|
+
meant that the child objects became referenced to both self and shallow.
|
|
282
|
+
This may be due to overriding the residue and chain classes so may not be a bug in biopython.
|
|
283
|
+
|
|
284
|
+
When copying the child_list in the loop, I use the list to iterate over instead of the dictionary.
|
|
285
|
+
This preserves the ordering of the children.
|
|
286
|
+
"""
|
|
287
|
+
shallow = copy(self)
|
|
288
|
+
shallow.child_list = (
|
|
289
|
+
[]
|
|
290
|
+
) # copy(self.child_list) # I have removed this explicit copy.
|
|
291
|
+
shallow.child_dict = {} # copy(self.child_dict)
|
|
292
|
+
shallow.xtra = copy(self.xtra)
|
|
293
|
+
shallow.detach_parent()
|
|
294
|
+
|
|
295
|
+
# Order preserving copy
|
|
296
|
+
for child in self.get_list():
|
|
297
|
+
assert (
|
|
298
|
+
child.get_id() in self.child_dict
|
|
299
|
+
), "child id not in the child_dict keys: this is unexpected and bad :("
|
|
300
|
+
shallow.add(child.copy())
|
|
301
|
+
return shallow
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on 9 May 2017
|
|
3
|
+
@author: leem
|
|
4
|
+
Modified version of the ABDB.AbPDB.Fragment class
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .Entity import Entity
|
|
8
|
+
from Bio.PDB.PDBExceptions import PDBConstructionException
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Fragment(Entity):
|
|
12
|
+
"""
|
|
13
|
+
A modified Entity class that can be thought of as a way of grouping children:
|
|
14
|
+
e.g. TCR (TCR object) -> TCRchain (TCRchain object) -> Fragment CDRB3 (Fragment object)
|
|
15
|
+
-> Residue B110 (Residue object)
|
|
16
|
+
Does not modify the parent/child attributes of its children.
|
|
17
|
+
For instance, one might define a fragment and add residues to it in order to visualise them.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, id):
|
|
21
|
+
self._id = id
|
|
22
|
+
Entity.__init__(self, id)
|
|
23
|
+
self.level = "F"
|
|
24
|
+
|
|
25
|
+
def __repr__(self):
|
|
26
|
+
if hasattr(self, "chain_type"):
|
|
27
|
+
return "<Fragment %s TCRchain: %s>" % (self.id, self.parent.parent.id)
|
|
28
|
+
else:
|
|
29
|
+
return "<Fragment %s>" % self.id
|
|
30
|
+
|
|
31
|
+
def add(self, entity):
|
|
32
|
+
"Add a child to the Entity."
|
|
33
|
+
entity_id = entity.get_id()
|
|
34
|
+
if self.has_id(entity_id):
|
|
35
|
+
raise PDBConstructionException("%s defined twice" % str(entity_id))
|
|
36
|
+
|
|
37
|
+
# parent of child is not changed
|
|
38
|
+
self.child_list.append(entity)
|
|
39
|
+
self.child_dict[entity_id] = entity
|
|
40
|
+
|
|
41
|
+
def insert(self, pos, entity):
|
|
42
|
+
"Add a child to the Entity at a specified position."
|
|
43
|
+
entity_id = entity.get_id()
|
|
44
|
+
if self.has_id(entity_id):
|
|
45
|
+
raise PDBConstructionException("%s defined twice" % str(entity_id))
|
|
46
|
+
|
|
47
|
+
# parent of child is not changed
|
|
48
|
+
self.child_list[pos:pos] = [entity]
|
|
49
|
+
self.child_dict[entity_id] = entity
|
|
50
|
+
|
|
51
|
+
def get_residues(self):
|
|
52
|
+
for residue in self:
|
|
53
|
+
yield residue
|
|
54
|
+
|
|
55
|
+
def get_atoms(self):
|
|
56
|
+
for residue in self.get_residues():
|
|
57
|
+
for atom in residue:
|
|
58
|
+
yield atom
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on 9 May 2017
|
|
3
|
+
@author: leem
|
|
4
|
+
|
|
5
|
+
A generic holder class that can be used to contain individual chains, etc.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .Entity import Entity
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Holder(Entity):
|
|
13
|
+
def __init__(self, identifier):
|
|
14
|
+
Entity.__init__(self, identifier)
|
|
15
|
+
self.level = "H"
|
|
16
|
+
|
|
17
|
+
def __repr__(self):
|
|
18
|
+
if len(self.child_list):
|
|
19
|
+
return "<Holder %s chains: %s>" % (
|
|
20
|
+
self.id,
|
|
21
|
+
",".join([child.id for child in self]),
|
|
22
|
+
)
|
|
23
|
+
else:
|
|
24
|
+
return "<Holder %s chains: None>" % (self.id)
|