stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,301 @@
1
+ """
2
+ Created on 9 May 2017
3
+ @author: leem
4
+
5
+ A modified Entity class based on SAbDab's ABDB.AbPDB and Bio.PDB's entity
6
+
7
+ """
8
+
9
+ import sys
10
+ import io
11
+ from copy import copy
12
+ from textwrap import wrap
13
+
14
+ import Bio
15
+ import Bio.PDB
16
+ from Bio.Data.IUPACData import atom_weights
17
+ from .Select import select_all
18
+
19
+ _ATOM_FORMAT_STRING = (
20
+ "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n"
21
+ )
22
+
23
+
24
+ class Entity(Bio.PDB.Entity.Entity):
25
+ """
26
+ A modified entity object allows for direct writing of coordinates.
27
+ """
28
+
29
+ def _get_atom_line(self, atom, atom_number=None, charge=" "):
30
+ """
31
+ Modified from TCRDB.Bio.PDB.PDBIO
32
+ """
33
+ hetfield, resseq, icode = atom.parent.get_id()
34
+ resname = atom.parent.get_resname()
35
+ segid = atom.parent.get_segid()
36
+
37
+ if atom_number is None:
38
+ atom_number = atom.serial_number
39
+
40
+ chain_id = atom.parent.parent.get_id()
41
+
42
+ if hetfield != " ":
43
+ record_type = "HETATM"
44
+ else:
45
+ record_type = "ATOM "
46
+
47
+ if atom.element:
48
+ element = atom.element.strip().upper()
49
+ if element.capitalize() not in atom_weights:
50
+ raise ValueError("Unrecognised element %r" % atom.element)
51
+ element = element.rjust(2)
52
+ else:
53
+ element = " "
54
+
55
+ name = atom.get_fullname()
56
+ altloc = atom.get_altloc()
57
+ x, y, z = atom.get_coord()
58
+ bfactor = atom.get_bfactor()
59
+ occupancy = atom.get_occupancy()
60
+
61
+ args = (
62
+ record_type,
63
+ atom_number,
64
+ name,
65
+ altloc,
66
+ resname,
67
+ chain_id,
68
+ resseq,
69
+ icode,
70
+ x,
71
+ y,
72
+ z,
73
+ occupancy,
74
+ bfactor,
75
+ segid,
76
+ element,
77
+ charge,
78
+ )
79
+ return _ATOM_FORMAT_STRING % args
80
+
81
+ def _get_output_string(self, selection, n):
82
+ """
83
+ Method to get the atom lines of the entity's children.
84
+ @param selection: Selector object from TcrPDB.Select or inherited class.
85
+ @param n: An integer value to number the current atom with.
86
+ If this is False the original numbering is used from the pdb file.
87
+ """
88
+ output_string = ""
89
+
90
+ for child in self.get_list():
91
+ if selection.accept(child):
92
+ if child.level == "A":
93
+ output_string += self._get_atom_line(child, atom_number=n)
94
+ if n:
95
+ n += 1
96
+ else:
97
+ output_string_add, n = child._get_output_string(selection, n)
98
+ output_string += output_string_add
99
+ return output_string, n
100
+
101
+ # TODO
102
+
103
+ def save(self, output=sys.stdout, renumber=True, selection=False, remarks=True):
104
+ """
105
+ Save the coordinates of the entity
106
+
107
+ @param output: Where to write coordinates to. Should be an an open file, string or sys.stdout.
108
+ By default the output is written to stdout
109
+ @param renumber: Flag whether to renumber the atoms to IMGT scheme
110
+ Default is to renumber the atoms so that the first is 1 etc.
111
+ Use renumber = False to retain the original atom numbering from the pdb file
112
+
113
+ @param selection: Provide a selector object to select which children of the entity should be outputted.
114
+ Selection should be a selector object from TcrPDB.Select.
115
+ Some basic selector classes are provided in the module. More complex classes can be created by inheriting from these.
116
+ If selection = False (default) all atoms in the entity are output
117
+
118
+ @param remarks: Flag to print out remarks generated by TcrPDB. Default TRUE
119
+ Example:
120
+ entity.save("path/to/file/filename.pdb")
121
+ residue.save( "residue1.pdb" )
122
+ """
123
+
124
+ def ag_chain_and_type(ags):
125
+
126
+ if ags:
127
+ chains, types = [], []
128
+ for ag in ags:
129
+ if ag.level == "C": # peptide/protein/nucleic acid.
130
+ chains.append(ag.id)
131
+ types.append(ag.type)
132
+ elif ag.level == "R": # hapten / single carb
133
+ chains.append(ag.parent.id)
134
+ types.append(ag.type)
135
+ elif ag.level == "F": # carbs
136
+ try:
137
+ chains.append(ag.child_list[0].parent.id)
138
+ types.append(ag.type)
139
+ except IndexError:
140
+ chains.append("UNKNOWN")
141
+ types.append("UNKNOWN")
142
+ else:
143
+ chains.append("UNKNOWN")
144
+ types.append("UNKNOWN")
145
+ return ";".join(chains), ";".join(types)
146
+ else:
147
+ return None, None
148
+
149
+ if renumber:
150
+ n = 1
151
+ else:
152
+ n = None
153
+
154
+ if not selection:
155
+ selection = select_all()
156
+
157
+ if self.level != "TS" or (self.level == "TS" and len(self.child_list) == 1):
158
+ output_string, n_atoms = self._get_output_string(selection, n)
159
+
160
+ elif self.level == "TS":
161
+ # output method for NMR structures
162
+ output_string, n_atoms = "", 0
163
+ # sort models by model id
164
+ self.child_list = sorted(self.child_list, key=lambda z: z.id)
165
+
166
+ for mod in self.child_list:
167
+ number = ("%d" % mod.id).rjust(9) # space by 9 for models
168
+ output_string += "MODEL%s\n" % number
169
+ string, n_atoms = mod._get_output_string(selection, n_atoms + 1)
170
+ output_string += string
171
+ output_string += "ENDMDL\n"
172
+ n_atoms -= 1
173
+
174
+ remark_string = ""
175
+
176
+ if remarks:
177
+ remark_string = (
178
+ "REMARK 5 IMGT RENUMBERED STRUCTURE %s GENERATED BY STCRDAB\n"
179
+ % str(self.id).upper()
180
+ )
181
+ remark_string += (
182
+ "REMARK 5 TCR CHAINS ARE RENUMBERED IN THE VARIABLE REGIONS ONLY\n"
183
+ )
184
+ remark_string += "REMARK 5 MHC CHAINS ARE RENUMBERED IN THE G DOMAINS OR FOR B2M-GLOBULIN\n"
185
+ remark_string += "REMARK 5 NON-TCR and NON-MHC CHAINS ARE LEFT WITH RESIDUE IDS AS IN PDB\n"
186
+ p = self
187
+ i = 0
188
+ while i < 6: # only try and go up residue,chain, holder, model, structure
189
+ if hasattr(p, "warnings"):
190
+ # the entity is the top structure
191
+ for TCR in p.get_TCRs():
192
+ antigen_chain, antigen_type = ag_chain_and_type(TCR.antigen)
193
+ if TCR.MHC:
194
+ mh_chains = TCR.MHC[0].get_id()
195
+ else:
196
+ mh_chains = ""
197
+ ch0, ch1 = [(c.chain_type, c.id) for c in TCR.get_chains()]
198
+ remark_string += (
199
+ "REMARK 5 PAIRED_%s %sCHAIN=%s %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
200
+ % (
201
+ TCR.get_TCR_type(),
202
+ ch0[0],
203
+ ch0[1],
204
+ ch1[0],
205
+ ch1[1],
206
+ mh_chains,
207
+ antigen_chain,
208
+ antigen_type,
209
+ )
210
+ )
211
+
212
+ for TR_chain in p.get_unpaired_TCRchains():
213
+ antigen_chain, antigen_type = ag_chain_and_type(
214
+ TR_chain.antigen
215
+ )
216
+ if TR_chain.get_MHC():
217
+ mh_chains = TR_chain.get_MHC()[0].get_id()
218
+ else:
219
+ mh_chains = ""
220
+ remark_string += (
221
+ "REMARK 5 SINGLE %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
222
+ % (
223
+ TR_chain.chain_type,
224
+ TR_chain.id,
225
+ mh_chains,
226
+ antigen_chain,
227
+ antigen_type,
228
+ )
229
+ )
230
+ for warning in str(p.warnings).split("\n"):
231
+ if warning:
232
+ remark_string += (
233
+ "\n".join(
234
+ [
235
+ "REMARK 5 " + w.upper()
236
+ for w in wrap(warning, width=60)
237
+ ]
238
+ )
239
+ + "\n"
240
+ )
241
+ break
242
+ if p:
243
+ p = p.get_parent()
244
+ i += 1
245
+ else:
246
+ remark_string = ""
247
+ break
248
+
249
+ output_string = remark_string.upper() + output_string
250
+
251
+ if output is sys.stdout:
252
+ output.write(output_string)
253
+ elif type(output) is str:
254
+ with open(output, "w") as out:
255
+ out.write(output_string)
256
+ elif isinstance(output, io.IOBase):
257
+ output.write(output_string)
258
+
259
+ def transform(self, rot, tran):
260
+ """
261
+ Apply rotation and translation to the atomic coordinates.
262
+
263
+ Example:
264
+ >>> rotation=rotmat(pi, Vector(1,0,0))
265
+ >>> translation=array((0,0,1), 'f')
266
+ >>> entity.transform(rotation, translation)
267
+
268
+ @param rot: A right multiplying rotation matrix
269
+ @type rot: 3x3 Numeric array
270
+
271
+ @param tran: the translation vector
272
+ @type tran: size 3 Numeric array
273
+ """
274
+
275
+ for o in self.get_list():
276
+ o.transform(rot, tran)
277
+
278
+ def copy(self):
279
+ """
280
+ Copy has been played with a bit. For my purposes the version in 1.61 did not work as explicit copying of the child list
281
+ meant that the child objects became referenced to both self and shallow.
282
+ This may be due to overriding the residue and chain classes so may not be a bug in biopython.
283
+
284
+ When copying the child_list in the loop, I use the list to iterate over instead of the dictionary.
285
+ This preserves the ordering of the children.
286
+ """
287
+ shallow = copy(self)
288
+ shallow.child_list = (
289
+ []
290
+ ) # copy(self.child_list) # I have removed this explicit copy.
291
+ shallow.child_dict = {} # copy(self.child_dict)
292
+ shallow.xtra = copy(self.xtra)
293
+ shallow.detach_parent()
294
+
295
+ # Order preserving copy
296
+ for child in self.get_list():
297
+ assert (
298
+ child.get_id() in self.child_dict
299
+ ), "child id not in the child_dict keys: this is unexpected and bad :("
300
+ shallow.add(child.copy())
301
+ return shallow
@@ -0,0 +1,58 @@
1
+ """
2
+ Created on 9 May 2017
3
+ @author: leem
4
+ Modified version of the ABDB.AbPDB.Fragment class
5
+ """
6
+
7
+ from .Entity import Entity
8
+ from Bio.PDB.PDBExceptions import PDBConstructionException
9
+
10
+
11
+ class Fragment(Entity):
12
+ """
13
+ A modified Entity class that can be thought of as a way of grouping children:
14
+ e.g. TCR (TCR object) -> TCRchain (TCRchain object) -> Fragment CDRB3 (Fragment object)
15
+ -> Residue B110 (Residue object)
16
+ Does not modify the parent/child attributes of its children.
17
+ For instance, one might define a fragment and add residues to it in order to visualise them.
18
+ """
19
+
20
+ def __init__(self, id):
21
+ self._id = id
22
+ Entity.__init__(self, id)
23
+ self.level = "F"
24
+
25
+ def __repr__(self):
26
+ if hasattr(self, "chain_type"):
27
+ return "<Fragment %s TCRchain: %s>" % (self.id, self.parent.parent.id)
28
+ else:
29
+ return "<Fragment %s>" % self.id
30
+
31
+ def add(self, entity):
32
+ "Add a child to the Entity."
33
+ entity_id = entity.get_id()
34
+ if self.has_id(entity_id):
35
+ raise PDBConstructionException("%s defined twice" % str(entity_id))
36
+
37
+ # parent of child is not changed
38
+ self.child_list.append(entity)
39
+ self.child_dict[entity_id] = entity
40
+
41
+ def insert(self, pos, entity):
42
+ "Add a child to the Entity at a specified position."
43
+ entity_id = entity.get_id()
44
+ if self.has_id(entity_id):
45
+ raise PDBConstructionException("%s defined twice" % str(entity_id))
46
+
47
+ # parent of child is not changed
48
+ self.child_list[pos:pos] = [entity]
49
+ self.child_dict[entity_id] = entity
50
+
51
+ def get_residues(self):
52
+ for residue in self:
53
+ yield residue
54
+
55
+ def get_atoms(self):
56
+ for residue in self.get_residues():
57
+ for atom in residue:
58
+ yield atom
@@ -0,0 +1,24 @@
1
+ """
2
+ Created on 9 May 2017
3
+ @author: leem
4
+
5
+ A generic holder class that can be used to contain individual chains, etc.
6
+
7
+ """
8
+
9
+ from .Entity import Entity
10
+
11
+
12
+ class Holder(Entity):
13
+ def __init__(self, identifier):
14
+ Entity.__init__(self, identifier)
15
+ self.level = "H"
16
+
17
+ def __repr__(self):
18
+ if len(self.child_list):
19
+ return "<Holder %s chains: %s>" % (
20
+ self.id,
21
+ ",".join([child.id for child in self]),
22
+ )
23
+ else:
24
+ return "<Holder %s chains: None>" % (self.id)