stjames 0.0.59__py3-none-any.whl → 0.0.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stjames might be problematic. Click here for more details.
- stjames/atomium_stjames/mmcif.py +216 -162
- stjames/atomium_stjames/utilities.py +58 -46
- stjames/molecule.py +46 -1
- stjames/types.py +10 -1
- stjames/workflows/__init__.py +5 -2
- stjames/workflows/admet.py +15 -4
- stjames/workflows/basic_calculation.py +17 -2
- stjames/workflows/bde.py +5 -5
- stjames/workflows/conformer.py +4 -2
- stjames/workflows/conformer_search.py +3 -3
- stjames/workflows/descriptors.py +16 -3
- stjames/workflows/docking.py +26 -11
- stjames/workflows/electronic_properties.py +5 -2
- stjames/workflows/fukui.py +19 -2
- stjames/workflows/hydrogen_bond_basicity.py +29 -5
- stjames/workflows/irc.py +4 -2
- stjames/workflows/molecular_dynamics.py +28 -3
- stjames/workflows/multistage_opt.py +5 -3
- stjames/workflows/pka.py +36 -2
- stjames/workflows/redox_potential.py +4 -2
- stjames/workflows/scan.py +37 -2
- stjames/workflows/solubility.py +60 -0
- stjames/workflows/spin_states.py +4 -2
- stjames/workflows/tautomer.py +24 -2
- stjames/workflows/workflow.py +27 -4
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/METADATA +2 -3
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/RECORD +30 -29
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/LICENSE +0 -0
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/WHEEL +0 -0
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/top_level.txt +0 -0
stjames/atomium_stjames/mmcif.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import re
|
|
4
4
|
from collections import deque
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Callable
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
@@ -11,7 +11,8 @@ from .data import CODES
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def mmcif_string_to_mmcif_dict(filestring: str) -> dict[str, Any]:
|
|
14
|
-
"""
|
|
14
|
+
"""
|
|
15
|
+
Converts a .cif filestring and into a ``dict`` that represents its
|
|
15
16
|
table structure. Only lines which aren't empty and which don't begin with
|
|
16
17
|
``#`` are used.
|
|
17
18
|
|
|
@@ -19,30 +20,33 @@ def mmcif_string_to_mmcif_dict(filestring: str) -> dict[str, Any]:
|
|
|
19
20
|
then split into the blocks that will become table lists. At the end, quote
|
|
20
21
|
marks are removed from any string which retains them.
|
|
21
22
|
|
|
22
|
-
:param
|
|
23
|
-
|
|
23
|
+
:param filestring: .cif filestring to process
|
|
24
|
+
"""
|
|
24
25
|
|
|
25
26
|
lines = deque(filter(lambda l: l and l[0] != "#", filestring.split("\n")))
|
|
26
27
|
lines = consolidate_strings(lines)
|
|
27
28
|
blocks = mmcif_lines_to_mmcif_blocks(lines)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
else
|
|
33
|
-
|
|
29
|
+
|
|
30
|
+
mmcif_dict = {
|
|
31
|
+
block["category"]: loop_block_to_list(block)
|
|
32
|
+
if block["lines"][0] == "loop_" # keep open
|
|
33
|
+
else non_loop_block_to_list(block)
|
|
34
|
+
for block in blocks
|
|
35
|
+
}
|
|
34
36
|
strip_quotes(mmcif_dict)
|
|
37
|
+
|
|
35
38
|
return mmcif_dict
|
|
36
39
|
|
|
37
40
|
|
|
38
41
|
def consolidate_strings(lines: deque[str]) -> deque[str]:
|
|
39
|
-
"""
|
|
42
|
+
"""
|
|
43
|
+
Generally, .cif files have a one file line to one table row
|
|
40
44
|
correspondence. Sometimes however, a string cell is given a line of its own,
|
|
41
45
|
breaking the row over several lines. This function takes the lines of a .cif
|
|
42
46
|
file and puts all table rows on a single line.
|
|
43
47
|
|
|
44
|
-
:param
|
|
45
|
-
|
|
48
|
+
:param lines: .cif file lines
|
|
49
|
+
"""
|
|
46
50
|
|
|
47
51
|
new_lines: deque[str] = deque()
|
|
48
52
|
while lines:
|
|
@@ -59,12 +63,12 @@ def consolidate_strings(lines: deque[str]) -> deque[str]:
|
|
|
59
63
|
|
|
60
64
|
|
|
61
65
|
def mmcif_lines_to_mmcif_blocks(lines: deque[str]) -> list[dict[str, Any]]:
|
|
62
|
-
"""
|
|
63
|
-
.cif file lines and splits them into
|
|
64
|
-
a ``dict`` containing a category name and a list of lines.
|
|
66
|
+
"""
|
|
67
|
+
Takes a list of .cif file lines and splits them into table blocks. Each
|
|
68
|
+
block will be a ``dict`` containing a category name and a list of lines.
|
|
65
69
|
|
|
66
|
-
:param
|
|
67
|
-
|
|
70
|
+
:param lines: .cif file lines
|
|
71
|
+
"""
|
|
68
72
|
|
|
69
73
|
category = None
|
|
70
74
|
block: list[str] = []
|
|
@@ -92,11 +96,12 @@ def mmcif_lines_to_mmcif_blocks(lines: deque[str]) -> list[dict[str, Any]]:
|
|
|
92
96
|
|
|
93
97
|
|
|
94
98
|
def non_loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
95
|
-
"""
|
|
99
|
+
"""
|
|
100
|
+
Takes a simple block ``dict`` with no loop and turns it into a table
|
|
96
101
|
``list``.
|
|
97
102
|
|
|
98
|
-
:param
|
|
99
|
-
|
|
103
|
+
:param block: .cif block to process
|
|
104
|
+
"""
|
|
100
105
|
|
|
101
106
|
d = {}
|
|
102
107
|
for index in range(len(block["lines"]) - 1):
|
|
@@ -113,12 +118,13 @@ def non_loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
|
113
118
|
|
|
114
119
|
|
|
115
120
|
def loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
116
|
-
"""
|
|
121
|
+
"""
|
|
122
|
+
Takes a loop block ``dict`` where the initial lines are table headers and
|
|
117
123
|
turns it into a table ``list``. Sometimes a row is broken over several lines
|
|
118
124
|
so this function deals with that too.
|
|
119
125
|
|
|
120
|
-
:param
|
|
121
|
-
|
|
126
|
+
:param block: .cif block to process
|
|
127
|
+
"""
|
|
122
128
|
|
|
123
129
|
names, lines, _ = [], [], True
|
|
124
130
|
body_start = 0
|
|
@@ -135,19 +141,21 @@ def loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
|
135
141
|
lines.pop(n + 1)
|
|
136
142
|
for line in lines:
|
|
137
143
|
l.append({name: value for name, value in zip(names, line)})
|
|
144
|
+
|
|
138
145
|
return l
|
|
139
146
|
|
|
140
147
|
|
|
141
148
|
def split_values(line: str) -> list[str]:
|
|
142
|
-
"""
|
|
149
|
+
"""
|
|
150
|
+
The body of a .cif table is a series of lines, with each cell divided by
|
|
143
151
|
whitespace. This function takes a string line and breaks it into cells.
|
|
144
152
|
|
|
145
153
|
There are a few peculiarities to handle. Sometimes a cell is a string
|
|
146
154
|
enclosed in quote marks, and spaces within this string obviously shouldn't
|
|
147
155
|
be used to break the line. This function handles all of that.
|
|
148
156
|
|
|
149
|
-
:param
|
|
150
|
-
|
|
157
|
+
:param line: .cif line to split
|
|
158
|
+
"""
|
|
151
159
|
|
|
152
160
|
if not re.search("['\"]", line):
|
|
153
161
|
return line.split()
|
|
@@ -167,16 +175,17 @@ def split_values(line: str) -> list[str]:
|
|
|
167
175
|
else:
|
|
168
176
|
value.append(char)
|
|
169
177
|
values.append(value)
|
|
178
|
+
|
|
170
179
|
return ["".join(v) for v in values if v]
|
|
171
180
|
|
|
172
181
|
|
|
173
182
|
def strip_quotes(mmcif_dict: dict[str, Any]) -> None:
|
|
174
|
-
"""
|
|
175
|
-
quote marks from
|
|
183
|
+
"""
|
|
184
|
+
In-place removes unneeded quote marks from a .mmcif dictionary.
|
|
176
185
|
|
|
177
|
-
:param
|
|
178
|
-
|
|
179
|
-
for
|
|
186
|
+
:param mmcif_dict: almost finished .mmcif dictionary to clean
|
|
187
|
+
"""
|
|
188
|
+
for _, table in mmcif_dict.items():
|
|
180
189
|
for row in table:
|
|
181
190
|
for k, value in row.items():
|
|
182
191
|
for char in "'\"":
|
|
@@ -186,11 +195,12 @@ def strip_quotes(mmcif_dict: dict[str, Any]) -> None:
|
|
|
186
195
|
|
|
187
196
|
|
|
188
197
|
def mmcif_dict_to_data_dict(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
|
|
189
|
-
"""
|
|
198
|
+
"""
|
|
199
|
+
Converts an .mmcif dictionary into an atomium data dictionary, with the
|
|
190
200
|
same standard layout that the other file formats get converted into.
|
|
191
201
|
|
|
192
|
-
:param
|
|
193
|
-
|
|
202
|
+
:param mmcif_dict: .mmcif dictionary
|
|
203
|
+
"""
|
|
194
204
|
|
|
195
205
|
data_dict = {
|
|
196
206
|
"description": {"code": None, "title": None, "deposition_date": None, "classification": None, "keywords": [], "authors": []},
|
|
@@ -204,15 +214,18 @@ def mmcif_dict_to_data_dict(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
|
|
|
204
214
|
update_quality_dict(mmcif_dict, data_dict)
|
|
205
215
|
update_geometry_dict(mmcif_dict, data_dict)
|
|
206
216
|
update_models_list(mmcif_dict, data_dict)
|
|
217
|
+
|
|
207
218
|
return data_dict
|
|
208
219
|
|
|
209
220
|
|
|
210
221
|
def update_description_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
211
|
-
"""
|
|
222
|
+
"""
|
|
223
|
+
Takes a data dictionary and updates its description sub-dictionary with
|
|
212
224
|
information from a .mmcif dictionary.
|
|
213
225
|
|
|
214
|
-
:param
|
|
215
|
-
:param
|
|
226
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
227
|
+
:param data_dict: data dictionary to update
|
|
228
|
+
"""
|
|
216
229
|
|
|
217
230
|
mmcif_to_data_transfer(mmcif_dict, data_dict, "description", "code", "entry", "id")
|
|
218
231
|
mmcif_to_data_transfer(mmcif_dict, data_dict, "description", "title", "struct", "title")
|
|
@@ -223,11 +236,13 @@ def update_description_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any
|
|
|
223
236
|
|
|
224
237
|
|
|
225
238
|
def update_experiment_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
226
|
-
"""
|
|
239
|
+
"""
|
|
240
|
+
Takes a data dictionary and updates its experiment sub-dictionary with
|
|
227
241
|
information from a .mmcif dictionary.
|
|
228
242
|
|
|
229
|
-
:param
|
|
230
|
-
:param
|
|
243
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
244
|
+
:param data_dict: data dictionary to update
|
|
245
|
+
"""
|
|
231
246
|
|
|
232
247
|
mmcif_to_data_transfer(mmcif_dict, data_dict, "experiment", "technique", "exptl", "method")
|
|
233
248
|
for cat, key in [
|
|
@@ -245,11 +260,13 @@ def update_experiment_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]
|
|
|
245
260
|
|
|
246
261
|
|
|
247
262
|
def update_quality_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
248
|
-
"""
|
|
263
|
+
"""
|
|
264
|
+
Takes a data dictionary and updates its quality sub-dictionary with
|
|
249
265
|
information from a .mmcif dictionary.
|
|
250
266
|
|
|
251
|
-
:param
|
|
252
|
-
:param
|
|
267
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
268
|
+
:param data_dict: data dictionary to update
|
|
269
|
+
"""
|
|
253
270
|
|
|
254
271
|
mmcif_to_data_transfer(mmcif_dict, data_dict, "quality", "resolution", "reflns", "d_resolution_high", func=float)
|
|
255
272
|
if not data_dict["quality"]["resolution"]:
|
|
@@ -261,11 +278,13 @@ def update_quality_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -
|
|
|
261
278
|
|
|
262
279
|
|
|
263
280
|
def update_geometry_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
264
|
-
"""
|
|
281
|
+
"""
|
|
282
|
+
Takes a data dictionary and updates its geometry sub-dictionary with
|
|
265
283
|
information from a .mmcif dictionary.
|
|
266
284
|
|
|
267
|
-
:param
|
|
268
|
-
:param
|
|
285
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
286
|
+
:param data_dict: data dictionary to update
|
|
287
|
+
"""
|
|
269
288
|
|
|
270
289
|
data_dict["geometry"]["assemblies"] = [
|
|
271
290
|
{
|
|
@@ -291,11 +310,13 @@ def update_geometry_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any])
|
|
|
291
310
|
|
|
292
311
|
|
|
293
312
|
def assign_metrics_to_assembly(mmcif_dict: dict[str, Any], assembly: dict[str, Any]) -> None:
|
|
294
|
-
"""
|
|
313
|
+
"""
|
|
314
|
+
Takes an assembly dict, and goes through an mmcif dictionary looking for
|
|
295
315
|
relevant energy etc. information to update it with.
|
|
296
316
|
|
|
297
|
-
:param
|
|
298
|
-
:param
|
|
317
|
+
:param mmcif_dict: dictionary to read
|
|
318
|
+
:param assembly: assembly to update
|
|
319
|
+
"""
|
|
299
320
|
|
|
300
321
|
for a in mmcif_dict.get("pdbx_struct_assembly_prop", []):
|
|
301
322
|
if a["biol_id"] == str(assembly["id"]):
|
|
@@ -308,12 +329,14 @@ def assign_metrics_to_assembly(mmcif_dict: dict[str, Any], assembly: dict[str, A
|
|
|
308
329
|
|
|
309
330
|
|
|
310
331
|
def assign_transformations_to_assembly(mmcif_dict: dict[str, Any], operations: Any, assembly: dict[str, Any]) -> None:
|
|
311
|
-
"""
|
|
332
|
+
"""
|
|
333
|
+
Takes an assembly dict, and goes through an mmcif dictionary looking for
|
|
312
334
|
relevant transformation information to update it with.
|
|
313
335
|
|
|
314
|
-
:param
|
|
315
|
-
:param
|
|
316
|
-
:param
|
|
336
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
337
|
+
:param operations: processed operations matrices
|
|
338
|
+
:param assembly: assembly to update
|
|
339
|
+
"""
|
|
317
340
|
|
|
318
341
|
for gen in mmcif_dict.get("pdbx_struct_assembly_gen", []):
|
|
319
342
|
if gen["assembly_id"] == str(assembly["id"]):
|
|
@@ -326,19 +349,20 @@ def assign_transformations_to_assembly(mmcif_dict: dict[str, Any], operations: A
|
|
|
326
349
|
|
|
327
350
|
|
|
328
351
|
def get_operation_id_groups(expression: str) -> list[list[str]]:
|
|
329
|
-
"""
|
|
330
|
-
|
|
331
|
-
becomes [[1, 2, 3]], (1-3)(8-11,17) becomes [[1, 2, 3], [8, 9, 10, 11, 17]],
|
|
332
|
-
and so on.
|
|
352
|
+
"""
|
|
353
|
+
Determines which transformation IDs are an operator expression is referring to.
|
|
333
354
|
|
|
334
|
-
|
|
335
|
-
|
|
355
|
+
For example, (1,2,3) becomes [[1, 2, 3]], (1-3)(8-11,17) becomes
|
|
356
|
+
[[1, 2, 3], [8, 9, 10, 11, 17]], and so on.
|
|
336
357
|
|
|
358
|
+
:param str expression: expression to parse
|
|
359
|
+
:return: list of transformation ID groups
|
|
360
|
+
"""
|
|
337
361
|
if expression[0] != "(":
|
|
338
362
|
expression = "({})".format(expression)
|
|
339
|
-
|
|
363
|
+
|
|
340
364
|
group_ids = []
|
|
341
|
-
for group in
|
|
365
|
+
for group in re.findall(r"\((.+?)\)", expression):
|
|
342
366
|
ids = []
|
|
343
367
|
elements = group.split(",")
|
|
344
368
|
for element in elements:
|
|
@@ -347,16 +371,20 @@ def get_operation_id_groups(expression: str) -> list[list[str]]:
|
|
|
347
371
|
ids += [str(n) for n in list(range(bounds[0], bounds[1] + 1))]
|
|
348
372
|
else:
|
|
349
373
|
ids.append(element)
|
|
374
|
+
|
|
350
375
|
group_ids.append(ids)
|
|
376
|
+
|
|
351
377
|
return group_ids
|
|
352
378
|
|
|
353
379
|
|
|
354
380
|
def update_crystallography_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
355
|
-
"""
|
|
356
|
-
|
|
381
|
+
"""
|
|
382
|
+
Takes a data dictionary and updates its crystallography sub-sub-dictionary
|
|
383
|
+
with information from a .mmcif dictionary.
|
|
357
384
|
|
|
358
|
-
:param
|
|
359
|
-
:param
|
|
385
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
386
|
+
:param data_dict: data dictionary to update
|
|
387
|
+
"""
|
|
360
388
|
|
|
361
389
|
if mmcif_dict.get("cell"):
|
|
362
390
|
mmcif_to_data_transfer(mmcif_dict, data_dict["geometry"], "crystallography", "space_group", "symmetry", "space_group_name_H-M")
|
|
@@ -368,37 +396,42 @@ def update_crystallography_dict(mmcif_dict: dict[str, Any], data_dict: dict[str,
|
|
|
368
396
|
|
|
369
397
|
|
|
370
398
|
def operation_id_groups_to_operations(operations: Any, operation_id_groups: Any) -> Any:
|
|
371
|
-
"""
|
|
372
|
-
operation IDs - cross
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
:param
|
|
376
|
-
|
|
399
|
+
"""
|
|
400
|
+
Creates a operation matrices for an assembly, from operation IDs - cross
|
|
401
|
+
multiplying as required.
|
|
402
|
+
|
|
403
|
+
:param operations: parsed .mmcif operations
|
|
404
|
+
:param operation_id_groups: operation IDs
|
|
405
|
+
:return: operation matrices
|
|
406
|
+
"""
|
|
377
407
|
operation_groups = [[operations[i] for i in ids] for ids in operation_id_groups]
|
|
408
|
+
|
|
378
409
|
while len(operation_groups) and len(operation_groups) != 1:
|
|
379
|
-
operations = []
|
|
380
|
-
for op1 in operation_groups[0]:
|
|
381
|
-
for op2 in operation_groups[1]:
|
|
382
|
-
operations.append(np.matmul(op1, op2))
|
|
410
|
+
operations = [np.matmul(op1, op2) for op1 in operation_groups[0] for op2 in operation_groups[1]]
|
|
383
411
|
operation_groups[0] = operations
|
|
384
412
|
operation_groups.pop(1)
|
|
413
|
+
|
|
385
414
|
return operation_groups[0]
|
|
386
415
|
|
|
387
416
|
|
|
388
417
|
def update_models_list(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
|
|
389
|
-
"""
|
|
418
|
+
"""
|
|
419
|
+
Takes a data dictionary and updates its models list with
|
|
390
420
|
information from a .mmcif dictionary.
|
|
391
421
|
|
|
392
|
-
:param
|
|
393
|
-
:param
|
|
422
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
423
|
+
:param data_dict: data dictionary to update
|
|
424
|
+
"""
|
|
394
425
|
|
|
395
426
|
data_dict["models"] = []
|
|
396
427
|
types = {e["id"]: e["type"] for e in mmcif_dict.get("entity", {})}
|
|
397
428
|
names = {e["id"]: e["name"] for e in mmcif_dict.get("chem_comp", {}) if e["mon_nstd_flag"] != "y"}
|
|
398
429
|
entities = {m["id"]: m["entity_id"] for m in mmcif_dict.get("struct_asym", [])}
|
|
430
|
+
|
|
399
431
|
# sequences = make_sequences(mmcif_dict)
|
|
400
432
|
secondary_structure = make_secondary_structure(mmcif_dict)
|
|
401
433
|
aniso = make_aniso(mmcif_dict)
|
|
434
|
+
|
|
402
435
|
model: dict[str, Any] = {"polymer": {}, "non_polymer": {}, "water": {}, "branched": {}}
|
|
403
436
|
model_num = mmcif_dict["atom_site"][0]["pdbx_PDB_model_num"]
|
|
404
437
|
for atom in mmcif_dict["atom_site"]:
|
|
@@ -412,64 +445,67 @@ def update_models_list(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) ->
|
|
|
412
445
|
else:
|
|
413
446
|
add_atom_to_non_polymer(atom, aniso, model, mol_type, names)
|
|
414
447
|
data_dict["models"].append(model)
|
|
448
|
+
|
|
415
449
|
for model in data_dict["models"]:
|
|
416
450
|
add_sequences_to_polymers(model, mmcif_dict, entities)
|
|
417
451
|
add_secondary_structure_to_polymers(model, secondary_structure)
|
|
418
452
|
|
|
419
453
|
|
|
420
454
|
def make_aniso(mmcif_dict: dict[str, Any]) -> dict[int, Any]:
|
|
421
|
-
"""
|
|
422
|
-
|
|
423
|
-
:param mmcif_dict: the .mmcif dict to read.
|
|
424
|
-
:rtype: ``dict``"""
|
|
455
|
+
"""
|
|
456
|
+
Makes a mapping of atom IDs to anisotropy information.
|
|
425
457
|
|
|
458
|
+
:param mmcif_dict: .mmcif dict to read
|
|
459
|
+
"""
|
|
426
460
|
return {
|
|
427
461
|
int(a["id"]): [float(a["U[{}][{}]".format(x, y)]) for x, y in ["11", "22", "33", "12", "13", "23"]] # type: ignore [has-type, misc]
|
|
428
462
|
for a in mmcif_dict.get("atom_site_anisotrop", [])
|
|
429
463
|
}
|
|
430
464
|
|
|
431
465
|
|
|
432
|
-
def make_secondary_structure(mmcif_dict: dict[str, Any]) -> dict[str,
|
|
433
|
-
"""
|
|
466
|
+
def make_secondary_structure(mmcif_dict: dict[str, Any]) -> dict[str, list[list[str]]]:
|
|
467
|
+
"""
|
|
468
|
+
Creates a dictionary of helices and strands, with each having a list of
|
|
434
469
|
start and end residues.
|
|
435
470
|
|
|
436
|
-
:param mmcif_dict:
|
|
437
|
-
:
|
|
438
|
-
|
|
439
|
-
helices
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
"{}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
"{}
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
471
|
+
:param mmcif_dict: .mmcif dict to read
|
|
472
|
+
:return: secondary structure dictionary
|
|
473
|
+
"""
|
|
474
|
+
helices = [
|
|
475
|
+
[
|
|
476
|
+
"{}.{}{}".format(
|
|
477
|
+
helix[f"{x}_auth_asym_id"],
|
|
478
|
+
helix[f"{x}_auth_seq_id"],
|
|
479
|
+
helix[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
|
|
480
|
+
)
|
|
481
|
+
for x in ["beg", "end"]
|
|
482
|
+
]
|
|
483
|
+
for helix in mmcif_dict.get("struct_conf", [])
|
|
484
|
+
]
|
|
485
|
+
|
|
486
|
+
strands = [
|
|
487
|
+
[
|
|
488
|
+
"{}.{}{}".format(
|
|
489
|
+
strand[f"{x}_auth_asym_id"],
|
|
490
|
+
strand[f"{x}_auth_seq_id"],
|
|
491
|
+
strand[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
|
|
492
|
+
)
|
|
493
|
+
for x in ["beg", "end"]
|
|
494
|
+
]
|
|
495
|
+
for strand in mmcif_dict.get("struct_sheet_range", [])
|
|
496
|
+
]
|
|
462
497
|
return {"helices": helices, "strands": strands}
|
|
463
498
|
|
|
464
499
|
|
|
465
500
|
def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict[str, Any], names: dict[str, Any]) -> None:
|
|
466
|
-
"""
|
|
467
|
-
dictionary.
|
|
501
|
+
"""
|
|
502
|
+
Takes an MMCIF atom dictionary, converts it, and adds it to a polymer dictionary.
|
|
468
503
|
|
|
469
|
-
:param
|
|
470
|
-
:param
|
|
471
|
-
:param
|
|
472
|
-
:param
|
|
504
|
+
:param atom: .mmcif dictionary to read
|
|
505
|
+
:param aniso: lookup dictionary for anisotropy information
|
|
506
|
+
:param model: model to update
|
|
507
|
+
:param names: lookup dictionary for full name information
|
|
508
|
+
"""
|
|
473
509
|
|
|
474
510
|
mol_id = atom["auth_asym_id"]
|
|
475
511
|
res_id = make_residue_id(atom)
|
|
@@ -501,16 +537,17 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
|
|
|
501
537
|
|
|
502
538
|
|
|
503
539
|
def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict[str, Any], mol_type: str, names: dict[str, Any]) -> None:
|
|
504
|
-
"""
|
|
505
|
-
dictionary.
|
|
506
|
-
|
|
507
|
-
:param
|
|
508
|
-
:param
|
|
509
|
-
:param
|
|
510
|
-
:param
|
|
511
|
-
:param
|
|
512
|
-
|
|
540
|
+
"""
|
|
541
|
+
Takes an MMCIF atom dictionary, converts it, and adds it to a non_polymer dictionary.
|
|
542
|
+
|
|
543
|
+
:param atom: .mmcif dictionary to read
|
|
544
|
+
:param aniso: lookup dictionary for anisotropy information
|
|
545
|
+
:param model: model to update
|
|
546
|
+
:param mol_type: non_polymer or water
|
|
547
|
+
:param names: lookup dictionary for full name information
|
|
548
|
+
"""
|
|
513
549
|
mol_id = make_residue_id(atom)
|
|
550
|
+
|
|
514
551
|
try:
|
|
515
552
|
model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
|
|
516
553
|
except Exception:
|
|
@@ -525,35 +562,39 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
|
|
|
525
562
|
|
|
526
563
|
|
|
527
564
|
def make_residue_id(d: dict[str, Any]) -> str:
|
|
528
|
-
"""
|
|
529
|
-
|
|
530
|
-
:param dict d: the atom dictionary to read.
|
|
531
|
-
:rtype: ``str``"""
|
|
565
|
+
"""
|
|
566
|
+
Generates a residue ID for an atom.
|
|
532
567
|
|
|
568
|
+
:param d: atom dictionary to read
|
|
569
|
+
:return: residue ID
|
|
570
|
+
"""
|
|
533
571
|
insert = "" if d["pdbx_PDB_ins_code"] in "?." else d["pdbx_PDB_ins_code"]
|
|
572
|
+
|
|
534
573
|
return "{}.{}{}".format(d["auth_asym_id"], d["auth_seq_id"], insert)
|
|
535
574
|
|
|
536
575
|
|
|
537
576
|
def add_sequences_to_polymers(model: dict[str, Any], mmcif_dict: dict[str, Any], entities: dict[str, Any]) -> None:
|
|
538
|
-
"""
|
|
577
|
+
"""
|
|
578
|
+
Takes a pre-populated mapping of chain IDs to entity IDs, and uses them
|
|
539
579
|
to add sequence information to a model.
|
|
540
580
|
|
|
541
|
-
:param
|
|
542
|
-
:param
|
|
543
|
-
:param
|
|
544
|
-
|
|
581
|
+
:param model: model to update
|
|
582
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
583
|
+
:param entities: mapping of chain IDs to entity IDs
|
|
584
|
+
"""
|
|
545
585
|
sequences = make_sequences(mmcif_dict)
|
|
546
586
|
for polymer in model["polymer"].values():
|
|
547
587
|
polymer["sequence"] = sequences.get(entities.get(polymer["internal_id"], ""), "")
|
|
548
588
|
|
|
549
589
|
|
|
550
590
|
def add_secondary_structure_to_polymers(model: dict[str, Any], ss_dict: dict[str, Any]) -> None:
|
|
551
|
-
"""
|
|
591
|
+
"""
|
|
592
|
+
Updates polymer dictionaries with secondary structure information, from
|
|
552
593
|
a previously created mapping.
|
|
553
594
|
|
|
554
|
-
:param
|
|
555
|
-
:param
|
|
556
|
-
|
|
595
|
+
:param model: model to update
|
|
596
|
+
:param ss_dict: mapping to read
|
|
597
|
+
"""
|
|
557
598
|
for ss in ("helices", "strands"):
|
|
558
599
|
for segment in ss_dict[ss]:
|
|
559
600
|
chain = model["polymer"].get(segment[0].split(".")[0])
|
|
@@ -570,11 +611,12 @@ def add_secondary_structure_to_polymers(model: dict[str, Any], ss_dict: dict[str
|
|
|
570
611
|
|
|
571
612
|
|
|
572
613
|
def make_sequences(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
|
|
573
|
-
"""
|
|
574
|
-
|
|
575
|
-
:param dict mmcif_dict: the .mmcif dictionary to read.
|
|
576
|
-
:rtype: ``dict``"""
|
|
614
|
+
"""
|
|
615
|
+
Creates a mapping of entity IDs to sequences.
|
|
577
616
|
|
|
617
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
618
|
+
:return: sequence mapping
|
|
619
|
+
"""
|
|
578
620
|
return {
|
|
579
621
|
e["id"]: "".join([CODES.get(res["mon_id"], "X") for res in mmcif_dict.get("entity_poly_seq", []) if res["entity_id"] == e["id"]])
|
|
580
622
|
for e in mmcif_dict.get("entity", [])
|
|
@@ -583,13 +625,15 @@ def make_sequences(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
|
|
|
583
625
|
|
|
584
626
|
|
|
585
627
|
def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dict[str, Any]:
|
|
586
|
-
"""
|
|
587
|
-
|
|
588
|
-
:param dict d: the .mmcif atom dictionary.
|
|
589
|
-
:param dict d: the mapping of atom IDs to anisotropy.
|
|
590
|
-
:rtype: ``dict``"""
|
|
628
|
+
"""
|
|
629
|
+
Turns an .mmcif atom dictionary into an atomium atom data dictionary.
|
|
591
630
|
|
|
631
|
+
:param d: .mmcif atom dictionary
|
|
632
|
+
:param aniso_dict: mapping of atom IDs to anisotropy
|
|
633
|
+
:return: atom data dictionary
|
|
634
|
+
"""
|
|
592
635
|
charge = "pdbx_formal_charge"
|
|
636
|
+
|
|
593
637
|
atom = {
|
|
594
638
|
"x": d["Cartn_x"],
|
|
595
639
|
"y": d["Cartn_y"],
|
|
@@ -603,9 +647,11 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
|
|
|
603
647
|
"anisotropy": aniso_dict.get(int(d["id"]), [0, 0, 0, 0, 0, 0]),
|
|
604
648
|
"is_hetatm": d.get("group_PDB", "ATOM") == "HETATM",
|
|
605
649
|
}
|
|
650
|
+
|
|
606
651
|
for key in ["x", "y", "z", "charge", "bvalue", "occupancy"]:
|
|
607
652
|
if atom[key] is not None:
|
|
608
653
|
atom[key] = float(atom[key])
|
|
654
|
+
|
|
609
655
|
if atom["charge"] == 0:
|
|
610
656
|
atom["charge"] = None
|
|
611
657
|
if not atom["is_hetatm"]:
|
|
@@ -616,6 +662,7 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
|
|
|
616
662
|
atom["occupancy"] = None
|
|
617
663
|
if atom["name"] == atom["element"]:
|
|
618
664
|
atom["name"] = None
|
|
665
|
+
|
|
619
666
|
return atom
|
|
620
667
|
|
|
621
668
|
|
|
@@ -629,33 +676,40 @@ def mmcif_to_data_transfer(
|
|
|
629
676
|
date: bool = False,
|
|
630
677
|
split: bool = False,
|
|
631
678
|
multi: bool = False,
|
|
632
|
-
func: Any = None,
|
|
679
|
+
func: Callable[[Any], Any] | None = None,
|
|
633
680
|
) -> None:
|
|
634
|
-
"""
|
|
681
|
+
"""
|
|
682
|
+
Function for transfering a bit of data from a .mmcif dictionary to a
|
|
635
683
|
data dictionary, or doing nothing if the data doesn't exist.
|
|
636
684
|
|
|
637
|
-
:param
|
|
638
|
-
:param
|
|
639
|
-
:param
|
|
640
|
-
:param
|
|
641
|
-
:param
|
|
642
|
-
:param
|
|
643
|
-
:param
|
|
644
|
-
:param
|
|
645
|
-
:param
|
|
646
|
-
:param
|
|
647
|
-
|
|
685
|
+
:param mmcif_dict: .mmcif dictionary to read
|
|
686
|
+
:param data_dict: data dictionary to update
|
|
687
|
+
:param d_cat: top-level key in the data dictionary
|
|
688
|
+
:param d_key: data dictionary field to update
|
|
689
|
+
:param m_table: name of the .mmcif table to look in
|
|
690
|
+
:param m_key: .mmcif field to read
|
|
691
|
+
:param date: if True, value will be converted to a date
|
|
692
|
+
:param split: if True, value will be split on commas
|
|
693
|
+
:param multi: if True, every row in the table will be read
|
|
694
|
+
:param func: if given, will be applied to the value
|
|
695
|
+
"""
|
|
648
696
|
try:
|
|
649
697
|
if multi:
|
|
650
698
|
value = [row[m_key] for row in mmcif_dict[m_table]]
|
|
651
699
|
else:
|
|
652
700
|
value = mmcif_dict[m_table][0][m_key]
|
|
701
|
+
|
|
653
702
|
if date:
|
|
654
703
|
value = datetime.strptime(value, "%Y-%m-%d").date() # type: ignore [arg-type, assignment]
|
|
655
704
|
if split:
|
|
656
705
|
value = value.replace(", ", ",").split(",") # type: ignore [attr-defined]
|
|
657
706
|
if func:
|
|
658
707
|
value = func(value)
|
|
659
|
-
|
|
708
|
+
|
|
709
|
+
if isinstance(value, str) and value == "?":
|
|
710
|
+
value = None
|
|
711
|
+
|
|
712
|
+
data_dict[d_cat][d_key] = value
|
|
713
|
+
|
|
660
714
|
except Exception:
|
|
661
715
|
pass
|