stjames 0.0.59__py3-none-any.whl → 0.0.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stjames might be problematic. Click here for more details.

@@ -3,7 +3,7 @@
3
3
  import re
4
4
  from collections import deque
5
5
  from datetime import datetime
6
- from typing import Any
6
+ from typing import Any, Callable
7
7
 
8
8
  import numpy as np
9
9
 
@@ -11,7 +11,8 @@ from .data import CODES
11
11
 
12
12
 
13
13
  def mmcif_string_to_mmcif_dict(filestring: str) -> dict[str, Any]:
14
- """Takes a .cif filestring and turns into a ``dict`` which represents its
14
+ """
15
+ Converts a .cif filestring and into a ``dict`` that represents its
15
16
  table structure. Only lines which aren't empty and which don't begin with
16
17
  ``#`` are used.
17
18
 
@@ -19,30 +20,33 @@ def mmcif_string_to_mmcif_dict(filestring: str) -> dict[str, Any]:
19
20
  then split into the blocks that will become table lists. At the end, quote
20
21
  marks are removed from any string which retains them.
21
22
 
22
- :param str filestring: the .cif filestring to process.
23
- :rtype: ``dict``"""
23
+ :param filestring: .cif filestring to process
24
+ """
24
25
 
25
26
  lines = deque(filter(lambda l: l and l[0] != "#", filestring.split("\n")))
26
27
  lines = consolidate_strings(lines)
27
28
  blocks = mmcif_lines_to_mmcif_blocks(lines)
28
- mmcif_dict = {}
29
- for block in blocks:
30
- if block["lines"][0] == "loop_":
31
- mmcif_dict[block["category"]] = loop_block_to_list(block)
32
- else:
33
- mmcif_dict[block["category"]] = non_loop_block_to_list(block)
29
+
30
+ mmcif_dict = {
31
+ block["category"]: loop_block_to_list(block)
32
+ if block["lines"][0] == "loop_" # keep open
33
+ else non_loop_block_to_list(block)
34
+ for block in blocks
35
+ }
34
36
  strip_quotes(mmcif_dict)
37
+
35
38
  return mmcif_dict
36
39
 
37
40
 
38
41
  def consolidate_strings(lines: deque[str]) -> deque[str]:
39
- """Generally, .cif files have a one file line to one table row
42
+ """
43
+ Generally, .cif files have a one file line to one table row
40
44
  correspondence. Sometimes however, a string cell is given a line of its own,
41
45
  breaking the row over several lines. This function takes the lines of a .cif
42
46
  file and puts all table rows on a single line.
43
47
 
44
- :param deque lines: the .cif file lines.
45
- :rtype: ``deque``"""
48
+ :param lines: .cif file lines
49
+ """
46
50
 
47
51
  new_lines: deque[str] = deque()
48
52
  while lines:
@@ -59,12 +63,12 @@ def consolidate_strings(lines: deque[str]) -> deque[str]:
59
63
 
60
64
 
61
65
  def mmcif_lines_to_mmcif_blocks(lines: deque[str]) -> list[dict[str, Any]]:
62
- """A .cif file is ultimately a list of tables. This function takes a list of
63
- .cif file lines and splits them into these table blocks. Each block will be
64
- a ``dict`` containing a category name and a list of lines.
66
+ """
67
+ Takes a list of .cif file lines and splits them into table blocks. Each
68
+ block will be a ``dict`` containing a category name and a list of lines.
65
69
 
66
- :param deque lines: the .cif file lines.
67
- :rtype: ``list``"""
70
+ :param lines: .cif file lines
71
+ """
68
72
 
69
73
  category = None
70
74
  block: list[str] = []
@@ -92,11 +96,12 @@ def mmcif_lines_to_mmcif_blocks(lines: deque[str]) -> list[dict[str, Any]]:
92
96
 
93
97
 
94
98
  def non_loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
95
- """Takes a simple block ``dict`` with no loop and turns it into a table
99
+ """
100
+ Takes a simple block ``dict`` with no loop and turns it into a table
96
101
  ``list``.
97
102
 
98
- :param dict block: the .cif block to process.
99
- :rtype: ``list``"""
103
+ :param block: .cif block to process
104
+ """
100
105
 
101
106
  d = {}
102
107
  for index in range(len(block["lines"]) - 1):
@@ -113,12 +118,13 @@ def non_loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
113
118
 
114
119
 
115
120
  def loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
116
- """Takes a loop block ``dict`` where the initial lines are table headers and
121
+ """
122
+ Takes a loop block ``dict`` where the initial lines are table headers and
117
123
  turns it into a table ``list``. Sometimes a row is broken over several lines
118
124
  so this function deals with that too.
119
125
 
120
- :param dict block: the .cif block to process.
121
- :rtype: ``list``"""
126
+ :param block: .cif block to process
127
+ """
122
128
 
123
129
  names, lines, _ = [], [], True
124
130
  body_start = 0
@@ -135,19 +141,21 @@ def loop_block_to_list(block: dict[str, Any]) -> list[dict[str, Any]]:
135
141
  lines.pop(n + 1)
136
142
  for line in lines:
137
143
  l.append({name: value for name, value in zip(names, line)})
144
+
138
145
  return l
139
146
 
140
147
 
141
148
  def split_values(line: str) -> list[str]:
142
- """The body of a .cif table is a series of lines, with each cell divided by
149
+ """
150
+ The body of a .cif table is a series of lines, with each cell divided by
143
151
  whitespace. This function takes a string line and breaks it into cells.
144
152
 
145
153
  There are a few peculiarities to handle. Sometimes a cell is a string
146
154
  enclosed in quote marks, and spaces within this string obviously shouldn't
147
155
  be used to break the line. This function handles all of that.
148
156
 
149
- :param str line: the .cif line to split.
150
- :rtype: ``list``"""
157
+ :param line: .cif line to split
158
+ """
151
159
 
152
160
  if not re.search("['\"]", line):
153
161
  return line.split()
@@ -167,16 +175,17 @@ def split_values(line: str) -> list[str]:
167
175
  else:
168
176
  value.append(char)
169
177
  values.append(value)
178
+
170
179
  return ["".join(v) for v in values if v]
171
180
 
172
181
 
173
182
  def strip_quotes(mmcif_dict: dict[str, Any]) -> None:
174
- """Goes through each table in the mmcif ``dict`` and removes any unneeded
175
- quote marks from the cells.
183
+ """
184
+ In-place removes unneeded quote marks from a .mmcif dictionary.
176
185
 
177
- :param dict mmcif_dict: the almost finished .mmcif dictionary to clean."""
178
-
179
- for name, table in mmcif_dict.items():
186
+ :param mmcif_dict: almost finished .mmcif dictionary to clean
187
+ """
188
+ for _, table in mmcif_dict.items():
180
189
  for row in table:
181
190
  for k, value in row.items():
182
191
  for char in "'\"":
@@ -186,11 +195,12 @@ def strip_quotes(mmcif_dict: dict[str, Any]) -> None:
186
195
 
187
196
 
188
197
  def mmcif_dict_to_data_dict(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
189
- """Converts an .mmcif dictionary into an atomium data dictionary, with the
198
+ """
199
+ Converts an .mmcif dictionary into an atomium data dictionary, with the
190
200
  same standard layout that the other file formats get converted into.
191
201
 
192
- :param dict mmcif_dict: the .mmcif dictionary.
193
- :rtype: ``dict``"""
202
+ :param mmcif_dict: .mmcif dictionary
203
+ """
194
204
 
195
205
  data_dict = {
196
206
  "description": {"code": None, "title": None, "deposition_date": None, "classification": None, "keywords": [], "authors": []},
@@ -204,15 +214,18 @@ def mmcif_dict_to_data_dict(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
204
214
  update_quality_dict(mmcif_dict, data_dict)
205
215
  update_geometry_dict(mmcif_dict, data_dict)
206
216
  update_models_list(mmcif_dict, data_dict)
217
+
207
218
  return data_dict
208
219
 
209
220
 
210
221
  def update_description_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
211
- """Takes a data dictionary and updates its description sub-dictionary with
222
+ """
223
+ Takes a data dictionary and updates its description sub-dictionary with
212
224
  information from a .mmcif dictionary.
213
225
 
214
- :param dict mmcif_dict: the .mmcif dictionary to read.
215
- :param dict data_dict: the data dictionary to update."""
226
+ :param mmcif_dict: .mmcif dictionary to read
227
+ :param data_dict: data dictionary to update
228
+ """
216
229
 
217
230
  mmcif_to_data_transfer(mmcif_dict, data_dict, "description", "code", "entry", "id")
218
231
  mmcif_to_data_transfer(mmcif_dict, data_dict, "description", "title", "struct", "title")
@@ -223,11 +236,13 @@ def update_description_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any
223
236
 
224
237
 
225
238
  def update_experiment_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
226
- """Takes a data dictionary and updates its experiment sub-dictionary with
239
+ """
240
+ Takes a data dictionary and updates its experiment sub-dictionary with
227
241
  information from a .mmcif dictionary.
228
242
 
229
- :param dict mmcif_dict: the .mmcif dictionary to read.
230
- :param dict data_dict: the data dictionary to update."""
243
+ :param mmcif_dict: .mmcif dictionary to read
244
+ :param data_dict: data dictionary to update
245
+ """
231
246
 
232
247
  mmcif_to_data_transfer(mmcif_dict, data_dict, "experiment", "technique", "exptl", "method")
233
248
  for cat, key in [
@@ -245,11 +260,13 @@ def update_experiment_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]
245
260
 
246
261
 
247
262
  def update_quality_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
248
- """Takes a data dictionary and updates its quality sub-dictionary with
263
+ """
264
+ Takes a data dictionary and updates its quality sub-dictionary with
249
265
  information from a .mmcif dictionary.
250
266
 
251
- :param dict mmcif_dict: the .mmcif dictionary to read.
252
- :param dict data_dict: the data dictionary to update."""
267
+ :param mmcif_dict: .mmcif dictionary to read
268
+ :param data_dict: data dictionary to update
269
+ """
253
270
 
254
271
  mmcif_to_data_transfer(mmcif_dict, data_dict, "quality", "resolution", "reflns", "d_resolution_high", func=float)
255
272
  if not data_dict["quality"]["resolution"]:
@@ -261,11 +278,13 @@ def update_quality_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -
261
278
 
262
279
 
263
280
  def update_geometry_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
264
- """Takes a data dictionary and updates its geometry sub-dictionary with
281
+ """
282
+ Takes a data dictionary and updates its geometry sub-dictionary with
265
283
  information from a .mmcif dictionary.
266
284
 
267
- :param dict mmcif_dict: the .mmcif dictionary to read.
268
- :param dict data_dict: the data dictionary to update."""
285
+ :param mmcif_dict: .mmcif dictionary to read
286
+ :param data_dict: data dictionary to update
287
+ """
269
288
 
270
289
  data_dict["geometry"]["assemblies"] = [
271
290
  {
@@ -291,11 +310,13 @@ def update_geometry_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any])
291
310
 
292
311
 
293
312
  def assign_metrics_to_assembly(mmcif_dict: dict[str, Any], assembly: dict[str, Any]) -> None:
294
- """Takes an assembly dict, and goes through an mmcif dictionary looking for
313
+ """
314
+ Takes an assembly dict, and goes through an mmcif dictionary looking for
295
315
  relevant energy etc. information to update it with.
296
316
 
297
- :param dict mmcif_dict: The dictionary to read.
298
- :param dict assembly: The assembly to update."""
317
+ :param mmcif_dict: dictionary to read
318
+ :param assembly: assembly to update
319
+ """
299
320
 
300
321
  for a in mmcif_dict.get("pdbx_struct_assembly_prop", []):
301
322
  if a["biol_id"] == str(assembly["id"]):
@@ -308,12 +329,14 @@ def assign_metrics_to_assembly(mmcif_dict: dict[str, Any], assembly: dict[str, A
308
329
 
309
330
 
310
331
  def assign_transformations_to_assembly(mmcif_dict: dict[str, Any], operations: Any, assembly: dict[str, Any]) -> None:
311
- """Takes an assembly dict, and goes through an mmcif dictionary looking for
332
+ """
333
+ Takes an assembly dict, and goes through an mmcif dictionary looking for
312
334
  relevant transformation information to update it with.
313
335
 
314
- :param dict mmcif_dict: the .mmcif dictionary to read.
315
- :param dict operations: the processed operations matrices.
316
- :param dict assembly: the assembly to update."""
336
+ :param mmcif_dict: .mmcif dictionary to read
337
+ :param operations: processed operations matrices
338
+ :param assembly: assembly to update
339
+ """
317
340
 
318
341
  for gen in mmcif_dict.get("pdbx_struct_assembly_gen", []):
319
342
  if gen["assembly_id"] == str(assembly["id"]):
@@ -326,19 +349,20 @@ def assign_transformations_to_assembly(mmcif_dict: dict[str, Any], operations: A
326
349
 
327
350
 
328
351
  def get_operation_id_groups(expression: str) -> list[list[str]]:
329
- """Takes an operator expression from an .mmcif transformation dict, and
330
- works out what transformation IDs it is referring to. For example, (1,2,3)
331
- becomes [[1, 2, 3]], (1-3)(8-11,17) becomes [[1, 2, 3], [8, 9, 10, 11, 17]],
332
- and so on.
352
+ """
353
+ Determines which transformation IDs are an operator expression is referring to.
333
354
 
334
- :param str expression: The expression to parse.
335
- :rtype: ``list``"""
355
+ For example, (1,2,3) becomes [[1, 2, 3]], (1-3)(8-11,17) becomes
356
+ [[1, 2, 3], [8, 9, 10, 11, 17]], and so on.
336
357
 
358
+ :param str expression: expression to parse
359
+ :return: list of transformation ID groups
360
+ """
337
361
  if expression[0] != "(":
338
362
  expression = "({})".format(expression)
339
- groups = re.findall(r"\((.+?)\)", expression)
363
+
340
364
  group_ids = []
341
- for group in groups:
365
+ for group in re.findall(r"\((.+?)\)", expression):
342
366
  ids = []
343
367
  elements = group.split(",")
344
368
  for element in elements:
@@ -347,16 +371,20 @@ def get_operation_id_groups(expression: str) -> list[list[str]]:
347
371
  ids += [str(n) for n in list(range(bounds[0], bounds[1] + 1))]
348
372
  else:
349
373
  ids.append(element)
374
+
350
375
  group_ids.append(ids)
376
+
351
377
  return group_ids
352
378
 
353
379
 
354
380
  def update_crystallography_dict(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
355
- """Takes a data dictionary and updates its crystallography
356
- sub-sub-dictionary with information from a .mmcif dictionary.
381
+ """
382
+ Takes a data dictionary and updates its crystallography sub-sub-dictionary
383
+ with information from a .mmcif dictionary.
357
384
 
358
- :param dict mmcif_dict: the .mmcif dictionary to read.
359
- :param dict data_dict: the data dictionary to update."""
385
+ :param mmcif_dict: .mmcif dictionary to read
386
+ :param data_dict: data dictionary to update
387
+ """
360
388
 
361
389
  if mmcif_dict.get("cell"):
362
390
  mmcif_to_data_transfer(mmcif_dict, data_dict["geometry"], "crystallography", "space_group", "symmetry", "space_group_name_H-M")
@@ -368,37 +396,42 @@ def update_crystallography_dict(mmcif_dict: dict[str, Any], data_dict: dict[str,
368
396
 
369
397
 
370
398
  def operation_id_groups_to_operations(operations: Any, operation_id_groups: Any) -> Any:
371
- """Creates a list of operation matrices for an assembly, from a list of
372
- operation IDs - cross multiplying as required.
373
-
374
- :param dict operations: the parsed .mmcif operations.
375
- :param list operation_id_groups: the operation IDs."""
376
-
399
+ """
400
+ Creates a operation matrices for an assembly, from operation IDs - cross
401
+ multiplying as required.
402
+
403
+ :param operations: parsed .mmcif operations
404
+ :param operation_id_groups: operation IDs
405
+ :return: operation matrices
406
+ """
377
407
  operation_groups = [[operations[i] for i in ids] for ids in operation_id_groups]
408
+
378
409
  while len(operation_groups) and len(operation_groups) != 1:
379
- operations = []
380
- for op1 in operation_groups[0]:
381
- for op2 in operation_groups[1]:
382
- operations.append(np.matmul(op1, op2))
410
+ operations = [np.matmul(op1, op2) for op1 in operation_groups[0] for op2 in operation_groups[1]]
383
411
  operation_groups[0] = operations
384
412
  operation_groups.pop(1)
413
+
385
414
  return operation_groups[0]
386
415
 
387
416
 
388
417
  def update_models_list(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) -> None:
389
- """Takes a data dictionary and updates its models list with
418
+ """
419
+ Takes a data dictionary and updates its models list with
390
420
  information from a .mmcif dictionary.
391
421
 
392
- :param dict mmcif_dict: the .mmcif dictionary to read.
393
- :param dict data_dict: the data dictionary to update."""
422
+ :param mmcif_dict: .mmcif dictionary to read
423
+ :param data_dict: data dictionary to update
424
+ """
394
425
 
395
426
  data_dict["models"] = []
396
427
  types = {e["id"]: e["type"] for e in mmcif_dict.get("entity", {})}
397
428
  names = {e["id"]: e["name"] for e in mmcif_dict.get("chem_comp", {}) if e["mon_nstd_flag"] != "y"}
398
429
  entities = {m["id"]: m["entity_id"] for m in mmcif_dict.get("struct_asym", [])}
430
+
399
431
  # sequences = make_sequences(mmcif_dict)
400
432
  secondary_structure = make_secondary_structure(mmcif_dict)
401
433
  aniso = make_aniso(mmcif_dict)
434
+
402
435
  model: dict[str, Any] = {"polymer": {}, "non_polymer": {}, "water": {}, "branched": {}}
403
436
  model_num = mmcif_dict["atom_site"][0]["pdbx_PDB_model_num"]
404
437
  for atom in mmcif_dict["atom_site"]:
@@ -412,64 +445,67 @@ def update_models_list(mmcif_dict: dict[str, Any], data_dict: dict[str, Any]) ->
412
445
  else:
413
446
  add_atom_to_non_polymer(atom, aniso, model, mol_type, names)
414
447
  data_dict["models"].append(model)
448
+
415
449
  for model in data_dict["models"]:
416
450
  add_sequences_to_polymers(model, mmcif_dict, entities)
417
451
  add_secondary_structure_to_polymers(model, secondary_structure)
418
452
 
419
453
 
420
454
  def make_aniso(mmcif_dict: dict[str, Any]) -> dict[int, Any]:
421
- """Makes a mapping of atom IDs to anisotropy information.
422
-
423
- :param mmcif_dict: the .mmcif dict to read.
424
- :rtype: ``dict``"""
455
+ """
456
+ Makes a mapping of atom IDs to anisotropy information.
425
457
 
458
+ :param mmcif_dict: .mmcif dict to read
459
+ """
426
460
  return {
427
461
  int(a["id"]): [float(a["U[{}][{}]".format(x, y)]) for x, y in ["11", "22", "33", "12", "13", "23"]] # type: ignore [has-type, misc]
428
462
  for a in mmcif_dict.get("atom_site_anisotrop", [])
429
463
  }
430
464
 
431
465
 
432
- def make_secondary_structure(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
433
- """Creates a dictionary of helices and strands, with each having a list of
466
+ def make_secondary_structure(mmcif_dict: dict[str, Any]) -> dict[str, list[list[str]]]:
467
+ """
468
+ Creates a dictionary of helices and strands, with each having a list of
434
469
  start and end residues.
435
470
 
436
- :param mmcif_dict: the .mmcif dict to read.
437
- :rtype: ``dict``"""
438
-
439
- helices, strands = [], []
440
- for helix in mmcif_dict.get("struct_conf", []):
441
- helices.append(
442
- [
443
- "{}.{}{}".format(
444
- helix[f"{x}_auth_asym_id"],
445
- helix[f"{x}_auth_seq_id"],
446
- helix[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
447
- )
448
- for x in ["beg", "end"]
449
- ]
450
- )
451
- for strand in mmcif_dict.get("struct_sheet_range", []):
452
- strands.append(
453
- [
454
- "{}.{}{}".format(
455
- strand[f"{x}_auth_asym_id"],
456
- strand[f"{x}_auth_seq_id"],
457
- strand[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
458
- )
459
- for x in ["beg", "end"]
460
- ]
461
- )
471
+ :param mmcif_dict: .mmcif dict to read
472
+ :return: secondary structure dictionary
473
+ """
474
+ helices = [
475
+ [
476
+ "{}.{}{}".format(
477
+ helix[f"{x}_auth_asym_id"],
478
+ helix[f"{x}_auth_seq_id"],
479
+ helix[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
480
+ )
481
+ for x in ["beg", "end"]
482
+ ]
483
+ for helix in mmcif_dict.get("struct_conf", [])
484
+ ]
485
+
486
+ strands = [
487
+ [
488
+ "{}.{}{}".format(
489
+ strand[f"{x}_auth_asym_id"],
490
+ strand[f"{x}_auth_seq_id"],
491
+ strand[f"pdbx_{x}_PDB_ins_code"].replace("?", ""),
492
+ )
493
+ for x in ["beg", "end"]
494
+ ]
495
+ for strand in mmcif_dict.get("struct_sheet_range", [])
496
+ ]
462
497
  return {"helices": helices, "strands": strands}
463
498
 
464
499
 
465
500
  def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict[str, Any], names: dict[str, Any]) -> None:
466
- """Takes an MMCIF atom dictionary, converts it, and adds it to a polymer
467
- dictionary.
501
+ """
502
+ Takes an MMCIF atom dictionary, converts it, and adds it to a polymer dictionary.
468
503
 
469
- :param dict atom: the .mmcif dictionary to read.
470
- :param dict aniso: lookup dictionary for anisotropy information.
471
- :param dict model: the model to update.
472
- :param dict names: the lookup dictionary for full name information."""
504
+ :param atom: .mmcif dictionary to read
505
+ :param aniso: lookup dictionary for anisotropy information
506
+ :param model: model to update
507
+ :param names: lookup dictionary for full name information
508
+ """
473
509
 
474
510
  mol_id = atom["auth_asym_id"]
475
511
  res_id = make_residue_id(atom)
@@ -501,16 +537,17 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
501
537
 
502
538
 
503
539
  def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict[str, Any], mol_type: str, names: dict[str, Any]) -> None:
504
- """Takes an MMCIF atom dictionary, converts it, and adds it to a non_polymer
505
- dictionary.
506
-
507
- :param dict atom: the .mmcif dictionary to read.
508
- :param dict aniso: lookup dictionary for anisotropy information.
509
- :param dict model: the model to update.
510
- :param str mol_type: non_polymer or water.
511
- :param dict names: the lookup dictionary for full name information."""
512
-
540
+ """
541
+ Takes an MMCIF atom dictionary, converts it, and adds it to a non_polymer dictionary.
542
+
543
+ :param atom: .mmcif dictionary to read
544
+ :param aniso: lookup dictionary for anisotropy information
545
+ :param model: model to update
546
+ :param mol_type: non_polymer or water
547
+ :param names: lookup dictionary for full name information
548
+ """
513
549
  mol_id = make_residue_id(atom)
550
+
514
551
  try:
515
552
  model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
516
553
  except Exception:
@@ -525,35 +562,39 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
525
562
 
526
563
 
527
564
  def make_residue_id(d: dict[str, Any]) -> str:
528
- """Generates a residue ID for an atom.
529
-
530
- :param dict d: the atom dictionary to read.
531
- :rtype: ``str``"""
565
+ """
566
+ Generates a residue ID for an atom.
532
567
 
568
+ :param d: atom dictionary to read
569
+ :return: residue ID
570
+ """
533
571
  insert = "" if d["pdbx_PDB_ins_code"] in "?." else d["pdbx_PDB_ins_code"]
572
+
534
573
  return "{}.{}{}".format(d["auth_asym_id"], d["auth_seq_id"], insert)
535
574
 
536
575
 
537
576
  def add_sequences_to_polymers(model: dict[str, Any], mmcif_dict: dict[str, Any], entities: dict[str, Any]) -> None:
538
- """Takes a pre-populated mapping of chain IDs to entity IDs, and uses them
577
+ """
578
+ Takes a pre-populated mapping of chain IDs to entity IDs, and uses them
539
579
  to add sequence information to a model.
540
580
 
541
- :param dict model: the model to update.
542
- :param dict mmcif_dict: the .mmcif dictionary to read.
543
- :param dict entities: a mapping of chain IDs to entity IDs."""
544
-
581
+ :param model: model to update
582
+ :param mmcif_dict: .mmcif dictionary to read
583
+ :param entities: mapping of chain IDs to entity IDs
584
+ """
545
585
  sequences = make_sequences(mmcif_dict)
546
586
  for polymer in model["polymer"].values():
547
587
  polymer["sequence"] = sequences.get(entities.get(polymer["internal_id"], ""), "")
548
588
 
549
589
 
550
590
  def add_secondary_structure_to_polymers(model: dict[str, Any], ss_dict: dict[str, Any]) -> None:
551
- """Updates polymer dictionaries with secondary structure information, from
591
+ """
592
+ Updates polymer dictionaries with secondary structure information, from
552
593
  a previously created mapping.
553
594
 
554
- :param dict model: the model to update.
555
- :param dict ss_dict: the mapping to read."""
556
-
595
+ :param model: model to update
596
+ :param ss_dict: mapping to read
597
+ """
557
598
  for ss in ("helices", "strands"):
558
599
  for segment in ss_dict[ss]:
559
600
  chain = model["polymer"].get(segment[0].split(".")[0])
@@ -570,11 +611,12 @@ def add_secondary_structure_to_polymers(model: dict[str, Any], ss_dict: dict[str
570
611
 
571
612
 
572
613
  def make_sequences(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
573
- """Creates a mapping of entity IDs to sequences.
574
-
575
- :param dict mmcif_dict: the .mmcif dictionary to read.
576
- :rtype: ``dict``"""
614
+ """
615
+ Creates a mapping of entity IDs to sequences.
577
616
 
617
+ :param mmcif_dict: .mmcif dictionary to read
618
+ :return: sequence mapping
619
+ """
578
620
  return {
579
621
  e["id"]: "".join([CODES.get(res["mon_id"], "X") for res in mmcif_dict.get("entity_poly_seq", []) if res["entity_id"] == e["id"]])
580
622
  for e in mmcif_dict.get("entity", [])
@@ -583,13 +625,15 @@ def make_sequences(mmcif_dict: dict[str, Any]) -> dict[str, Any]:
583
625
 
584
626
 
585
627
  def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dict[str, Any]:
586
- """Turns an .mmcif atom dictionary into an atomium atom data dictionary.
587
-
588
- :param dict d: the .mmcif atom dictionary.
589
- :param dict d: the mapping of atom IDs to anisotropy.
590
- :rtype: ``dict``"""
628
+ """
629
+ Turns an .mmcif atom dictionary into an atomium atom data dictionary.
591
630
 
631
+ :param d: .mmcif atom dictionary
632
+ :param aniso_dict: mapping of atom IDs to anisotropy
633
+ :return: atom data dictionary
634
+ """
592
635
  charge = "pdbx_formal_charge"
636
+
593
637
  atom = {
594
638
  "x": d["Cartn_x"],
595
639
  "y": d["Cartn_y"],
@@ -603,9 +647,11 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
603
647
  "anisotropy": aniso_dict.get(int(d["id"]), [0, 0, 0, 0, 0, 0]),
604
648
  "is_hetatm": d.get("group_PDB", "ATOM") == "HETATM",
605
649
  }
650
+
606
651
  for key in ["x", "y", "z", "charge", "bvalue", "occupancy"]:
607
652
  if atom[key] is not None:
608
653
  atom[key] = float(atom[key])
654
+
609
655
  if atom["charge"] == 0:
610
656
  atom["charge"] = None
611
657
  if not atom["is_hetatm"]:
@@ -616,6 +662,7 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
616
662
  atom["occupancy"] = None
617
663
  if atom["name"] == atom["element"]:
618
664
  atom["name"] = None
665
+
619
666
  return atom
620
667
 
621
668
 
@@ -629,33 +676,40 @@ def mmcif_to_data_transfer(
629
676
  date: bool = False,
630
677
  split: bool = False,
631
678
  multi: bool = False,
632
- func: Any = None,
679
+ func: Callable[[Any], Any] | None = None,
633
680
  ) -> None:
634
- """A function for transfering a bit of data from a .mmcif dictionary to a
681
+ """
682
+ Function for transfering a bit of data from a .mmcif dictionary to a
635
683
  data dictionary, or doing nothing if the data doesn't exist.
636
684
 
637
- :param dict mmcif_dict: the .mmcif dictionary to read.
638
- :param dict data_dict: the data dictionary to update.
639
- :param str d_cat: the top-level key in the data dictionary.
640
- :param str d_key: the data dictionary field to update.
641
- :param str m_table: the name of the .mmcif table to look in.
642
- :param str m_key: the .mmcif field to read.
643
- :param bool date: if True, the value will be converted to a date.
644
- :param bool split: if True, the value will be split on commas.
645
- :param bool multi: if True, every row in the table will be read.
646
- :param function func: if given, this will be applied to the value."""
647
-
685
+ :param mmcif_dict: .mmcif dictionary to read
686
+ :param data_dict: data dictionary to update
687
+ :param d_cat: top-level key in the data dictionary
688
+ :param d_key: data dictionary field to update
689
+ :param m_table: name of the .mmcif table to look in
690
+ :param m_key: .mmcif field to read
691
+ :param date: if True, value will be converted to a date
692
+ :param split: if True, value will be split on commas
693
+ :param multi: if True, every row in the table will be read
694
+ :param func: if given, will be applied to the value
695
+ """
648
696
  try:
649
697
  if multi:
650
698
  value = [row[m_key] for row in mmcif_dict[m_table]]
651
699
  else:
652
700
  value = mmcif_dict[m_table][0][m_key]
701
+
653
702
  if date:
654
703
  value = datetime.strptime(value, "%Y-%m-%d").date() # type: ignore [arg-type, assignment]
655
704
  if split:
656
705
  value = value.replace(", ", ",").split(",") # type: ignore [attr-defined]
657
706
  if func:
658
707
  value = func(value)
659
- data_dict[d_cat][d_key] = None if value == "?" else value # type: ignore [comparison-overlap]
708
+
709
+ if isinstance(value, str) and value == "?":
710
+ value = None
711
+
712
+ data_dict[d_cat][d_key] = value
713
+
660
714
  except Exception:
661
715
  pass