stjames 0.0.52__py3-none-any.whl → 0.0.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stjames might be problematic. Click here for more details.
- stjames/atomium_stjames/__init__.py +5 -0
- stjames/atomium_stjames/data.py +377 -0
- stjames/atomium_stjames/mmcif.py +651 -0
- stjames/atomium_stjames/pdb.py +572 -0
- stjames/atomium_stjames/utilities.py +125 -0
- stjames/pdb.py +482 -10
- stjames/workflows/irc.py +14 -7
- {stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/METADATA +2 -1
- {stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/RECORD +12 -7
- {stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/LICENSE +0 -0
- {stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/WHEEL +0 -0
- {stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/top_level.txt +0 -0
stjames/pdb.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from datetime import date
|
|
1
|
+
from datetime import date, datetime
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import Any, Literal
|
|
4
4
|
|
|
5
|
-
import atomium # type: ignore [import-untyped]
|
|
6
|
-
from atomium.pdb import pdb_dict_to_data_dict, pdb_string_to_pdb_dict # type: ignore [import-untyped]
|
|
7
5
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
8
6
|
|
|
7
|
+
import stjames.atomium_stjames as astj
|
|
8
|
+
from stjames.atomium_stjames.mmcif import mmcif_dict_to_data_dict, mmcif_string_to_mmcif_dict
|
|
9
|
+
from stjames.atomium_stjames.pdb import inverse_make_sequences, pdb_dict_to_data_dict, pdb_string_to_pdb_dict
|
|
9
10
|
from stjames.types import Matrix3x3, Vector3D
|
|
10
11
|
|
|
11
12
|
# Mostly for testing purposes
|
|
@@ -22,12 +23,12 @@ class PDBAtom(BaseModel):
|
|
|
22
23
|
z: float
|
|
23
24
|
element: str
|
|
24
25
|
name: str
|
|
25
|
-
charge: float
|
|
26
|
-
occupancy: float
|
|
26
|
+
charge: float | None
|
|
27
|
+
occupancy: float | None
|
|
27
28
|
alt_loc: str | None
|
|
28
|
-
anisotropy: list[float]
|
|
29
|
+
anisotropy: list[float] | None
|
|
29
30
|
bvalue: float
|
|
30
|
-
is_hetatm: bool
|
|
31
|
+
is_hetatm: bool | None
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class PDBWater(BaseModel):
|
|
@@ -175,6 +176,9 @@ class PDBDescription(BaseModel):
|
|
|
175
176
|
if v is None:
|
|
176
177
|
return v
|
|
177
178
|
|
|
179
|
+
if isinstance(v, date):
|
|
180
|
+
return v.isoformat()
|
|
181
|
+
|
|
178
182
|
return str(date)
|
|
179
183
|
|
|
180
184
|
|
|
@@ -192,14 +196,482 @@ class PDB(BaseModel):
|
|
|
192
196
|
|
|
193
197
|
def read_pdb(path: Path | str) -> PDB:
|
|
194
198
|
"""Read a pdb located at path."""
|
|
195
|
-
return PDB.model_validate(
|
|
199
|
+
return PDB.model_validate(astj.open(str(path), data_dict=True))
|
|
196
200
|
|
|
197
201
|
|
|
198
202
|
def fetch_pdb(code: str) -> PDB:
|
|
199
203
|
"""Fetch a pdb from the Protein Data Bank."""
|
|
200
|
-
return PDB.model_validate(
|
|
204
|
+
return PDB.model_validate(astj.fetch(code, data_dict=True))
|
|
201
205
|
|
|
202
206
|
|
|
203
|
-
def
|
|
207
|
+
def pdb_from_pdb_filestring(pdb: str) -> PDB:
|
|
204
208
|
"""Read a PDB from a string."""
|
|
205
209
|
return PDB.model_validate(pdb_dict_to_data_dict(pdb_string_to_pdb_dict(pdb)))
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def pdb_from_mmcif_filestring(pdb: str) -> PDB:
|
|
213
|
+
"""Read a PDB from a string."""
|
|
214
|
+
return PDB.model_validate(mmcif_dict_to_data_dict(mmcif_string_to_mmcif_dict(pdb)))
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def pdb_object_to_pdb_filestring(pdb: PDB) -> str:
|
|
218
|
+
pdb_lines: list[str] = []
|
|
219
|
+
chains: list[str] = []
|
|
220
|
+
# Header
|
|
221
|
+
pdb_lines.extend(_build_header_section(pdb))
|
|
222
|
+
pdb_lines.extend(_build_source_section(pdb))
|
|
223
|
+
pdb_lines.extend(_build_keyword_section(pdb))
|
|
224
|
+
|
|
225
|
+
full_name_dict: dict[str, str] = {}
|
|
226
|
+
seqres_lines, chains = _build_secondary_structure_and_seqres(pdb, full_name_dict)
|
|
227
|
+
|
|
228
|
+
pdb_lines.extend(seqres_lines)
|
|
229
|
+
pdb_lines.extend(_build_hetname_section(full_name_dict))
|
|
230
|
+
|
|
231
|
+
pdb_lines.extend(_build_remark_section(pdb, chains))
|
|
232
|
+
|
|
233
|
+
pdb_lines.extend(_build_crystallography_section(pdb))
|
|
234
|
+
|
|
235
|
+
for model_index, model in enumerate(pdb.models, start=1):
|
|
236
|
+
# If more than one model, add MODEL line
|
|
237
|
+
if len(pdb.models) > 1:
|
|
238
|
+
pdb_lines.append(f"MODEL {model_index:>4}")
|
|
239
|
+
|
|
240
|
+
# === 1) Polymers (protein, DNA, etc.) ===
|
|
241
|
+
for chain_id, polymer in model.polymer.items():
|
|
242
|
+
# Use polymer's internal_id if you want that as the chain ID
|
|
243
|
+
# otherwise just use the dictionary key
|
|
244
|
+
this_chain_id = polymer.internal_id or chain_id
|
|
245
|
+
|
|
246
|
+
for _residue_id, residue in polymer.residues.items():
|
|
247
|
+
assert residue.name is not None
|
|
248
|
+
for _atom_id, atom in residue.atoms.items():
|
|
249
|
+
line = _format_atom_line(
|
|
250
|
+
serial=_atom_id,
|
|
251
|
+
atom=atom,
|
|
252
|
+
chain_id=this_chain_id,
|
|
253
|
+
res_name=residue.name,
|
|
254
|
+
res_num=int(_residue_id[2:]),
|
|
255
|
+
alt_loc=atom.alt_loc or "",
|
|
256
|
+
)
|
|
257
|
+
pdb_lines.append(line)
|
|
258
|
+
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
259
|
+
line = _format_anisou_line(
|
|
260
|
+
serial=_atom_id,
|
|
261
|
+
atom=atom,
|
|
262
|
+
chain_id=this_chain_id,
|
|
263
|
+
res_name=residue.name,
|
|
264
|
+
res_num=int(_residue_id[2:]),
|
|
265
|
+
alt_loc=atom.alt_loc or "",
|
|
266
|
+
)
|
|
267
|
+
pdb_lines.append(line)
|
|
268
|
+
|
|
269
|
+
pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{int(_residue_id[2:]):>4}")
|
|
270
|
+
|
|
271
|
+
# === 2) Non-polymers (e.g. ligands, ions) ===
|
|
272
|
+
for _np_id, nonpoly in model.non_polymer.items():
|
|
273
|
+
# We'll treat each non-polymer as if it had a chain ID = nonpoly.polymer (or "Z")
|
|
274
|
+
chain_id_for_np = nonpoly.polymer or "Z"
|
|
275
|
+
|
|
276
|
+
# For residue name, we can just use nonpoly.name or a 3-letter variant
|
|
277
|
+
# There's no standard "residue number" for these, so pick something
|
|
278
|
+
# or let the user define it in the original model. We'll just use 1 for demonstration.
|
|
279
|
+
# If you prefer incremental numbering, keep a separate counter.
|
|
280
|
+
for _atom_id, atom in nonpoly.atoms.items():
|
|
281
|
+
line = _format_atom_line(
|
|
282
|
+
serial=_atom_id,
|
|
283
|
+
atom=atom,
|
|
284
|
+
chain_id=chain_id_for_np,
|
|
285
|
+
res_name=nonpoly.name,
|
|
286
|
+
res_num=int(_np_id[2:]),
|
|
287
|
+
)
|
|
288
|
+
pdb_lines.append(line)
|
|
289
|
+
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
290
|
+
line = _format_anisou_line(
|
|
291
|
+
serial=_atom_id,
|
|
292
|
+
atom=atom,
|
|
293
|
+
chain_id=chain_id_for_np,
|
|
294
|
+
res_name=nonpoly.name,
|
|
295
|
+
res_num=int(_np_id[2:]),
|
|
296
|
+
)
|
|
297
|
+
pdb_lines.append(line)
|
|
298
|
+
|
|
299
|
+
# === 3) Water ===
|
|
300
|
+
for _w_id, water in model.water.items():
|
|
301
|
+
# Water is typically "HOH" in PDB
|
|
302
|
+
for _atom_id, atom in water.atoms.items():
|
|
303
|
+
line = _format_atom_line(
|
|
304
|
+
serial=_atom_id,
|
|
305
|
+
atom=atom,
|
|
306
|
+
chain_id=_w_id[0], # Or you can use water.polymer if set
|
|
307
|
+
res_name="HOH",
|
|
308
|
+
res_num=int(_w_id[2:]), # or an incrementing value
|
|
309
|
+
)
|
|
310
|
+
pdb_lines.append(line)
|
|
311
|
+
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
312
|
+
line = _format_anisou_line(
|
|
313
|
+
serial=_atom_id,
|
|
314
|
+
atom=atom,
|
|
315
|
+
chain_id=_w_id[0],
|
|
316
|
+
res_name="HOH",
|
|
317
|
+
res_num=int(_w_id[2:]),
|
|
318
|
+
)
|
|
319
|
+
pdb_lines.append(line)
|
|
320
|
+
|
|
321
|
+
# === 4) Branched ===
|
|
322
|
+
# If your structure has branched molecules (glycans, etc.),
|
|
323
|
+
# adapt similarly. For now, let's demonstrate if there's anything in branched
|
|
324
|
+
for _b_id, branched_obj in model.branched.items():
|
|
325
|
+
# "branched_obj" could be a custom structure. We'll assume it
|
|
326
|
+
# mirrors the format of non_polymer or something similar.
|
|
327
|
+
# If it has `.atoms`, we do the same:
|
|
328
|
+
if isinstance(branched_obj, dict) and "atoms" in branched_obj:
|
|
329
|
+
for _atom_id, atom in branched_obj["atoms"].items():
|
|
330
|
+
line = _format_atom_line(
|
|
331
|
+
serial=_atom_id,
|
|
332
|
+
atom=atom,
|
|
333
|
+
chain_id="B",
|
|
334
|
+
res_name="BRN", # or branched_obj.get("name", "BRN")
|
|
335
|
+
res_num=1,
|
|
336
|
+
)
|
|
337
|
+
pdb_lines.append(line)
|
|
338
|
+
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
339
|
+
line = _format_anisou_line(
|
|
340
|
+
serial=_atom_id,
|
|
341
|
+
atom=atom,
|
|
342
|
+
chain_id="B",
|
|
343
|
+
res_name="BRN",
|
|
344
|
+
res_num=1,
|
|
345
|
+
)
|
|
346
|
+
pdb_lines.append(line)
|
|
347
|
+
|
|
348
|
+
if len(pdb.models) > 1:
|
|
349
|
+
pdb_lines.append("ENDMDL")
|
|
350
|
+
|
|
351
|
+
# Finally, the PDB standard ends with an END record
|
|
352
|
+
pdb_lines.append("END")
|
|
353
|
+
|
|
354
|
+
resulting_string = _create_filestring(pdb_lines)
|
|
355
|
+
return resulting_string
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _create_filestring(lines: list[str]) -> str:
|
|
359
|
+
# Join the lines with newline characters and add a newline at the end if desired
|
|
360
|
+
filestring = "\n".join(lines) + "\n"
|
|
361
|
+
return filestring
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _format_date(date_str: str | None) -> str | None:
|
|
365
|
+
"""
|
|
366
|
+
Formats a date string from "YYYY-MM-DD" to "DD-MMM-YY".
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
date_str (str): Date string in "YYYY-MM-DD" format.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
str: Formatted date string in "DD-MMM-YY" format.
|
|
373
|
+
"""
|
|
374
|
+
if date_str is None:
|
|
375
|
+
return None
|
|
376
|
+
date_obj = datetime.strptime(date_str, "%Y-%m-%d").date()
|
|
377
|
+
return date_obj.strftime("%d-%b-%y").upper()
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _format_atom_line(
|
|
381
|
+
serial: int,
|
|
382
|
+
atom: PDBAtom,
|
|
383
|
+
chain_id: str,
|
|
384
|
+
res_name: str,
|
|
385
|
+
res_num: int | None,
|
|
386
|
+
alt_loc: str = "",
|
|
387
|
+
) -> str:
|
|
388
|
+
"""
|
|
389
|
+
Return a single PDB ATOM/HETATM record line as a string, using standard
|
|
390
|
+
column alignment conventions:
|
|
391
|
+
|
|
392
|
+
See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
|
|
393
|
+
"""
|
|
394
|
+
record_type = "HETATM" if atom.is_hetatm else "ATOM "
|
|
395
|
+
|
|
396
|
+
# Columns are typically strict. We'll use Python formatting with fixed widths.
|
|
397
|
+
# Some fields might need defaults if missing.
|
|
398
|
+
alt_loc_char = alt_loc if alt_loc else " "
|
|
399
|
+
residue_name = (res_name or "UNK")[:3] # limit to 3 chars
|
|
400
|
+
chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
|
|
401
|
+
residue_num = res_num if res_num is not None else 1
|
|
402
|
+
|
|
403
|
+
# Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
|
|
404
|
+
# If your model stores charges differently, adapt as needed.
|
|
405
|
+
# For simplicity, let's store integer/float charges as strings, e.g. " 0", " 2", etc.
|
|
406
|
+
# Or we can leave it blank if zero.
|
|
407
|
+
chg = ""
|
|
408
|
+
if atom.charge and abs(atom.charge) > 0:
|
|
409
|
+
# E.g., +1.0 -> " +1", -2.0 -> " -2"
|
|
410
|
+
# Convert to integer if it's always integral
|
|
411
|
+
chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
|
|
412
|
+
chg = f"{chg_val:2}"
|
|
413
|
+
else:
|
|
414
|
+
chg = " "
|
|
415
|
+
|
|
416
|
+
# Construct the line.
|
|
417
|
+
# Use exact spacing & field widths to match PDB guidelines.
|
|
418
|
+
line = (
|
|
419
|
+
f"{record_type}"
|
|
420
|
+
f"{serial:5d} " # atom serial number (columns 7-11)
|
|
421
|
+
f"{atom.name:<4}" # atom name (columns 13-16, left-justified in this snippet)
|
|
422
|
+
f"{alt_loc_char}" # altLoc (column 17)
|
|
423
|
+
f"{residue_name:>3}" # residue name (columns 18-20)
|
|
424
|
+
f" {chain_char}" # chain ID (column 22)
|
|
425
|
+
f"{residue_num:4d}" # residue sequence number (columns 23-26)
|
|
426
|
+
f" " # columns 27-30 (insertion code plus spacing)
|
|
427
|
+
f"{atom.x:8.3f}" # x (columns 31-38)
|
|
428
|
+
f"{atom.y:8.3f}" # y (columns 39-46)
|
|
429
|
+
f"{atom.z:8.3f}" # z (columns 47-54)
|
|
430
|
+
f"{atom.occupancy:6.2f}" # occupancy (columns 55-60)
|
|
431
|
+
f"{atom.bvalue:6.2f}" # temp factor (columns 61-66)
|
|
432
|
+
f" " # columns 67-76 (padding)
|
|
433
|
+
f"{atom.element:>2}" # element (columns 77-78)
|
|
434
|
+
f"{chg:>2}" # charge (columns 79-80)
|
|
435
|
+
)
|
|
436
|
+
return line
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _format_anisou_line(
|
|
440
|
+
serial: int,
|
|
441
|
+
atom: PDBAtom,
|
|
442
|
+
chain_id: str,
|
|
443
|
+
res_name: str,
|
|
444
|
+
res_num: int | None,
|
|
445
|
+
alt_loc: str = "",
|
|
446
|
+
) -> str:
|
|
447
|
+
"""
|
|
448
|
+
Return a single PDB ANISOU record line as a string, using standard
|
|
449
|
+
column alignment conventions:
|
|
450
|
+
|
|
451
|
+
See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
|
|
452
|
+
"""
|
|
453
|
+
record_type = "ANISOU"
|
|
454
|
+
|
|
455
|
+
# Columns are typically strict. We'll use Python formatting with fixed widths.
|
|
456
|
+
# Some fields might need defaults if missing.
|
|
457
|
+
alt_loc_char = alt_loc if alt_loc else " "
|
|
458
|
+
residue_name = (res_name or "UNK")[:3] # limit to 3 chars
|
|
459
|
+
chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
|
|
460
|
+
residue_num = res_num if res_num is not None else 1
|
|
461
|
+
|
|
462
|
+
chg = ""
|
|
463
|
+
if atom.charge and abs(atom.charge) > 0:
|
|
464
|
+
# E.g., +1.0 -> " +1", -2.0 -> " -2"
|
|
465
|
+
# Convert to integer if it's always integral
|
|
466
|
+
chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
|
|
467
|
+
chg = f"{chg_val:2}"
|
|
468
|
+
else:
|
|
469
|
+
chg = " "
|
|
470
|
+
|
|
471
|
+
if atom.anisotropy:
|
|
472
|
+
aniso_lines = (
|
|
473
|
+
f"{_float_to_pdb_string(atom.anisotropy[0]):>7}" # x (columns 29-35)
|
|
474
|
+
f"{_float_to_pdb_string(atom.anisotropy[1]):>7}" # x (columns 36-42)
|
|
475
|
+
f"{_float_to_pdb_string(atom.anisotropy[2]):>7}" # x (columns 43-49)
|
|
476
|
+
f"{_float_to_pdb_string(atom.anisotropy[3]):>7}" # x (columns 50-56)
|
|
477
|
+
f"{_float_to_pdb_string(atom.anisotropy[4]):>7}" # x (columns 57-63)
|
|
478
|
+
f"{_float_to_pdb_string(atom.anisotropy[5]):>7}"
|
|
479
|
+
)
|
|
480
|
+
else:
|
|
481
|
+
space = " "
|
|
482
|
+
aniso_lines = (
|
|
483
|
+
f"{space:>7}" # x (columns 29-35)
|
|
484
|
+
f"{space:>7}" # x (columns 36-42)
|
|
485
|
+
f"{space:>7}" # x (columns 43-49)
|
|
486
|
+
f"{space:>7}" # x (columns 50-56)
|
|
487
|
+
f"{space:>7}" # x (columns 57-63)
|
|
488
|
+
f"{space:>7}"
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Construct the line.
|
|
492
|
+
# Use exact spacing & field widths to match PDB guidelines.
|
|
493
|
+
line = (
|
|
494
|
+
f"{record_type}"
|
|
495
|
+
f"{serial:5d} " # atom serial number (columns 7-11)
|
|
496
|
+
f"{atom.name:<4}" # atom name (columns 13-16, left-justified in this snippet)
|
|
497
|
+
f"{alt_loc_char}" # altLoc (column 17)
|
|
498
|
+
f"{residue_name:>3}" # residue name (columns 18-20)
|
|
499
|
+
f" {chain_char}" # chain ID (column 22)
|
|
500
|
+
f"{residue_num:4d}" # residue sequence number (columns 23-26)
|
|
501
|
+
f" " # columns 27-28 (insertion code plus spacing)
|
|
502
|
+
f"{aniso_lines}"
|
|
503
|
+
f" " # columns 70-76 (padding)
|
|
504
|
+
f"{atom.element:>2}" # element (columns 77-78)
|
|
505
|
+
f"{chg:>2}" # charge (columns 79-80)
|
|
506
|
+
)
|
|
507
|
+
return line
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
# chat code
|
|
511
|
+
def _float_to_pdb_string(x: float) -> str:
|
|
512
|
+
# Determine the sign
|
|
513
|
+
sign = "-" if x < 0 else ""
|
|
514
|
+
a = abs(x)
|
|
515
|
+
|
|
516
|
+
if a < 1:
|
|
517
|
+
# Format with exactly 4 decimals, e.g. 0.0044 -> "0.0044"
|
|
518
|
+
s = f"{a:.4f}"
|
|
519
|
+
# Remove the "0." and then remove any leading zeros.
|
|
520
|
+
significant = s[2:].lstrip("0")
|
|
521
|
+
return sign + significant
|
|
522
|
+
else:
|
|
523
|
+
# Format with exactly 4 decimals. For example, 1.131 -> "1.1310"
|
|
524
|
+
s = f"{a:.4f}"
|
|
525
|
+
# Split into integer and fractional parts.
|
|
526
|
+
integer_part, fractional_part = s.split(".")
|
|
527
|
+
# We want a total of 5 digits. So, the number of fractional digits we need is:
|
|
528
|
+
needed = 5 - len(integer_part)
|
|
529
|
+
# Use the needed number of digits from the fractional part.
|
|
530
|
+
result = integer_part + fractional_part[:needed]
|
|
531
|
+
return sign + result
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _helix_list_to_pdb_helix(polymer_dict: dict[str, PDBPolymer], helices: list[list[str]]) -> list[str]:
|
|
535
|
+
helix_lines = []
|
|
536
|
+
for i, helix in enumerate(helices, start=1):
|
|
537
|
+
start_aa_name = polymer_dict[helix[0][0]].residues[helix[0]].name
|
|
538
|
+
end_aa_name = polymer_dict[helix[-1][0]].residues[helix[-1]].name
|
|
539
|
+
helix_line = f"HELIX {i:>3} {i:>3} {start_aa_name} {helix[0][0]} {helix[0][2:]:>4} {end_aa_name} {helix[-1][0]} {helix[-1][2:]:>4} 1{len(helix):>36}"
|
|
540
|
+
helix_lines.append(helix_line)
|
|
541
|
+
return helix_lines
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def _strand_list_to_pdb_sheets(polymer_dict: dict[str, PDBPolymer], strands: list[list[str]]) -> list[str]:
|
|
545
|
+
strand_lines = []
|
|
546
|
+
for i, strand in enumerate(strands, start=1):
|
|
547
|
+
start_aa_name = polymer_dict[strand[0][0]].residues[strand[0]].name
|
|
548
|
+
end_aa_name = polymer_dict[strand[-1][0]].residues[strand[-1]].name
|
|
549
|
+
helix_line = (
|
|
550
|
+
f"SHEET {i:>3} {strand[0][0]:>3}{len(strands):>2} {start_aa_name} {strand[0][0]}{strand[0][2:]:>4} "
|
|
551
|
+
f"{end_aa_name} {strand[-1][0]}{strand[-1][2:]:>4} {-1 if i != 1 else 0:>2}"
|
|
552
|
+
)
|
|
553
|
+
strand_lines.append(helix_line)
|
|
554
|
+
return strand_lines
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _build_header_section(pdb: PDB) -> list[str]:
|
|
558
|
+
header = f"HEADER {pdb.description.classification or '':<40}{_format_date(pdb.description.deposition_date) or '':<10} {pdb.description.code or '':<5}"
|
|
559
|
+
title = f"TITLE {pdb.description.title or '':<70}"
|
|
560
|
+
exp_dta = f"EXPDTA {pdb.experiment.technique or '':<69}"
|
|
561
|
+
authors = f"AUTHOR {','.join(pdb.description.authors).upper():<69}"
|
|
562
|
+
|
|
563
|
+
return [header, title, exp_dta, authors]
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _build_source_section(pdb: PDB) -> list[str]:
|
|
567
|
+
"""Builds the source organism and expression system lines."""
|
|
568
|
+
organism_line = f"SOURCE ORGANISM_SCIENTIFIC: {(pdb.experiment.source_organism + ';') if pdb.experiment.source_organism else '':<69}"
|
|
569
|
+
expression_line = f"SOURCE EXPRESSION_SYSTEM: {(pdb.experiment.expression_system + ';') if pdb.experiment.expression_system else '':<69}"
|
|
570
|
+
return [organism_line, expression_line]
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def _build_keyword_section(pdb: PDB) -> list[str]:
|
|
574
|
+
"""Builds the keyword (KEYWDS) lines."""
|
|
575
|
+
lines = []
|
|
576
|
+
for i, keyword in enumerate(pdb.description.keywords):
|
|
577
|
+
if i == len(pdb.description.keywords) - 1:
|
|
578
|
+
lines.append(f"KEYWDS {keyword:<79}")
|
|
579
|
+
else:
|
|
580
|
+
lines.append(f"KEYWDS {keyword + ',':<79}")
|
|
581
|
+
return lines
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _build_secondary_structure_and_seqres(pdb: PDB, full_name_dict: dict[str, str]) -> tuple[list[str], list[str]]:
|
|
585
|
+
"""
|
|
586
|
+
Iterates over models and polymers to build secondary structure lines (e.g. sheets, helices)
|
|
587
|
+
and sequence records (SEQRES). Also collects full names for heterogen records.
|
|
588
|
+
Returns a tuple: (list of seqres (and secondary structure) lines, list of chain IDs).
|
|
589
|
+
"""
|
|
590
|
+
seqres_lines = []
|
|
591
|
+
chains = []
|
|
592
|
+
|
|
593
|
+
for model in pdb.models:
|
|
594
|
+
for chain_id, polymer in model.polymer.items():
|
|
595
|
+
chains.append(chain_id)
|
|
596
|
+
# Add sheet and helix records (if available)
|
|
597
|
+
for strand_line in _strand_list_to_pdb_sheets(model.polymer, polymer.strands):
|
|
598
|
+
seqres_lines.append(strand_line)
|
|
599
|
+
for helix_line in _helix_list_to_pdb_helix(model.polymer, polymer.helices):
|
|
600
|
+
seqres_lines.append(helix_line)
|
|
601
|
+
# Add SEQRES lines from the polymer’s sequence
|
|
602
|
+
if polymer.sequence:
|
|
603
|
+
seqres_lines.extend(inverse_make_sequences(polymer.sequence, chain_id))
|
|
604
|
+
# Collect full names from each residue
|
|
605
|
+
for _, residue in polymer.residues.items():
|
|
606
|
+
if residue.full_name and residue.name:
|
|
607
|
+
full_name_dict[residue.name] = residue.full_name
|
|
608
|
+
# Also collect full names for non-polymer molecules
|
|
609
|
+
for _, non_polymer in model.non_polymer.items():
|
|
610
|
+
if non_polymer.full_name and non_polymer.name:
|
|
611
|
+
full_name_dict[non_polymer.name] = non_polymer.full_name
|
|
612
|
+
|
|
613
|
+
return seqres_lines, chains
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def _build_hetname_section(full_name_dict: dict[str, str]) -> list[str]:
|
|
617
|
+
"""Builds the HETNAM lines for non-polymer molecules."""
|
|
618
|
+
lines = []
|
|
619
|
+
for name, full_name in full_name_dict.items():
|
|
620
|
+
if len(full_name) > 55:
|
|
621
|
+
for i in range(0, len(full_name), 55):
|
|
622
|
+
lines.append(f"HETNAM {int(i / 55):>2} {name:<3} {full_name[i : i + 55]:<55}")
|
|
623
|
+
else:
|
|
624
|
+
lines.append(f"HETNAM {name:<3} {full_name:<55}")
|
|
625
|
+
return lines
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _build_remark_section(pdb: PDB, chains: list[str]) -> list[str]:
|
|
629
|
+
"""Builds REMARK lines (resolution, R factors, biomolecule and missing residues)."""
|
|
630
|
+
lines = []
|
|
631
|
+
lines.append(f"REMARK 2 RESOLUTION. {pdb.quality.resolution:>7} ANGSTROMS.")
|
|
632
|
+
if pdb.quality.rfree:
|
|
633
|
+
lines.append(f"REMARK 3 FREE R VALUE : {pdb.quality.rfree}")
|
|
634
|
+
if pdb.quality.rvalue:
|
|
635
|
+
lines.append(f"REMARK 3 R VALUE (WORKING SET) : {pdb.quality.rvalue}")
|
|
636
|
+
|
|
637
|
+
# REMARK 350: Biomolecule details
|
|
638
|
+
lines.append("REMARK 350")
|
|
639
|
+
lines.append("REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN")
|
|
640
|
+
lines.append("REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE")
|
|
641
|
+
lines.append("REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS")
|
|
642
|
+
lines.append("REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND")
|
|
643
|
+
lines.append("REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN.")
|
|
644
|
+
lines.append("REMARK 350")
|
|
645
|
+
lines.append("REMARK 350 BIOMOLECULE: 1")
|
|
646
|
+
lines.append("REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: MONOMERIC")
|
|
647
|
+
lines.append(f"REMARK 350 APPLY THE FOLLOWING TO CHAINS: {', '.join(chains)}")
|
|
648
|
+
lines.append("REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000")
|
|
649
|
+
lines.append("REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000")
|
|
650
|
+
lines.append("REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000")
|
|
651
|
+
|
|
652
|
+
# REMARK 465: Missing residues
|
|
653
|
+
lines.append("REMARK 465 MISSING RESIDUES")
|
|
654
|
+
lines.append("REMARK 465 THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE")
|
|
655
|
+
lines.append("REMARK 465 EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN")
|
|
656
|
+
lines.append("REMARK 465 IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.)")
|
|
657
|
+
lines.append("REMARK 465")
|
|
658
|
+
lines.append("REMARK 465 M RES C SSSEQI")
|
|
659
|
+
for missing_residue in pdb.experiment.missing_residues:
|
|
660
|
+
lines.append(f"REMARK 465 {missing_residue.name} {missing_residue.id[0]} {missing_residue.id[2:]}")
|
|
661
|
+
return lines
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def _build_crystallography_section(pdb: PDB) -> list[str]:
|
|
665
|
+
"""Builds the CRYST1 line if unit cell data is provided."""
|
|
666
|
+
lines = []
|
|
667
|
+
if pdb.geometry.crystallography.unit_cell:
|
|
668
|
+
lines.append(
|
|
669
|
+
f"CRYST1{pdb.geometry.crystallography.unit_cell[0]:>9}"
|
|
670
|
+
f"{pdb.geometry.crystallography.unit_cell[1]:>9}"
|
|
671
|
+
f"{pdb.geometry.crystallography.unit_cell[2]:>9}"
|
|
672
|
+
f"{pdb.geometry.crystallography.unit_cell[3]:>7}"
|
|
673
|
+
f"{pdb.geometry.crystallography.unit_cell[4]:>7}"
|
|
674
|
+
f"{pdb.geometry.crystallography.unit_cell[5]:>7} "
|
|
675
|
+
f"{pdb.geometry.crystallography.space_group or '':<11}"
|
|
676
|
+
)
|
|
677
|
+
return lines
|
stjames/workflows/irc.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Self
|
|
2
2
|
|
|
3
|
-
from pydantic import Field, model_validator
|
|
3
|
+
from pydantic import Field, PositiveFloat, field_validator, model_validator
|
|
4
4
|
|
|
5
5
|
from ..method import XTB_METHODS, Method
|
|
6
6
|
from ..mode import Mode
|
|
@@ -24,28 +24,26 @@ class IRCWorkflow(Workflow):
|
|
|
24
24
|
:param settings: Settings for running the IRC (only for manual mode)
|
|
25
25
|
:param solvent: Solvent for the calculation (non-Manual mode only)
|
|
26
26
|
:param preopt: whether to optimize the geometry before starting the IRC
|
|
27
|
-
:param
|
|
27
|
+
:param max_irc_steps: maximum number of steps for the IRC
|
|
28
|
+
:param step_size: step size for the IRC (Å)
|
|
28
29
|
|
|
29
30
|
Results:
|
|
30
31
|
:param starting_TS: optimized TS before the IRC (==initial_molecule if preopt=False)
|
|
31
32
|
:param irc_forward: forward calculations
|
|
32
33
|
:param irc_backward: reverse calculations
|
|
33
|
-
:param opt_forward: optimization steps after the forward IRC
|
|
34
|
-
:param opt_backward: optimization steps after the reverse IRC
|
|
35
34
|
"""
|
|
36
35
|
|
|
37
36
|
settings: Settings = _sentinel_settings
|
|
38
37
|
solvent: Solvent | None = None
|
|
39
38
|
|
|
40
39
|
preopt: bool = False
|
|
41
|
-
|
|
40
|
+
max_irc_steps: int = 10
|
|
41
|
+
step_size: PositiveFloat = 0.05
|
|
42
42
|
|
|
43
43
|
starting_TS: UUID | None = None
|
|
44
44
|
|
|
45
45
|
irc_forward: list[UUID] = Field(default_factory=list)
|
|
46
46
|
irc_backward: list[UUID] = Field(default_factory=list)
|
|
47
|
-
opt_forward: list[UUID] = Field(default_factory=list)
|
|
48
|
-
opt_backward: list[UUID] = Field(default_factory=list)
|
|
49
47
|
|
|
50
48
|
def __str__(self) -> str:
|
|
51
49
|
return repr(self)
|
|
@@ -62,6 +60,15 @@ class IRCWorkflow(Workflow):
|
|
|
62
60
|
"""Level of theory for the workflow."""
|
|
63
61
|
return self.settings.level_of_theory
|
|
64
62
|
|
|
63
|
+
@field_validator("step_size", mode="after")
|
|
64
|
+
@classmethod
|
|
65
|
+
def validate_step_size(cls, step_size: float) -> float:
|
|
66
|
+
"""Validate the step size."""
|
|
67
|
+
if step_size < 1e-3 or step_size > 0.1:
|
|
68
|
+
raise ValueError(f"Step size must be between 0.001 and 0.1 Å, got: {step_size}")
|
|
69
|
+
|
|
70
|
+
return step_size
|
|
71
|
+
|
|
65
72
|
@model_validator(mode="after")
|
|
66
73
|
def validate_mode(self) -> Self:
|
|
67
74
|
"""Convert the mode to settings."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: stjames
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.53
|
|
4
4
|
Summary: standardized JSON atom/molecule encoding scheme
|
|
5
5
|
Author-email: Corin Wagen <corin@rowansci.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/rowansci/stjames
|
|
@@ -12,6 +12,7 @@ Requires-Dist: atomium<2,>=1
|
|
|
12
12
|
Requires-Dist: pydantic>=2.4
|
|
13
13
|
Requires-Dist: numpy
|
|
14
14
|
Requires-Dist: atomium<2.0,>=1.0
|
|
15
|
+
Requires-Dist: requests
|
|
15
16
|
|
|
16
17
|
# stjames
|
|
17
18
|
|
|
@@ -15,7 +15,7 @@ stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
|
|
|
15
15
|
stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
|
|
16
16
|
stjames/molecule.py,sha256=DeNYmFdvbuKeXvLqlu-UxHMyZVK6y4j-Lw3HITGMnHw,12406
|
|
17
17
|
stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
|
|
18
|
-
stjames/pdb.py,sha256=
|
|
18
|
+
stjames/pdb.py,sha256=yAEqFV2BxStd-G1PDNqtB8Qy_8x4sWZDiaSk8ifM1U0,25130
|
|
19
19
|
stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
|
|
20
20
|
stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
|
|
@@ -25,6 +25,11 @@ stjames/status.py,sha256=wTKNcNxStoEHrxxgr_zTyN90NITa3rxMQZzOgrCifEw,332
|
|
|
25
25
|
stjames/task.py,sha256=OLINRqe66o7t8arffilwmggrF_7TH0L79u6DhGruxV8,329
|
|
26
26
|
stjames/thermochem_settings.py,sha256=ZTLz31v8Ltutde5Nfm0vH5YahWjcfFWfr_R856KffxE,517
|
|
27
27
|
stjames/types.py,sha256=hw-3UBikESvN3DzfK5doZB030kIEfx9gC3yBkIbebsI,3764
|
|
28
|
+
stjames/atomium_stjames/__init__.py,sha256=gZkzC7i9D_fmWUTN55gtygITo3-qvJUda5CXLR0jyCQ,306
|
|
29
|
+
stjames/atomium_stjames/data.py,sha256=-hzwBpTHq5JetsOVyopUJswKnKAkMtJ_XkONxjXVupU,5675
|
|
30
|
+
stjames/atomium_stjames/mmcif.py,sha256=16LNhQW7GkwEmRAG2lDEnhQaeBabtzIiEbzjjBnLhNg,27108
|
|
31
|
+
stjames/atomium_stjames/pdb.py,sha256=nkCqdc6fy6rKNcIZZDysDLTdlPJWWRmTYBYEFr1wcAQ,22365
|
|
32
|
+
stjames/atomium_stjames/utilities.py,sha256=B_TNLTrsiGaEPBG5-4mhTcj0v4VgYUi55ICF9IR_LG0,4776
|
|
28
33
|
stjames/data/__init__.py,sha256=O59Ksp7AIqwOELCWymfCx7YeBzwNOGCMlGQi7tNLqiE,24
|
|
29
34
|
stjames/data/bragg_radii.json,sha256=hhbn-xyZNSdmnULIjN2Cvq-_BGIZIqG243Ls_mey61w,1350
|
|
30
35
|
stjames/data/elements.py,sha256=9BW01LZlyJ0H5s7Q26vUmjZIST41fwOYYrGvmPd7q0w,858
|
|
@@ -43,7 +48,7 @@ stjames/workflows/docking.py,sha256=K6zy4lo1XfrrMd7ZmKAe_Fd9wvKhtCMoK66gp-TsuJA,
|
|
|
43
48
|
stjames/workflows/electronic_properties.py,sha256=uAIcGKKLhqoHyDgcOZulEXwTU2EjidyvOndZDYyeJEk,4003
|
|
44
49
|
stjames/workflows/fukui.py,sha256=2J23RjkSOZ-40AM3AdnbJkRBGaCevkjkhnV3pVfa6lo,738
|
|
45
50
|
stjames/workflows/hydrogen_bond_basicity.py,sha256=Luvov2DlDvZN06W-mU6YaN7wcIrTLwzdoWww-jNE3x4,517
|
|
46
|
-
stjames/workflows/irc.py,sha256=
|
|
51
|
+
stjames/workflows/irc.py,sha256=3lA3EOXju6d0vETs-PEnRBnGCkshTXDtjw4dVDj0N5A,3333
|
|
47
52
|
stjames/workflows/molecular_dynamics.py,sha256=4HmYETU1VT2BA4-PqAayRZLjnj1WuYxd5bqpIyH9g5k,2465
|
|
48
53
|
stjames/workflows/multistage_opt.py,sha256=0ou-UYMGIrewZIg3QZIgwS_eweYdsh2pRplxgRCqLcE,13572
|
|
49
54
|
stjames/workflows/pka.py,sha256=vSbMc7wuUKATNLq2kQyfCyX6aUthCj-XGSoXnuk4GMo,1031
|
|
@@ -52,8 +57,8 @@ stjames/workflows/scan.py,sha256=uNSuUmVMAV4exNvcv1viVe7930i7GZMn7RtEimnwEE8,100
|
|
|
52
57
|
stjames/workflows/spin_states.py,sha256=b-uCf-pHjF_JHbExeb5GdRToE0pIxP0JTd50U130ckI,4693
|
|
53
58
|
stjames/workflows/tautomer.py,sha256=x3TC8hkMs87ZUodLyhce5EUzYoV276ePfPMi7ISWyNU,651
|
|
54
59
|
stjames/workflows/workflow.py,sha256=tIu5naADYgYS7kdW8quvGEWHWosBcrIdcD7L86v-uMQ,976
|
|
55
|
-
stjames-0.0.
|
|
56
|
-
stjames-0.0.
|
|
57
|
-
stjames-0.0.
|
|
58
|
-
stjames-0.0.
|
|
59
|
-
stjames-0.0.
|
|
60
|
+
stjames-0.0.53.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
|
|
61
|
+
stjames-0.0.53.dist-info/METADATA,sha256=rXyJcZw2ooRmEXl7y7YNoca93EyheOn780kZPISC-Aw,1713
|
|
62
|
+
stjames-0.0.53.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
63
|
+
stjames-0.0.53.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
|
|
64
|
+
stjames-0.0.53.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|