stjames 0.0.52__py3-none-any.whl → 0.0.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stjames might be problematic. Click here for more details.

stjames/pdb.py CHANGED
@@ -1,11 +1,12 @@
1
- from datetime import date
1
+ from datetime import date, datetime
2
2
  from pathlib import Path
3
3
  from typing import Any, Literal
4
4
 
5
- import atomium # type: ignore [import-untyped]
6
- from atomium.pdb import pdb_dict_to_data_dict, pdb_string_to_pdb_dict # type: ignore [import-untyped]
7
5
  from pydantic import BaseModel, ConfigDict, Field, field_validator
8
6
 
7
+ import stjames.atomium_stjames as astj
8
+ from stjames.atomium_stjames.mmcif import mmcif_dict_to_data_dict, mmcif_string_to_mmcif_dict
9
+ from stjames.atomium_stjames.pdb import inverse_make_sequences, pdb_dict_to_data_dict, pdb_string_to_pdb_dict
9
10
  from stjames.types import Matrix3x3, Vector3D
10
11
 
11
12
  # Mostly for testing purposes
@@ -22,12 +23,12 @@ class PDBAtom(BaseModel):
22
23
  z: float
23
24
  element: str
24
25
  name: str
25
- charge: float
26
- occupancy: float
26
+ charge: float | None
27
+ occupancy: float | None
27
28
  alt_loc: str | None
28
- anisotropy: list[float]
29
+ anisotropy: list[float] | None
29
30
  bvalue: float
30
- is_hetatm: bool
31
+ is_hetatm: bool | None
31
32
 
32
33
 
33
34
  class PDBWater(BaseModel):
@@ -175,6 +176,9 @@ class PDBDescription(BaseModel):
175
176
  if v is None:
176
177
  return v
177
178
 
179
+ if isinstance(v, date):
180
+ return v.isoformat()
181
+
178
182
  return str(date)
179
183
 
180
184
 
@@ -192,14 +196,482 @@ class PDB(BaseModel):
192
196
 
193
197
  def read_pdb(path: Path | str) -> PDB:
194
198
  """Read a pdb located at path."""
195
- return PDB.model_validate(atomium.open(str(path), data_dict=True))
199
+ return PDB.model_validate(astj.open(str(path), data_dict=True))
196
200
 
197
201
 
198
202
  def fetch_pdb(code: str) -> PDB:
199
203
  """Fetch a pdb from the Protein Data Bank."""
200
- return PDB.model_validate(atomium.fetch(code, data_dict=True))
204
+ return PDB.model_validate(astj.fetch(code, data_dict=True))
201
205
 
202
206
 
203
- def pdb_from_string(pdb: str) -> PDB:
207
+ def pdb_from_pdb_filestring(pdb: str) -> PDB:
204
208
  """Read a PDB from a string."""
205
209
  return PDB.model_validate(pdb_dict_to_data_dict(pdb_string_to_pdb_dict(pdb)))
210
+
211
+
212
+ def pdb_from_mmcif_filestring(pdb: str) -> PDB:
213
+ """Read a PDB from a string."""
214
+ return PDB.model_validate(mmcif_dict_to_data_dict(mmcif_string_to_mmcif_dict(pdb)))
215
+
216
+
217
+ def pdb_object_to_pdb_filestring(pdb: PDB) -> str:
218
+ pdb_lines: list[str] = []
219
+ chains: list[str] = []
220
+ # Header
221
+ pdb_lines.extend(_build_header_section(pdb))
222
+ pdb_lines.extend(_build_source_section(pdb))
223
+ pdb_lines.extend(_build_keyword_section(pdb))
224
+
225
+ full_name_dict: dict[str, str] = {}
226
+ seqres_lines, chains = _build_secondary_structure_and_seqres(pdb, full_name_dict)
227
+
228
+ pdb_lines.extend(seqres_lines)
229
+ pdb_lines.extend(_build_hetname_section(full_name_dict))
230
+
231
+ pdb_lines.extend(_build_remark_section(pdb, chains))
232
+
233
+ pdb_lines.extend(_build_crystallography_section(pdb))
234
+
235
+ for model_index, model in enumerate(pdb.models, start=1):
236
+ # If more than one model, add MODEL line
237
+ if len(pdb.models) > 1:
238
+ pdb_lines.append(f"MODEL {model_index:>4}")
239
+
240
+ # === 1) Polymers (protein, DNA, etc.) ===
241
+ for chain_id, polymer in model.polymer.items():
242
+ # Use polymer's internal_id if you want that as the chain ID
243
+ # otherwise just use the dictionary key
244
+ this_chain_id = polymer.internal_id or chain_id
245
+
246
+ for _residue_id, residue in polymer.residues.items():
247
+ assert residue.name is not None
248
+ for _atom_id, atom in residue.atoms.items():
249
+ line = _format_atom_line(
250
+ serial=_atom_id,
251
+ atom=atom,
252
+ chain_id=this_chain_id,
253
+ res_name=residue.name,
254
+ res_num=int(_residue_id[2:]),
255
+ alt_loc=atom.alt_loc or "",
256
+ )
257
+ pdb_lines.append(line)
258
+ if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
259
+ line = _format_anisou_line(
260
+ serial=_atom_id,
261
+ atom=atom,
262
+ chain_id=this_chain_id,
263
+ res_name=residue.name,
264
+ res_num=int(_residue_id[2:]),
265
+ alt_loc=atom.alt_loc or "",
266
+ )
267
+ pdb_lines.append(line)
268
+
269
+ pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{int(_residue_id[2:]):>4}")
270
+
271
+ # === 2) Non-polymers (e.g. ligands, ions) ===
272
+ for _np_id, nonpoly in model.non_polymer.items():
273
+ # We'll treat each non-polymer as if it had a chain ID = nonpoly.polymer (or "Z")
274
+ chain_id_for_np = nonpoly.polymer or "Z"
275
+
276
+ # For residue name, we can just use nonpoly.name or a 3-letter variant
277
+ # There's no standard "residue number" for these, so pick something
278
+ # or let the user define it in the original model. We'll just use 1 for demonstration.
279
+ # If you prefer incremental numbering, keep a separate counter.
280
+ for _atom_id, atom in nonpoly.atoms.items():
281
+ line = _format_atom_line(
282
+ serial=_atom_id,
283
+ atom=atom,
284
+ chain_id=chain_id_for_np,
285
+ res_name=nonpoly.name,
286
+ res_num=int(_np_id[2:]),
287
+ )
288
+ pdb_lines.append(line)
289
+ if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
290
+ line = _format_anisou_line(
291
+ serial=_atom_id,
292
+ atom=atom,
293
+ chain_id=chain_id_for_np,
294
+ res_name=nonpoly.name,
295
+ res_num=int(_np_id[2:]),
296
+ )
297
+ pdb_lines.append(line)
298
+
299
+ # === 3) Water ===
300
+ for _w_id, water in model.water.items():
301
+ # Water is typically "HOH" in PDB
302
+ for _atom_id, atom in water.atoms.items():
303
+ line = _format_atom_line(
304
+ serial=_atom_id,
305
+ atom=atom,
306
+ chain_id=_w_id[0], # Or you can use water.polymer if set
307
+ res_name="HOH",
308
+ res_num=int(_w_id[2:]), # or an incrementing value
309
+ )
310
+ pdb_lines.append(line)
311
+ if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
312
+ line = _format_anisou_line(
313
+ serial=_atom_id,
314
+ atom=atom,
315
+ chain_id=_w_id[0],
316
+ res_name="HOH",
317
+ res_num=int(_w_id[2:]),
318
+ )
319
+ pdb_lines.append(line)
320
+
321
+ # === 4) Branched ===
322
+ # If your structure has branched molecules (glycans, etc.),
323
+ # adapt similarly. For now, let's demonstrate if there's anything in branched
324
+ for _b_id, branched_obj in model.branched.items():
325
+ # "branched_obj" could be a custom structure. We'll assume it
326
+ # mirrors the format of non_polymer or something similar.
327
+ # If it has `.atoms`, we do the same:
328
+ if isinstance(branched_obj, dict) and "atoms" in branched_obj:
329
+ for _atom_id, atom in branched_obj["atoms"].items():
330
+ line = _format_atom_line(
331
+ serial=_atom_id,
332
+ atom=atom,
333
+ chain_id="B",
334
+ res_name="BRN", # or branched_obj.get("name", "BRN")
335
+ res_num=1,
336
+ )
337
+ pdb_lines.append(line)
338
+ if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
339
+ line = _format_anisou_line(
340
+ serial=_atom_id,
341
+ atom=atom,
342
+ chain_id="B",
343
+ res_name="BRN",
344
+ res_num=1,
345
+ )
346
+ pdb_lines.append(line)
347
+
348
+ if len(pdb.models) > 1:
349
+ pdb_lines.append("ENDMDL")
350
+
351
+ # Finally, the PDB standard ends with an END record
352
+ pdb_lines.append("END")
353
+
354
+ resulting_string = _create_filestring(pdb_lines)
355
+ return resulting_string
356
+
357
+
358
+ def _create_filestring(lines: list[str]) -> str:
359
+ # Join the lines with newline characters and add a newline at the end if desired
360
+ filestring = "\n".join(lines) + "\n"
361
+ return filestring
362
+
363
+
364
+ def _format_date(date_str: str | None) -> str | None:
365
+ """
366
+ Formats a date string from "YYYY-MM-DD" to "DD-MMM-YY".
367
+
368
+ Args:
369
+ date_str (str): Date string in "YYYY-MM-DD" format.
370
+
371
+ Returns:
372
+ str: Formatted date string in "DD-MMM-YY" format.
373
+ """
374
+ if date_str is None:
375
+ return None
376
+ date_obj = datetime.strptime(date_str, "%Y-%m-%d").date()
377
+ return date_obj.strftime("%d-%b-%y").upper()
378
+
379
+
380
+ def _format_atom_line(
381
+ serial: int,
382
+ atom: PDBAtom,
383
+ chain_id: str,
384
+ res_name: str,
385
+ res_num: int | None,
386
+ alt_loc: str = "",
387
+ ) -> str:
388
+ """
389
+ Return a single PDB ATOM/HETATM record line as a string, using standard
390
+ column alignment conventions:
391
+
392
+ See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
393
+ """
394
+ record_type = "HETATM" if atom.is_hetatm else "ATOM "
395
+
396
+ # Columns are typically strict. We'll use Python formatting with fixed widths.
397
+ # Some fields might need defaults if missing.
398
+ alt_loc_char = alt_loc if alt_loc else " "
399
+ residue_name = (res_name or "UNK")[:3] # limit to 3 chars
400
+ chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
401
+ residue_num = res_num if res_num is not None else 1
402
+
403
+ # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
404
+ # If your model stores charges differently, adapt as needed.
405
+ # For simplicity, let's store integer/float charges as strings, e.g. " 0", " 2", etc.
406
+ # Or we can leave it blank if zero.
407
+ chg = ""
408
+ if atom.charge and abs(atom.charge) > 0:
409
+ # E.g., +1.0 -> " +1", -2.0 -> " -2"
410
+ # Convert to integer if it's always integral
411
+ chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
412
+ chg = f"{chg_val:2}"
413
+ else:
414
+ chg = " "
415
+
416
+ # Construct the line.
417
+ # Use exact spacing & field widths to match PDB guidelines.
418
+ line = (
419
+ f"{record_type}"
420
+ f"{serial:5d} " # atom serial number (columns 7-11)
421
+ f"{atom.name:<4}" # atom name (columns 13-16, left-justified in this snippet)
422
+ f"{alt_loc_char}" # altLoc (column 17)
423
+ f"{residue_name:>3}" # residue name (columns 18-20)
424
+ f" {chain_char}" # chain ID (column 22)
425
+ f"{residue_num:4d}" # residue sequence number (columns 23-26)
426
+ f" " # columns 27-30 (insertion code plus spacing)
427
+ f"{atom.x:8.3f}" # x (columns 31-38)
428
+ f"{atom.y:8.3f}" # y (columns 39-46)
429
+ f"{atom.z:8.3f}" # z (columns 47-54)
430
+ f"{atom.occupancy:6.2f}" # occupancy (columns 55-60)
431
+ f"{atom.bvalue:6.2f}" # temp factor (columns 61-66)
432
+ f" " # columns 67-76 (padding)
433
+ f"{atom.element:>2}" # element (columns 77-78)
434
+ f"{chg:>2}" # charge (columns 79-80)
435
+ )
436
+ return line
437
+
438
+
439
+ def _format_anisou_line(
440
+ serial: int,
441
+ atom: PDBAtom,
442
+ chain_id: str,
443
+ res_name: str,
444
+ res_num: int | None,
445
+ alt_loc: str = "",
446
+ ) -> str:
447
+ """
448
+ Return a single PDB ANISOU record line as a string, using standard
449
+ column alignment conventions:
450
+
451
+ See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
452
+ """
453
+ record_type = "ANISOU"
454
+
455
+ # Columns are typically strict. We'll use Python formatting with fixed widths.
456
+ # Some fields might need defaults if missing.
457
+ alt_loc_char = alt_loc if alt_loc else " "
458
+ residue_name = (res_name or "UNK")[:3] # limit to 3 chars
459
+ chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
460
+ residue_num = res_num if res_num is not None else 1
461
+
462
+ chg = ""
463
+ if atom.charge and abs(atom.charge) > 0:
464
+ # E.g., +1.0 -> " +1", -2.0 -> " -2"
465
+ # Convert to integer if it's always integral
466
+ chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
467
+ chg = f"{chg_val:2}"
468
+ else:
469
+ chg = " "
470
+
471
+ if atom.anisotropy:
472
+ aniso_lines = (
473
+ f"{_float_to_pdb_string(atom.anisotropy[0]):>7}" # x (columns 29-35)
474
+ f"{_float_to_pdb_string(atom.anisotropy[1]):>7}" # x (columns 36-42)
475
+ f"{_float_to_pdb_string(atom.anisotropy[2]):>7}" # x (columns 43-49)
476
+ f"{_float_to_pdb_string(atom.anisotropy[3]):>7}" # x (columns 50-56)
477
+ f"{_float_to_pdb_string(atom.anisotropy[4]):>7}" # x (columns 57-63)
478
+ f"{_float_to_pdb_string(atom.anisotropy[5]):>7}"
479
+ )
480
+ else:
481
+ space = " "
482
+ aniso_lines = (
483
+ f"{space:>7}" # x (columns 29-35)
484
+ f"{space:>7}" # x (columns 36-42)
485
+ f"{space:>7}" # x (columns 43-49)
486
+ f"{space:>7}" # x (columns 50-56)
487
+ f"{space:>7}" # x (columns 57-63)
488
+ f"{space:>7}"
489
+ )
490
+
491
+ # Construct the line.
492
+ # Use exact spacing & field widths to match PDB guidelines.
493
+ line = (
494
+ f"{record_type}"
495
+ f"{serial:5d} " # atom serial number (columns 7-11)
496
+ f"{atom.name:<4}" # atom name (columns 13-16, left-justified in this snippet)
497
+ f"{alt_loc_char}" # altLoc (column 17)
498
+ f"{residue_name:>3}" # residue name (columns 18-20)
499
+ f" {chain_char}" # chain ID (column 22)
500
+ f"{residue_num:4d}" # residue sequence number (columns 23-26)
501
+ f" " # columns 27-28 (insertion code plus spacing)
502
+ f"{aniso_lines}"
503
+ f" " # columns 70-76 (padding)
504
+ f"{atom.element:>2}" # element (columns 77-78)
505
+ f"{chg:>2}" # charge (columns 79-80)
506
+ )
507
+ return line
508
+
509
+
510
+ # chat code
511
+ def _float_to_pdb_string(x: float) -> str:
512
+ # Determine the sign
513
+ sign = "-" if x < 0 else ""
514
+ a = abs(x)
515
+
516
+ if a < 1:
517
+ # Format with exactly 4 decimals, e.g. 0.0044 -> "0.0044"
518
+ s = f"{a:.4f}"
519
+ # Remove the "0." and then remove any leading zeros.
520
+ significant = s[2:].lstrip("0")
521
+ return sign + significant
522
+ else:
523
+ # Format with exactly 4 decimals. For example, 1.131 -> "1.1310"
524
+ s = f"{a:.4f}"
525
+ # Split into integer and fractional parts.
526
+ integer_part, fractional_part = s.split(".")
527
+ # We want a total of 5 digits. So, the number of fractional digits we need is:
528
+ needed = 5 - len(integer_part)
529
+ # Use the needed number of digits from the fractional part.
530
+ result = integer_part + fractional_part[:needed]
531
+ return sign + result
532
+
533
+
534
+ def _helix_list_to_pdb_helix(polymer_dict: dict[str, PDBPolymer], helices: list[list[str]]) -> list[str]:
535
+ helix_lines = []
536
+ for i, helix in enumerate(helices, start=1):
537
+ start_aa_name = polymer_dict[helix[0][0]].residues[helix[0]].name
538
+ end_aa_name = polymer_dict[helix[-1][0]].residues[helix[-1]].name
539
+ helix_line = f"HELIX {i:>3} {i:>3} {start_aa_name} {helix[0][0]} {helix[0][2:]:>4} {end_aa_name} {helix[-1][0]} {helix[-1][2:]:>4} 1{len(helix):>36}"
540
+ helix_lines.append(helix_line)
541
+ return helix_lines
542
+
543
+
544
+ def _strand_list_to_pdb_sheets(polymer_dict: dict[str, PDBPolymer], strands: list[list[str]]) -> list[str]:
545
+ strand_lines = []
546
+ for i, strand in enumerate(strands, start=1):
547
+ start_aa_name = polymer_dict[strand[0][0]].residues[strand[0]].name
548
+ end_aa_name = polymer_dict[strand[-1][0]].residues[strand[-1]].name
549
+ helix_line = (
550
+ f"SHEET {i:>3} {strand[0][0]:>3}{len(strands):>2} {start_aa_name} {strand[0][0]}{strand[0][2:]:>4} "
551
+ f"{end_aa_name} {strand[-1][0]}{strand[-1][2:]:>4} {-1 if i != 1 else 0:>2}"
552
+ )
553
+ strand_lines.append(helix_line)
554
+ return strand_lines
555
+
556
+
557
+ def _build_header_section(pdb: PDB) -> list[str]:
558
+ header = f"HEADER {pdb.description.classification or '':<40}{_format_date(pdb.description.deposition_date) or '':<10} {pdb.description.code or '':<5}"
559
+ title = f"TITLE {pdb.description.title or '':<70}"
560
+ exp_dta = f"EXPDTA {pdb.experiment.technique or '':<69}"
561
+ authors = f"AUTHOR {','.join(pdb.description.authors).upper():<69}"
562
+
563
+ return [header, title, exp_dta, authors]
564
+
565
+
566
+ def _build_source_section(pdb: PDB) -> list[str]:
567
+ """Builds the source organism and expression system lines."""
568
+ organism_line = f"SOURCE ORGANISM_SCIENTIFIC: {(pdb.experiment.source_organism + ';') if pdb.experiment.source_organism else '':<69}"
569
+ expression_line = f"SOURCE EXPRESSION_SYSTEM: {(pdb.experiment.expression_system + ';') if pdb.experiment.expression_system else '':<69}"
570
+ return [organism_line, expression_line]
571
+
572
+
573
+ def _build_keyword_section(pdb: PDB) -> list[str]:
574
+ """Builds the keyword (KEYWDS) lines."""
575
+ lines = []
576
+ for i, keyword in enumerate(pdb.description.keywords):
577
+ if i == len(pdb.description.keywords) - 1:
578
+ lines.append(f"KEYWDS {keyword:<79}")
579
+ else:
580
+ lines.append(f"KEYWDS {keyword + ',':<79}")
581
+ return lines
582
+
583
+
584
+ def _build_secondary_structure_and_seqres(pdb: PDB, full_name_dict: dict[str, str]) -> tuple[list[str], list[str]]:
585
+ """
586
+ Iterates over models and polymers to build secondary structure lines (e.g. sheets, helices)
587
+ and sequence records (SEQRES). Also collects full names for heterogen records.
588
+ Returns a tuple: (list of seqres (and secondary structure) lines, list of chain IDs).
589
+ """
590
+ seqres_lines = []
591
+ chains = []
592
+
593
+ for model in pdb.models:
594
+ for chain_id, polymer in model.polymer.items():
595
+ chains.append(chain_id)
596
+ # Add sheet and helix records (if available)
597
+ for strand_line in _strand_list_to_pdb_sheets(model.polymer, polymer.strands):
598
+ seqres_lines.append(strand_line)
599
+ for helix_line in _helix_list_to_pdb_helix(model.polymer, polymer.helices):
600
+ seqres_lines.append(helix_line)
601
+ # Add SEQRES lines from the polymer’s sequence
602
+ if polymer.sequence:
603
+ seqres_lines.extend(inverse_make_sequences(polymer.sequence, chain_id))
604
+ # Collect full names from each residue
605
+ for _, residue in polymer.residues.items():
606
+ if residue.full_name and residue.name:
607
+ full_name_dict[residue.name] = residue.full_name
608
+ # Also collect full names for non-polymer molecules
609
+ for _, non_polymer in model.non_polymer.items():
610
+ if non_polymer.full_name and non_polymer.name:
611
+ full_name_dict[non_polymer.name] = non_polymer.full_name
612
+
613
+ return seqres_lines, chains
614
+
615
+
616
+ def _build_hetname_section(full_name_dict: dict[str, str]) -> list[str]:
617
+ """Builds the HETNAM lines for non-polymer molecules."""
618
+ lines = []
619
+ for name, full_name in full_name_dict.items():
620
+ if len(full_name) > 55:
621
+ for i in range(0, len(full_name), 55):
622
+ lines.append(f"HETNAM {int(i / 55):>2} {name:<3} {full_name[i : i + 55]:<55}")
623
+ else:
624
+ lines.append(f"HETNAM {name:<3} {full_name:<55}")
625
+ return lines
626
+
627
+
628
+ def _build_remark_section(pdb: PDB, chains: list[str]) -> list[str]:
629
+ """Builds REMARK lines (resolution, R factors, biomolecule and missing residues)."""
630
+ lines = []
631
+ lines.append(f"REMARK 2 RESOLUTION. {pdb.quality.resolution:>7} ANGSTROMS.")
632
+ if pdb.quality.rfree:
633
+ lines.append(f"REMARK 3 FREE R VALUE : {pdb.quality.rfree}")
634
+ if pdb.quality.rvalue:
635
+ lines.append(f"REMARK 3 R VALUE (WORKING SET) : {pdb.quality.rvalue}")
636
+
637
+ # REMARK 350: Biomolecule details
638
+ lines.append("REMARK 350")
639
+ lines.append("REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN")
640
+ lines.append("REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE")
641
+ lines.append("REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS")
642
+ lines.append("REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND")
643
+ lines.append("REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN.")
644
+ lines.append("REMARK 350")
645
+ lines.append("REMARK 350 BIOMOLECULE: 1")
646
+ lines.append("REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: MONOMERIC")
647
+ lines.append(f"REMARK 350 APPLY THE FOLLOWING TO CHAINS: {', '.join(chains)}")
648
+ lines.append("REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000")
649
+ lines.append("REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000")
650
+ lines.append("REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000")
651
+
652
+ # REMARK 465: Missing residues
653
+ lines.append("REMARK 465 MISSING RESIDUES")
654
+ lines.append("REMARK 465 THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE")
655
+ lines.append("REMARK 465 EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN")
656
+ lines.append("REMARK 465 IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.)")
657
+ lines.append("REMARK 465")
658
+ lines.append("REMARK 465 M RES C SSSEQI")
659
+ for missing_residue in pdb.experiment.missing_residues:
660
+ lines.append(f"REMARK 465 {missing_residue.name} {missing_residue.id[0]} {missing_residue.id[2:]}")
661
+ return lines
662
+
663
+
664
+ def _build_crystallography_section(pdb: PDB) -> list[str]:
665
+ """Builds the CRYST1 line if unit cell data is provided."""
666
+ lines = []
667
+ if pdb.geometry.crystallography.unit_cell:
668
+ lines.append(
669
+ f"CRYST1{pdb.geometry.crystallography.unit_cell[0]:>9}"
670
+ f"{pdb.geometry.crystallography.unit_cell[1]:>9}"
671
+ f"{pdb.geometry.crystallography.unit_cell[2]:>9}"
672
+ f"{pdb.geometry.crystallography.unit_cell[3]:>7}"
673
+ f"{pdb.geometry.crystallography.unit_cell[4]:>7}"
674
+ f"{pdb.geometry.crystallography.unit_cell[5]:>7} "
675
+ f"{pdb.geometry.crystallography.space_group or '':<11}"
676
+ )
677
+ return lines
stjames/workflows/irc.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Self
2
2
 
3
- from pydantic import Field, model_validator
3
+ from pydantic import Field, PositiveFloat, field_validator, model_validator
4
4
 
5
5
  from ..method import XTB_METHODS, Method
6
6
  from ..mode import Mode
@@ -24,28 +24,26 @@ class IRCWorkflow(Workflow):
24
24
  :param settings: Settings for running the IRC (only for manual mode)
25
25
  :param solvent: Solvent for the calculation (non-Manual mode only)
26
26
  :param preopt: whether to optimize the geometry before starting the IRC
27
- :param final_opt: whether to optimize the final IRC geometry to a minimum
27
+ :param max_irc_steps: maximum number of steps for the IRC
28
+ :param step_size: step size for the IRC (Å)
28
29
 
29
30
  Results:
30
31
  :param starting_TS: optimized TS before the IRC (==initial_molecule if preopt=False)
31
32
  :param irc_forward: forward calculations
32
33
  :param irc_backward: reverse calculations
33
- :param opt_forward: optimization steps after the forward IRC
34
- :param opt_backward: optimization steps after the reverse IRC
35
34
  """
36
35
 
37
36
  settings: Settings = _sentinel_settings
38
37
  solvent: Solvent | None = None
39
38
 
40
39
  preopt: bool = False
41
- final_opt: bool = False
40
+ max_irc_steps: int = 10
41
+ step_size: PositiveFloat = 0.05
42
42
 
43
43
  starting_TS: UUID | None = None
44
44
 
45
45
  irc_forward: list[UUID] = Field(default_factory=list)
46
46
  irc_backward: list[UUID] = Field(default_factory=list)
47
- opt_forward: list[UUID] = Field(default_factory=list)
48
- opt_backward: list[UUID] = Field(default_factory=list)
49
47
 
50
48
  def __str__(self) -> str:
51
49
  return repr(self)
@@ -62,6 +60,15 @@ class IRCWorkflow(Workflow):
62
60
  """Level of theory for the workflow."""
63
61
  return self.settings.level_of_theory
64
62
 
63
+ @field_validator("step_size", mode="after")
64
+ @classmethod
65
+ def validate_step_size(cls, step_size: float) -> float:
66
+ """Validate the step size."""
67
+ if step_size < 1e-3 or step_size > 0.1:
68
+ raise ValueError(f"Step size must be between 0.001 and 0.1 Å, got: {step_size}")
69
+
70
+ return step_size
71
+
65
72
  @model_validator(mode="after")
66
73
  def validate_mode(self) -> Self:
67
74
  """Convert the mode to settings."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: stjames
3
- Version: 0.0.52
3
+ Version: 0.0.53
4
4
  Summary: standardized JSON atom/molecule encoding scheme
5
5
  Author-email: Corin Wagen <corin@rowansci.com>
6
6
  Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: atomium<2,>=1
12
12
  Requires-Dist: pydantic>=2.4
13
13
  Requires-Dist: numpy
14
14
  Requires-Dist: atomium<2.0,>=1.0
15
+ Requires-Dist: requests
15
16
 
16
17
  # stjames
17
18
 
@@ -15,7 +15,7 @@ stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
15
15
  stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
16
16
  stjames/molecule.py,sha256=DeNYmFdvbuKeXvLqlu-UxHMyZVK6y4j-Lw3HITGMnHw,12406
17
17
  stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
18
- stjames/pdb.py,sha256=LDAxBLwd_xAzpEmls5G3fdvq77O7Cf7nMY8_ApDPfH8,4742
18
+ stjames/pdb.py,sha256=yAEqFV2BxStd-G1PDNqtB8Qy_8x4sWZDiaSk8ifM1U0,25130
19
19
  stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
20
20
  stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
@@ -25,6 +25,11 @@ stjames/status.py,sha256=wTKNcNxStoEHrxxgr_zTyN90NITa3rxMQZzOgrCifEw,332
25
25
  stjames/task.py,sha256=OLINRqe66o7t8arffilwmggrF_7TH0L79u6DhGruxV8,329
26
26
  stjames/thermochem_settings.py,sha256=ZTLz31v8Ltutde5Nfm0vH5YahWjcfFWfr_R856KffxE,517
27
27
  stjames/types.py,sha256=hw-3UBikESvN3DzfK5doZB030kIEfx9gC3yBkIbebsI,3764
28
+ stjames/atomium_stjames/__init__.py,sha256=gZkzC7i9D_fmWUTN55gtygITo3-qvJUda5CXLR0jyCQ,306
29
+ stjames/atomium_stjames/data.py,sha256=-hzwBpTHq5JetsOVyopUJswKnKAkMtJ_XkONxjXVupU,5675
30
+ stjames/atomium_stjames/mmcif.py,sha256=16LNhQW7GkwEmRAG2lDEnhQaeBabtzIiEbzjjBnLhNg,27108
31
+ stjames/atomium_stjames/pdb.py,sha256=nkCqdc6fy6rKNcIZZDysDLTdlPJWWRmTYBYEFr1wcAQ,22365
32
+ stjames/atomium_stjames/utilities.py,sha256=B_TNLTrsiGaEPBG5-4mhTcj0v4VgYUi55ICF9IR_LG0,4776
28
33
  stjames/data/__init__.py,sha256=O59Ksp7AIqwOELCWymfCx7YeBzwNOGCMlGQi7tNLqiE,24
29
34
  stjames/data/bragg_radii.json,sha256=hhbn-xyZNSdmnULIjN2Cvq-_BGIZIqG243Ls_mey61w,1350
30
35
  stjames/data/elements.py,sha256=9BW01LZlyJ0H5s7Q26vUmjZIST41fwOYYrGvmPd7q0w,858
@@ -43,7 +48,7 @@ stjames/workflows/docking.py,sha256=K6zy4lo1XfrrMd7ZmKAe_Fd9wvKhtCMoK66gp-TsuJA,
43
48
  stjames/workflows/electronic_properties.py,sha256=uAIcGKKLhqoHyDgcOZulEXwTU2EjidyvOndZDYyeJEk,4003
44
49
  stjames/workflows/fukui.py,sha256=2J23RjkSOZ-40AM3AdnbJkRBGaCevkjkhnV3pVfa6lo,738
45
50
  stjames/workflows/hydrogen_bond_basicity.py,sha256=Luvov2DlDvZN06W-mU6YaN7wcIrTLwzdoWww-jNE3x4,517
46
- stjames/workflows/irc.py,sha256=y4KXeHPfPoT9jsroABxWyFdg6k5y3r99hdtvo8g26_A,3148
51
+ stjames/workflows/irc.py,sha256=3lA3EOXju6d0vETs-PEnRBnGCkshTXDtjw4dVDj0N5A,3333
47
52
  stjames/workflows/molecular_dynamics.py,sha256=4HmYETU1VT2BA4-PqAayRZLjnj1WuYxd5bqpIyH9g5k,2465
48
53
  stjames/workflows/multistage_opt.py,sha256=0ou-UYMGIrewZIg3QZIgwS_eweYdsh2pRplxgRCqLcE,13572
49
54
  stjames/workflows/pka.py,sha256=vSbMc7wuUKATNLq2kQyfCyX6aUthCj-XGSoXnuk4GMo,1031
@@ -52,8 +57,8 @@ stjames/workflows/scan.py,sha256=uNSuUmVMAV4exNvcv1viVe7930i7GZMn7RtEimnwEE8,100
52
57
  stjames/workflows/spin_states.py,sha256=b-uCf-pHjF_JHbExeb5GdRToE0pIxP0JTd50U130ckI,4693
53
58
  stjames/workflows/tautomer.py,sha256=x3TC8hkMs87ZUodLyhce5EUzYoV276ePfPMi7ISWyNU,651
54
59
  stjames/workflows/workflow.py,sha256=tIu5naADYgYS7kdW8quvGEWHWosBcrIdcD7L86v-uMQ,976
55
- stjames-0.0.52.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
56
- stjames-0.0.52.dist-info/METADATA,sha256=4vbKOo5yexzEQRYM3x6_fYAyAt72lhpU-UKSn_hdOwE,1689
57
- stjames-0.0.52.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
58
- stjames-0.0.52.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
59
- stjames-0.0.52.dist-info/RECORD,,
60
+ stjames-0.0.53.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
61
+ stjames-0.0.53.dist-info/METADATA,sha256=rXyJcZw2ooRmEXl7y7YNoca93EyheOn780kZPISC-Aw,1713
62
+ stjames-0.0.53.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
63
+ stjames-0.0.53.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
64
+ stjames-0.0.53.dist-info/RECORD,,