stjames 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stjames might be problematic. Click here for more details.

@@ -516,7 +516,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
516
516
  try:
517
517
  model["polymer"][mol_id]["residues"][res_id] = {
518
518
  "name": name,
519
- "full_name": names.get(name),
519
+ "full_name": names.get(name).upper() if names.get(name) is not None else None, # type: ignore [union-attr]
520
520
  "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
521
521
  "number": len(model["polymer"][mol_id]["residues"]) + 1,
522
522
  }
@@ -530,7 +530,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
530
530
  "name": name,
531
531
  "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
532
532
  "number": 1,
533
- "full_name": names.get(name),
533
+ "full_name": names.get(name).upper() if names.get(name) is not None else None, # type: ignore [union-attr]
534
534
  }
535
535
  },
536
536
  }
@@ -547,6 +547,8 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
547
547
  :param names: lookup dictionary for full name information
548
548
  """
549
549
  mol_id = make_residue_id(atom)
550
+ if mol_type == "non-polymer":
551
+ mol_type = "non_polymer"
550
552
 
551
553
  try:
552
554
  model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
@@ -554,7 +556,7 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
554
556
  name = atom["auth_comp_id"]
555
557
  model[mol_type][mol_id] = {
556
558
  "name": name,
557
- "full_name": names.get(name),
559
+ "full_name": names.get(name).upper() if names.get(name) is not None and names.get(name).lower() != "water" else None, # type: ignore [union-attr]
558
560
  "internal_id": atom["label_asym_id"],
559
561
  "polymer": atom["auth_asym_id"],
560
562
  "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
@@ -644,7 +646,7 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
644
646
  "bvalue": d.get("B_iso_or_equiv"),
645
647
  "charge": d.get(charge, 0) if d.get(charge) != "?" else 0,
646
648
  "alt_loc": d.get("label_alt_id") if d.get("label_alt_id") != "." else None,
647
- "anisotropy": aniso_dict.get(int(d["id"]), [0, 0, 0, 0, 0, 0]),
649
+ "anisotropy": aniso_dict.get(int(d["id"]), None),
648
650
  "is_hetatm": d.get("group_PDB", "ATOM") == "HETATM",
649
651
  }
650
652
 
stjames/molecule.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  from pathlib import Path
3
- from typing import Annotated, Iterable, Optional, Self, TypeAlias
3
+ from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
4
4
 
5
5
  import pydantic
6
6
  from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
@@ -9,6 +9,7 @@ from rdkit.Chem import AllChem
9
9
 
10
10
  from .atom import Atom
11
11
  from .base import Base, round_float, round_optional_float
12
+ from .data import SYMBOL_ELEMENT
12
13
  from .periodic_cell import PeriodicCell
13
14
  from .types import (
14
15
  FloatPerAtom,
@@ -34,6 +35,7 @@ class VibrationalMode(Base):
34
35
  reduced_mass: Annotated[float, AfterValidator(round_float(3))] # amu
35
36
  force_constant: Annotated[float, AfterValidator(round_float(3))] # mDyne/Å
36
37
  displacements: Annotated[Vector3DPerAtom, AfterValidator(round_vector3d_per_atom(6))] # Å
38
+ ir_intensity: Annotated[Optional[float], AfterValidator(round_optional_float(3))] = None # km/mol
37
39
 
38
40
 
39
41
  class Molecule(Base):
@@ -245,28 +247,102 @@ class Molecule(Base):
245
247
  return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
246
248
 
247
249
  @classmethod
248
- def from_extxyz_lines(cls: type[Self], lines: Iterable[str], charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
249
- # ensure first line is number of atoms
250
- lines = list(lines)
250
+ def from_extxyz_lines(
251
+ cls: type[Self],
252
+ lines: Iterable[str],
253
+ charge: int | None = None,
254
+ multiplicity: PositiveInt | None = None,
255
+ cell: PeriodicCell | None = None,
256
+ ) -> Self:
257
+ """
258
+ Parses an EXTXYZ file, extracting atom positions, forces (if present), and metadata.
259
+
260
+ Supports:
261
+ - Lattice vectors (cell information)
262
+ - Properties field (species, positions, forces, etc.)
263
+ - Other metadata like charge, multiplicity, energy, etc.
264
+
265
+ :param lines: Iterable of lines from an EXTXYZ file
266
+ :param charge: total charge of the molecule (default: 0 if not found)
267
+ :param multiplicity: spin multiplicity of the molecule (default: 1 if not found)
268
+ :param cell: PeriodicCell containing lattice vectors
269
+ :return: Molecule
270
+ :raises MoleculeReadError: if the file is not in the correct format
271
+ """
272
+ if not isinstance(lines, Sequence):
273
+ lines = list(lines)
274
+
275
+ # Ensure first line contains number of atoms
251
276
  if len(lines[0].split()) == 1:
252
277
  natoms = lines[0].strip()
253
- if not natoms.isdigit() or (int(lines[0]) != len(lines) - 2):
254
- raise MoleculeReadError(f"First line of EXTXYZ file should be the number of atoms, got: {lines[0]} != {len(lines) - 2}")
255
- lines = lines[1:]
278
+ if not natoms.isdigit() or (int(natoms) != len(lines) - 2):
279
+ raise MoleculeReadError(f"First line should be number of atoms, got: {lines[0]} != {len(lines) - 2}")
280
+ data_line, *lines = lines[1:]
256
281
  else:
257
- raise MoleculeReadError(f"First line of EXTXYZ should be only an int denoting number of atoms. Got {lines[0].split()}")
282
+ raise MoleculeReadError(f"First line should be an integer denoting atom count. Got {lines[0].split()}")
258
283
 
259
- # ensure second line contains key-value pairs
260
- if "=" not in lines[0]:
261
- raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
284
+ metadata = parse_extxyz_comment_line(data_line)
262
285
 
263
- cell = parse_comment_line(lines[0])
264
- lines = lines[1:]
286
+ T = TypeVar("T")
265
287
 
266
- try:
267
- return cls(atoms=[Atom.from_xyz(line) for line in lines], cell=cell, charge=charge, multiplicity=multiplicity)
268
- except (ValueError, ValidationError) as e:
269
- raise MoleculeReadError("Error reading molecule from extxyz") from e
288
+ def metadata_optional_get(key: str, value: T | None, default: T) -> T:
289
+ """Set key to default if not found in metadata"""
290
+ if value is None:
291
+ return metadata.get(key, default) # type: ignore [return-value]
292
+
293
+ return value
294
+
295
+ charge = metadata_optional_get("total_charge", charge, 0)
296
+ multiplicity = metadata_optional_get("multiplicity", multiplicity, 1)
297
+ cell = cell or metadata.get("cell")
298
+ energy = metadata.get("energy", None)
299
+
300
+ force_idx = None
301
+ if properties := metadata.get("properties", "").split(":"):
302
+ if properties[0].lower() != "species":
303
+ raise MoleculeReadError(f"Invalid or missing 'Properties' field in EXTXYZ, got: {properties}")
304
+
305
+ # Identify column indices for position and force data
306
+ pos_idx = None
307
+ current_idx = 0 # Start after 'species:S'
308
+
309
+ while current_idx < len(properties):
310
+ if properties[current_idx].lower() == "pos" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
311
+ pos_idx = current_idx
312
+ elif properties[current_idx].lower() == "forces" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
313
+ force_idx = current_idx
314
+ current_idx += 3
315
+
316
+ if pos_idx is None:
317
+ raise MoleculeReadError("No position data ('pos:R:3') found in Properties field.")
318
+
319
+ def parse_line_atoms(line: str) -> Atom:
320
+ symbol, sx, sy, sz, *_ = line.split()
321
+ atomic_number = SYMBOL_ELEMENT[symbol.title()]
322
+ x, y, z = map(float, (sx, sy, sz))
323
+
324
+ return Atom(atomic_number=atomic_number, position=(x, y, z))
325
+
326
+ def parse_line_with_grad(line: str) -> tuple[Atom, Vector3D]:
327
+ symbol, sx, sy, sz, sgx, sgy, sgz, *_ = line.split()
328
+ atomic_number = SYMBOL_ELEMENT[symbol.title()]
329
+ x, y, z = map(float, (sx, sy, sz))
330
+ gx, gy, gz = map(float, (sgx, sgy, sgz))
331
+
332
+ return (
333
+ Atom(atomic_number=atomic_number, position=(x, y, z)),
334
+ (-gx, -gy, -gz),
335
+ )
336
+
337
+ atoms: list[Atom]
338
+ gradients: list[Vector3D] | None
339
+ if force_idx is not None:
340
+ atoms, gradients = zip(*map(parse_line_with_grad, lines), strict=True) # type: ignore [assignment]
341
+ else:
342
+ atoms = [parse_line_atoms(line) for line in lines]
343
+ gradients = None
344
+
345
+ return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
270
346
 
271
347
  @classmethod
272
348
  def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
@@ -312,43 +388,62 @@ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
312
388
  return rdkm
313
389
 
314
390
 
315
- def parse_comment_line(line: str) -> PeriodicCell:
316
- """
317
- currently only supporting lattice and porperites fields from comment line
318
- modify in future to support other fields from comment from_xyz_lines
319
- ex: name, mulitplicity, charge, etc.
391
+ class EXTXYZMetadata(TypedDict, total=False):
392
+ properties: Any
393
+ total_charge: int
394
+ multiplicity: int
395
+ energy: float
396
+ cell: PeriodicCell
397
+
398
+
399
+ def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
320
400
  """
321
- cell = None
401
+ Parse the comment line of an EXTXYZ file, extracting lattice, properties, and metadata.
402
+
403
+ Supports:
404
+ - Lattice vectors (cell information)
405
+ - Properties field (species, positions, forces, etc.)
406
+ - Other metadata fields like charge, multiplicity, energy, etc.
407
+
408
+ :param line: comment line from an EXTXYZ file
409
+ :return: parsed properties
410
+
411
+ >>> parse_extxyz_comment_line('Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3')
412
+ {'cell': PeriodicCell(lattice_vectors=((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0)), is_periodic=(True, True, True), volume=0.0), 'properties': 'species:S:1:pos:R:3'}
413
+ """ # noqa: E501
414
+
322
415
  # Regular expression to match key="value", key='value', or key=value
323
416
  pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
324
417
  pairs = re.findall(pattern, line)
325
418
 
326
- prop_dict = {}
419
+ prop_dict: EXTXYZMetadata = {}
327
420
  for pair in pairs:
328
421
  key, value = pair.split("=", 1)
329
- if key.lower() == "lattice":
330
- value = value.strip("'\"").split()
331
- if len(value) != 9:
332
- raise MoleculeReadError(f"Lattice should have 9 entries got {len(value)}")
422
+ key = key.lower().strip()
423
+ value = value.strip("'\"")
424
+
425
+ if key == "lattice":
426
+ lattice_values = value.split()
427
+ if len(lattice_values) != 9:
428
+ raise MoleculeReadError(f"Lattice should have 9 entries, got {len(lattice_values)}")
333
429
 
334
- # Convert the value to a 3x3 tuple of tuples of floats
335
430
  try:
336
- cell = tuple(tuple(map(float, value[i : i + 3])) for i in range(0, 9, 3))
431
+ cell = tuple(tuple(map(float, lattice_values[i : i + 3])) for i in range(0, 9, 3))
337
432
  except ValueError:
338
- raise MoleculeReadError(f"Lattice should be floats, got {value}")
433
+ raise MoleculeReadError(f"Lattice should be floats, got {lattice_values}")
339
434
 
340
- prop_dict[key] = value
435
+ prop_dict["cell"] = PeriodicCell(lattice_vectors=cell)
341
436
 
342
- elif key.lower() == "properties":
343
- if value.lower() != "species:s:1:pos:r:3":
344
- raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
345
- prop_dict[key] = value
346
- else:
347
- raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
437
+ elif key == "properties":
438
+ prop_dict["properties"] = value
348
439
 
349
- if cell is None:
350
- raise MoleculeReadError("Lattice field is required but missing.")
440
+ elif key == "total_charge":
441
+ prop_dict["total_charge"] = int(value)
442
+ elif key == "multiplicity":
443
+ prop_dict["multiplicity"] = int(value)
444
+ elif key == "energy":
445
+ prop_dict["energy"] = float(value)
446
+ else:
447
+ prop_dict[key] = value # type: ignore [literal-required]
351
448
 
352
- if "properties" not in [key.lower() for key in prop_dict.keys()]:
353
- raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
354
- return PeriodicCell(lattice_vectors=cell)
449
+ return prop_dict
stjames/pdb.py CHANGED
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from datetime import date, datetime
2
3
  from pathlib import Path
3
4
  from typing import Any, Literal
@@ -204,6 +205,12 @@ def fetch_pdb(code: str) -> PDB:
204
205
  return PDB.model_validate(astj.fetch(code, data_dict=True))
205
206
 
206
207
 
208
+ def fetch_pdb_from_mmcif(code: str) -> PDB:
209
+ """Fetch a pdb from the Protein Data Bank."""
210
+ code += ".cif"
211
+ return PDB.model_validate(astj.fetch(code, data_dict=True))
212
+
213
+
207
214
  def pdb_from_pdb_filestring(pdb: str) -> PDB:
208
215
  """Read a PDB from a string."""
209
216
  return PDB.model_validate(pdb_dict_to_data_dict(pdb_string_to_pdb_dict(pdb)))
@@ -270,7 +277,7 @@ def pdb_object_to_pdb_filestring(
270
277
  atom=atom,
271
278
  chain_id=this_chain_id,
272
279
  res_name=residue.name,
273
- res_num=int(_residue_id[2:]),
280
+ res_num=_residue_id[2:],
274
281
  alt_loc=atom.alt_loc or "",
275
282
  )
276
283
  pdb_lines.append(line)
@@ -280,7 +287,7 @@ def pdb_object_to_pdb_filestring(
280
287
  atom=atom,
281
288
  chain_id=this_chain_id,
282
289
  res_name=residue.name,
283
- res_num=int(_residue_id[2:]),
290
+ res_num=_residue_id[2:],
284
291
  alt_loc=atom.alt_loc or "",
285
292
  )
286
293
  pdb_lines.append(line)
@@ -302,7 +309,7 @@ def pdb_object_to_pdb_filestring(
302
309
  atom=atom,
303
310
  chain_id=chain_id_for_np,
304
311
  res_name=nonpoly.name,
305
- res_num=int(_np_id[2:]),
312
+ res_num=_np_id[2:],
306
313
  )
307
314
  pdb_lines.append(line)
308
315
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -311,7 +318,7 @@ def pdb_object_to_pdb_filestring(
311
318
  atom=atom,
312
319
  chain_id=chain_id_for_np,
313
320
  res_name=nonpoly.name,
314
- res_num=int(_np_id[2:]),
321
+ res_num=_np_id[2:],
315
322
  )
316
323
  pdb_lines.append(line)
317
324
 
@@ -324,7 +331,7 @@ def pdb_object_to_pdb_filestring(
324
331
  atom=atom,
325
332
  chain_id=_w_id[0], # Or you can use water.polymer if set
326
333
  res_name="HOH",
327
- res_num=int(_w_id[2:]), # or an incrementing value
334
+ res_num=_w_id[2:], # or an incrementing value
328
335
  )
329
336
  pdb_lines.append(line)
330
337
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -333,7 +340,7 @@ def pdb_object_to_pdb_filestring(
333
340
  atom=atom,
334
341
  chain_id=_w_id[0],
335
342
  res_name="HOH",
336
- res_num=int(_w_id[2:]),
343
+ res_num=_w_id[2:],
337
344
  )
338
345
  pdb_lines.append(line)
339
346
 
@@ -351,7 +358,7 @@ def pdb_object_to_pdb_filestring(
351
358
  atom=atom,
352
359
  chain_id="B",
353
360
  res_name="BRN", # or branched_obj.get("name", "BRN")
354
- res_num=1,
361
+ res_num="1",
355
362
  )
356
363
  pdb_lines.append(line)
357
364
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -360,7 +367,7 @@ def pdb_object_to_pdb_filestring(
360
367
  atom=atom,
361
368
  chain_id="B",
362
369
  res_name="BRN",
363
- res_num=1,
370
+ res_num="1",
364
371
  )
365
372
  pdb_lines.append(line)
366
373
 
@@ -401,7 +408,7 @@ def _format_atom_line(
401
408
  atom: PDBAtom,
402
409
  chain_id: str,
403
410
  res_name: str,
404
- res_num: int | None,
411
+ res_num: str | None,
405
412
  alt_loc: str = "",
406
413
  ) -> str:
407
414
  """
@@ -417,7 +424,15 @@ def _format_atom_line(
417
424
  alt_loc_char = alt_loc if alt_loc else " "
418
425
  residue_name = (res_name or "UNK")[:3] # limit to 3 chars
419
426
  chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
420
- residue_num = res_num if res_num is not None else 1
427
+ residue_num_str = "1"
428
+ insertion_code = " "
429
+ if res_num:
430
+ match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
431
+ if match:
432
+ residue_num_str, insertion_code = match.groups()
433
+ insertion_code = insertion_code if insertion_code != "" else " "
434
+
435
+ residue_num = int(residue_num_str)
421
436
 
422
437
  # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
423
438
  # If your model stores charges differently, adapt as needed.
@@ -445,7 +460,8 @@ def _format_atom_line(
445
460
  f"{residue_name:>3}" # residue name (columns 18-20)
446
461
  f" {chain_char}" # chain ID (column 22)
447
462
  f"{residue_num:4d}" # residue sequence number (columns 23-26)
448
- f" " # columns 27-30 (insertion code plus spacing)
463
+ f"{insertion_code}"
464
+ f" " # columns 27-30 (spacing)
449
465
  f"{atom.x:8.3f}" # x (columns 31-38)
450
466
  f"{atom.y:8.3f}" # y (columns 39-46)
451
467
  f"{atom.z:8.3f}" # z (columns 47-54)
@@ -463,7 +479,7 @@ def _format_anisou_line(
463
479
  atom: PDBAtom,
464
480
  chain_id: str,
465
481
  res_name: str,
466
- res_num: int | None,
482
+ res_num: str | None,
467
483
  alt_loc: str = "",
468
484
  ) -> str:
469
485
  """
@@ -479,7 +495,15 @@ def _format_anisou_line(
479
495
  alt_loc_char = alt_loc if alt_loc else " "
480
496
  residue_name = (res_name or "UNK")[:3] # limit to 3 chars
481
497
  chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
482
- residue_num = res_num if res_num is not None else 1
498
+ residue_num_str = "1"
499
+ insertion_code = " "
500
+ if res_num:
501
+ match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
502
+ if match:
503
+ residue_num_str, insertion_code = match.groups()
504
+ insertion_code = insertion_code if insertion_code != "" else " "
505
+
506
+ residue_num = int(residue_num_str)
483
507
 
484
508
  chg = ""
485
509
  if atom.charge and abs(atom.charge) > 0:
@@ -522,7 +546,8 @@ def _format_anisou_line(
522
546
  f"{residue_name:>3}" # residue name (columns 18-20)
523
547
  f" {chain_char}" # chain ID (column 22)
524
548
  f"{residue_num:4d}" # residue sequence number (columns 23-26)
525
- f" " # columns 27-28 (insertion code plus spacing)
549
+ f"{insertion_code}"
550
+ f" " # columns 27-28 (plus spacing)
526
551
  f"{aniso_lines}"
527
552
  f" " # columns 70-76 (padding)
528
553
  f"{atom.element:>2}" # element (columns 77-78)
@@ -12,6 +12,7 @@ from .docking import *
12
12
  from .electronic_properties import *
13
13
  from .fukui import *
14
14
  from .hydrogen_bond_basicity import *
15
+ from .ion_mobility import *
15
16
  from .irc import *
16
17
  from .molecular_dynamics import *
17
18
  from .multistage_opt import *
@@ -34,6 +35,7 @@ WORKFLOW_NAME = Literal[
34
35
  "electronic_properties",
35
36
  "fukui",
36
37
  "hydrogen_bond_basicity",
38
+ "ion_mobility",
37
39
  "irc",
38
40
  "molecular_dynamics",
39
41
  "multistage_opt",
@@ -56,6 +58,7 @@ WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
56
58
  "electronic_properties": ElectronicPropertiesWorkflow, # type: ignore [dict-item]
57
59
  "fukui": FukuiIndexWorkflow, # type: ignore [dict-item]
58
60
  "hydrogen_bond_basicity": HydrogenBondBasicityWorkflow, # type: ignore [dict-item]
61
+ "ion_mobility": IonMobilityWorkflow, # type: ignore [dict-item]
59
62
  "irc": IRCWorkflow, # type: ignore [dict-item]
60
63
  "molecular_dynamics": MolecularDynamicsWorkflow, # type: ignore [dict-item]
61
64
  "multistage_opt": MultiStageOptWorkflow, # type: ignore [dict-item]
@@ -1,18 +1,39 @@
1
1
  """ADME-Tox property prediction workflow."""
2
2
 
3
- from .workflow import MoleculeWorkflow
3
+ import warnings
4
+ from typing import Self
4
5
 
6
+ from pydantic import model_validator
5
7
 
6
- class ADMETWorkflow(MoleculeWorkflow):
8
+ from ..molecule import Molecule
9
+ from .workflow import MoleculeWorkflow, SMILESWorkflow
10
+
11
+
12
+ class ADMETWorkflow(SMILESWorkflow, MoleculeWorkflow):
7
13
  """
8
14
  A workflow for predicting ADME-Tox properties.
9
15
 
10
16
  Inherited:
11
- :param initial_molecule: Molecule of interest
17
+ :param initial_smiles: SMILES string of molecule (mutually exclusive with initial_molecule)
18
+ :param initial_molecule: Molecule of interest (deprecated)
12
19
  :param mode: Mode for workflow (currently unused)
13
20
 
14
21
  New:
15
22
  :param properties: predicted properties
16
23
  """
17
24
 
25
+ initial_smiles: str = ""
26
+ initial_molecule: Molecule | None = None # type: ignore [assignment] # Deprecated
18
27
  properties: dict[str, float | int] | None = None
28
+
29
+ @model_validator(mode="after")
30
+ def validate_mol_input(self) -> Self:
31
+ """Ensure that only one of initial_molecule or initial_smiles is set."""
32
+
33
+ if not (bool(self.initial_smiles) ^ bool(self.initial_molecule)):
34
+ raise ValueError("Can only set one of initial_molecule should and initial_smiles")
35
+
36
+ if self.initial_molecule is not None:
37
+ warnings.warn(DeprecationWarning("initial_molecule is deprecated. Use initial_smiles instead."))
38
+
39
+ return self
@@ -0,0 +1,36 @@
1
+ """Ion mobility workflow."""
2
+
3
+ from ..types import UUID
4
+ from .workflow import MoleculeWorkflow
5
+
6
+
7
+ class IonMobilityWorkflow(MoleculeWorkflow):
8
+ """
9
+ Workflow for calculating hydrogen bond basicity.
10
+
11
+ Inherited:
12
+ :param initial_molecule: Molecule of interest
13
+ :param mode: Mode for workflow (currently unused)
14
+
15
+ New:
16
+ :param do_csearch: whether to perform a conformational search
17
+ :param do_optimization: whether to perform an optimization
18
+
19
+ Results:
20
+ :param conformer_ccs: the collision cross section (Å**2) per conformer
21
+ :param conformer_ccs_stdev: the uncertainty in the same
22
+ :param conformer_weights: the Boltzmann weights at RT
23
+ :param average_ccs: the Boltzmann-weighted CCS for the ensemble
24
+ :param average_ccs_stdev: the uncertainty in the same
25
+ """
26
+
27
+ do_csearch: bool = True
28
+ do_optimization: bool = True
29
+ conformers: list[UUID] = []
30
+
31
+ conformer_ccs: list[float] = []
32
+ conformer_ccs_stdev: list[float] = []
33
+ boltzmann_weights: list[float] = []
34
+
35
+ average_ccs: float | None = None
36
+ average_ccs_stdev: float | None = None
@@ -0,0 +1,72 @@
1
+ """pKa workflow."""
2
+
3
+ from typing import Annotated, Self
4
+
5
+ from pydantic import AfterValidator, model_validator
6
+
7
+ from ..base import Base, round_float
8
+ from ..types import round_list
9
+ from .workflow import SMILESWorkflow
10
+
11
+
12
+ class MacropKaMicrostate(Base):
13
+ """
14
+ A microstate for pKa calculations.
15
+
16
+ :param smiles: SMILES string for this conformer
17
+ :param energy: free energy of this conformer
18
+ :param charge: the total charge
19
+ """
20
+
21
+ smiles: str
22
+ energy: Annotated[float, AfterValidator(round_float(3))] # free energy
23
+ charge: int
24
+
25
+
26
+ class MacropKaValue(Base):
27
+ """
28
+ Represents a change in pKa.
29
+
30
+ :param initial_charge: the charge of the initial state
31
+ :param final_charge: the charge of the final state
32
+ :param pKa: the pKa for the transition
33
+ """
34
+
35
+ initial_charge: int
36
+ final_charge: int
37
+ pKa: Annotated[float, AfterValidator(round_float(3))]
38
+
39
+
40
+ class MacropKaWorkflow(SMILESWorkflow):
41
+ """
42
+ Workflow for calculating pKa.
43
+
44
+ Inherited:
45
+ :param initial_smiles:
46
+
47
+ New:
48
+ :param temperature: the temperature, in K
49
+ :param min_pH: for precomputed microstate weights
50
+ :param max_pH: for precomputed microstate weights
51
+
52
+ Results:
53
+ :param microstates: microstates
54
+ :param pKa_values: macroscopic pKa values
55
+ :param microstate_weights_by_pH: precompute the % of different microstates
56
+ """
57
+
58
+ temperature: Annotated[float, AfterValidator(round_float(3))] = 298.0
59
+ min_pH: Annotated[float, AfterValidator(round_float(3))] = 0.0
60
+ max_pH: Annotated[float, AfterValidator(round_float(3))] = 14.0
61
+
62
+ microstates: list[MacropKaMicrostate] = []
63
+ pKa_values: list[MacropKaValue] = []
64
+ microstate_weights_by_pH: dict[float, Annotated[list[float], AfterValidator(round_list(6))]] = {}
65
+
66
+ @model_validator(mode="after")
67
+ def check_weights(self) -> Self:
68
+ for weights in self.microstate_weights_by_pH.values():
69
+ if len(weights) != len(self.microstates):
70
+ raise ValueError("Length of microstate weights doesn't match!")
71
+
72
+ return self
stjames/workflows/scan.py CHANGED
@@ -4,7 +4,7 @@ from typing import Annotated
4
4
 
5
5
  import numpy as np
6
6
  from numpy.typing import NDArray
7
- from pydantic import AfterValidator
7
+ from pydantic import AfterValidator, field_validator
8
8
 
9
9
  from ..base import Base, round_optional_float
10
10
  from ..molecule import Molecule
@@ -62,15 +62,33 @@ class ScanWorkflow(MoleculeWorkflow):
62
62
  :param mode: Mode for workflow (currently unused)
63
63
 
64
64
  New:
65
- :param scan_settings: information about what coordinate to scan
65
+ :param scan_settings: what coordinate(s) to scan; if more than one, all will be performed simultaneously and should have the same number of steps
66
+ :param scan_settings_2d: what additional coordinate(s) to scan; makes a grid with `scan_settings`
67
+ :param wavefront propagation: whether to use wavefront propagation (10.1063/5.0009232) for more expensive but smoother scans
66
68
  :param calc_settings: settings for the calculation
67
69
  :param calc_engine: engine to use for the calculation
68
70
  :param scan_points: points along the scan
69
71
  """
70
72
 
71
- scan_settings: ScanSettings
73
+ scan_settings: ScanSettings | list[ScanSettings]
74
+ scan_settings_2d: ScanSettings | list[ScanSettings] = []
72
75
  calc_settings: Settings
73
76
  calc_engine: str
74
77
 
78
+ wavefront_propagation: bool = True
79
+
75
80
  # UUIDs of scan points
76
81
  scan_points: list[UUID | None] = []
82
+
83
+ @field_validator("scan_settings", "scan_settings_2d", mode="after")
84
+ @classmethod
85
+ def validate_scan_settings(cls, val: ScanSettings | list[ScanSettings]) -> list[ScanSettings]:
86
+ """Ensure that scan_settings is a list, and that every list item has the same number of steps."""
87
+ if isinstance(val, ScanSettings):
88
+ val = [val]
89
+
90
+ for ss in val:
91
+ if ss.num != val[0].num:
92
+ raise ValueError("Concerted scan settings must have same number of steps!")
93
+
94
+ return val
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: stjames
3
- Version: 0.0.64
3
+ Version: 0.0.66
4
4
  Summary: standardized JSON atom/molecule encoding scheme
5
5
  Author-email: Corin Wagen <corin@rowansci.com>
6
6
  Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
12
12
  Requires-Dist: numpy
13
13
  Requires-Dist: requests
14
14
  Requires-Dist: rdkit
15
+ Dynamic: license-file
15
16
 
16
17
  # stjames
17
18
 
@@ -13,9 +13,9 @@ stjames/int_settings.py,sha256=5HXp8opt5ZyY1UpmfaK7NVloWVLM5jkG0elEEqpVLUo,896
13
13
  stjames/message.py,sha256=Rq6QqmHZKecWxYH8fVyXmuoCCPZv8YinvgykSeorXSU,216
14
14
  stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
15
15
  stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
16
- stjames/molecule.py,sha256=2BRXYKtkm5ztYiywyC2S__Zu4a-QoDEgb7LR7F4xHvs,14268
16
+ stjames/molecule.py,sha256=4dakMkn-_I5bSWsijLLY0tn5NkBEuZhmtYDj-MDSJE0,17987
17
17
  stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
18
- stjames/pdb.py,sha256=-i0H029NEX-pcyCqdVyq7D62ZDvmUPWK7l83WdoDmpk,25759
18
+ stjames/pdb.py,sha256=Ens-RNO8s1rxJzjWFXM4Q1-7s8wyXzEMPZrc6o4QDzk,26465
19
19
  stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
20
20
  stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
@@ -27,7 +27,7 @@ stjames/thermochem_settings.py,sha256=ZTLz31v8Ltutde5Nfm0vH5YahWjcfFWfr_R856Kffx
27
27
  stjames/types.py,sha256=rs2CdpkruIfU-PS98rjr9HAJNFGdZDB_zl-u3wa5rAs,4092
28
28
  stjames/atomium_stjames/__init__.py,sha256=gZkzC7i9D_fmWUTN55gtygITo3-qvJUda5CXLR0jyCQ,306
29
29
  stjames/atomium_stjames/data.py,sha256=-hzwBpTHq5JetsOVyopUJswKnKAkMtJ_XkONxjXVupU,5675
30
- stjames/atomium_stjames/mmcif.py,sha256=llbJ65p2B-aZN31-E_ODVDmrVeBoSw9y_Mg5XjyQvTA,26755
30
+ stjames/atomium_stjames/mmcif.py,sha256=B5t_gxvMTG8OblXUWF0dnKgOHhn-8bufOv_a7ccbiqU,27081
31
31
  stjames/atomium_stjames/pdb.py,sha256=C2mEcBDDrnoXD9ZCMIH2uJpjiWPJy6ktXq8IFZsrQKM,22482
32
32
  stjames/atomium_stjames/utilities.py,sha256=-YtM7sRMvMk0wWrC3svWUWH4CGI0NtY77nXsg9tjHfc,4964
33
33
  stjames/data/__init__.py,sha256=O59Ksp7AIqwOELCWymfCx7YeBzwNOGCMlGQi7tNLqiE,24
@@ -37,8 +37,8 @@ stjames/data/isotopes.json,sha256=5ba8QnLrHD_Ypv2xekv2cIRwYrX3MQ19-1FOFtt0RuU,83
37
37
  stjames/data/nist_isotopes.json,sha256=d5DNk1dX0iB1waEYIRR6JMHuA7AuYwSBEgBvb4EKyhM,14300
38
38
  stjames/data/read_nist_isotopes.py,sha256=y10FNjW43QpC45qib7VHsIghEwT7GG5rsNwHdc9osRI,3309
39
39
  stjames/data/symbol_element.json,sha256=vl_buFusTqBd-muYQtMLtTDLy2OtBI6KkBeqkaWRQrg,1186
40
- stjames/workflows/__init__.py,sha256=5KX0IcuYElj8K3qE2c-XY8dL-vPdG87US7ErfZlyK88,2293
41
- stjames/workflows/admet.py,sha256=m8yGWe-UeYK5F7TOeNsQMPTzdWL-aaRSTQsyO7SVa6k,421
40
+ stjames/workflows/__init__.py,sha256=sHKzK6ZtVt8TkaX3JoIrg46SVK9rDi_elA7qCIAqBpE,2410
41
+ stjames/workflows/admet.py,sha256=h8ph6oeRCxU3-_jqRRWPg3RZcheu9JzCHiWqSC9VYKY,1296
42
42
  stjames/workflows/basic_calculation.py,sha256=ZX3KwhfyyCTjc2ougQIL4If7gtwZP9WjqpL45mBquW0,573
43
43
  stjames/workflows/bde.py,sha256=hdTjwma5L9SrU5F5r6dB1ruB_B6buBUtZHf2sanNW2k,9802
44
44
  stjames/workflows/conformer.py,sha256=18aO6ngMBeGAmQkBdLGCCHr398RIYr1v2hD2IT1u4cc,3005
@@ -48,18 +48,20 @@ stjames/workflows/docking.py,sha256=GCW_-JeEZcMXKZ9EQFOxWUYRo0jsbzwIv10aSz8KuaQ,
48
48
  stjames/workflows/electronic_properties.py,sha256=GT3-NC7w-dbcOJ-3AzJ7LgzH6frTbiH2Iyb9BCa-SvY,4112
49
49
  stjames/workflows/fukui.py,sha256=e7CF7Mp2Dt1JTipQx-Sz_37W1urL-iRpjXY-9ItSvhM,1268
50
50
  stjames/workflows/hydrogen_bond_basicity.py,sha256=XDpHEluw6DQ9Zk5g2Je2a81HqIkqPglZ-6f2YZnd4Bc,1159
51
+ stjames/workflows/ion_mobility.py,sha256=e6XSidrud5qSkrAcjzOzgHaf-G09JoP09V76myjdyjc,1097
51
52
  stjames/workflows/irc.py,sha256=ZP7icylW8rgo_Uh7h3bmyumn0ru1IyF-61nP5Jnmq3M,3402
53
+ stjames/workflows/macropka.py,sha256=d3jUKZIUU7ifcMCB6KuTEpP91Pl9sQA0WAmjnODbp6g,2058
52
54
  stjames/workflows/molecular_dynamics.py,sha256=kxugE73Ntzpj-xpJSoQ1EwGzXXdvi_NTyeP4913EVwE,3173
53
55
  stjames/workflows/multistage_opt.py,sha256=pPLAZDztHd37q8cxCUkdq8EzOFyrTzZJHNfDV5auiHs,13638
54
56
  stjames/workflows/pka.py,sha256=j3vBh2YM3nJzJ1XJKPsmYahRCeaU9n3P-G-u9_moaFw,2065
55
57
  stjames/workflows/redox_potential.py,sha256=7S18t9Y3eynSnA3lZbRlvLfdbgeBopdiigLzt1zxg5c,3871
56
- stjames/workflows/scan.py,sha256=vGS1wWMpMSogb63DEED6U6oHsLgV0D2hXVQg2UWWJgs,1913
58
+ stjames/workflows/scan.py,sha256=Lph2VhsxJMpzY-wtmUV7U1TOS7mIwjgkLniaipGDo8I,2899
57
59
  stjames/workflows/solubility.py,sha256=kGfVyPPGDLRpf2j6dSY7woCkfsoXSbUzdSImA4mcMpw,1898
58
60
  stjames/workflows/spin_states.py,sha256=0degmE-frovgoXweshZyjfjqL7nkbaFoO9YoJhvQnaI,4748
59
61
  stjames/workflows/tautomer.py,sha256=7eYKziGPg8Km6lfowTzSkgJfJ4SHUPrAmnTf8Bi-SB0,1164
60
62
  stjames/workflows/workflow.py,sha256=sk2BUz59wdIkT_EyKOnMt5woNrjo3aHVK38cU8x8I7Q,1423
61
- stjames-0.0.64.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
62
- stjames-0.0.64.dist-info/METADATA,sha256=33n-Ix3Rfcue2dKm4SUqmWow8MeXv1_SF6UcpfOPYiQ,1672
63
- stjames-0.0.64.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
64
- stjames-0.0.64.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
65
- stjames-0.0.64.dist-info/RECORD,,
63
+ stjames-0.0.66.dist-info/licenses/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
64
+ stjames-0.0.66.dist-info/METADATA,sha256=vyqa0iG3iF1_oGzGK_R6KhwKrP1HZQVUKXB3MP91CSU,1694
65
+ stjames-0.0.66.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
66
+ stjames-0.0.66.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
67
+ stjames-0.0.66.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (77.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5