PyPI - stjames - Versions diffs - 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl - Mend

stjames 0.0.64py3-none-any.whl → 0.0.66py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (13) hide show

stjames/atomium_stjames/mmcif.py +6 -4
stjames/molecule.py +138 -43
stjames/pdb.py +39 -14
stjames/workflows/__init__.py +3 -0
stjames/workflows/admet.py +24 -3
stjames/workflows/ion_mobility.py +36 -0
stjames/workflows/macropka.py +72 -0
stjames/workflows/scan.py +21 -3
{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/METADATA +3 -2
{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/RECORD +13 -11
{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/WHEEL +1 -1
{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info/licenses}/LICENSE +0 -0
{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/top_level.txt +0 -0

stjames/atomium_stjames/mmcif.py CHANGED Viewed

@@ -516,7 +516,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
         try:
             model["polymer"][mol_id]["residues"][res_id] = {
                 "name": name,
-                "full_name": names.get(name),
+                "full_name": names.get(name).upper() if names.get(name) is not None else None,  # type: ignore [union-attr]
                 "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
                 "number": len(model["polymer"][mol_id]["residues"]) + 1,
             }
@@ -530,7 +530,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
                         "name": name,
                         "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
                         "number": 1,
-                        "full_name": names.get(name),
+                        "full_name": names.get(name).upper() if names.get(name) is not None else None,  # type: ignore [union-attr]
                     }
                 },
             }
@@ -547,6 +547,8 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
     :param names: lookup dictionary for full name information
     """
     mol_id = make_residue_id(atom)
+    if mol_type == "non-polymer":
+        mol_type = "non_polymer"
     try:
         model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
@@ -554,7 +556,7 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
         name = atom["auth_comp_id"]
         model[mol_type][mol_id] = {
             "name": name,
-            "full_name": names.get(name),
+            "full_name": names.get(name).upper() if names.get(name) is not None and names.get(name).lower() != "water" else None,  # type: ignore [union-attr]
             "internal_id": atom["label_asym_id"],
             "polymer": atom["auth_asym_id"],
             "atoms": {int(atom["id"]): atom_dict_to_atom_dict(atom, aniso)},
@@ -644,7 +646,7 @@ def atom_dict_to_atom_dict(d: dict[str, Any], aniso_dict: dict[int, Any]) -> dic
         "bvalue": d.get("B_iso_or_equiv"),
         "charge": d.get(charge, 0) if d.get(charge) != "?" else 0,
         "alt_loc": d.get("label_alt_id") if d.get("label_alt_id") != "." else None,
-        "anisotropy": aniso_dict.get(int(d["id"]), [0, 0, 0, 0, 0, 0]),
+        "anisotropy": aniso_dict.get(int(d["id"]), None),
         "is_hetatm": d.get("group_PDB", "ATOM") == "HETATM",
     }

stjames/molecule.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import re
 from pathlib import Path
-from typing import Annotated, Iterable, Optional, Self, TypeAlias
+from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
 import pydantic
 from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
@@ -9,6 +9,7 @@ from rdkit.Chem import AllChem
 from .atom import Atom
 from .base import Base, round_float, round_optional_float
+from .data import SYMBOL_ELEMENT
 from .periodic_cell import PeriodicCell
 from .types import (
     FloatPerAtom,
@@ -34,6 +35,7 @@ class VibrationalMode(Base):
     reduced_mass: Annotated[float, AfterValidator(round_float(3))]  # amu
     force_constant: Annotated[float, AfterValidator(round_float(3))]  # mDyne/Å
     displacements: Annotated[Vector3DPerAtom, AfterValidator(round_vector3d_per_atom(6))]  # Å
+    ir_intensity: Annotated[Optional[float], AfterValidator(round_optional_float(3))] = None  # km/mol
 class Molecule(Base):
@@ -245,28 +247,102 @@ class Molecule(Base):
         return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
     @classmethod
-    def from_extxyz_lines(cls: type[Self], lines: Iterable[str], charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
-        # ensure first line is number of atoms
-        lines = list(lines)
+    def from_extxyz_lines(
+        cls: type[Self],
+        lines: Iterable[str],
+        charge: int | None = None,
+        multiplicity: PositiveInt | None = None,
+        cell: PeriodicCell | None = None,
+    ) -> Self:
+        """
+        Parses an EXTXYZ file, extracting atom positions, forces (if present), and metadata.
+        Supports:
+        - Lattice vectors (cell information)
+        - Properties field (species, positions, forces, etc.)
+        - Other metadata like charge, multiplicity, energy, etc.
+        :param lines: Iterable of lines from an EXTXYZ file
+        :param charge: total charge of the molecule (default: 0 if not found)
+        :param multiplicity: spin multiplicity of the molecule (default: 1 if not found)
+        :param cell: PeriodicCell containing lattice vectors
+        :return: Molecule
+        :raises MoleculeReadError: if the file is not in the correct format
+        """
+        if not isinstance(lines, Sequence):
+            lines = list(lines)
+        # Ensure first line contains number of atoms
         if len(lines[0].split()) == 1:
             natoms = lines[0].strip()
-            if not natoms.isdigit() or (int(lines[0]) != len(lines) - 2):
-                raise MoleculeReadError(f"First line of EXTXYZ file should be the number of atoms, got: {lines[0]} != {len(lines) - 2}")
-            lines = lines[1:]
+            if not natoms.isdigit() or (int(natoms) != len(lines) - 2):
+                raise MoleculeReadError(f"First line should be number of atoms, got: {lines[0]} != {len(lines) - 2}")
+            data_line, *lines = lines[1:]
         else:
-            raise MoleculeReadError(f"First line of EXTXYZ should be only an int denoting number of atoms. Got {lines[0].split()}")
+            raise MoleculeReadError(f"First line should be an integer denoting atom count. Got {lines[0].split()}")
-        # ensure second line contains key-value pairs
-        if "=" not in lines[0]:
-            raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
+        metadata = parse_extxyz_comment_line(data_line)
-        cell = parse_comment_line(lines[0])
-        lines = lines[1:]
+        T = TypeVar("T")
-        try:
-            return cls(atoms=[Atom.from_xyz(line) for line in lines], cell=cell, charge=charge, multiplicity=multiplicity)
-        except (ValueError, ValidationError) as e:
-            raise MoleculeReadError("Error reading molecule from extxyz") from e
+        def metadata_optional_get(key: str, value: T | None, default: T) -> T:
+            """Set key to default if not found in metadata"""
+            if value is None:
+                return metadata.get(key, default)  # type: ignore [return-value]
+            return value
+        charge = metadata_optional_get("total_charge", charge, 0)
+        multiplicity = metadata_optional_get("multiplicity", multiplicity, 1)
+        cell = cell or metadata.get("cell")
+        energy = metadata.get("energy", None)
+        force_idx = None
+        if properties := metadata.get("properties", "").split(":"):
+            if properties[0].lower() != "species":
+                raise MoleculeReadError(f"Invalid or missing 'Properties' field in EXTXYZ, got: {properties}")
+            # Identify column indices for position and force data
+            pos_idx = None
+            current_idx = 0  # Start after 'species:S'
+            while current_idx < len(properties):
+                if properties[current_idx].lower() == "pos" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
+                    pos_idx = current_idx
+                elif properties[current_idx].lower() == "forces" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
+                    force_idx = current_idx
+                current_idx += 3
+            if pos_idx is None:
+                raise MoleculeReadError("No position data ('pos:R:3') found in Properties field.")
+        def parse_line_atoms(line: str) -> Atom:
+            symbol, sx, sy, sz, *_ = line.split()
+            atomic_number = SYMBOL_ELEMENT[symbol.title()]
+            x, y, z = map(float, (sx, sy, sz))
+            return Atom(atomic_number=atomic_number, position=(x, y, z))
+        def parse_line_with_grad(line: str) -> tuple[Atom, Vector3D]:
+            symbol, sx, sy, sz, sgx, sgy, sgz, *_ = line.split()
+            atomic_number = SYMBOL_ELEMENT[symbol.title()]
+            x, y, z = map(float, (sx, sy, sz))
+            gx, gy, gz = map(float, (sgx, sgy, sgz))
+            return (
+                Atom(atomic_number=atomic_number, position=(x, y, z)),
+                (-gx, -gy, -gz),
+            )
+        atoms: list[Atom]
+        gradients: list[Vector3D] | None
+        if force_idx is not None:
+            atoms, gradients = zip(*map(parse_line_with_grad, lines), strict=True)  # type: ignore [assignment]
+        else:
+            atoms = [parse_line_atoms(line) for line in lines]
+            gradients = None
+        return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
     @classmethod
     def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
@@ -312,43 +388,62 @@ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
     return rdkm
-def parse_comment_line(line: str) -> PeriodicCell:
-    """
-    currently only supporting lattice and porperites fields from comment line
-    modify in future to support other fields from comment from_xyz_lines
-    ex: name, mulitplicity, charge, etc.
+class EXTXYZMetadata(TypedDict, total=False):
+    properties: Any
+    total_charge: int
+    multiplicity: int
+    energy: float
+    cell: PeriodicCell
+def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
     """
-    cell = None
+    Parse the comment line of an EXTXYZ file, extracting lattice, properties, and metadata.
+    Supports:
+    - Lattice vectors (cell information)
+    - Properties field (species, positions, forces, etc.)
+    - Other metadata fields like charge, multiplicity, energy, etc.
+    :param line: comment line from an EXTXYZ file
+    :return: parsed properties
+    >>> parse_extxyz_comment_line('Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3')
+    {'cell': PeriodicCell(lattice_vectors=((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0)), is_periodic=(True, True, True), volume=0.0), 'properties': 'species:S:1:pos:R:3'}
+    """  # noqa: E501
     # Regular expression to match key="value", key='value', or key=value
     pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
     pairs = re.findall(pattern, line)
-    prop_dict = {}
+    prop_dict: EXTXYZMetadata = {}
     for pair in pairs:
         key, value = pair.split("=", 1)
-        if key.lower() == "lattice":
-            value = value.strip("'\"").split()
-            if len(value) != 9:
-                raise MoleculeReadError(f"Lattice should have 9 entries got {len(value)}")
+        key = key.lower().strip()
+        value = value.strip("'\"")
+        if key == "lattice":
+            lattice_values = value.split()
+            if len(lattice_values) != 9:
+                raise MoleculeReadError(f"Lattice should have 9 entries, got {len(lattice_values)}")
-            # Convert the value to a 3x3 tuple of tuples of floats
             try:
-                cell = tuple(tuple(map(float, value[i : i + 3])) for i in range(0, 9, 3))
+                cell = tuple(tuple(map(float, lattice_values[i : i + 3])) for i in range(0, 9, 3))
             except ValueError:
-                raise MoleculeReadError(f"Lattice should be floats, got {value}")
+                raise MoleculeReadError(f"Lattice should be floats, got {lattice_values}")
-            prop_dict[key] = value
+            prop_dict["cell"] = PeriodicCell(lattice_vectors=cell)
-        elif key.lower() == "properties":
-            if value.lower() != "species:s:1:pos:r:3":
-                raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
-            prop_dict[key] = value
-        else:
-            raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
+        elif key == "properties":
+            prop_dict["properties"] = value
-    if cell is None:
-        raise MoleculeReadError("Lattice field is required but missing.")
+        elif key == "total_charge":
+            prop_dict["total_charge"] = int(value)
+        elif key == "multiplicity":
+            prop_dict["multiplicity"] = int(value)
+        elif key == "energy":
+            prop_dict["energy"] = float(value)
+        else:
+            prop_dict[key] = value  # type: ignore [literal-required]
-    if "properties" not in [key.lower() for key in prop_dict.keys()]:
-        raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
-    return PeriodicCell(lattice_vectors=cell)
+    return prop_dict

stjames/pdb.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import re
 from datetime import date, datetime
 from pathlib import Path
 from typing import Any, Literal
@@ -204,6 +205,12 @@ def fetch_pdb(code: str) -> PDB:
     return PDB.model_validate(astj.fetch(code, data_dict=True))
+def fetch_pdb_from_mmcif(code: str) -> PDB:
+    """Fetch a pdb from the Protein Data Bank."""
+    code += ".cif"
+    return PDB.model_validate(astj.fetch(code, data_dict=True))
 def pdb_from_pdb_filestring(pdb: str) -> PDB:
     """Read a PDB from a string."""
     return PDB.model_validate(pdb_dict_to_data_dict(pdb_string_to_pdb_dict(pdb)))
@@ -270,7 +277,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=this_chain_id,
                         res_name=residue.name,
-                        res_num=int(_residue_id[2:]),
+                        res_num=_residue_id[2:],
                         alt_loc=atom.alt_loc or "",
                     )
                     pdb_lines.append(line)
@@ -280,7 +287,7 @@ def pdb_object_to_pdb_filestring(
                             atom=atom,
                             chain_id=this_chain_id,
                             res_name=residue.name,
-                            res_num=int(_residue_id[2:]),
+                            res_num=_residue_id[2:],
                             alt_loc=atom.alt_loc or "",
                         )
                         pdb_lines.append(line)
@@ -302,7 +309,7 @@ def pdb_object_to_pdb_filestring(
                     atom=atom,
                     chain_id=chain_id_for_np,
                     res_name=nonpoly.name,
-                    res_num=int(_np_id[2:]),
+                    res_num=_np_id[2:],
                 )
                 pdb_lines.append(line)
                 if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -311,7 +318,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=chain_id_for_np,
                         res_name=nonpoly.name,
-                        res_num=int(_np_id[2:]),
+                        res_num=_np_id[2:],
                     )
                     pdb_lines.append(line)
@@ -324,7 +331,7 @@ def pdb_object_to_pdb_filestring(
                     atom=atom,
                     chain_id=_w_id[0],  # Or you can use water.polymer if set
                     res_name="HOH",
-                    res_num=int(_w_id[2:]),  # or an incrementing value
+                    res_num=_w_id[2:],  # or an incrementing value
                 )
                 pdb_lines.append(line)
                 if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -333,7 +340,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=_w_id[0],
                         res_name="HOH",
-                        res_num=int(_w_id[2:]),
+                        res_num=_w_id[2:],
                     )
                     pdb_lines.append(line)
@@ -351,7 +358,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id="B",
                         res_name="BRN",  # or branched_obj.get("name", "BRN")
-                        res_num=1,
+                        res_num="1",
                     )
                     pdb_lines.append(line)
                     if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -360,7 +367,7 @@ def pdb_object_to_pdb_filestring(
                             atom=atom,
                             chain_id="B",
                             res_name="BRN",
-                            res_num=1,
+                            res_num="1",
                         )
                         pdb_lines.append(line)
@@ -401,7 +408,7 @@ def _format_atom_line(
     atom: PDBAtom,
     chain_id: str,
     res_name: str,
-    res_num: int | None,
+    res_num: str | None,
     alt_loc: str = "",
 ) -> str:
     """
@@ -417,7 +424,15 @@ def _format_atom_line(
     alt_loc_char = alt_loc if alt_loc else " "
     residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
     chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
-    residue_num = res_num if res_num is not None else 1
+    residue_num_str = "1"
+    insertion_code = " "
+    if res_num:
+        match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
+        if match:
+            residue_num_str, insertion_code = match.groups()
+            insertion_code = insertion_code if insertion_code != "" else " "
+    residue_num = int(residue_num_str)
     # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
     # If your model stores charges differently, adapt as needed.
@@ -445,7 +460,8 @@ def _format_atom_line(
         f"{residue_name:>3}"  # residue name (columns 18-20)
         f" {chain_char}"  # chain ID (column 22)
         f"{residue_num:4d}"  # residue sequence number (columns 23-26)
-        f"    "  # columns 27-30 (insertion code plus spacing)
+        f"{insertion_code}"
+        f"   "  # columns 27-30 (spacing)
         f"{atom.x:8.3f}"  # x (columns 31-38)
         f"{atom.y:8.3f}"  # y (columns 39-46)
         f"{atom.z:8.3f}"  # z (columns 47-54)
@@ -463,7 +479,7 @@ def _format_anisou_line(
     atom: PDBAtom,
     chain_id: str,
     res_name: str,
-    res_num: int | None,
+    res_num: str | None,
     alt_loc: str = "",
 ) -> str:
     """
@@ -479,7 +495,15 @@ def _format_anisou_line(
     alt_loc_char = alt_loc if alt_loc else " "
     residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
     chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
-    residue_num = res_num if res_num is not None else 1
+    residue_num_str = "1"
+    insertion_code = " "
+    if res_num:
+        match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
+        if match:
+            residue_num_str, insertion_code = match.groups()
+            insertion_code = insertion_code if insertion_code != "" else " "
+    residue_num = int(residue_num_str)
     chg = ""
     if atom.charge and abs(atom.charge) > 0:
@@ -522,7 +546,8 @@ def _format_anisou_line(
         f"{residue_name:>3}"  # residue name (columns 18-20)
         f" {chain_char}"  # chain ID (column 22)
         f"{residue_num:4d}"  # residue sequence number (columns 23-26)
-        f"  "  # columns 27-28 (insertion code plus spacing)
+        f"{insertion_code}"
+        f" "  # columns 27-28 (plus spacing)
         f"{aniso_lines}"
         f"      "  # columns 70-76 (padding)
         f"{atom.element:>2}"  # element (columns 77-78)

stjames/workflows/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from .docking import *
 from .electronic_properties import *
 from .fukui import *
 from .hydrogen_bond_basicity import *
+from .ion_mobility import *
 from .irc import *
 from .molecular_dynamics import *
 from .multistage_opt import *
@@ -34,6 +35,7 @@ WORKFLOW_NAME = Literal[
     "electronic_properties",
     "fukui",
     "hydrogen_bond_basicity",
+    "ion_mobility",
     "irc",
     "molecular_dynamics",
     "multistage_opt",
@@ -56,6 +58,7 @@ WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
     "electronic_properties": ElectronicPropertiesWorkflow,  # type: ignore [dict-item]
     "fukui": FukuiIndexWorkflow,  # type: ignore [dict-item]
     "hydrogen_bond_basicity": HydrogenBondBasicityWorkflow,  # type: ignore [dict-item]
+    "ion_mobility": IonMobilityWorkflow,  # type: ignore [dict-item]
     "irc": IRCWorkflow,  # type: ignore [dict-item]
     "molecular_dynamics": MolecularDynamicsWorkflow,  # type: ignore [dict-item]
     "multistage_opt": MultiStageOptWorkflow,  # type: ignore [dict-item]

stjames/workflows/admet.py CHANGED Viewed

@@ -1,18 +1,39 @@
 """ADME-Tox property prediction workflow."""
-from .workflow import MoleculeWorkflow
+import warnings
+from typing import Self
+from pydantic import model_validator
-class ADMETWorkflow(MoleculeWorkflow):
+from ..molecule import Molecule
+from .workflow import MoleculeWorkflow, SMILESWorkflow
+class ADMETWorkflow(SMILESWorkflow, MoleculeWorkflow):
     """
     A workflow for predicting ADME-Tox properties.
     Inherited:
-    :param initial_molecule: Molecule of interest
+    :param initial_smiles: SMILES string of molecule (mutually exclusive with initial_molecule)
+    :param initial_molecule: Molecule of interest (deprecated)
     :param mode: Mode for workflow (currently unused)
     New:
     :param properties: predicted properties
     """
+    initial_smiles: str = ""
+    initial_molecule: Molecule | None = None  # type: ignore [assignment]  # Deprecated
     properties: dict[str, float | int] | None = None
+    @model_validator(mode="after")
+    def validate_mol_input(self) -> Self:
+        """Ensure that only one of initial_molecule or initial_smiles is set."""
+        if not (bool(self.initial_smiles) ^ bool(self.initial_molecule)):
+            raise ValueError("Can only set one of initial_molecule should and initial_smiles")
+        if self.initial_molecule is not None:
+            warnings.warn(DeprecationWarning("initial_molecule is deprecated. Use initial_smiles instead."))
+        return self

stjames/workflows/ion_mobility.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""Ion mobility workflow."""
+from ..types import UUID
+from .workflow import MoleculeWorkflow
+class IonMobilityWorkflow(MoleculeWorkflow):
+    """
+    Workflow for calculating hydrogen bond basicity.
+    Inherited:
+    :param initial_molecule: Molecule of interest
+    :param mode: Mode for workflow (currently unused)
+    New:
+    :param do_csearch: whether to perform a conformational search
+    :param do_optimization: whether to perform an optimization
+    Results:
+    :param conformer_ccs: the collision cross section (Å**2) per conformer
+    :param conformer_ccs_stdev: the uncertainty in the same
+    :param conformer_weights: the Boltzmann weights at RT
+    :param average_ccs: the Boltzmann-weighted CCS for the ensemble
+    :param average_ccs_stdev: the uncertainty in the same
+    """
+    do_csearch: bool = True
+    do_optimization: bool = True
+    conformers: list[UUID] = []
+    conformer_ccs: list[float] = []
+    conformer_ccs_stdev: list[float] = []
+    boltzmann_weights: list[float] = []
+    average_ccs: float | None = None
+    average_ccs_stdev: float | None = None

stjames/workflows/macropka.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""pKa workflow."""
+from typing import Annotated, Self
+from pydantic import AfterValidator, model_validator
+from ..base import Base, round_float
+from ..types import round_list
+from .workflow import SMILESWorkflow
+class MacropKaMicrostate(Base):
+    """
+    A microstate for pKa calculations.
+    :param smiles: SMILES string for this conformer
+    :param energy: free energy of this conformer
+    :param charge: the total charge
+    """
+    smiles: str
+    energy: Annotated[float, AfterValidator(round_float(3))]  # free energy
+    charge: int
+class MacropKaValue(Base):
+    """
+    Represents a change in pKa.
+    :param initial_charge: the charge of the initial state
+    :param final_charge: the charge of the final state
+    :param pKa: the pKa for the transition
+    """
+    initial_charge: int
+    final_charge: int
+    pKa: Annotated[float, AfterValidator(round_float(3))]
+class MacropKaWorkflow(SMILESWorkflow):
+    """
+    Workflow for calculating pKa.
+    Inherited:
+    :param initial_smiles:
+    New:
+    :param temperature: the temperature, in K
+    :param min_pH: for precomputed microstate weights
+    :param max_pH: for precomputed microstate weights
+    Results:
+    :param microstates: microstates
+    :param pKa_values: macroscopic pKa values
+    :param microstate_weights_by_pH: precompute the % of different microstates
+    """
+    temperature: Annotated[float, AfterValidator(round_float(3))] = 298.0
+    min_pH: Annotated[float, AfterValidator(round_float(3))] = 0.0
+    max_pH: Annotated[float, AfterValidator(round_float(3))] = 14.0
+    microstates: list[MacropKaMicrostate] = []
+    pKa_values: list[MacropKaValue] = []
+    microstate_weights_by_pH: dict[float, Annotated[list[float], AfterValidator(round_list(6))]] = {}
+    @model_validator(mode="after")
+    def check_weights(self) -> Self:
+        for weights in self.microstate_weights_by_pH.values():
+            if len(weights) != len(self.microstates):
+                raise ValueError("Length of microstate weights doesn't match!")
+        return self

stjames/workflows/scan.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Annotated
 import numpy as np
 from numpy.typing import NDArray
-from pydantic import AfterValidator
+from pydantic import AfterValidator, field_validator
 from ..base import Base, round_optional_float
 from ..molecule import Molecule
@@ -62,15 +62,33 @@ class ScanWorkflow(MoleculeWorkflow):
     :param mode: Mode for workflow (currently unused)
     New:
-    :param scan_settings: information about what coordinate to scan
+    :param scan_settings: what coordinate(s) to scan; if more than one, all will be performed simultaneously and should have the same number of steps
+    :param scan_settings_2d: what additional coordinate(s) to scan; makes a grid with `scan_settings`
+    :param wavefront propagation: whether to use wavefront propagation (10.1063/5.0009232) for more expensive but smoother scans
     :param calc_settings: settings for the calculation
     :param calc_engine: engine to use for the calculation
     :param scan_points: points along the scan
     """
-    scan_settings: ScanSettings
+    scan_settings: ScanSettings | list[ScanSettings]
+    scan_settings_2d: ScanSettings | list[ScanSettings] = []
     calc_settings: Settings
     calc_engine: str
+    wavefront_propagation: bool = True
     # UUIDs of scan points
     scan_points: list[UUID | None] = []
+    @field_validator("scan_settings", "scan_settings_2d", mode="after")
+    @classmethod
+    def validate_scan_settings(cls, val: ScanSettings | list[ScanSettings]) -> list[ScanSettings]:
+        """Ensure that scan_settings is a list, and that every list item has the same number of steps."""
+        if isinstance(val, ScanSettings):
+            val = [val]
+        for ss in val:
+            if ss.num != val[0].num:
+                raise ValueError("Concerted scan settings must have same number of steps!")
+        return val

{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.64
+Version: 0.0.66
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
 Requires-Dist: numpy
 Requires-Dist: requests
 Requires-Dist: rdkit
+Dynamic: license-file
 # stjames

{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/RECORD RENAMED Viewed

@@ -13,9 +13,9 @@ stjames/int_settings.py,sha256=5HXp8opt5ZyY1UpmfaK7NVloWVLM5jkG0elEEqpVLUo,896
 stjames/message.py,sha256=Rq6QqmHZKecWxYH8fVyXmuoCCPZv8YinvgykSeorXSU,216
 stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
 stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
-stjames/molecule.py,sha256=2BRXYKtkm5ztYiywyC2S__Zu4a-QoDEgb7LR7F4xHvs,14268
+stjames/molecule.py,sha256=4dakMkn-_I5bSWsijLLY0tn5NkBEuZhmtYDj-MDSJE0,17987
 stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
-stjames/pdb.py,sha256=-i0H029NEX-pcyCqdVyq7D62ZDvmUPWK7l83WdoDmpk,25759
+stjames/pdb.py,sha256=Ens-RNO8s1rxJzjWFXM4Q1-7s8wyXzEMPZrc6o4QDzk,26465
 stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
 stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
@@ -27,7 +27,7 @@ stjames/thermochem_settings.py,sha256=ZTLz31v8Ltutde5Nfm0vH5YahWjcfFWfr_R856Kffx
 stjames/types.py,sha256=rs2CdpkruIfU-PS98rjr9HAJNFGdZDB_zl-u3wa5rAs,4092
 stjames/atomium_stjames/__init__.py,sha256=gZkzC7i9D_fmWUTN55gtygITo3-qvJUda5CXLR0jyCQ,306
 stjames/atomium_stjames/data.py,sha256=-hzwBpTHq5JetsOVyopUJswKnKAkMtJ_XkONxjXVupU,5675
-stjames/atomium_stjames/mmcif.py,sha256=llbJ65p2B-aZN31-E_ODVDmrVeBoSw9y_Mg5XjyQvTA,26755
+stjames/atomium_stjames/mmcif.py,sha256=B5t_gxvMTG8OblXUWF0dnKgOHhn-8bufOv_a7ccbiqU,27081
 stjames/atomium_stjames/pdb.py,sha256=C2mEcBDDrnoXD9ZCMIH2uJpjiWPJy6ktXq8IFZsrQKM,22482
 stjames/atomium_stjames/utilities.py,sha256=-YtM7sRMvMk0wWrC3svWUWH4CGI0NtY77nXsg9tjHfc,4964
 stjames/data/__init__.py,sha256=O59Ksp7AIqwOELCWymfCx7YeBzwNOGCMlGQi7tNLqiE,24
@@ -37,8 +37,8 @@ stjames/data/isotopes.json,sha256=5ba8QnLrHD_Ypv2xekv2cIRwYrX3MQ19-1FOFtt0RuU,83
 stjames/data/nist_isotopes.json,sha256=d5DNk1dX0iB1waEYIRR6JMHuA7AuYwSBEgBvb4EKyhM,14300
 stjames/data/read_nist_isotopes.py,sha256=y10FNjW43QpC45qib7VHsIghEwT7GG5rsNwHdc9osRI,3309
 stjames/data/symbol_element.json,sha256=vl_buFusTqBd-muYQtMLtTDLy2OtBI6KkBeqkaWRQrg,1186
-stjames/workflows/__init__.py,sha256=5KX0IcuYElj8K3qE2c-XY8dL-vPdG87US7ErfZlyK88,2293
-stjames/workflows/admet.py,sha256=m8yGWe-UeYK5F7TOeNsQMPTzdWL-aaRSTQsyO7SVa6k,421
+stjames/workflows/__init__.py,sha256=sHKzK6ZtVt8TkaX3JoIrg46SVK9rDi_elA7qCIAqBpE,2410
+stjames/workflows/admet.py,sha256=h8ph6oeRCxU3-_jqRRWPg3RZcheu9JzCHiWqSC9VYKY,1296
 stjames/workflows/basic_calculation.py,sha256=ZX3KwhfyyCTjc2ougQIL4If7gtwZP9WjqpL45mBquW0,573
 stjames/workflows/bde.py,sha256=hdTjwma5L9SrU5F5r6dB1ruB_B6buBUtZHf2sanNW2k,9802
 stjames/workflows/conformer.py,sha256=18aO6ngMBeGAmQkBdLGCCHr398RIYr1v2hD2IT1u4cc,3005
@@ -48,18 +48,20 @@ stjames/workflows/docking.py,sha256=GCW_-JeEZcMXKZ9EQFOxWUYRo0jsbzwIv10aSz8KuaQ,
 stjames/workflows/electronic_properties.py,sha256=GT3-NC7w-dbcOJ-3AzJ7LgzH6frTbiH2Iyb9BCa-SvY,4112
 stjames/workflows/fukui.py,sha256=e7CF7Mp2Dt1JTipQx-Sz_37W1urL-iRpjXY-9ItSvhM,1268
 stjames/workflows/hydrogen_bond_basicity.py,sha256=XDpHEluw6DQ9Zk5g2Je2a81HqIkqPglZ-6f2YZnd4Bc,1159
+stjames/workflows/ion_mobility.py,sha256=e6XSidrud5qSkrAcjzOzgHaf-G09JoP09V76myjdyjc,1097
 stjames/workflows/irc.py,sha256=ZP7icylW8rgo_Uh7h3bmyumn0ru1IyF-61nP5Jnmq3M,3402
+stjames/workflows/macropka.py,sha256=d3jUKZIUU7ifcMCB6KuTEpP91Pl9sQA0WAmjnODbp6g,2058
 stjames/workflows/molecular_dynamics.py,sha256=kxugE73Ntzpj-xpJSoQ1EwGzXXdvi_NTyeP4913EVwE,3173
 stjames/workflows/multistage_opt.py,sha256=pPLAZDztHd37q8cxCUkdq8EzOFyrTzZJHNfDV5auiHs,13638
 stjames/workflows/pka.py,sha256=j3vBh2YM3nJzJ1XJKPsmYahRCeaU9n3P-G-u9_moaFw,2065
 stjames/workflows/redox_potential.py,sha256=7S18t9Y3eynSnA3lZbRlvLfdbgeBopdiigLzt1zxg5c,3871
-stjames/workflows/scan.py,sha256=vGS1wWMpMSogb63DEED6U6oHsLgV0D2hXVQg2UWWJgs,1913
+stjames/workflows/scan.py,sha256=Lph2VhsxJMpzY-wtmUV7U1TOS7mIwjgkLniaipGDo8I,2899
 stjames/workflows/solubility.py,sha256=kGfVyPPGDLRpf2j6dSY7woCkfsoXSbUzdSImA4mcMpw,1898
 stjames/workflows/spin_states.py,sha256=0degmE-frovgoXweshZyjfjqL7nkbaFoO9YoJhvQnaI,4748
 stjames/workflows/tautomer.py,sha256=7eYKziGPg8Km6lfowTzSkgJfJ4SHUPrAmnTf8Bi-SB0,1164
 stjames/workflows/workflow.py,sha256=sk2BUz59wdIkT_EyKOnMt5woNrjo3aHVK38cU8x8I7Q,1423
-stjames-0.0.64.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
-stjames-0.0.64.dist-info/METADATA,sha256=33n-Ix3Rfcue2dKm4SUqmWow8MeXv1_SF6UcpfOPYiQ,1672
-stjames-0.0.64.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-stjames-0.0.64.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
-stjames-0.0.64.dist-info/RECORD,,
+stjames-0.0.66.dist-info/licenses/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
+stjames-0.0.66.dist-info/METADATA,sha256=vyqa0iG3iF1_oGzGK_R6KhwKrP1HZQVUKXB3MP91CSU,1694
+stjames-0.0.66.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
+stjames-0.0.66.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
+stjames-0.0.66.dist-info/RECORD,,

{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (77.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{stjames-0.0.64.dist-info → stjames-0.0.66.dist-info}/top_level.txt RENAMED Viewed

File without changes

stjames 0.0.64__py3-none-any.whl → 0.0.66__py3-none-any.whl

Potentially problematic release.

stjames 0.0.64py3-none-any.whl → 0.0.66py3-none-any.whl