PyPI - stjames - Versions diffs - 0.0.76__tar.gz → 0.0.78__tar.gz - Mend

stjames 0.0.76tar.gz → 0.0.78tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (79) hide show

{stjames-0.0.76/stjames.egg-info → stjames-0.0.78}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.76
+Version: 0.0.78
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
 Requires-Dist: numpy
 Requires-Dist: requests
 Requires-Dist: rdkit
+Requires-Dist: more-itertools
 Dynamic: license-file
 # stjames

{stjames-0.0.76 → stjames-0.0.78}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "stjames"
-version = "0.0.76"
+version = "0.0.78"
 description = "standardized JSON atom/molecule encoding scheme"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -13,6 +13,7 @@ dependencies = [
     "numpy",
     "requests",
     "rdkit",
+    "more-itertools",
 ]
 [build-system]

{stjames-0.0.76 → stjames-0.0.78}/stjames/atomium_stjames/pdb.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import re
 from datetime import datetime
 from itertools import chain, groupby
-from typing import Any, Callable
+from typing import Any, Callable, TypedDict
 from .data import CODES
 from .mmcif import add_secondary_structure_to_polymers
@@ -476,15 +476,17 @@ def add_atom_to_polymer(line: str, model: dict[Any, Any], chain_id: str, res_id:
     :param str res_id: the molecule ID to add to.
     :param dict aniso_dict: lookup dictionary for anisotropy information."""
+    atom = atom_line_to_dict(line, aniso_dict)
     try:
-        model["polymer"][chain_id]["residues"][res_id]["atoms"][int(line[6:11])] = atom_line_to_dict(line, aniso_dict)
+        model["polymer"][chain_id]["residues"][res_id]["atoms"][int(line[6:11])] = atom
     except Exception:
         name = line[17:20].strip()
         try:
             model["polymer"][chain_id]["residues"][res_id] = {
                 "name": name,
                 "full_name": full_names.get(name),
-                "atoms": {int(line[6:11]): atom_line_to_dict(line, aniso_dict)},
+                "atoms": {int(line[6:11]): atom},
                 "number": len(model["polymer"][chain_id]["residues"]) + 1,
             }
         except Exception:
@@ -495,7 +497,7 @@ def add_atom_to_polymer(line: str, model: dict[Any, Any], chain_id: str, res_id:
                 "residues": {
                     res_id: {
                         "name": line[17:20].strip(),
-                        "atoms": {int(line[6:11]): atom_line_to_dict(line, aniso_dict)},
+                        "atoms": {int(line[6:11]): atom},
                         "number": 1,
                         "full_name": None,
                     }
@@ -511,10 +513,11 @@ def add_atom_to_non_polymer(line: str, model: dict[Any, Any], res_id: str, aniso
     :param dict model: the model to update.
     :param str res_id: the molecule ID to add to.
     :param dict aniso_dict: lookup dictionary for anisotropy information."""
+    atom = atom_line_to_dict(line, aniso_dict)
     key = "water" if line[17:20] in ["HOH", "DOD"] else "non_polymer"
     try:
-        model[key][res_id]["atoms"][int(line[6:11])] = atom_line_to_dict(line, aniso_dict)
+        model[key][res_id]["atoms"][int(line[6:11])] = atom
     except Exception:
         name = line[17:20].strip()
         model[key][res_id] = {
@@ -522,18 +525,55 @@ def add_atom_to_non_polymer(line: str, model: dict[Any, Any], res_id: str, aniso
             "full_name": full_names.get(name),
             "internal_id": line[21],
             "polymer": line[21],
-            "atoms": {int(line[6:11]): atom_line_to_dict(line, aniso_dict)},
+            "atoms": {int(line[6:11]): atom},
         }
-def atom_line_to_dict(line: str, aniso_dict: dict[Any, Any]) -> dict[str, Any]:
-    """Converts an ATOM or HETATM record to an atom dictionary.
+def guess_element_from_name(atom_name: str) -> str | None:
+    atom_name = atom_name.strip()
+    if not atom_name:
+        return None
+    # Case 1: Atom name starts with a digit (e.g. '1HG1') → element is second character
+    if atom_name[0].isdigit() and len(atom_name) > 1:
+        return atom_name[1].upper()
+    # # Case 2: Atom name starts with a letter
+    # if len(atom_name) >= 2 and atom_name[:2].isalpha():
+    #     possible = atom_name[:2].strip().capitalize()
+    #     # Check for common two-letter elements
+    #     if possible in {"Cl", "Br", "Fe", "Mg", "Zn", "Ca", "Na", "Cu", "Mn", "Co", "Ni"}:
+    #         return possible
+    # Fallback to first letter
+    return atom_name[0].upper()
+class AtomDict(TypedDict, total=False):
+    """A dictionary representing an atom in a PDB file."""
+    occupancy: float | None
+    bvalue: float | None
+    charge: int | None
+    anisotropy: float | None
+    is_hetatm: bool | None
+    name: str | None
+    alt_loc: str | None
+    x: float
+    y: float
+    z: float
+    element: str | None
+def atom_line_to_dict(line: str, aniso_dict: dict[Any, Any]) -> AtomDict:
+    """
+    Converts an ATOM or HETATM record to an atom dictionary.
     :param str line: the record to convert.
     :param dict aniso_dict: the anisotropy dictionary to use.
-    :rtype: ``dict``"""
+    :return: atom dictionary
+    """
-    a = {"occupancy": 1, "bvalue": None, "charge": 0, "anisotropy": aniso_dict.get(int(line[6:11].strip()), None)}
+    a: AtomDict = {"occupancy": 1, "bvalue": None, "charge": 0, "anisotropy": aniso_dict.get(int(line[6:11].strip()), None)}
     a["is_hetatm"] = line[:6] == "HETATM"
     a["name"] = line[12:16].strip() or None
     a["alt_loc"] = line[16].strip() or None
@@ -545,6 +585,11 @@ def atom_line_to_dict(line: str, aniso_dict: dict[Any, Any]) -> dict[str, Any]:
     if line[60:66].strip():
         a["bvalue"] = float(line[60:66].strip())
     a["element"] = line[76:78].strip() or None
+    if not a["element"]:
+        if not a["name"]:
+            raise ValueError("Cannot guess element from empty name.")
+        assert isinstance(a["name"], str)
+        a["element"] = guess_element_from_name(a["name"])
     if line[78:80].strip():
         try:
             a["charge"] = int(line[78:80].strip())
@@ -561,6 +606,7 @@ def atom_line_to_dict(line: str, aniso_dict: dict[Any, Any]) -> dict[str, Any]:
         a["occupancy"] = None
     if a["name"] == a["element"]:
         a["name"] = None
     return a

{stjames-0.0.76 → stjames-0.0.78}/stjames/molecule.py RENAMED Viewed

@@ -2,6 +2,7 @@ import re
 from pathlib import Path
 from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
+import numpy as np
 import pydantic
 from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
 from rdkit import Chem
@@ -75,15 +76,41 @@ class Molecule(Base):
     def __len__(self) -> int:
         return len(self.atoms)
-    def distance(self, atom1: PositiveInt, atom2: PositiveInt) -> float:
+    def distance(self, i: PositiveInt, j: PositiveInt) -> float:
         r"""
-        Get the distance between atoms.
+        Calculate the distance between atoms.
         >>> mol = Molecule.from_xyz("H 0 1 0\nH 0 0 1")
         >>> mol.distance(1, 2)
         1.4142135623730951
         """
-        return sum((q2 - q1) ** 2 for q1, q2 in zip(self.atoms[atom1 - 1].position, self.atoms[atom2 - 1].position)) ** 0.5  # type: ignore [no-any-return,unused-ignore]
+        return sum((q2 - q1) ** 2 for q1, q2 in zip(self.atoms[i - 1].position, self.atoms[j - 1].position)) ** 0.5  # type: ignore [no-any-return,unused-ignore]
+    def angle(self, i: PositiveInt, j: PositiveInt, k: PositiveInt, degrees: bool = True) -> float:
+        r"""
+        Calculate the angle between three atoms.
+        >>> Molecule.from_xyz("H 0 0 0\nO 0 0 1\nH 0 1 1").angle(1, 2, 3)
+        90.0
+        """
+        return angle(self.coordinates[i - 1], self.coordinates[j - 1], self.coordinates[k - 1], degrees=degrees)
+    def dihedral(self, i: int, j: int, k: int, l: int, degrees: bool = True, positive_domain: bool = True) -> float:
+        r"""
+        Calculate the dihedral angle between four atoms.
+        >>> Molecule.from_xyz("H 0 0 0\nO 0 0 1\nO 0 1 1\nH 1 1 1").dihedral(1, 2, 3, 4)
+        270.0
+        """
+        return dihedral(
+            self.coordinates[i - 1],
+            self.coordinates[j - 1],
+            self.coordinates[k - 1],
+            self.coordinates[l - 1],
+            degrees=degrees,
+            positive_domain=positive_domain,
+        )
     @property
     def coordinates(self) -> Vector3DPerAtom:
@@ -345,19 +372,17 @@ class Molecule(Base):
         return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
     @classmethod
-    def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
+    def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0, multiplicity: int = 1) -> Self:
         if len(rdkm.GetConformers()) == 0:
             rdkm = _embed_rdkit_mol(rdkm)
-        atoms = []
         atomic_numbers = [atom.GetAtomicNum() for atom in rdkm.GetAtoms()]  # type: ignore [no-untyped-call, unused-ignore]
-        geom = rdkm.GetConformers()[cid].GetPositions()
-        for i in range(len(atomic_numbers)):
-            atoms.append(Atom(atomic_number=atomic_numbers[i], position=geom[i]))
+        atoms = [
+            Atom(atomic_number=atom, position=xyz)  # keep open
+            for atom, xyz in zip(atomic_numbers, rdkm.GetConformers()[cid].GetPositions(), strict=True)
+        ]
         charge = Chem.GetFormalCharge(rdkm)
-        multiplicity = 1
         return cls(atoms=atoms, charge=charge, multiplicity=multiplicity)
@@ -447,3 +472,67 @@ def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
             prop_dict[key] = value  # type: ignore [literal-required]
     return prop_dict
+def angle(p0: Sequence[float], p1: Sequence[float], p2: Sequence[float], degrees: bool = True) -> float:
+    """
+    Angle between three points.
+    :param i, j, k: positions of points
+    :param degrees: whether to return in degrees
+    :return: angle in radians or degrees
+    """
+    a0, a1, a2 = map(np.asarray, (p0, p1, p2))
+    u = a1 - a0
+    v = a1 - a2
+    nu = np.linalg.norm(u)
+    nv = np.linalg.norm(v)
+    cos_theta = np.dot(u, v) / (nu * nv)
+    cos_theta = np.clip(cos_theta, -1.0, 1.0)
+    ang = np.arccos(cos_theta)
+    if degrees:
+        return float(np.degrees(ang))
+    return float(ang)
+def dihedral(p0: Sequence[float], p1: Sequence[float], p2: Sequence[float], p3: Sequence[float], degrees: bool = True, positive_domain: bool = True) -> float:
+    """
+    Dihedral angle between four points.
+    :param p0, p1, p2, p3: points
+    :param degrees: whether to return in degrees
+    :param positive_domain: (0, 360] if True else (-180, 180]
+    :return: angle in degrees or radians (or nan if collinearities detected)
+    >>> a = [0, 0, 0]
+    >>> b = [0, 0, 1]
+    >>> c = [0, 1, 1]
+    >>> d1 = [0, 1, 2]
+    >>> d2 = [0.5, 1, 1.5]
+    >>> dihedral(a, b, c, d1)
+    180.0
+    >>> dihedral(a, b, c, d2, positive_domain=False)
+    -135.0
+    """
+    a0, a1, a2, a3 = map(np.asarray, (p0, p1, p2, p3))
+    b0 = a1 - a0
+    b1 = a2 - a1
+    b2 = a3 - a2
+    b1 = b1 / np.linalg.norm(b1)
+    v = b1 * np.dot(b0, b1) - b0
+    w = b2 - b1 * np.dot(b2, b1)
+    x = np.dot(v, w)
+    y = np.dot(np.cross(b1, v), w)
+    ang = np.arctan2(y, x)
+    if positive_domain and ang < 0:
+        ang += 2 * np.pi
+    if degrees:
+        return float(np.degrees(ang))
+    return float(ang)

{stjames-0.0.76 → stjames-0.0.78}/stjames/workflows/hydrogen_bond_basicity.py RENAMED Viewed

@@ -7,10 +7,10 @@ from .workflow import MoleculeWorkflow
 class HydrogenBondAcceptorSite(Base):
     """
-    A hydrogen bond acceptor site.
+    A hydrogen-bond-acceptor site.
     :param atom_idx: index of the atom
-    :param pkbhx: Hydrogen bond basicity
+    :param pkbhx: Hydrogen-bond basicity
     :param position: position of the atom
     :param name: name of the atom
     """
@@ -21,9 +21,23 @@ class HydrogenBondAcceptorSite(Base):
     name: str | None = None
+class HydrogenBondDonorSite(Base):
+    """
+    A hydrogen-bond-donor site.
+    :param atom_idx: index of the atom
+    :param pk_alpha: Hydrogen-bond acidity
+    :param position: position of the atom
+    """
+    atom_idx: int  # zero-indexed
+    pk_alpha: float
+    position: tuple[float, float, float]
 class HydrogenBondBasicityWorkflow(MoleculeWorkflow):
     """
-    Workflow for calculating hydrogen bond basicity.
+    Workflow for calculating hydrogen-bond basicity and acidity.
     Inherited:
     :param initial_molecule: Molecule of interest
@@ -36,6 +50,7 @@ class HydrogenBondBasicityWorkflow(MoleculeWorkflow):
     Results:
     :param optimization: UUID of optimization
     :param hba_sites: hydrogen-bond-acceptor sites
+    :param hbd_sites: hydrogen-bond-donor sites
     """
     do_csearch: bool = True
@@ -43,3 +58,4 @@ class HydrogenBondBasicityWorkflow(MoleculeWorkflow):
     optimization: UUID | None = None
     hba_sites: list[HydrogenBondAcceptorSite] = []  # noqa: RUF012
+    hbd_sites: list[HydrogenBondDonorSite] = []  # noqa: RUF012

{stjames-0.0.76 → stjames-0.0.78}/stjames/workflows/multistage_opt.py RENAMED Viewed

@@ -1,15 +1,19 @@
 """Multi-stage optimization workflow."""
+import re
 from typing import Self, Sequence
+import more_itertools as mit
 from pydantic import BaseModel, Field, model_validator
+from stjames.correction import Correction
 from ..constraint import Constraint
 from ..method import XTB_METHODS, Method
 from ..mode import Mode
 from ..opt_settings import OptimizationSettings
 from ..settings import Settings
-from ..solvent import Solvent, SolventSettings
+from ..solvent import Solvent, SolventModel, SolventSettings
 from ..task import Task
 from ..types import UUID
 from .workflow import MoleculeWorkflow
@@ -264,6 +268,73 @@ class MultiStageOptMixin(BaseModel):
         return self
+def mso_settings_from_method_string(
+    methods: str,
+    solvent: Solvent | None = None,
+    use_solvent_for_opt: bool = False,
+    constraints: list[Constraint] | None = None,
+    transition_state: bool = False,
+    frequencies: bool = False,
+) -> MultiStageOptSettings:
+    """
+    Helper function to construct multi-stage opt settings objects from a method string.
+    >>> mso_settings_from_method_string("r2SCAN-3c/CPCM(Water)//B3LYP-D3/6-31G(d)/ALPB(Water)//GFN2-xTB/CPCM(Water)//GFN0-xTB").level_of_theory
+    'r2scan_3c/cpcm(water)//b3lyp-d3/6-31g(d)/alpb(water)//gfn2_xtb/cpcm(water)//gfn0_xtb'
+    """
+    solvent_models = "|".join(model.name for model in SolventModel)
+    pattern = rf"""
+        (?P<method>[^/()]+)                                 # Method + optional corrections
+        (?:/(?P<basis_set>(?!{solvent_models})[^/]+?))?     # Optional basis_set, not starting with solvent model name
+        (?:/(?P<solvent_model>{solvent_models})             # Optional solvent model
+            \((?P<solvent>[^()]+)\))?                       # Solvent name in parentheses
+        (?:\/\/|$)                                          # End or separator
+"""
+    constraints = constraints or []
+    opt_settings = OptimizationSettings(constraints=constraints, transition_state=transition_state)
+    OPT = [Task.OPTIMIZE if not transition_state else Task.OPTIMIZE_TS]
+    valid_corrections = {c.name.lower() for c in Correction}  # Python3.11 hack
+    def process(match: re.Match[str]) -> Settings:
+        data = match.groupdict()
+        method, corrections = mit.partition(lambda x: x.lower() in valid_corrections, data["method"].split("-"))
+        solvent_settings = SolventSettings(solvent=data["solvent"], model=data["solvent_model"]) if data["solvent"] else None
+        return Settings(
+            method="-".join(method),
+            basis_set=data["basis_set"],
+            tasks=OPT,
+            solvent_settings=solvent_settings,
+            opt_settings=opt_settings,
+            corrections=list(corrections),
+        )
+    optimization_settings = [process(match) for match in re.finditer(pattern, methods, re.VERBOSE | re.IGNORECASE)]
+    if len(optimization_settings) > 1:
+        sp_settings = optimization_settings.pop(0)
+        sp_settings.tasks = [Task.ENERGY]
+    else:
+        sp_settings = None
+    optimization_settings = optimization_settings[::-1]
+    if frequencies:
+        optimization_settings[-1].tasks.append(Task.FREQUENCIES)
+    return MultiStageOptSettings(
+        mode=Mode.MANUAL,
+        optimization_settings=optimization_settings,
+        singlepoint_settings=sp_settings,
+        solvent=solvent,
+        xtb_preopt=False,
+        constraints=constraints,
+        transition_state=transition_state,
+        frequencies=frequencies,
+    )
 def build_mso_settings(
     sp_method: Method,
     sp_basis_set: str | None,

{stjames-0.0.76 → stjames-0.0.78}/stjames/workflows/protein_cofolding.py RENAMED Viewed

@@ -12,6 +12,7 @@ class CofoldingModel(LowercaseStrEnum):
     CHAI_1R = "chai_1r"
     BOLTZ_1 = "boltz_1"
+    BOLTZ_2 = "boltz_2"
 class CofoldingScores(BaseModel):
@@ -20,6 +21,15 @@ class CofoldingScores(BaseModel):
     iptm: float  # interface predicted template modeling score
+class AffinityScore(BaseModel):
+    pred_value: float
+    probability_binary: float
+    pred_value1: float
+    probability_binary1: float
+    pred_value2: float
+    probability_binary2: float
 class ProteinCofoldingWorkflow(FASTAWorkflow):
     """
     A workflow for predicting structures. Especially protein structures.
@@ -38,4 +48,5 @@ class ProteinCofoldingWorkflow(FASTAWorkflow):
     use_templates_server: bool = False
     predicted_structure_uuid: UUID | None = None
     scores: CofoldingScores | None = None
-    model: CofoldingModel = CofoldingModel.CHAI_1R
+    model: CofoldingModel = CofoldingModel.BOLTZ_2
+    affinity_score: AffinityScore | None = None

{stjames-0.0.76 → stjames-0.0.78}/stjames/workflows/workflow.py RENAMED Viewed

@@ -32,6 +32,7 @@ class FASTAWorkflow(Workflow):
     initial_protein_sequences: list[str]
     initial_smiles_list: list[str] | None = None
+    ligand_binding_affinity_index: int | None = None
     def __repr__(self) -> str:
         return f"<{type(self).__name__} {self.initial_protein_sequences} {self.initial_smiles_list}>"

{stjames-0.0.76 → stjames-0.0.78/stjames.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.76
+Version: 0.0.78
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
 Requires-Dist: numpy
 Requires-Dist: requests
 Requires-Dist: rdkit
+Requires-Dist: more-itertools
 Dynamic: license-file
 # stjames