PyPI - stjames - Versions diffs - 0.0.115__tar.gz → 0.0.117__tar.gz - Mend

stjames 0.0.115tar.gz → 0.0.117tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (84) hide show

{stjames-0.0.115/stjames.egg-info → stjames-0.0.117}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.115
+Version: 0.0.117
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames

{stjames-0.0.115 → stjames-0.0.117}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "stjames"
-version = "0.0.115"
+version = "0.0.117"
 description = "standardized JSON atom/molecule encoding scheme"
 readme = "README.md"
 requires-python = ">=3.11"

{stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/mmcif.py RENAMED Viewed

@@ -512,7 +512,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
     try:
         model["polymer"][mol_id]["residues"][res_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
     except Exception:
-        name = atom["auth_comp_id"]
+        name = atom.get("auth_comp_id") or atom.get("label_comp_id") or "UNKNOWN"
         try:
             model["polymer"][mol_id]["residues"][res_id] = {
                 "name": name,
@@ -553,7 +553,7 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
     try:
         model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
     except Exception:
-        name = atom["auth_comp_id"]
+        name = atom.get("auth_comp_id") or atom.get("label_comp_id") or "UNKNOWN"
         model[mol_type][mol_id] = {
             "name": name,
             "full_name": names.get(name).upper() if names.get(name) is not None and names.get(name).lower() != "water" else None,  # type: ignore [union-attr]

{stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/__init__.py RENAMED Viewed

@@ -21,6 +21,7 @@ from .multistage_opt import *
 from .nmr import *
 from .pka import *
 from .pose_analysis_md import *
+from .protein_binder_design import *
 from .protein_cofolding import *
 from .redox_potential import *
 from .scan import *
@@ -51,6 +52,7 @@ WORKFLOW_NAME = Literal[
     "pka",
     "pose_analysis_md",
     "protein_cofolding",
+    "protein_binder_design",
     "redox_potential",
     "scan",
     "solubility",
@@ -80,6 +82,7 @@ WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
     "pka": pKaWorkflow,  # type: ignore [dict-item]
     "pose_analysis_md": PoseAnalysisMolecularDynamicsWorkflow,  # type: ignore [dict-item]
     "protein_cofolding": ProteinCofoldingWorkflow,  # type: ignore [dict-item]
+    "protein_binder_design": ProteinBinderDesignWorkflow,  # type: ignore [dict-item]
     "redox_potential": RedoxPotentialWorkflow,  # type: ignore [dict-item]
     "scan": ScanWorkflow,  # type: ignore [dict-item]
     "solubility": SolubilityWorkflow,  # type: ignore [dict-item]

{stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/conformer_search.py RENAMED Viewed

@@ -9,9 +9,10 @@ from ..base import LowercaseStrEnum
 from ..constraint import Constraint
 from ..method import Method, XTBMethod
 from ..mode import Mode
+from ..molecule import Molecule
 from ..types import UUID, FloatPerAtom, round_float_per_atom
 from .multistage_opt import MultiStageOptMixin
-from .workflow import MoleculeWorkflow
+from .workflow import MoleculeWorkflow, SMILESWorkflow
 _sentinel = object()
@@ -377,12 +378,16 @@ class ConformerSearchMixin(ConformerGenMixin, MultiStageOptMixin):
         return self
-class ConformerSearchWorkflow(ConformerSearchMixin, MoleculeWorkflow):
+class ConformerSearchWorkflow(ConformerSearchMixin, SMILESWorkflow, MoleculeWorkflow):
     """
     ConformerSearch Workflow.
+    This workflow supports both SMILES and 3D molecular input. Some conformer generation settings
+    support both methods; others (like CREST) require 3D information. Only one should be supplied.
     Inherited:
     :param initial_molecule: Molecule of interest
+    :param initial_smiles: SMILES of the molecule of interest
     :param conf_gen_mode: Mode for calculations
     :param conf_gen_settings: settings for conformer generation
     :param mso_mode: Mode for MultiStageOptSettings
@@ -401,6 +406,21 @@ class ConformerSearchWorkflow(ConformerSearchMixin, MoleculeWorkflow):
     :param energies: energies of the molecules
     """
+    initial_smiles: str = ""
+    initial_molecule: Molecule | None = None  # type: ignore [assignment]
     # Results
     conformer_uuids: list[list[UUID | None]] = Field(default_factory=list)
     energies: Annotated[FloatPerAtom, AfterValidator(round_float_per_atom(6))] = Field(default_factory=list)
+    @model_validator(mode="after")
+    def validate_mol_input(self) -> Self:
+        """Ensure that only one of initial_molecule or initial_smiles is set."""
+        if not (bool(self.initial_smiles) ^ bool(self.initial_molecule)):
+            raise ValueError("Can only set one of initial_molecule and initial_smiles")
+        if isinstance(self.conf_gen_settings, iMTDSettings) and (self.initial_molecule is None):
+            raise ValueError("iMTDSettings requires `initial_molecule` to be set")
+        return self

{stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/docking.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Docking workflow."""
-from typing import Annotated, Self, TypeAlias
+from typing import Annotated, Literal, Self, TypeAlias
 from pydantic import AfterValidator, ConfigDict, field_validator, model_validator
@@ -46,12 +46,23 @@ class VinaSettings(DockingSettings):
     """
     Controls how AutoDock Vina is run.
+    :param executable: which Vina implementation is run.
+    :param scoring_function: which scoring function is employed.
     :param exhaustiveness: how many times Vina attempts to find a pose.
         8 is typical, 32 is considered relatively careful.
     """
+    executable: Literal["qvina2", "vina"] = "vina"
+    scoring_function: Literal["vinardo", "vina"] = "vinardo"
     exhaustiveness: int = 8
+    @model_validator(mode="after")
+    def check_executable_scoring_function(self) -> Self:
+        """Check if the combination of exectuable and scoring function is supported."""
+        if (self.executable == "qvina2") and (self.scoring_function == "vinardo"):
+            raise ValueError("qvina2 does not implement the vinardo scoring function!")
+        return self
 class DockingWorkflow(MoleculeWorkflow):
     """

{stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/pose_analysis_md.py RENAMED Viewed

@@ -76,8 +76,8 @@ class PoseAnalysisMolecularDynamicsWorkflow(SMILESWorkflow):
     protein_uuid: UUID | None = None
     ligand_residue_name: str = "LIG"
-    num_trajectories: PositiveInt = 4
-    equilibration_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 5
+    num_trajectories: PositiveInt = 1
+    equilibration_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 1
     simulation_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 10
     temperature: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 300

stjames-0.0.117/stjames/workflows/protein_binder_design.py ADDED Viewed

@@ -0,0 +1,303 @@
+"""Protein-binder-design workflow."""
+from enum import Enum
+from typing import Annotated, TypeAlias
+from pydantic import AfterValidator
+from ..base import Base, LowercaseStrEnum, round_optional_float
+from ..types import UUID
+from .workflow import Workflow
+ProteinUUID: TypeAlias = UUID
+class BoltzGenSecondaryStructure(Base):
+    """
+    Represents the secondary structure assignments for a protein.
+    :param id: Optional identifier for this secondary structure annotation.
+    :param sheet: String encoding the residue indices comprising β-sheet structures
+                  (e.g., "1,3..11" for residues 1, and 3 through 11).
+    :param helix: String encoding residue indices comprising helices.
+    :param loop: String encoding residue indices comprising loop or coil regions.
+    """
+    id: str | None = None
+    sheet: str | None = None
+    helix: str | None = None
+    loop: str | None = None
+class BoltzGenProteinEntity(Base):
+    """
+    Represents a protein entity, either a designed or natural sequence.
+    :param id: Unique identifier for the protein.
+    :param sequence: Protein sequence, may contain amino acids and numbers for designed regions.
+    :param secondary_structure: Optional assigned secondary structure.
+    :param cyclic: Whether the protein is cyclic (True/False). Optional.
+    """
+    id: str
+    sequence: str  # can include amino acids as well as numbers for designed regions
+    secondary_structure: BoltzGenSecondaryStructure | None = None
+    # binding_types: BindingType | None = None - we may want to add this later but not used in examples.
+    cyclic: bool | None = None
+class BoltzGenLigandEntity(Base):
+    """
+    Represents a ligand entity (non-protein), such as a small molecule.
+    :param id: Unique identifier for the ligand.
+    :param smiles: SMILES string representation of the ligand.
+    """
+    id: str
+    smiles: str
+    # binding_types: str | None = None - we may want to add this later but not used in examples.
+class BoltzGenRegionSelection(Base):
+    """
+    Defines a region of a protein chain by specifying its chain identifier and (optionally) residue indices.
+    :param chain_id: Identifier for the protein chain (e.g., 'A', 'B', etc.).
+    :param residue_indices: Residues to select, specified as a string in the format "5..7,13" or "5..15,50..".
+    """
+    chain_id: str | None = None
+    residue_indices: str | None = None
+class BoltzGenProximityRegionSelection(BoltzGenRegionSelection):
+    """
+    Defines a region of a protein chain based on spatial proximity to a selection of residues.
+    Inherits:
+        BoltzGenRegionSelection
+    :param radius: Radius in angstroms (Å) used to select all residues within proximity to the specified region.
+    """
+    radius: int | None = None
+class BoltzGenBindingType(Base):
+    """
+    Represents the binding interface specification for a given protein chain.
+    :param chain_id: Identifier for the protein chain (e.g., 'A', 'B', etc.).
+    :param binding: Residue indices or regions that are required to participate in binding
+        (e.g., "5..7,13" or "all" for the whole chain).
+    :param not_binding: Residue indices or regions that should explicitly not participate in binding
+        (e.g., "5..7,13" or "all" for excluding the entire chain).
+    """
+    chain_id: str | None = None
+    binding: str | None = None
+    not_binding: str | None = None
+class BoltzGenSecondaryStructureOptions(str, Enum):
+    UNSPECIFIED = "UNSPECIFIED"
+    LOOP = "LOOP"
+    HELIX = "HELIX"
+    SHEET = "SHEET"
+class BoltzGenDesignInsertion(Base):
+    """
+    Represents an insertion site for protein design in a specific chain.
+    :param chain_id: Identifier of the chain where the insertion occurs.
+    :param residue_index: Position in the chain after which the insertion is to be made.
+    :param number_of_residues: Number of residues to insert at the specified site (can be a string pattern).
+    :param secondary_structure: Desired secondary structure type for the inserted residues
+        ("UNSPECIFIED", "LOOP", "HELIX", or "SHEET"). Optional.
+    """
+    chain_id: str
+    residue_index: int
+    number_of_residues: str
+    secondary_structure: BoltzGenSecondaryStructureOptions | None = None
+class BoltzGenFileEntity(Base):
+    """
+    Represents a protein structure input and its associated region selection and design specifications
+    for the BoltzGen binder design workflow.
+    :param uuid: Unique identifier for the protein structure.
+    :param include: List of regions to include in the design or analysis context.
+    :param exclude: List of regions to explicitly exclude from consideration (e.g., for ignoring noisy/irrelevant regions).
+    :param include_proximity: List of regions defined by spatial proximity (e.g., residues within a given radius).
+    :param binding_types: List of binding type constraints or permitted interface regions.
+    :param design: List of regions that are being subject to design (mutable, allowed to change).
+    :param secondary_structure: List of desired or annotated secondary structure definitions for selected regions.
+    :param design_insertions: List of new regions to be inserted with specified properties (e.g., insertion sites, structure preferences).
+    """
+    uuid: ProteinUUID
+    include: list[BoltzGenRegionSelection] = []
+    exclude: list[BoltzGenRegionSelection] = []
+    # fuse: None  - we may want to add this later but not used in examples.
+    include_proximity: list[BoltzGenProximityRegionSelection] = []
+    binding_types: list[BoltzGenBindingType] = []
+    # structure_groups: None - we may want to add this later but not used in examples.
+    design: list[BoltzGenRegionSelection] = []
+    secondary_structure: list[BoltzGenSecondaryStructure] = []
+    design_insertions: list[BoltzGenDesignInsertion] = []
+class BoltzGenAtomSpecification(Base):
+    """
+    Atom specification for a protein chain, used for applying constraints or referencing atoms.
+    :param chain_id: Identifier for the protein chain (e.g., "A", "B").
+    :param index: Residue index the atom belongs to (integer, 1-based).
+    :param atom_name: Name of the atom (e.g., "CA", "N", "O", etc.).
+    """
+    chain_id: str
+    index: int
+    atom_name: str
+class BoltzGenConstraint(Base):
+    """
+    Describes a covalent or spatial constraint between two specified atoms in the context of protein design.
+    :param atom1: First atom in the constraint.
+    :param atom2: Second atom in the constraint.
+    """
+    atom1: BoltzGenAtomSpecification
+    atom2: BoltzGenAtomSpecification
+class BoltzGenInput(Base):
+    """
+    Represents the primary input schema for the boltzgen application.
+    :param protein_entities: Protein chains that are designed or targeted for binding.
+    :param ligand_entities: Small molecules or other non-protein ligands relevant to the design.
+    :param file_entities: 3d protein structures and input settings related to them.
+    :param constraints: Covalent bond constraints
+    """
+    protein_entities: list[BoltzGenProteinEntity] = []
+    ligand_entities: list[BoltzGenLigandEntity] = []
+    file_entities: list[BoltzGenFileEntity] = []
+    constraints: list[BoltzGenConstraint] = []
+class BoltzGenScores(Base):
+    """
+    Compact, interpretable metrics for a designed binder.
+    ↑ higher is better, ↓ lower is better
+    :param quality_score: aggregate model quality (↑)
+    :param num_filters_passed: number of QC/heuristic filters passed (↑)
+    :param iptm: inter-chain pTM confidence, 0–1 (↑)
+    :param design_ptm: design pTM confidence, 0–1 (↑)
+    :param min_interaction_pae: minimum interface PAE in Å (↓)
+    :param bb_rmsd: backbone RMSD in Å (↓)
+    :param delta_sasa_refolded: ΔSASA of interface after refolding, Å² (↑ typically indicates better burial)
+    :param plip_hbonds_refolded: count of hydrogen bonds at the interface (↑)
+    :param plip_saltbridge_refolded: count of salt bridges at the interface (↑)
+    :param liability_score: composite liabilities score (↓)
+    :param liability_high_severity_violations: count of high-severity liabilities (↓)
+    :param liability_num_violations: total liability count (↓)
+    :param helix: fraction helical content, 0–1
+    :param sheet: fraction β-sheet content, 0–1
+    :param loop: fraction loop/coil content, 0–1
+    :param design_largest_hydrophobic_patch_refolded: largest hydrophobic patch area after refolding, Å²
+    :param design_hydrophobicity: overall design hydrophobicity score (unitless)
+    :param num_tokens: sequence length / token count
+    """
+    quality_score: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    num_filters_passed: int | None = None
+    iptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    design_ptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    min_design_to_target_pae: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    design_to_target_iptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    min_interaction_pae: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    bb_rmsd: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    delta_sasa_refolded: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    plip_hbonds_refolded: int | None = None
+    plip_saltbridge_refolded: int | None = None
+    liability_score: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    liability_high_severity_violations: int | None = None
+    liability_num_violations: int | None = None
+    helix: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    sheet: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    loop: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    design_largest_hydrophobic_patch_refolded: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    design_hydrophobicity: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
+    num_tokens: int | None = None
+class ProteinBinderDesignResult(Base):
+    """
+    The output; a designed binder.
+    :param sequence: the sequence
+    :param bound_structure: the PDB of the structure bound to the target
+    :param scores: the scores for the generated structure
+    """
+    binder_sequence: str | None = None
+    bound_structure: ProteinUUID | None = None
+    scores: BoltzGenScores | None = None
+class BoltzGenProtocol(LowercaseStrEnum):
+    """
+    The predefined protocol used for generation + filtering.
+    """
+    PROTEIN_ANYTHING = "protein-anything"
+    PEPTIDE_ANYTHING = "peptide-anything"
+    PROTEIN_SMALL_MOLECULE = "protein-small_molecule"
+    NANOBODY_ANYTHING = "nanobody-anything"
+class BoltzGenSettings(Base):
+    """
+    The settings for running BoltzGen.
+    :param num_designs: how many designs to generate
+    :param protocol: which protocol to use
+    :param binding_residue: a dict mapping the chain ID to which residues should bind.
+        the string follows the BoltzGen format of specifying ranges of residue indices (refer to their documentation).
+        examples include "5..7,13" or "5..15,50..".
+    """
+    protocol: BoltzGenProtocol = BoltzGenProtocol.PROTEIN_ANYTHING
+    num_designs: int = 100
+    budget: int = 20
+class ProteinBinderDesignWorkflow(Workflow):
+    """
+    A workflow for generating proteins or peptides that bind to something.
+    New:
+    :param binder_design_input: the input to the protein binder design workflow
+    :param binder_design_settings: the settings for the protein generation method employed
+    :param generated_binders: the output structures
+    """
+    binder_design_input: BoltzGenInput = BoltzGenInput()
+    binder_design_settings: BoltzGenSettings = BoltzGenSettings()
+    generated_binders: list[ProteinBinderDesignResult] = []

{stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/protein_cofolding.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Protein cofolding Workflow."""
+"""Protein cofolding workflow."""
 from typing import Annotated, Literal, TypeAlias
@@ -26,6 +26,7 @@ class Token(BaseModel):
     input_type: Literal["ligand", "protein"]
     input_index: int
     token_index: int
+    atom_name: str | None = None
 class ContactConstraint(BaseModel):

{stjames-0.0.115 → stjames-0.0.117/stjames.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.115
+Version: 0.0.117
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames

{stjames-0.0.115 → stjames-0.0.117}/stjames.egg-info/SOURCES.txt RENAMED Viewed

@@ -65,6 +65,7 @@ stjames/workflows/multistage_opt.py
 stjames/workflows/nmr.py
 stjames/workflows/pka.py
 stjames/workflows/pose_analysis_md.py
+stjames/workflows/protein_binder_design.py
 stjames/workflows/protein_cofolding.py
 stjames/workflows/redox_potential.py
 stjames/workflows/scan.py