stjames 0.0.115__tar.gz → 0.0.117__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stjames might be problematic. Click here for more details.
- {stjames-0.0.115/stjames.egg-info → stjames-0.0.117}/PKG-INFO +1 -1
- {stjames-0.0.115 → stjames-0.0.117}/pyproject.toml +1 -1
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/mmcif.py +2 -2
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/__init__.py +3 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/conformer_search.py +22 -2
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/docking.py +12 -1
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/pose_analysis_md.py +2 -2
- stjames-0.0.117/stjames/workflows/protein_binder_design.py +303 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/protein_cofolding.py +2 -1
- {stjames-0.0.115 → stjames-0.0.117/stjames.egg-info}/PKG-INFO +1 -1
- {stjames-0.0.115 → stjames-0.0.117}/stjames.egg-info/SOURCES.txt +1 -0
- {stjames-0.0.115 → stjames-0.0.117}/LICENSE +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/README.md +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/setup.cfg +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/__init__.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/_deprecated_solvent_settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atom.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/__init__.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/data.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/pdb.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/atomium_stjames/utilities.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/base.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/basis_set.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/calculation.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/compute_settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/constraint.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/correction.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/__init__.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/bragg_radii.json +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/elements.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/isotopes.json +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/nist_isotopes.json +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/read_nist_isotopes.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/data/symbol_element.json +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/engine.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/message.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/method.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/mode.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/molecule.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/opt_settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/optimization/__init__.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/optimization/freezing_string_method.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/pdb.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/periodic_cell.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/py.typed +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/scf_settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/solvent.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/status.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/task.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/thermochem_settings.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/types.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/admet.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/basic_calculation.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/bde.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/conformer.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/descriptors.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/double_ended_ts_search.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/electronic_properties.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/fukui.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/hydrogen_bond_basicity.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/ion_mobility.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/irc.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/macropka.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/molecular_dynamics.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/multistage_opt.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/nmr.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/pka.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/redox_potential.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/scan.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/solubility.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/spin_states.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/strain.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/tautomer.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames/workflows/workflow.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames.egg-info/dependency_links.txt +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames.egg-info/requires.txt +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/stjames.egg-info/top_level.txt +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_constraints.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_from_extxyz.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_molecule.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_pdb.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_rounding.py +0 -0
- {stjames-0.0.115 → stjames-0.0.117}/tests/test_settings.py +0 -0
|
@@ -512,7 +512,7 @@ def add_atom_to_polymer(atom: dict[str, Any], aniso: dict[int, Any], model: dict
|
|
|
512
512
|
try:
|
|
513
513
|
model["polymer"][mol_id]["residues"][res_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
|
|
514
514
|
except Exception:
|
|
515
|
-
name = atom
|
|
515
|
+
name = atom.get("auth_comp_id") or atom.get("label_comp_id") or "UNKNOWN"
|
|
516
516
|
try:
|
|
517
517
|
model["polymer"][mol_id]["residues"][res_id] = {
|
|
518
518
|
"name": name,
|
|
@@ -553,7 +553,7 @@ def add_atom_to_non_polymer(atom: dict[str, Any], aniso: dict[int, Any], model:
|
|
|
553
553
|
try:
|
|
554
554
|
model[mol_type][mol_id]["atoms"][int(atom["id"])] = atom_dict_to_atom_dict(atom, aniso)
|
|
555
555
|
except Exception:
|
|
556
|
-
name = atom
|
|
556
|
+
name = atom.get("auth_comp_id") or atom.get("label_comp_id") or "UNKNOWN"
|
|
557
557
|
model[mol_type][mol_id] = {
|
|
558
558
|
"name": name,
|
|
559
559
|
"full_name": names.get(name).upper() if names.get(name) is not None and names.get(name).lower() != "water" else None, # type: ignore [union-attr]
|
|
@@ -21,6 +21,7 @@ from .multistage_opt import *
|
|
|
21
21
|
from .nmr import *
|
|
22
22
|
from .pka import *
|
|
23
23
|
from .pose_analysis_md import *
|
|
24
|
+
from .protein_binder_design import *
|
|
24
25
|
from .protein_cofolding import *
|
|
25
26
|
from .redox_potential import *
|
|
26
27
|
from .scan import *
|
|
@@ -51,6 +52,7 @@ WORKFLOW_NAME = Literal[
|
|
|
51
52
|
"pka",
|
|
52
53
|
"pose_analysis_md",
|
|
53
54
|
"protein_cofolding",
|
|
55
|
+
"protein_binder_design",
|
|
54
56
|
"redox_potential",
|
|
55
57
|
"scan",
|
|
56
58
|
"solubility",
|
|
@@ -80,6 +82,7 @@ WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
|
|
|
80
82
|
"pka": pKaWorkflow, # type: ignore [dict-item]
|
|
81
83
|
"pose_analysis_md": PoseAnalysisMolecularDynamicsWorkflow, # type: ignore [dict-item]
|
|
82
84
|
"protein_cofolding": ProteinCofoldingWorkflow, # type: ignore [dict-item]
|
|
85
|
+
"protein_binder_design": ProteinBinderDesignWorkflow, # type: ignore [dict-item]
|
|
83
86
|
"redox_potential": RedoxPotentialWorkflow, # type: ignore [dict-item]
|
|
84
87
|
"scan": ScanWorkflow, # type: ignore [dict-item]
|
|
85
88
|
"solubility": SolubilityWorkflow, # type: ignore [dict-item]
|
|
@@ -9,9 +9,10 @@ from ..base import LowercaseStrEnum
|
|
|
9
9
|
from ..constraint import Constraint
|
|
10
10
|
from ..method import Method, XTBMethod
|
|
11
11
|
from ..mode import Mode
|
|
12
|
+
from ..molecule import Molecule
|
|
12
13
|
from ..types import UUID, FloatPerAtom, round_float_per_atom
|
|
13
14
|
from .multistage_opt import MultiStageOptMixin
|
|
14
|
-
from .workflow import MoleculeWorkflow
|
|
15
|
+
from .workflow import MoleculeWorkflow, SMILESWorkflow
|
|
15
16
|
|
|
16
17
|
_sentinel = object()
|
|
17
18
|
|
|
@@ -377,12 +378,16 @@ class ConformerSearchMixin(ConformerGenMixin, MultiStageOptMixin):
|
|
|
377
378
|
return self
|
|
378
379
|
|
|
379
380
|
|
|
380
|
-
class ConformerSearchWorkflow(ConformerSearchMixin, MoleculeWorkflow):
|
|
381
|
+
class ConformerSearchWorkflow(ConformerSearchMixin, SMILESWorkflow, MoleculeWorkflow):
|
|
381
382
|
"""
|
|
382
383
|
ConformerSearch Workflow.
|
|
383
384
|
|
|
385
|
+
This workflow supports both SMILES and 3D molecular input. Some conformer generation settings
|
|
386
|
+
support both methods; others (like CREST) require 3D information. Only one should be supplied.
|
|
387
|
+
|
|
384
388
|
Inherited:
|
|
385
389
|
:param initial_molecule: Molecule of interest
|
|
390
|
+
:param initial_smiles: SMILES of the molecule of interest
|
|
386
391
|
:param conf_gen_mode: Mode for calculations
|
|
387
392
|
:param conf_gen_settings: settings for conformer generation
|
|
388
393
|
:param mso_mode: Mode for MultiStageOptSettings
|
|
@@ -401,6 +406,21 @@ class ConformerSearchWorkflow(ConformerSearchMixin, MoleculeWorkflow):
|
|
|
401
406
|
:param energies: energies of the molecules
|
|
402
407
|
"""
|
|
403
408
|
|
|
409
|
+
initial_smiles: str = ""
|
|
410
|
+
initial_molecule: Molecule | None = None # type: ignore [assignment]
|
|
411
|
+
|
|
404
412
|
# Results
|
|
405
413
|
conformer_uuids: list[list[UUID | None]] = Field(default_factory=list)
|
|
406
414
|
energies: Annotated[FloatPerAtom, AfterValidator(round_float_per_atom(6))] = Field(default_factory=list)
|
|
415
|
+
|
|
416
|
+
@model_validator(mode="after")
|
|
417
|
+
def validate_mol_input(self) -> Self:
|
|
418
|
+
"""Ensure that only one of initial_molecule or initial_smiles is set."""
|
|
419
|
+
|
|
420
|
+
if not (bool(self.initial_smiles) ^ bool(self.initial_molecule)):
|
|
421
|
+
raise ValueError("Can only set one of initial_molecule and initial_smiles")
|
|
422
|
+
|
|
423
|
+
if isinstance(self.conf_gen_settings, iMTDSettings) and (self.initial_molecule is None):
|
|
424
|
+
raise ValueError("iMTDSettings requires `initial_molecule` to be set")
|
|
425
|
+
|
|
426
|
+
return self
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Docking workflow."""
|
|
2
2
|
|
|
3
|
-
from typing import Annotated, Self, TypeAlias
|
|
3
|
+
from typing import Annotated, Literal, Self, TypeAlias
|
|
4
4
|
|
|
5
5
|
from pydantic import AfterValidator, ConfigDict, field_validator, model_validator
|
|
6
6
|
|
|
@@ -46,12 +46,23 @@ class VinaSettings(DockingSettings):
|
|
|
46
46
|
"""
|
|
47
47
|
Controls how AutoDock Vina is run.
|
|
48
48
|
|
|
49
|
+
:param executable: which Vina implementation is run.
|
|
50
|
+
:param scoring_function: which scoring function is employed.
|
|
49
51
|
:param exhaustiveness: how many times Vina attempts to find a pose.
|
|
50
52
|
8 is typical, 32 is considered relatively careful.
|
|
51
53
|
"""
|
|
52
54
|
|
|
55
|
+
executable: Literal["qvina2", "vina"] = "vina"
|
|
56
|
+
scoring_function: Literal["vinardo", "vina"] = "vinardo"
|
|
53
57
|
exhaustiveness: int = 8
|
|
54
58
|
|
|
59
|
+
@model_validator(mode="after")
|
|
60
|
+
def check_executable_scoring_function(self) -> Self:
|
|
61
|
+
"""Check if the combination of exectuable and scoring function is supported."""
|
|
62
|
+
if (self.executable == "qvina2") and (self.scoring_function == "vinardo"):
|
|
63
|
+
raise ValueError("qvina2 does not implement the vinardo scoring function!")
|
|
64
|
+
return self
|
|
65
|
+
|
|
55
66
|
|
|
56
67
|
class DockingWorkflow(MoleculeWorkflow):
|
|
57
68
|
"""
|
|
@@ -76,8 +76,8 @@ class PoseAnalysisMolecularDynamicsWorkflow(SMILESWorkflow):
|
|
|
76
76
|
protein_uuid: UUID | None = None
|
|
77
77
|
ligand_residue_name: str = "LIG"
|
|
78
78
|
|
|
79
|
-
num_trajectories: PositiveInt =
|
|
80
|
-
equilibration_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] =
|
|
79
|
+
num_trajectories: PositiveInt = 1
|
|
80
|
+
equilibration_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 1
|
|
81
81
|
simulation_time_ns: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 10
|
|
82
82
|
|
|
83
83
|
temperature: Annotated[PositiveFloat, AfterValidator(round_float(3))] = 300
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""Protein-binder-design workflow."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Annotated, TypeAlias
|
|
5
|
+
|
|
6
|
+
from pydantic import AfterValidator
|
|
7
|
+
|
|
8
|
+
from ..base import Base, LowercaseStrEnum, round_optional_float
|
|
9
|
+
from ..types import UUID
|
|
10
|
+
from .workflow import Workflow
|
|
11
|
+
|
|
12
|
+
ProteinUUID: TypeAlias = UUID
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BoltzGenSecondaryStructure(Base):
|
|
16
|
+
"""
|
|
17
|
+
Represents the secondary structure assignments for a protein.
|
|
18
|
+
|
|
19
|
+
:param id: Optional identifier for this secondary structure annotation.
|
|
20
|
+
:param sheet: String encoding the residue indices comprising β-sheet structures
|
|
21
|
+
(e.g., "1,3..11" for residues 1, and 3 through 11).
|
|
22
|
+
:param helix: String encoding residue indices comprising helices.
|
|
23
|
+
:param loop: String encoding residue indices comprising loop or coil regions.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
id: str | None = None
|
|
27
|
+
sheet: str | None = None
|
|
28
|
+
helix: str | None = None
|
|
29
|
+
loop: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BoltzGenProteinEntity(Base):
|
|
33
|
+
"""
|
|
34
|
+
Represents a protein entity, either a designed or natural sequence.
|
|
35
|
+
|
|
36
|
+
:param id: Unique identifier for the protein.
|
|
37
|
+
:param sequence: Protein sequence, may contain amino acids and numbers for designed regions.
|
|
38
|
+
:param secondary_structure: Optional assigned secondary structure.
|
|
39
|
+
:param cyclic: Whether the protein is cyclic (True/False). Optional.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
id: str
|
|
43
|
+
sequence: str # can include amino acids as well as numbers for designed regions
|
|
44
|
+
secondary_structure: BoltzGenSecondaryStructure | None = None
|
|
45
|
+
# binding_types: BindingType | None = None - we may want to add this later but not used in examples.
|
|
46
|
+
cyclic: bool | None = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BoltzGenLigandEntity(Base):
|
|
50
|
+
"""
|
|
51
|
+
Represents a ligand entity (non-protein), such as a small molecule.
|
|
52
|
+
|
|
53
|
+
:param id: Unique identifier for the ligand.
|
|
54
|
+
:param smiles: SMILES string representation of the ligand.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
id: str
|
|
58
|
+
smiles: str
|
|
59
|
+
# binding_types: str | None = None - we may want to add this later but not used in examples.
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class BoltzGenRegionSelection(Base):
|
|
63
|
+
"""
|
|
64
|
+
Defines a region of a protein chain by specifying its chain identifier and (optionally) residue indices.
|
|
65
|
+
|
|
66
|
+
:param chain_id: Identifier for the protein chain (e.g., 'A', 'B', etc.).
|
|
67
|
+
:param residue_indices: Residues to select, specified as a string in the format "5..7,13" or "5..15,50..".
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
chain_id: str | None = None
|
|
71
|
+
residue_indices: str | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class BoltzGenProximityRegionSelection(BoltzGenRegionSelection):
|
|
75
|
+
"""
|
|
76
|
+
Defines a region of a protein chain based on spatial proximity to a selection of residues.
|
|
77
|
+
|
|
78
|
+
Inherits:
|
|
79
|
+
BoltzGenRegionSelection
|
|
80
|
+
|
|
81
|
+
:param radius: Radius in angstroms (Å) used to select all residues within proximity to the specified region.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
radius: int | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class BoltzGenBindingType(Base):
|
|
88
|
+
"""
|
|
89
|
+
Represents the binding interface specification for a given protein chain.
|
|
90
|
+
|
|
91
|
+
:param chain_id: Identifier for the protein chain (e.g., 'A', 'B', etc.).
|
|
92
|
+
:param binding: Residue indices or regions that are required to participate in binding
|
|
93
|
+
(e.g., "5..7,13" or "all" for the whole chain).
|
|
94
|
+
:param not_binding: Residue indices or regions that should explicitly not participate in binding
|
|
95
|
+
(e.g., "5..7,13" or "all" for excluding the entire chain).
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
chain_id: str | None = None
|
|
99
|
+
binding: str | None = None
|
|
100
|
+
not_binding: str | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class BoltzGenSecondaryStructureOptions(str, Enum):
|
|
104
|
+
UNSPECIFIED = "UNSPECIFIED"
|
|
105
|
+
LOOP = "LOOP"
|
|
106
|
+
HELIX = "HELIX"
|
|
107
|
+
SHEET = "SHEET"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class BoltzGenDesignInsertion(Base):
|
|
111
|
+
"""
|
|
112
|
+
Represents an insertion site for protein design in a specific chain.
|
|
113
|
+
|
|
114
|
+
:param chain_id: Identifier of the chain where the insertion occurs.
|
|
115
|
+
:param residue_index: Position in the chain after which the insertion is to be made.
|
|
116
|
+
:param number_of_residues: Number of residues to insert at the specified site (can be a string pattern).
|
|
117
|
+
:param secondary_structure: Desired secondary structure type for the inserted residues
|
|
118
|
+
("UNSPECIFIED", "LOOP", "HELIX", or "SHEET"). Optional.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
chain_id: str
|
|
122
|
+
residue_index: int
|
|
123
|
+
number_of_residues: str
|
|
124
|
+
secondary_structure: BoltzGenSecondaryStructureOptions | None = None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class BoltzGenFileEntity(Base):
|
|
128
|
+
"""
|
|
129
|
+
Represents a protein structure input and its associated region selection and design specifications
|
|
130
|
+
for the BoltzGen binder design workflow.
|
|
131
|
+
|
|
132
|
+
:param uuid: Unique identifier for the protein structure.
|
|
133
|
+
:param include: List of regions to include in the design or analysis context.
|
|
134
|
+
:param exclude: List of regions to explicitly exclude from consideration (e.g., for ignoring noisy/irrelevant regions).
|
|
135
|
+
:param include_proximity: List of regions defined by spatial proximity (e.g., residues within a given radius).
|
|
136
|
+
:param binding_types: List of binding type constraints or permitted interface regions.
|
|
137
|
+
:param design: List of regions that are being subject to design (mutable, allowed to change).
|
|
138
|
+
:param secondary_structure: List of desired or annotated secondary structure definitions for selected regions.
|
|
139
|
+
:param design_insertions: List of new regions to be inserted with specified properties (e.g., insertion sites, structure preferences).
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
uuid: ProteinUUID
|
|
143
|
+
include: list[BoltzGenRegionSelection] = []
|
|
144
|
+
exclude: list[BoltzGenRegionSelection] = []
|
|
145
|
+
# fuse: None - we may want to add this later but not used in examples.
|
|
146
|
+
include_proximity: list[BoltzGenProximityRegionSelection] = []
|
|
147
|
+
binding_types: list[BoltzGenBindingType] = []
|
|
148
|
+
# structure_groups: None - we may want to add this later but not used in examples.
|
|
149
|
+
design: list[BoltzGenRegionSelection] = []
|
|
150
|
+
secondary_structure: list[BoltzGenSecondaryStructure] = []
|
|
151
|
+
design_insertions: list[BoltzGenDesignInsertion] = []
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class BoltzGenAtomSpecification(Base):
|
|
155
|
+
"""
|
|
156
|
+
Atom specification for a protein chain, used for applying constraints or referencing atoms.
|
|
157
|
+
|
|
158
|
+
:param chain_id: Identifier for the protein chain (e.g., "A", "B").
|
|
159
|
+
:param index: Residue index the atom belongs to (integer, 1-based).
|
|
160
|
+
:param atom_name: Name of the atom (e.g., "CA", "N", "O", etc.).
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
chain_id: str
|
|
164
|
+
index: int
|
|
165
|
+
atom_name: str
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class BoltzGenConstraint(Base):
|
|
169
|
+
"""
|
|
170
|
+
Describes a covalent or spatial constraint between two specified atoms in the context of protein design.
|
|
171
|
+
|
|
172
|
+
:param atom1: First atom in the constraint.
|
|
173
|
+
:param atom2: Second atom in the constraint.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
atom1: BoltzGenAtomSpecification
|
|
177
|
+
atom2: BoltzGenAtomSpecification
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class BoltzGenInput(Base):
|
|
181
|
+
"""
|
|
182
|
+
Represents the primary input schema for the boltzgen application.
|
|
183
|
+
|
|
184
|
+
:param protein_entities: Protein chains that are designed or targeted for binding.
|
|
185
|
+
:param ligand_entities: Small molecules or other non-protein ligands relevant to the design.
|
|
186
|
+
:param file_entities: 3d protein structures and input settings related to them.
|
|
187
|
+
:param constraints: Covalent bond constraints
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
protein_entities: list[BoltzGenProteinEntity] = []
|
|
191
|
+
ligand_entities: list[BoltzGenLigandEntity] = []
|
|
192
|
+
file_entities: list[BoltzGenFileEntity] = []
|
|
193
|
+
constraints: list[BoltzGenConstraint] = []
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class BoltzGenScores(Base):
|
|
197
|
+
"""
|
|
198
|
+
Compact, interpretable metrics for a designed binder.
|
|
199
|
+
↑ higher is better, ↓ lower is better
|
|
200
|
+
|
|
201
|
+
:param quality_score: aggregate model quality (↑)
|
|
202
|
+
:param num_filters_passed: number of QC/heuristic filters passed (↑)
|
|
203
|
+
:param iptm: inter-chain pTM confidence, 0–1 (↑)
|
|
204
|
+
:param design_ptm: design pTM confidence, 0–1 (↑)
|
|
205
|
+
:param min_interaction_pae: minimum interface PAE in Å (↓)
|
|
206
|
+
:param bb_rmsd: backbone RMSD in Å (↓)
|
|
207
|
+
:param delta_sasa_refolded: ΔSASA of interface after refolding, Ų (↑ typically indicates better burial)
|
|
208
|
+
:param plip_hbonds_refolded: count of hydrogen bonds at the interface (↑)
|
|
209
|
+
:param plip_saltbridge_refolded: count of salt bridges at the interface (↑)
|
|
210
|
+
:param liability_score: composite liabilities score (↓)
|
|
211
|
+
:param liability_high_severity_violations: count of high-severity liabilities (↓)
|
|
212
|
+
:param liability_num_violations: total liability count (↓)
|
|
213
|
+
:param helix: fraction helical content, 0–1
|
|
214
|
+
:param sheet: fraction β-sheet content, 0–1
|
|
215
|
+
:param loop: fraction loop/coil content, 0–1
|
|
216
|
+
:param design_largest_hydrophobic_patch_refolded: largest hydrophobic patch area after refolding, Ų
|
|
217
|
+
:param design_hydrophobicity: overall design hydrophobicity score (unitless)
|
|
218
|
+
:param num_tokens: sequence length / token count
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
quality_score: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
222
|
+
num_filters_passed: int | None = None
|
|
223
|
+
|
|
224
|
+
iptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
225
|
+
design_ptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
226
|
+
min_design_to_target_pae: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
227
|
+
design_to_target_iptm: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
228
|
+
min_interaction_pae: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
229
|
+
|
|
230
|
+
bb_rmsd: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
231
|
+
delta_sasa_refolded: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
232
|
+
|
|
233
|
+
plip_hbonds_refolded: int | None = None
|
|
234
|
+
plip_saltbridge_refolded: int | None = None
|
|
235
|
+
|
|
236
|
+
liability_score: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
237
|
+
liability_high_severity_violations: int | None = None
|
|
238
|
+
liability_num_violations: int | None = None
|
|
239
|
+
|
|
240
|
+
helix: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
241
|
+
sheet: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
242
|
+
loop: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
243
|
+
design_largest_hydrophobic_patch_refolded: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
244
|
+
design_hydrophobicity: Annotated[float | None, AfterValidator(round_optional_float(3))] = None
|
|
245
|
+
num_tokens: int | None = None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class ProteinBinderDesignResult(Base):
|
|
249
|
+
"""
|
|
250
|
+
The output; a designed binder.
|
|
251
|
+
|
|
252
|
+
:param sequence: the sequence
|
|
253
|
+
:param bound_structure: the PDB of the structure bound to the target
|
|
254
|
+
:param scores: the scores for the generated structure
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
binder_sequence: str | None = None
|
|
258
|
+
bound_structure: ProteinUUID | None = None
|
|
259
|
+
scores: BoltzGenScores | None = None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class BoltzGenProtocol(LowercaseStrEnum):
|
|
263
|
+
"""
|
|
264
|
+
The predefined protocol used for generation + filtering.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
PROTEIN_ANYTHING = "protein-anything"
|
|
268
|
+
PEPTIDE_ANYTHING = "peptide-anything"
|
|
269
|
+
PROTEIN_SMALL_MOLECULE = "protein-small_molecule"
|
|
270
|
+
NANOBODY_ANYTHING = "nanobody-anything"
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class BoltzGenSettings(Base):
|
|
274
|
+
"""
|
|
275
|
+
The settings for running BoltzGen.
|
|
276
|
+
|
|
277
|
+
:param num_designs: how many designs to generate
|
|
278
|
+
:param protocol: which protocol to use
|
|
279
|
+
:param binding_residue: a dict mapping the chain ID to which residues should bind.
|
|
280
|
+
the string follows the BoltzGen format of specifying ranges of residue indices (refer to their documentation).
|
|
281
|
+
examples include "5..7,13" or "5..15,50..".
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
protocol: BoltzGenProtocol = BoltzGenProtocol.PROTEIN_ANYTHING
|
|
285
|
+
num_designs: int = 100
|
|
286
|
+
budget: int = 20
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class ProteinBinderDesignWorkflow(Workflow):
|
|
290
|
+
"""
|
|
291
|
+
A workflow for generating proteins or peptides that bind to something.
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
New:
|
|
295
|
+
:param binder_design_input: the input to the protein binder design workflow
|
|
296
|
+
:param binder_design_settings: the settings for the protein generation method employed
|
|
297
|
+
:param generated_binders: the output structures
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
binder_design_input: BoltzGenInput = BoltzGenInput()
|
|
301
|
+
binder_design_settings: BoltzGenSettings = BoltzGenSettings()
|
|
302
|
+
|
|
303
|
+
generated_binders: list[ProteinBinderDesignResult] = []
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Protein cofolding
|
|
1
|
+
"""Protein cofolding workflow."""
|
|
2
2
|
|
|
3
3
|
from typing import Annotated, Literal, TypeAlias
|
|
4
4
|
|
|
@@ -26,6 +26,7 @@ class Token(BaseModel):
|
|
|
26
26
|
input_type: Literal["ligand", "protein"]
|
|
27
27
|
input_index: int
|
|
28
28
|
token_index: int
|
|
29
|
+
atom_name: str | None = None
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class ContactConstraint(BaseModel):
|
|
@@ -65,6 +65,7 @@ stjames/workflows/multistage_opt.py
|
|
|
65
65
|
stjames/workflows/nmr.py
|
|
66
66
|
stjames/workflows/pka.py
|
|
67
67
|
stjames/workflows/pose_analysis_md.py
|
|
68
|
+
stjames/workflows/protein_binder_design.py
|
|
68
69
|
stjames/workflows/protein_cofolding.py
|
|
69
70
|
stjames/workflows/redox_potential.py
|
|
70
71
|
stjames/workflows/scan.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|