stjames 0.0.59__py3-none-any.whl → 0.0.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stjames might be problematic. Click here for more details.
- stjames/atomium_stjames/mmcif.py +216 -162
- stjames/atomium_stjames/utilities.py +58 -46
- stjames/molecule.py +46 -1
- stjames/types.py +10 -1
- stjames/workflows/__init__.py +5 -2
- stjames/workflows/admet.py +15 -4
- stjames/workflows/basic_calculation.py +17 -2
- stjames/workflows/bde.py +5 -5
- stjames/workflows/conformer.py +4 -2
- stjames/workflows/conformer_search.py +3 -3
- stjames/workflows/descriptors.py +16 -3
- stjames/workflows/docking.py +26 -11
- stjames/workflows/electronic_properties.py +5 -2
- stjames/workflows/fukui.py +19 -2
- stjames/workflows/hydrogen_bond_basicity.py +29 -5
- stjames/workflows/irc.py +4 -2
- stjames/workflows/molecular_dynamics.py +28 -3
- stjames/workflows/multistage_opt.py +5 -3
- stjames/workflows/pka.py +36 -2
- stjames/workflows/redox_potential.py +4 -2
- stjames/workflows/scan.py +37 -2
- stjames/workflows/solubility.py +60 -0
- stjames/workflows/spin_states.py +4 -2
- stjames/workflows/tautomer.py +24 -2
- stjames/workflows/workflow.py +27 -4
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/METADATA +2 -3
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/RECORD +30 -29
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/LICENSE +0 -0
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/WHEEL +0 -0
- {stjames-0.0.59.dist-info → stjames-0.0.62.dist-info}/top_level.txt +0 -0
|
@@ -2,17 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import builtins
|
|
4
4
|
import gzip
|
|
5
|
-
from
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Callable
|
|
6
7
|
|
|
7
|
-
from requests import get # type: ignore
|
|
8
|
+
from requests import get # type: ignore[import-untyped]
|
|
8
9
|
|
|
9
10
|
from .mmcif import mmcif_dict_to_data_dict, mmcif_string_to_mmcif_dict
|
|
10
11
|
from .pdb import pdb_dict_to_data_dict, pdb_string_to_pdb_dict
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
def open(path: str,
|
|
14
|
-
"""
|
|
15
|
-
it
|
|
14
|
+
def open(path: Path | str, file_dict: bool = False, data_dict: bool = False) -> dict[str, Any]:
|
|
15
|
+
"""
|
|
16
|
+
Opens a file at a given path, works out what filetype it is, and parses it
|
|
17
|
+
accordingly.
|
|
16
18
|
|
|
17
19
|
For example:
|
|
18
20
|
open('/path/to/file.pdb', data_dict=True)
|
|
@@ -26,26 +28,29 @@ def open(path: str, *args, **kwargs) -> Any: # type: ignore [no-untyped-def]
|
|
|
26
28
|
:param bool file_dict: if ``True``, parsing will stop at the file ``dict``.
|
|
27
29
|
:param bool data_dict: if ``True``, parsing will stop at the data ``dict``.
|
|
28
30
|
:rtype: ``File``"""
|
|
31
|
+
path = Path(path)
|
|
29
32
|
|
|
30
|
-
if
|
|
33
|
+
if path.suffix == ".gz":
|
|
31
34
|
try:
|
|
32
35
|
with gzip.open(path) as f:
|
|
33
36
|
filestring = f.read().decode()
|
|
34
37
|
except Exception:
|
|
35
38
|
with gzip.open(path, "rt") as f:
|
|
36
39
|
filestring = f.read()
|
|
37
|
-
return parse_string(filestring, path[:-3], *args, **kwargs)
|
|
38
|
-
else:
|
|
39
|
-
try:
|
|
40
|
-
with builtins.open(path) as f:
|
|
41
|
-
filestring = f.read()
|
|
42
|
-
except Exception:
|
|
43
|
-
with builtins.open(path, "rb") as f:
|
|
44
|
-
filestring = f.read() # type: ignore [assignment]
|
|
45
|
-
return parse_string(filestring, path, *args, **kwargs)
|
|
46
40
|
|
|
41
|
+
return parse_string(filestring, path.suffix, file_dict=file_dict, data_dict=data_dict)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
with builtins.open(path) as f:
|
|
45
|
+
filestring = f.read()
|
|
46
|
+
except Exception:
|
|
47
|
+
with builtins.open(path, "rb") as f:
|
|
48
|
+
filestring = f.read() # type: ignore [assignment]
|
|
47
49
|
|
|
48
|
-
|
|
50
|
+
return parse_string(filestring, path, file_dict=file_dict, data_dict=data_dict)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def fetch(code: str, file_dict: bool = False, data_dict: bool = False) -> dict[str, Any]:
|
|
49
54
|
"""Fetches a file from a remote location via HTTP.
|
|
50
55
|
|
|
51
56
|
If a PDB code is given, the .cif form of that struture will be fetched from
|
|
@@ -59,11 +64,11 @@ def fetch(code: str, *args, **kwargs) -> Any: # type: ignore [no-untyped-def]
|
|
|
59
64
|
This will get the .mmtf version of structure 1LOL, but only go as far as
|
|
60
65
|
converting it to an atomium file dictionary.
|
|
61
66
|
|
|
62
|
-
:param
|
|
63
|
-
:param
|
|
64
|
-
:param
|
|
65
|
-
:raises ValueError: if no file is found
|
|
66
|
-
|
|
67
|
+
:param code: the file to fetch.
|
|
68
|
+
:param file_dict: if ``True``, parsing will stop at the file ``dict``
|
|
69
|
+
:param data_dict: if ``True``, parsing will stop at the data ``dict``
|
|
70
|
+
:raises ValueError: if no file is found
|
|
71
|
+
"""
|
|
67
72
|
|
|
68
73
|
if code.startswith("http"):
|
|
69
74
|
url = code
|
|
@@ -76,50 +81,57 @@ def fetch(code: str, *args, **kwargs) -> Any: # type: ignore [no-untyped-def]
|
|
|
76
81
|
response = get(url, stream=True)
|
|
77
82
|
if response.status_code == 200:
|
|
78
83
|
text = response.content if code.endswith(".mmtf") else response.text
|
|
79
|
-
return parse_string(text, code,
|
|
80
|
-
raise ValueError("Could not find anything at {}".format(url))
|
|
84
|
+
return parse_string(text, code, file_dict=file_dict, data_dict=data_dict)
|
|
81
85
|
|
|
86
|
+
raise ValueError(f"Could not find anything at {url}")
|
|
82
87
|
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
|
|
89
|
+
def parse_string(filestring: Any, path: Path | str, file_dict: bool = False, data_dict: bool = False) -> dict[str, Any]:
|
|
90
|
+
"""
|
|
91
|
+
Takes a filestring and parses it in the appropriate way. You must provide
|
|
85
92
|
the string to parse itself, and some other string that ends in either .cif,
|
|
86
93
|
.mmtf, or .cif - that will determine how the file is parsed.
|
|
87
94
|
|
|
88
95
|
(If this cannot be inferred from the path string, atomium will guess based
|
|
89
96
|
on the filestring contents.)
|
|
90
97
|
|
|
91
|
-
:param
|
|
92
|
-
:param
|
|
93
|
-
:param
|
|
94
|
-
:param
|
|
95
|
-
:
|
|
98
|
+
:param filestring: contents of some file
|
|
99
|
+
:param path: filename of the file of origin
|
|
100
|
+
:param file_dict: if ``True``, parsing will stop at the file ``dict``
|
|
101
|
+
:param data_dict: if ``True``, parsing will stop at the data ``dict``
|
|
102
|
+
:return: File
|
|
103
|
+
"""
|
|
96
104
|
|
|
97
105
|
file_func, data_func = get_parse_functions(filestring, path)
|
|
98
106
|
parsed = file_func(filestring)
|
|
107
|
+
|
|
99
108
|
if not file_dict:
|
|
100
109
|
parsed = data_func(parsed)
|
|
110
|
+
|
|
101
111
|
return parsed
|
|
102
112
|
|
|
103
113
|
|
|
104
|
-
def get_parse_functions(filestring: str, path: str) -> Any:
|
|
105
|
-
"""
|
|
106
|
-
|
|
114
|
+
def get_parse_functions(filestring: str, path: Path | str) -> tuple[Callable[[str], dict[str, Any]], Callable[[dict[str, Any]], dict[str, Any]]]:
|
|
115
|
+
"""
|
|
116
|
+
Determines the parsing functions to use for a given filestring and path.
|
|
107
117
|
|
|
108
118
|
(If this cannot be inferred from the path string, atomium will guess based
|
|
109
119
|
on the filestring contents.)
|
|
110
120
|
|
|
111
|
-
:param
|
|
112
|
-
:param
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
121
|
+
:param filestring: the filestring to inspect
|
|
122
|
+
:param path: the path to inspect
|
|
123
|
+
"""
|
|
124
|
+
path = Path(path)
|
|
125
|
+
|
|
126
|
+
funcs = {
|
|
127
|
+
".mmtf": (mmcif_string_to_mmcif_dict, mmcif_dict_to_data_dict),
|
|
128
|
+
".cif": (mmcif_string_to_mmcif_dict, mmcif_dict_to_data_dict),
|
|
129
|
+
".pdb": (pdb_string_to_pdb_dict, pdb_dict_to_data_dict),
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if path.suffix:
|
|
133
|
+
return funcs.get(path.suffix, (pdb_string_to_pdb_dict, pdb_dict_to_data_dict))
|
|
122
134
|
elif "_atom_sites" in filestring:
|
|
123
135
|
return (mmcif_string_to_mmcif_dict, mmcif_dict_to_data_dict)
|
|
124
|
-
|
|
125
|
-
|
|
136
|
+
|
|
137
|
+
return (pdb_string_to_pdb_dict, pdb_dict_to_data_dict)
|
stjames/molecule.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Annotated, Iterable, Optional, Self
|
|
3
|
+
from typing import Annotated, Iterable, Optional, Self, TypeAlias
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
|
|
7
|
+
from rdkit import Chem
|
|
8
|
+
from rdkit.Chem import AllChem
|
|
7
9
|
|
|
8
10
|
from .atom import Atom
|
|
9
11
|
from .base import Base, round_float, round_optional_float
|
|
@@ -20,6 +22,8 @@ from .types import (
|
|
|
20
22
|
round_vector3d_per_atom,
|
|
21
23
|
)
|
|
22
24
|
|
|
25
|
+
RdkitMol: TypeAlias = Chem.rdchem.Mol | Chem.rdchem.RWMol
|
|
26
|
+
|
|
23
27
|
|
|
24
28
|
class MoleculeReadError(RuntimeError):
|
|
25
29
|
pass
|
|
@@ -263,6 +267,47 @@ class Molecule(Base):
|
|
|
263
267
|
except (ValueError, ValidationError) as e:
|
|
264
268
|
raise MoleculeReadError("Error reading molecule from extxyz") from e
|
|
265
269
|
|
|
270
|
+
@classmethod
|
|
271
|
+
def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
|
|
272
|
+
if len(rdkm.GetConformers()) == 0:
|
|
273
|
+
rdkm = _embed_rdkit_mol(rdkm)
|
|
274
|
+
|
|
275
|
+
atoms = []
|
|
276
|
+
atomic_numbers = [atom.GetAtomicNum() for atom in rdkm.GetAtoms()] # type: ignore [no-untyped-call]
|
|
277
|
+
geom = rdkm.GetConformers()[cid].GetPositions()
|
|
278
|
+
|
|
279
|
+
for i in range(len(atomic_numbers)):
|
|
280
|
+
atoms.append(Atom(atomic_number=atomic_numbers[i], position=geom[i]))
|
|
281
|
+
|
|
282
|
+
charge = Chem.GetFormalCharge(rdkm)
|
|
283
|
+
multiplicity = 1
|
|
284
|
+
|
|
285
|
+
return cls(atoms=atoms, charge=charge, multiplicity=multiplicity)
|
|
286
|
+
|
|
287
|
+
@classmethod
|
|
288
|
+
def from_smiles(cls: type[Self], smiles: str) -> Self:
|
|
289
|
+
return cls.from_rdkit(Chem.MolFromSmiles(smiles))
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
|
|
293
|
+
try:
|
|
294
|
+
AllChem.SanitizeMol(rdkm) # type: ignore [attr-defined]
|
|
295
|
+
except Exception as e:
|
|
296
|
+
raise ValueError("Molecule could not be generated -- invalid chemistry!\n") from e
|
|
297
|
+
|
|
298
|
+
rdkm = AllChem.AddHs(rdkm) # type: ignore [attr-defined]
|
|
299
|
+
try:
|
|
300
|
+
status1 = AllChem.EmbedMolecule(rdkm, maxAttempts=200) # type: ignore [attr-defined]
|
|
301
|
+
assert status1 >= 0
|
|
302
|
+
except Exception as e:
|
|
303
|
+
status1 = AllChem.EmbedMolecule(rdkm, maxAttempts=200, useRandomCoords=True) # type: ignore [attr-defined]
|
|
304
|
+
if status1 < 0:
|
|
305
|
+
raise ValueError(f"Cannot embed molecule! Error: {e}")
|
|
306
|
+
|
|
307
|
+
AllChem.MMFFOptimizeMolecule(rdkm, maxIters=200) # type: ignore [attr-defined]
|
|
308
|
+
|
|
309
|
+
return rdkm
|
|
310
|
+
|
|
266
311
|
|
|
267
312
|
def parse_comment_line(line: str) -> PeriodicCell:
|
|
268
313
|
"""
|
stjames/types.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable, TypeAlias
|
|
1
|
+
from typing import Callable, Iterable, TypeAlias
|
|
2
2
|
|
|
3
3
|
UUID: TypeAlias = str
|
|
4
4
|
|
|
@@ -10,6 +10,15 @@ FloatPerAtom: TypeAlias = list[float]
|
|
|
10
10
|
Matrix3x3: TypeAlias = tuple[Vector3D, Vector3D, Vector3D]
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def round_list(round_to: int = 6) -> Callable[[Iterable[float]], list[float]]:
|
|
14
|
+
"""Create a validator that rounds each float in a list to a given number of decimal places."""
|
|
15
|
+
|
|
16
|
+
def rounder(values: Iterable[float]) -> list[float]:
|
|
17
|
+
return [round(value, round_to) for value in values]
|
|
18
|
+
|
|
19
|
+
return rounder
|
|
20
|
+
|
|
21
|
+
|
|
13
22
|
def round_vector3d(round_to: int = 6) -> Callable[[Vector3D], Vector3D]:
|
|
14
23
|
"""Create a validator that rounds each component of a Vector3D to a given number of decimal places."""
|
|
15
24
|
|
stjames/workflows/__init__.py
CHANGED
|
@@ -18,9 +18,10 @@ from .multistage_opt import *
|
|
|
18
18
|
from .pka import *
|
|
19
19
|
from .redox_potential import *
|
|
20
20
|
from .scan import *
|
|
21
|
+
from .solubility import *
|
|
21
22
|
from .spin_states import *
|
|
22
23
|
from .tautomer import *
|
|
23
|
-
from .workflow import
|
|
24
|
+
from .workflow import *
|
|
24
25
|
|
|
25
26
|
WORKFLOW_NAME = Literal[
|
|
26
27
|
"admet",
|
|
@@ -39,11 +40,12 @@ WORKFLOW_NAME = Literal[
|
|
|
39
40
|
"pka",
|
|
40
41
|
"redox_potential",
|
|
41
42
|
"scan",
|
|
43
|
+
"solubility",
|
|
42
44
|
"spin_states",
|
|
43
45
|
"tautomers",
|
|
44
46
|
]
|
|
45
47
|
|
|
46
|
-
WORKFLOW_MAPPING: dict[
|
|
48
|
+
WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
|
|
47
49
|
"admet": ADMETWorkflow, # type: ignore [dict-item]
|
|
48
50
|
"basic_calculation": BasicCalculationWorkflow, # type: ignore [dict-item]
|
|
49
51
|
"bde": BDEWorkflow, # type: ignore [dict-item]
|
|
@@ -60,6 +62,7 @@ WORKFLOW_MAPPING: dict[str, Workflow] = {
|
|
|
60
62
|
"pka": pKaWorkflow, # type: ignore [dict-item]
|
|
61
63
|
"redox_potential": RedoxPotentialWorkflow, # type: ignore [dict-item]
|
|
62
64
|
"scan": ScanWorkflow, # type: ignore [dict-item]
|
|
65
|
+
"solubility": SolubilityWorkflow, # type: ignore [dict-item]
|
|
63
66
|
"spin_states": SpinStatesWorkflow, # type: ignore [dict-item]
|
|
64
67
|
"tautomers": TautomerWorkflow, # type: ignore [dict-item]
|
|
65
68
|
}
|
stjames/workflows/admet.py
CHANGED
|
@@ -1,7 +1,18 @@
|
|
|
1
|
-
|
|
1
|
+
"""ADME-Tox property prediction workflow."""
|
|
2
2
|
|
|
3
|
-
from .workflow import
|
|
3
|
+
from .workflow import MoleculeWorkflow
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class ADMETWorkflow(
|
|
7
|
-
|
|
6
|
+
class ADMETWorkflow(MoleculeWorkflow):
|
|
7
|
+
"""
|
|
8
|
+
A workflow for predicting ADME-Tox properties.
|
|
9
|
+
|
|
10
|
+
Inherited:
|
|
11
|
+
:param initial_molecule: Molecule of interest
|
|
12
|
+
:param mode: Mode for workflow (currently unused)
|
|
13
|
+
|
|
14
|
+
New:
|
|
15
|
+
:param properties: predicted properties
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
properties: dict[str, float | int] | None = None
|
|
@@ -1,9 +1,24 @@
|
|
|
1
|
+
"""Basic calculation workflow."""
|
|
2
|
+
|
|
1
3
|
from ..settings import Settings
|
|
2
4
|
from ..types import UUID
|
|
3
|
-
from .workflow import
|
|
5
|
+
from .workflow import MoleculeWorkflow
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BasicCalculationWorkflow(MoleculeWorkflow):
|
|
9
|
+
"""
|
|
10
|
+
Workflow for a basic calculation.
|
|
11
|
+
|
|
12
|
+
Inherited:
|
|
13
|
+
:param initial_molecule: Molecule of interest
|
|
14
|
+
:param mode: Mode for workflow
|
|
4
15
|
|
|
16
|
+
New:
|
|
17
|
+
:param settings: Settings for running the calculation
|
|
18
|
+
:param engine: Engine to use
|
|
19
|
+
:param calculation_uuid: UUID of the calculation
|
|
20
|
+
"""
|
|
5
21
|
|
|
6
|
-
class BasicCalculationWorkflow(Workflow):
|
|
7
22
|
settings: Settings
|
|
8
23
|
engine: str
|
|
9
24
|
calculation_uuid: UUID | None = None
|
stjames/workflows/bde.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Bond
|
|
1
|
+
"""Bond-dissociation energy (BDE) workflow."""
|
|
2
2
|
|
|
3
3
|
import itertools
|
|
4
4
|
from typing import Annotated, Any, Iterable, Self, TypeVar
|
|
@@ -10,7 +10,7 @@ from ..mode import Mode
|
|
|
10
10
|
from ..molecule import Molecule
|
|
11
11
|
from ..types import UUID
|
|
12
12
|
from .multistage_opt import MultiStageOptMixin
|
|
13
|
-
from .workflow import
|
|
13
|
+
from .workflow import MoleculeWorkflow
|
|
14
14
|
|
|
15
15
|
# the id of a mutable object may change, thus using object()
|
|
16
16
|
_sentinel_mso_mode = object()
|
|
@@ -19,7 +19,7 @@ _T = TypeVar("_T")
|
|
|
19
19
|
|
|
20
20
|
class BDE(BaseModel):
|
|
21
21
|
"""
|
|
22
|
-
Bond
|
|
22
|
+
Bond-dissociation energy (BDE) result.
|
|
23
23
|
|
|
24
24
|
energy => (E_{fragment1} + E_{fragment2}) - E_{starting molecule}
|
|
25
25
|
|
|
@@ -49,9 +49,9 @@ class BDE(BaseModel):
|
|
|
49
49
|
return f"<{type(self).__name__} {self.fragment_idxs} {energy}>"
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
class BDEWorkflow(
|
|
52
|
+
class BDEWorkflow(MoleculeWorkflow, MultiStageOptMixin):
|
|
53
53
|
"""
|
|
54
|
-
Bond
|
|
54
|
+
Bond-dissociation energy (BDE) workflow.
|
|
55
55
|
|
|
56
56
|
Uses the modes from MultiStageOptSettings to compute BDEs.
|
|
57
57
|
|
stjames/workflows/conformer.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Deprecated conformer search workflow, use ConformerSearchWorkflow instead."""
|
|
2
|
+
|
|
1
3
|
from typing import Annotated, Any, Optional
|
|
2
4
|
|
|
3
5
|
from pydantic import AfterValidator
|
|
@@ -7,7 +9,7 @@ from ..constraint import Constraint
|
|
|
7
9
|
from ..method import Method
|
|
8
10
|
from ..mode import Mode
|
|
9
11
|
from ..solvent import Solvent
|
|
10
|
-
from .workflow import
|
|
12
|
+
from .workflow import MoleculeWorkflow
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class ConformerSettings(Base):
|
|
@@ -42,7 +44,7 @@ class Conformer(Base):
|
|
|
42
44
|
uuid: Optional[str] = None
|
|
43
45
|
|
|
44
46
|
|
|
45
|
-
class ConformerWorkflow(
|
|
47
|
+
class ConformerWorkflow(MoleculeWorkflow):
|
|
46
48
|
mode: Mode = Mode.RAPID
|
|
47
49
|
settings: ConformerSettings = ConformerSettings()
|
|
48
50
|
conformers: list[Conformer] = []
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Conformer
|
|
1
|
+
"""Conformer search workflow."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC
|
|
4
4
|
from typing import Annotated, Self, Sequence, TypeVar
|
|
@@ -11,7 +11,7 @@ from ..method import Method, XTBMethod
|
|
|
11
11
|
from ..mode import Mode
|
|
12
12
|
from ..types import UUID, FloatPerAtom, round_float_per_atom
|
|
13
13
|
from .multistage_opt import MultiStageOptMixin
|
|
14
|
-
from .workflow import
|
|
14
|
+
from .workflow import MoleculeWorkflow
|
|
15
15
|
|
|
16
16
|
_sentinel = object()
|
|
17
17
|
|
|
@@ -338,7 +338,7 @@ class ConformerSearchMixin(ConformerGenMixin, MultiStageOptMixin):
|
|
|
338
338
|
return self
|
|
339
339
|
|
|
340
340
|
|
|
341
|
-
class ConformerSearchWorkflow(ConformerSearchMixin,
|
|
341
|
+
class ConformerSearchWorkflow(ConformerSearchMixin, MoleculeWorkflow):
|
|
342
342
|
"""
|
|
343
343
|
ConformerSearch Workflow.
|
|
344
344
|
|
stjames/workflows/descriptors.py
CHANGED
|
@@ -1,11 +1,24 @@
|
|
|
1
|
+
"""Molecular descriptors workflow."""
|
|
2
|
+
|
|
1
3
|
from ..types import UUID
|
|
2
|
-
from .workflow import
|
|
4
|
+
from .workflow import MoleculeWorkflow
|
|
3
5
|
|
|
4
6
|
Descriptors = dict[str, dict[str, float] | tuple[float | None, ...] | float]
|
|
5
7
|
|
|
6
8
|
|
|
7
|
-
class DescriptorsWorkflow(
|
|
8
|
-
|
|
9
|
+
class DescriptorsWorkflow(MoleculeWorkflow):
|
|
10
|
+
"""
|
|
11
|
+
A workflow for calculating molecular descriptors.
|
|
12
|
+
|
|
13
|
+
Inherited:
|
|
14
|
+
:param initial_molecule: Molecule of interest
|
|
15
|
+
:param mode: Mode for workflow
|
|
16
|
+
|
|
17
|
+
New:
|
|
18
|
+
:param optimization: UUID of optimization
|
|
19
|
+
:param descriptors: calculated descriptors
|
|
20
|
+
"""
|
|
21
|
+
|
|
9
22
|
optimization: UUID | None = None
|
|
10
23
|
|
|
11
24
|
descriptors: Descriptors | None = None
|
stjames/workflows/docking.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Docking workflow."""
|
|
2
2
|
|
|
3
|
-
from typing import Annotated
|
|
3
|
+
from typing import Annotated, Self
|
|
4
4
|
|
|
5
|
-
from pydantic import AfterValidator, ConfigDict, field_validator
|
|
5
|
+
from pydantic import AfterValidator, ConfigDict, field_validator, model_validator
|
|
6
6
|
|
|
7
7
|
from ..base import Base, round_float
|
|
8
8
|
from ..pdb import PDB
|
|
9
9
|
from ..types import UUID, Vector3D
|
|
10
|
-
from .workflow import
|
|
10
|
+
from .workflow import MoleculeWorkflow
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class Score(Base):
|
|
@@ -22,10 +22,14 @@ class Score(Base):
|
|
|
22
22
|
score: Annotated[float, AfterValidator(round_float(3))]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class DockingWorkflow(
|
|
25
|
+
class DockingWorkflow(MoleculeWorkflow):
|
|
26
26
|
"""
|
|
27
27
|
Docking workflow.
|
|
28
28
|
|
|
29
|
+
Note that the protein can be supplied either by UUID or raw PDB object.
|
|
30
|
+
We anticipate that the former will dominate deployed usage, but the latter is handy for isolated testing.
|
|
31
|
+
If, for whatever reason, the workflow is initialized with both a `target_uuid` and a `target`, the UUID will be ignored.
|
|
32
|
+
|
|
29
33
|
Inherited:
|
|
30
34
|
:param initial_molecule: Molecule of interest
|
|
31
35
|
:param mode: Mode for workflow (currently unused)
|
|
@@ -36,7 +40,8 @@ class DockingWorkflow(Workflow):
|
|
|
36
40
|
:param do_csearch: whether to csearch starting structures
|
|
37
41
|
:param do_optimization: whether to optimize starting structures
|
|
38
42
|
:param conformers: UUIDs of optimized conformers
|
|
39
|
-
:param target: PDB of the protein
|
|
43
|
+
:param target: PDB of the protein.
|
|
44
|
+
:param target_uuid: UUID of the protein.
|
|
40
45
|
:param pocket: center (x, y, z) and size (x, y, z) of the pocket
|
|
41
46
|
|
|
42
47
|
Results:
|
|
@@ -49,7 +54,8 @@ class DockingWorkflow(Workflow):
|
|
|
49
54
|
do_optimization: bool = True
|
|
50
55
|
conformers: list[UUID] = []
|
|
51
56
|
|
|
52
|
-
target: PDB
|
|
57
|
+
target: PDB | None = None
|
|
58
|
+
target_uuid: UUID | None = None
|
|
53
59
|
pocket: tuple[Vector3D, Vector3D]
|
|
54
60
|
|
|
55
61
|
do_pose_hydrogen_refinement: bool = True
|
|
@@ -60,16 +66,25 @@ class DockingWorkflow(Workflow):
|
|
|
60
66
|
|
|
61
67
|
def __repr__(self) -> str:
|
|
62
68
|
"""Return a string representation of the Docking workflow."""
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
69
|
+
if self.target is not None:
|
|
70
|
+
desc = self.target.description
|
|
71
|
+
target = desc.code or desc.title
|
|
72
|
+
else:
|
|
73
|
+
target = ""
|
|
66
74
|
|
|
75
|
+
ligand = "".join(atom.atomic_symbol for atom in self.initial_molecule.atoms)
|
|
67
76
|
return f"<{type(self).__name__} {target} {ligand}>"
|
|
68
77
|
|
|
78
|
+
@model_validator(mode="after")
|
|
79
|
+
def check_protein(self) -> Self:
|
|
80
|
+
"""Check if protein is provided."""
|
|
81
|
+
if not self.target and not self.target_uuid:
|
|
82
|
+
raise ValueError("Must provide either molecules or smiles")
|
|
83
|
+
return self
|
|
84
|
+
|
|
69
85
|
@field_validator("pocket", mode="after")
|
|
70
86
|
def validate_pocket(cls, pocket: tuple[Vector3D, Vector3D]) -> tuple[Vector3D, Vector3D]:
|
|
71
|
-
|
|
87
|
+
_center, size = pocket
|
|
72
88
|
if any(q <= 0 for q in size):
|
|
73
89
|
raise ValueError(f"Pocket size must be positive, got: {size}")
|
|
74
|
-
|
|
75
90
|
return pocket
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Electronic properties workflow."""
|
|
2
|
+
|
|
1
3
|
from typing import Annotated
|
|
2
4
|
|
|
3
5
|
from pydantic import AfterValidator, NonNegativeFloat, NonNegativeInt
|
|
@@ -5,7 +7,7 @@ from pydantic import AfterValidator, NonNegativeFloat, NonNegativeInt
|
|
|
5
7
|
from ..base import Base, round_float
|
|
6
8
|
from ..settings import Settings
|
|
7
9
|
from ..types import UUID, FloatPerAtom, Matrix3x3, Vector3D, round_optional_float_per_atom, round_optional_matrix3x3, round_optional_vector3d
|
|
8
|
-
from .workflow import
|
|
10
|
+
from .workflow import MoleculeWorkflow
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class PropertyCubePoint(Base):
|
|
@@ -34,12 +36,13 @@ class MolecularOrbitalCube(PropertyCube):
|
|
|
34
36
|
energy: Annotated[float, AfterValidator(round_float(6))]
|
|
35
37
|
|
|
36
38
|
|
|
37
|
-
class ElectronicPropertiesWorkflow(
|
|
39
|
+
class ElectronicPropertiesWorkflow(MoleculeWorkflow):
|
|
38
40
|
"""
|
|
39
41
|
Workflow for computing electronic properties.
|
|
40
42
|
|
|
41
43
|
Inherited
|
|
42
44
|
:param initial_molecule: Molecule of interest
|
|
45
|
+
:param mode: Mode for workflow (currently unused)
|
|
43
46
|
|
|
44
47
|
Config settings:
|
|
45
48
|
:param settings: settings for the calculation
|
stjames/workflows/fukui.py
CHANGED
|
@@ -1,13 +1,30 @@
|
|
|
1
|
+
"""Fukui index workflow."""
|
|
2
|
+
|
|
1
3
|
from typing import Annotated
|
|
2
4
|
|
|
3
5
|
from pydantic import AfterValidator
|
|
4
6
|
|
|
5
7
|
from ..base import round_optional_float
|
|
6
8
|
from ..types import UUID, FloatPerAtom, round_optional_float_per_atom
|
|
7
|
-
from .workflow import
|
|
9
|
+
from .workflow import MoleculeWorkflow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FukuiIndexWorkflow(MoleculeWorkflow):
|
|
13
|
+
"""
|
|
14
|
+
Workflow for calculating Fukui indices.
|
|
15
|
+
|
|
16
|
+
Inherited:
|
|
17
|
+
:param initial_molecule: Molecule of interest
|
|
18
|
+
:param mode: Mode for workflow (currently unused)
|
|
8
19
|
|
|
20
|
+
Results:
|
|
21
|
+
:param optimization: UUID of optimization
|
|
22
|
+
:param global_electrophilicity_index: global electrophilicity index
|
|
23
|
+
:param fukui_positive: Fukui index for positive charges
|
|
24
|
+
:param fukui_negative: Fukui index for negative charges
|
|
25
|
+
:param fukui_zero: Fukui index for zero charges
|
|
26
|
+
"""
|
|
9
27
|
|
|
10
|
-
class FukuiIndexWorkflow(Workflow):
|
|
11
28
|
# UUID of optimization
|
|
12
29
|
optimization: UUID | None = None
|
|
13
30
|
|
|
@@ -1,21 +1,45 @@
|
|
|
1
|
+
"""Hydrogen-bond-basicity workflow."""
|
|
2
|
+
|
|
1
3
|
from ..base import Base
|
|
2
4
|
from ..types import UUID
|
|
3
|
-
from .workflow import
|
|
5
|
+
from .workflow import MoleculeWorkflow
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class HydrogenBondAcceptorSite(Base):
|
|
9
|
+
"""
|
|
10
|
+
A hydrogen bond acceptor site.
|
|
11
|
+
|
|
12
|
+
:param atom_idx: index of the atom
|
|
13
|
+
:param pkbhx: Hydrogen bond basicity
|
|
14
|
+
:param position: position of the atom
|
|
15
|
+
:param name: name of the atom
|
|
16
|
+
"""
|
|
17
|
+
|
|
7
18
|
atom_idx: int # zero-indexed
|
|
8
19
|
pkbhx: float
|
|
9
20
|
position: tuple[float, float, float]
|
|
10
21
|
name: str | None = None
|
|
11
22
|
|
|
12
23
|
|
|
13
|
-
class HydrogenBondBasicityWorkflow(
|
|
24
|
+
class HydrogenBondBasicityWorkflow(MoleculeWorkflow):
|
|
25
|
+
"""
|
|
26
|
+
Workflow for calculating hydrogen bond basicity.
|
|
27
|
+
|
|
28
|
+
Inherited:
|
|
29
|
+
:param initial_molecule: Molecule of interest
|
|
30
|
+
:param mode: Mode for workflow (currently unused)
|
|
31
|
+
|
|
32
|
+
New:
|
|
33
|
+
:param do_csearch: whether to perform a conformational search
|
|
34
|
+
:param do_optimization: whether to perform an optimization
|
|
35
|
+
|
|
36
|
+
Results:
|
|
37
|
+
:param optimization: UUID of optimization
|
|
38
|
+
:param hba_sites: hydrogen-bond-acceptor sites
|
|
39
|
+
"""
|
|
40
|
+
|
|
14
41
|
do_csearch: bool = True
|
|
15
42
|
do_optimization: bool = True
|
|
16
43
|
|
|
17
|
-
# UUID of optimization
|
|
18
44
|
optimization: UUID | None = None
|
|
19
|
-
|
|
20
|
-
# hydrogen-bond-acceptor sites
|
|
21
45
|
hba_sites: list[HydrogenBondAcceptorSite] = [] # noqa: RUF012
|
stjames/workflows/irc.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Intrinsic reaction coordinate (IRC) workflow."""
|
|
2
|
+
|
|
1
3
|
from typing import Self
|
|
2
4
|
|
|
3
5
|
from pydantic import Field, PositiveFloat, field_validator, model_validator
|
|
@@ -7,12 +9,12 @@ from ..mode import Mode
|
|
|
7
9
|
from ..settings import Settings
|
|
8
10
|
from ..solvent import Solvent, SolventModel, SolventSettings
|
|
9
11
|
from ..types import UUID
|
|
10
|
-
from .workflow import
|
|
12
|
+
from .workflow import MoleculeWorkflow
|
|
11
13
|
|
|
12
14
|
_sentinel_settings: Settings = object() # type: ignore [assignment]
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class IRCWorkflow(
|
|
17
|
+
class IRCWorkflow(MoleculeWorkflow):
|
|
16
18
|
"""
|
|
17
19
|
Workflow for Intrinsic Reaction Coordinate (IRC) calculations.
|
|
18
20
|
|