stjames 0.0.121__py3-none-any.whl → 0.0.123__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stjames might be problematic. Click here for more details.

@@ -18,6 +18,7 @@ from .ion_mobility import *
18
18
  from .irc import *
19
19
  from .macropka import *
20
20
  from .molecular_dynamics import *
21
+ from .msa import *
21
22
  from .multistage_opt import *
22
23
  from .nmr import *
23
24
  from .pka import *
@@ -49,6 +50,7 @@ WORKFLOW_NAME = Literal[
49
50
  "irc",
50
51
  "macropka",
51
52
  "molecular_dynamics",
53
+ "msa",
52
54
  "multistage_opt",
53
55
  "nmr",
54
56
  "pka",
@@ -81,6 +83,7 @@ WORKFLOW_MAPPING: dict[WORKFLOW_NAME, Workflow] = {
81
83
  "macropka": MacropKaWorkflow, # type: ignore [dict-item]
82
84
  "molecular_dynamics": MolecularDynamicsWorkflow, # type: ignore [dict-item]
83
85
  "multistage_opt": MultiStageOptWorkflow, # type: ignore [dict-item]
86
+ "msa": MSAWorkflow, # type: ignore [dict-item]
84
87
  "nmr": NMRSpectroscopyWorkflow, # type: ignore [dict-item]
85
88
  "pka": pKaWorkflow, # type: ignore [dict-item]
86
89
  "pose_analysis_md": PoseAnalysisMolecularDynamicsWorkflow, # type: ignore [dict-item]
@@ -3,9 +3,9 @@
3
3
  from abc import ABC
4
4
  from typing import Annotated, Literal, Self, Sequence, TypeVar
5
5
 
6
- from pydantic import AfterValidator, BaseModel, Field, field_validator, model_validator
6
+ from pydantic import AfterValidator, BaseModel, Field, PositiveInt, field_validator, model_validator
7
7
 
8
- from ..base import LowercaseStrEnum
8
+ from ..base import Base, LowercaseStrEnum
9
9
  from ..constraint import Constraint
10
10
  from ..method import Method, XTBMethod
11
11
  from ..mode import Mode
@@ -41,6 +41,50 @@ class ScreeningSettings(BaseModel):
41
41
  max_confs: int | None = None
42
42
 
43
43
 
44
+ class ConformerClusteringDescriptor(LowercaseStrEnum):
45
+ """
46
+ Potential descriptors to employ in conformer clustering.
47
+ """
48
+
49
+ SOLVENT_ACCESSIBLE_SURFACE_AREA = "solvent_accessible_surface_area"
50
+ POLAR_SOLVENT_ACCESSIBLE_SURACE_AREA = "polar_solvent_accessible_surface_area"
51
+ RADIUS_OF_GYRATION = "radius_of_gyration"
52
+ PLANE_OF_BEST_FIT = "plane_of_best_fit"
53
+ NORMALIZED_PRINCIPAL_MOMENT_RATIO_1 = "normalized_principal_moment_ratio_1"
54
+ NORMALIZED_PRINCIPAL_MOMENT_RATIO_2 = "normalized_principal_moment_ratio_2"
55
+
56
+
57
+ class ConformerClusteringSettings(Base):
58
+ """
59
+ Settings for clustering conformers based on their three-dimensional properties.
60
+
61
+ The properties used for clustering by default are:
62
+ - Solvent-accessible surface area
63
+ - Polar solvent-accessible surface area
64
+ - Radius of gyration
65
+ - Plane of best fit
66
+ - Normalized principal moment ratios 1 and 2
67
+
68
+ Rowan uses k-means clustering to identify representative conformers.
69
+ This loosely follows Wilcken and co-workers (10.1007/s10822-020-00337-7).
70
+
71
+ :param num_clusters: the number of clusters to include
72
+ :param conformers_per_cluster: the number of compounds to pick from each cluster
73
+ """
74
+
75
+ descriptors: list[ConformerClusteringDescriptor] = [
76
+ ConformerClusteringDescriptor.SOLVENT_ACCESSIBLE_SURFACE_AREA,
77
+ ConformerClusteringDescriptor.POLAR_SOLVENT_ACCESSIBLE_SURACE_AREA,
78
+ ConformerClusteringDescriptor.RADIUS_OF_GYRATION,
79
+ ConformerClusteringDescriptor.PLANE_OF_BEST_FIT,
80
+ ConformerClusteringDescriptor.NORMALIZED_PRINCIPAL_MOMENT_RATIO_1,
81
+ ConformerClusteringDescriptor.NORMALIZED_PRINCIPAL_MOMENT_RATIO_2,
82
+ ]
83
+
84
+ num_clusters: PositiveInt = 5
85
+ conformers_per_cluster: PositiveInt = 3
86
+
87
+
44
88
  class ConformerGenSettings(BaseModel):
45
89
  """
46
90
  Conformer generation settings.
@@ -302,6 +346,7 @@ class ConformerGenMixin(BaseModel):
302
346
  :param constraints: constraints to add
303
347
  :param nci: add a constraining potential for non-covalent interactions
304
348
  :param max_confs: maximum number of conformers to keep
349
+ :param clustering_settings: how to cluster the conformers (if at all)
305
350
  """
306
351
 
307
352
  conf_gen_mode: Mode = Mode.RAPID
@@ -310,6 +355,8 @@ class ConformerGenMixin(BaseModel):
310
355
  nci: bool = False
311
356
  max_confs: int | None = None
312
357
 
358
+ conformer_clustering_settings: ConformerClusteringSettings | None = None
359
+
313
360
  @model_validator(mode="after")
314
361
  def validate_and_build_conf_gen_settings(self) -> Self:
315
362
  """Validate and build the ConformerGenSettings."""
@@ -0,0 +1,13 @@
1
+ """DNA-related workflow data models."""
2
+
3
+ from ..base import Base
4
+
5
+
6
+ class DNASequence(Base):
7
+ """
8
+ DNA sequence metadata.
9
+
10
+ :param sequence: nucleotide string
11
+ """
12
+
13
+ sequence: str
@@ -0,0 +1,31 @@
1
+ from ..base import LowercaseStrEnum
2
+ from .workflow import ProteinSequenceWorkflow
3
+
4
+
5
+ class MSAFormat(LowercaseStrEnum):
6
+ """Format of the MSA."""
7
+
8
+ COLABFOLD_DEFAULT = "colabfold_default"
9
+ AF3_JSON = "af3_json"
10
+
11
+
12
+ class MSAWorkflow(ProteinSequenceWorkflow):
13
+ """
14
+ Workflow for generating a MSA from protein sequences.
15
+
16
+ Inherited:
17
+ :param initial_protein_sequences: protein sequences of interest
18
+
19
+ New:
20
+ :param format: the format of the MSA return files
21
+
22
+ Results:
23
+ :param a3m_file: A3M file string
24
+ :param m8_file: M8 file string
25
+ :param af3_json_file: AF3 JSON file string
26
+ """
27
+
28
+ format: MSAFormat = MSAFormat.COLABFOLD_DEFAULT
29
+ a3m_file: str | None = None
30
+ m8_file: str | None = None
31
+ af3_json_file: str | None = None
@@ -0,0 +1,15 @@
1
+ """Protein-related workflow data models."""
2
+
3
+ from ..base import Base
4
+
5
+
6
+ class ProteinSequence(Base):
7
+ """
8
+ Protein sequence metadata including cyclic flag.
9
+
10
+ :param sequence: amino-acid sequence string
11
+ :param cyclic: whether this sequence forms a cyclic peptide (defaults to False)
12
+ """
13
+
14
+ sequence: str
15
+ cyclic: bool = False
@@ -68,10 +68,14 @@ class AffinityScore(BaseModel):
68
68
 
69
69
  class ProteinCofoldingWorkflow(FASTAWorkflow):
70
70
  """
71
- A workflow for predicting structures. Especially protein structures.
71
+ Workflow for predicting structures.
72
+
73
+ Especially protein structures. At least one biological sequence is required.
72
74
 
73
75
  Inherited:
74
76
  :param initial_protein_sequences: protein sequences of interest
77
+ :param initial_dna_sequences: DNA sequences of interest
78
+ :param initial_rna_sequences: RNA sequences of interest
75
79
  :param initial_smiles_list: SMILES strings of interest
76
80
 
77
81
  New:
@@ -0,0 +1,13 @@
1
+ """RNA-related workflow data models."""
2
+
3
+ from ..base import Base
4
+
5
+
6
+ class RNASequence(Base):
7
+ """
8
+ RNA sequence metadata.
9
+
10
+ :param sequence: nucleotide string
11
+ """
12
+
13
+ sequence: str
@@ -1,5 +1,7 @@
1
1
  """Base classes for workflows."""
2
2
 
3
+ from typing import Any
4
+
3
5
  from pydantic import field_validator
4
6
 
5
7
  from ..base import Base
@@ -7,6 +9,9 @@ from ..message import Message
7
9
  from ..mode import Mode
8
10
  from ..molecule import Molecule
9
11
  from ..types import UUID
12
+ from .dna import DNASequence
13
+ from .protein import ProteinSequence
14
+ from .rna import RNASequence
10
15
 
11
16
 
12
17
  class Workflow(Base):
@@ -22,31 +27,30 @@ class Workflow(Base):
22
27
  return repr(self)
23
28
 
24
29
 
25
- class ProteinSequence(Base):
26
- """
27
- Protein sequence metadata including cyclic flag.
28
-
29
- :param sequence: amino-acid sequence string
30
- :param cyclic: whether this sequence forms a cyclic peptide (defaults to False)
31
- """
32
-
33
- sequence: str
34
- cyclic: bool = False
35
-
36
-
37
30
  class FASTAWorkflow(Workflow):
38
31
  """
39
- Base class for Workflows that operate on protein sequences and SMILES.
32
+ Base class for Workflows that operate on biological sequences and SMILES.
40
33
 
41
- :param initial_protein_sequences: proteins to evaluate, either plain sequence strings or ProteinSequence objects with cyclic flags
34
+ :param initial_protein_sequences: protein sequences to evaluate, either plain sequence strings or ProteinSequence objects with metadata
35
+ :param initial_dna_sequences: DNA sequences to evaluate, either plain sequence strings or DNASequence objects with metadata
36
+ :param initial_rna_sequences: RNA sequences to evaluate, either plain sequence strings or RNASequence objects with metadata
42
37
  :param initial_smiles_list: SMILES strings of interest
43
38
  :param ligand_binding_affinity_index: optional index selecting which ligand affinity to evaluate
39
+ :raises ValueError: if none of the sequence lists are provided
44
40
  """
45
41
 
46
- initial_protein_sequences: list[ProteinSequence] | list[str]
47
- initial_smiles_list: list[str] | None = None
42
+ initial_protein_sequences: list[ProteinSequence] | list[str] = []
43
+ initial_dna_sequences: list[DNASequence] = []
44
+ initial_rna_sequences: list[RNASequence] = []
45
+ initial_smiles_list: list[str] = []
48
46
  ligand_binding_affinity_index: int | None = None
49
47
 
48
+ def model_post_init(self, __context: Any) -> None:
49
+ if not (self.initial_protein_sequences or self.initial_dna_sequences or self.initial_rna_sequences):
50
+ raise ValueError(
51
+ "Provide at least one of `initial_protein_sequences`, `initial_dna_sequences`, or `initial_rna_sequences`.",
52
+ )
53
+
50
54
 
51
55
  class SMILESWorkflow(Workflow):
52
56
  """
@@ -95,6 +99,16 @@ class MoleculeWorkflow(Workflow):
95
99
  return mode
96
100
 
97
101
 
102
+ class ProteinSequenceWorkflow(Workflow):
103
+ """
104
+ Base class for Workflows that operate on protein sequences.
105
+
106
+ :param initial_protein_sequences: protein sequences to evaluate, either plain sequence strings or ProteinSequence objects with metadata
107
+ """
108
+
109
+ initial_protein_sequences: list[ProteinSequence] | list[str] = []
110
+
111
+
98
112
  class DBCalculation(Base):
99
113
  """Encodes a calculation that's in the database. This isn't terribly useful by itself."""
100
114
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stjames
3
- Version: 0.0.121
3
+ Version: 0.0.123
4
4
  Summary: standardized JSON atom/molecule encoding scheme
5
5
  Author-email: Corin Wagen <corin@rowansci.com>
6
6
  Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -37,14 +37,15 @@ stjames/data/read_nist_isotopes.py,sha256=y10FNjW43QpC45qib7VHsIghEwT7GG5rsNwHdc
37
37
  stjames/data/symbol_element.json,sha256=vl_buFusTqBd-muYQtMLtTDLy2OtBI6KkBeqkaWRQrg,1186
38
38
  stjames/optimization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  stjames/optimization/freezing_string_method.py,sha256=eEQBqbYHgJH9gVRLDIFtGuPcsHHMLAAt1hF3jtq70lo,2285
40
- stjames/workflows/__init__.py,sha256=GiSXuBCA2tE5eXHnVYJvjtoqX_zvCn8t0wpkvK1HEls,3438
40
+ stjames/workflows/__init__.py,sha256=wzLBi71MgMxlxRXw3N7feazCym6qNRJ8e4pRk3UXPHM,3520
41
41
  stjames/workflows/admet.py,sha256=qFUpCFiLW-3gzuEjCMNBJ6DEG_vquJcPAsN4SVZRfdE,1289
42
42
  stjames/workflows/basic_calculation.py,sha256=wIiq2lFRN4nxN0__X_JbPSeaUeJ4tKUyg6NaB1xqoEY,1079
43
43
  stjames/workflows/batch_docking.py,sha256=o-t5FkfLlbe60jX1_ZeqWSJJ5vsjEIbTa5GLN3ny_tk,1590
44
44
  stjames/workflows/bde.py,sha256=g_In-caftXiimrhfdptHjpfrYQUs3vF58qYmRnaTN8g,10825
45
45
  stjames/workflows/conformer.py,sha256=18aO6ngMBeGAmQkBdLGCCHr398RIYr1v2hD2IT1u4cc,3005
46
- stjames/workflows/conformer_search.py,sha256=1kBUT0yCcTPTCtxg1tlTKHRRXkfNYNqzla_89lDEL9k,15696
46
+ stjames/workflows/conformer_search.py,sha256=TaDI3unNjaxBKj4YHASwSaNQHlVSwuZDZdTIxk7MC4Y,17638
47
47
  stjames/workflows/descriptors.py,sha256=T4tc7xdtBdxESGO86KR323jPQ2pgwxBqgV0khA6MEgQ,584
48
+ stjames/workflows/dna.py,sha256=_M79WikFujCsUWr4YaEdOoeDDKPV_DhKptWt0ptktko,194
48
49
  stjames/workflows/docking.py,sha256=t30kqeFXQ0yrlqvN6Jdwt0SdfnJLDsfK-7yFi0gwNbY,4753
49
50
  stjames/workflows/double_ended_ts_search.py,sha256=ovJgEVFc6c3mijCE3TKAY70YvqNmAZ5Y4XgV4-tIxBI,3127
50
51
  stjames/workflows/electronic_properties.py,sha256=GT3-NC7w-dbcOJ-3AzJ7LgzH6frTbiH2Iyb9BCa-SvY,4112
@@ -54,21 +55,24 @@ stjames/workflows/ion_mobility.py,sha256=5vUjEYCnF9sN3dTqqEgWAq0jAfdoFvkLubemoXE
54
55
  stjames/workflows/irc.py,sha256=ZP7icylW8rgo_Uh7h3bmyumn0ru1IyF-61nP5Jnmq3M,3402
55
56
  stjames/workflows/macropka.py,sha256=Krj0xXuB-u57Kqlf4bbRiHDUWCpliFr6YPiYqPmYaWk,3803
56
57
  stjames/workflows/molecular_dynamics.py,sha256=cgjede9TWf-eXRFeUcM59cyVQAhSduL6L0J0oMrX3xc,3543
58
+ stjames/workflows/msa.py,sha256=nK3KmPQv9Sr7yRjNfDWSXIgBrk3PnhvDYHnsVoqoS-M,773
57
59
  stjames/workflows/multistage_opt.py,sha256=UN-4WLsT2WEjO5KqDPrcCkb708Co-ZScHx3g2bto768,16597
58
60
  stjames/workflows/nmr.py,sha256=1QEF4SB6dWIr-jzLEZ7V972UnRUOTufOJSHwIGyV3dM,2681
59
61
  stjames/workflows/pka.py,sha256=i-jzl2lN0yRWc0tgrWSBCplITEByfRyEQrlUhjnzcBc,4580
60
62
  stjames/workflows/pose_analysis_md.py,sha256=dpWVKC-8fPdw6ExIXk9xbeVBDUMUYQECpixb-oFa23I,4803
63
+ stjames/workflows/protein.py,sha256=MfwJ3qn24cCoUEczMH7A2NyBlI9t_VcwOowohTPDtYM,346
61
64
  stjames/workflows/protein_binder_design.py,sha256=KnPKQJTMrSO5xfw64Bh7T0wHck1NtysVwkVgHs1cGws,12013
62
- stjames/workflows/protein_cofolding.py,sha256=w7Sg_ttU4bcJb7wlVcI_AAsLM9WVAJcU5ucbNb5Iyzw,4326
65
+ stjames/workflows/protein_cofolding.py,sha256=5rs6wgksW5zKpcEkv0_B4Clt9dSQ-42oKF8fJtSRoaY,4495
63
66
  stjames/workflows/redox_potential.py,sha256=7S18t9Y3eynSnA3lZbRlvLfdbgeBopdiigLzt1zxg5c,3871
67
+ stjames/workflows/rna.py,sha256=FZO3UkRhgG3EnGXsQ70eAZTbeEY2P9u60oLD6JuPPMc,194
64
68
  stjames/workflows/scan.py,sha256=lgpvrrFG03GWQj2tWeBqPQVYSCGLgYdnQsU9bAUZQok,3317
65
69
  stjames/workflows/solubility.py,sha256=lfCVvJjqEaddLUpK6WBxjB7u12Sci-K95A5_qIMkIRM,3028
66
70
  stjames/workflows/spin_states.py,sha256=0degmE-frovgoXweshZyjfjqL7nkbaFoO9YoJhvQnaI,4748
67
71
  stjames/workflows/strain.py,sha256=paYxDDQTB1eYP_c2kLVz1-QX7Vpw0LLb3ujnFin_SOM,1834
68
72
  stjames/workflows/tautomer.py,sha256=7eYKziGPg8Km6lfowTzSkgJfJ4SHUPrAmnTf8Bi-SB0,1164
69
- stjames/workflows/workflow.py,sha256=cYNP_tK1afJTscXJzYv5VGe8n2Est6PpthS268oRi1U,2494
70
- stjames-0.0.121.dist-info/licenses/LICENSE,sha256=i05z7xEhyrg6f8j0lR3XYjShnF-MJGFQ-DnpsZ8yiVI,1084
71
- stjames-0.0.121.dist-info/METADATA,sha256=TeYRvGYD3m3c1KxoQa197eGf6oJ9KvyNkJ0gLN4kEJ0,1725
72
- stjames-0.0.121.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
73
- stjames-0.0.121.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
74
- stjames-0.0.121.dist-info/RECORD,,
73
+ stjames/workflows/workflow.py,sha256=dJYK9hY8FqNsD-foQsTI7Mpk2aTY87ASjMFgerx6gtQ,3432
74
+ stjames-0.0.123.dist-info/licenses/LICENSE,sha256=i05z7xEhyrg6f8j0lR3XYjShnF-MJGFQ-DnpsZ8yiVI,1084
75
+ stjames-0.0.123.dist-info/METADATA,sha256=r1v4SR_lfdasc-yuQRdmxI7nJXSnVKvbjQiMz4mwW54,1725
76
+ stjames-0.0.123.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
77
+ stjames-0.0.123.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
78
+ stjames-0.0.123.dist-info/RECORD,,