PyPI - stjames - Versions diffs - 0.0.42__tar.gz → 0.0.44__tar.gz - Mend

stjames 0.0.42tar.gz → 0.0.44tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (61) hide show

{stjames-0.0.42/stjames.egg-info → stjames-0.0.44}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: stjames
-Version: 0.0.42
+Version: 0.0.44
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames

{stjames-0.0.42 → stjames-0.0.44}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "stjames"
-version = "0.0.42"
+version = "0.0.44"
 description = "standardized JSON atom/molecule encoding scheme"
 readme = "README.md"
 requires-python = ">=3.11"

{stjames-0.0.42 → stjames-0.0.44}/stjames/atom.py RENAMED Viewed

@@ -60,7 +60,7 @@ class Atom(Base):
         Atom(1, [0.00000, 0.00000, 0.00000])
         """
         name, *xyz = xyz_line.split()
-        symbol = int(name) if name.isdigit() else SYMBOL_ELEMENT[name]
+        symbol = int(name) if name.isdigit() else SYMBOL_ELEMENT[name.title()]
         if not len(xyz) == 3:
             raise ValueError("XYZ file should have 3 coordinates per atom")
         return cls(atomic_number=symbol, position=xyz)

{stjames-0.0.42 → stjames-0.0.44}/stjames/molecule.py RENAMED Viewed

@@ -1,8 +1,9 @@
+import re
 from pathlib import Path
 from typing import Iterable, Optional, Self
 import pydantic
-from pydantic import NonNegativeInt, PositiveInt
+from pydantic import NonNegativeInt, PositiveInt, ValidationError
 from .atom import Atom
 from .base import Base
@@ -54,6 +55,8 @@ class Molecule(Base):
     thermal_enthalpy_corr: Optional[float] = None
     thermal_free_energy_corr: Optional[float] = None
+    smiles: Optional[str] = None
     def __len__(self) -> int:
         return len(self.atoms)
@@ -135,6 +138,8 @@ class Molecule(Base):
             match format:
                 case "xyz":
                     return cls.from_xyz_lines(f.readlines(), charge=charge, multiplicity=multiplicity)
+                case "extxyz":
+                    return cls.from_extxyz_lines(f.readlines(), charge=charge, multiplicity=multiplicity)
                 case _:
                     raise ValueError(f"Unsupported {format=}")
@@ -161,7 +166,7 @@ class Molecule(Base):
         try:
             return cls(atoms=[Atom.from_xyz(line) for line in lines], charge=charge, multiplicity=multiplicity)
-        except Exception as e:
+        except (ValueError, ValidationError) as e:
             raise MoleculeReadError("Error reading molecule from xyz") from e
     def to_xyz(self, comment: str = "", out_file: Path | str | None = None) -> str:
@@ -190,3 +195,85 @@ class Molecule(Base):
                 f.write(out)
         return out
+    @classmethod
+    def from_extxyz(cls: type[Self], extxyz: str, charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
+        r"""
+        Generate a Molecule from a EXTXYZ string. Currently only supporting Lattice and Properties fields.
+        >>> Molecule.from_extxyz('''
+        ... 2
+        ... Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3
+        ... H 0 0 0
+        ... H 0 0 1
+        ... ''').cell.lattice_vectors
+        ((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0))
+        """
+        return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
+    @classmethod
+    def from_extxyz_lines(cls: type[Self], lines: Iterable[str], charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
+        # ensure first line is number of atoms
+        lines = list(lines)
+        if len(lines[0].split()) == 1:
+            natoms = lines[0].strip()
+            if not natoms.isdigit() or (int(lines[0]) != len(lines) - 2):
+                raise MoleculeReadError(f"First line of EXTXYZ file should be the number of atoms, got: {lines[0]} != {len(lines) - 2}")
+            lines = lines[1:]
+        else:
+            raise MoleculeReadError(f"First line of EXTXYZ should be only an int denoting number of atoms. Got {lines[0].split()}")
+        # ensure second line contains key-value pairs
+        if "=" not in lines[0]:
+            raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
+        cell = parse_comment_line(lines[0])
+        lines = lines[1:]
+        try:
+            return cls(atoms=[Atom.from_xyz(line) for line in lines], cell=cell, charge=charge, multiplicity=multiplicity)
+        except (ValueError, ValidationError) as e:
+            raise MoleculeReadError("Error reading molecule from extxyz") from e
+def parse_comment_line(line: str) -> PeriodicCell:
+    """
+    currently only supporting lattice and porperites fields from comment line
+    modify in future to support other fields from comment from_xyz_lines
+    ex: name, mulitplicity, charge, etc.
+    """
+    cell = None
+    # Regular expression to match key="value", key='value', or key=value
+    pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
+    pairs = re.findall(pattern, line)
+    prop_dict = {}
+    for pair in pairs:
+        key, value = pair.split("=", 1)
+        if key.lower() == "lattice":
+            value = value.strip("'\"").split()
+            if len(value) != 9:
+                raise MoleculeReadError(f"Lattice should have 9 entries got {len(value)}")
+            # Convert the value to a 3x3 tuple of tuples of floats
+            try:
+                cell = tuple(tuple(map(float, value[i : i + 3])) for i in range(0, 9, 3))
+            except ValueError:
+                raise MoleculeReadError(f"Lattice should be floats, got {value}")
+            prop_dict[key] = value
+        elif key.lower() == "properties":
+            if value.lower() != "species:s:1:pos:r:3":
+                raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
+            prop_dict[key] = value
+        else:
+            raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
+    if cell is None:
+        raise MoleculeReadError("Lattice field is required but missing.")
+    if "properties" not in [key.lower() for key in prop_dict.keys()]:
+        raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
+    return PeriodicCell(lattice_vectors=cell)

stjames-0.0.44/stjames/settings.py ADDED Viewed

@@ -0,0 +1,216 @@
+from typing import Any, Optional, Self, TypeVar
+from pydantic import computed_field, field_validator, model_validator
+from .base import Base, UniqueList
+from .basis_set import BasisSet
+from .correction import Correction
+from .method import METHODS_WITH_CORRECTION, PREPACKAGED_METHODS, Method
+from .mode import Mode
+from .opt_settings import OptimizationSettings
+from .scf_settings import SCFSettings
+from .solvent import SolventSettings
+from .task import Task
+from .thermochem_settings import ThermochemistrySettings
+_T = TypeVar("_T")
+class Settings(Base):
+    mode: Mode = Mode.AUTO
+    method: Method = Method.HARTREE_FOCK
+    basis_set: Optional[BasisSet] = None
+    tasks: UniqueList[Task] = [Task.ENERGY, Task.CHARGE, Task.DIPOLE]
+    corrections: UniqueList[Correction] = []
+    solvent_settings: Optional[SolventSettings] = None
+    # scf/opt settings will be set automatically based on mode, but can be overridden manually
+    scf_settings: SCFSettings = SCFSettings()
+    opt_settings: OptimizationSettings = OptimizationSettings()
+    thermochem_settings: ThermochemistrySettings = ThermochemistrySettings()
+    # mypy has this dead wrong (https://docs.pydantic.dev/2.0/usage/computed_fields/)
+    # Python 3.12 narrows the reason for the ignore to prop-decorator
+    @computed_field  # type: ignore[misc, prop-decorator, unused-ignore]
+    @property
+    def level_of_theory(self) -> str:
+        corrections = list(filter(lambda x: x not in (None, ""), self.corrections))
+        if self.method in PREPACKAGED_METHODS or self.basis_set is None:
+            method = self.method.value
+        elif self.method in METHODS_WITH_CORRECTION or len(corrections) == 0:
+            method = f"{self.method.value}/{self.basis_set.name.lower()}"
+        else:
+            method = f"{self.method.value}-{'-'.join([c.value for c in corrections])}/{self.basis_set.name.lower()}"
+        if self.solvent_settings is not None:
+            method += f"/{self.solvent_settings.model.value}({self.solvent_settings.solvent.value})"
+        return method
+    @field_validator("mode")
+    @classmethod
+    def set_mode_auto(cls, mode: Mode) -> Mode:
+        """Set the mode to RAPID if AUTO is selected."""
+        if mode == Mode.AUTO:
+            return Mode.RAPID
+        return mode
+    @model_validator(mode="after")
+    def validate_and_build(self) -> Self:
+        if self.mode == Mode.AUTO:
+            self.mode = Mode.RAPID
+        self.scf_settings = _assign_scf_settings_by_mode(self.mode, self.scf_settings)
+        self.opt_settings = _assign_opt_settings_by_mode(self.mode, self.opt_settings)
+        return self
+    def model_post_init(self, __context: Any) -> None:
+        # figure out `optimize_ts`
+        if Task.OPTIMIZE_TS in self.tasks:
+            self.tasks.pop(self.tasks.index(Task.OPTIMIZE_TS))
+            self.tasks.append(Task.OPTIMIZE)
+            self.opt_settings.transition_state = True
+        # composite methods have their own basis sets, so overwrite user stuff
+        if self.method == Method.HF3C:
+            self.basis_set = BasisSet(name="minix")
+        elif self.method == Method.B973C:
+            self.basis_set = BasisSet(name="def2-mTZVP")
+        elif self.method == Method.R2SCAN3C:
+            self.basis_set = BasisSet(name="def2-mTZVPP")
+        elif self.method == Method.WB97X3C:
+            self.basis_set = BasisSet(name="vDZP")
+    @field_validator("basis_set", mode="before")
+    @classmethod
+    def parse_basis_set(cls, v: Any) -> BasisSet | dict[str, Any] | None:
+        """Turn a string into a ``BasisSet`` object. (This is a little crude.)"""
+        if isinstance(v, BasisSet):
+            return None if v.name is None else v
+        elif isinstance(v, dict):
+            return None if v.get("name") is None else v
+        elif isinstance(v, str):
+            if len(v):
+                return BasisSet(name=v)
+            # "" is basically None, let's be real here...
+            return None
+        elif v is None:
+            return None
+        else:
+            raise ValueError(f"invalid value ``{v}`` for ``basis_set``")
+    @field_validator("corrections", mode="before")
+    @classmethod
+    def remove_empty_string(cls, v: list[_T]) -> list[_T]:
+        """Remove empty string values."""
+        return [c for c in v if c] if v is not None else v
+def _assign_scf_settings_by_mode(mode: Mode, scf_settings: SCFSettings) -> SCFSettings:
+    """
+    Assign SCF settings based on the mode.
+    Values based off of the following sources:
+    QChem:
+        - https://manual.q-chem.com/5.2/Ch4.S3.SS2.html
+        - https://manual.q-chem.com/5.2/Ch4.S5.SS2.html
+    Gaussian:
+        - https://gaussian.com/integral/
+        - https://gaussian.com/overlay5/
+    Orca:
+        - manual 4.2.1, §9.6.1 and §9.7.3
+    Psi4:
+        - https://psicode.org/psi4manual/master/autodir_options_c/module__scf.html
+        - https://psicode.org/psi4manual/master/autodoc_glossary_options_c.html
+    TeraChem:
+        - Manual, it's easy to locate everything.
+    The below values are my best attempt at homogenizing various sources.
+    In general, eri_threshold should be 3 OOM lower than SCF convergence.
+    """
+    if mode == Mode.MANUAL:
+        return scf_settings
+    match mode:
+        case Mode.RECKLESS:
+            scf_settings.energy_threshold = 1e-5
+            scf_settings.rms_error_threshold = 1e-7
+            scf_settings.max_error_threshold = 1e-5
+            scf_settings.rebuild_frequency = 100
+            scf_settings.int_settings.eri_threshold = 1e-8
+            scf_settings.int_settings.csam_multiplier = 3.0
+            scf_settings.int_settings.pair_overlap_threshold = 1e-8
+        case Mode.RAPID | Mode.CAREFUL:
+            scf_settings.energy_threshold = 1e-6
+            scf_settings.rms_error_threshold = 1e-9
+            scf_settings.max_error_threshold = 1e-7
+            scf_settings.rebuild_frequency = 10
+            scf_settings.int_settings.eri_threshold = 1e-10
+            scf_settings.int_settings.csam_multiplier = 1.0
+            scf_settings.int_settings.pair_overlap_threshold = 1e-10
+        case Mode.METICULOUS:
+            scf_settings.energy_threshold = 1e-8
+            scf_settings.rms_error_threshold = 1e-9
+            scf_settings.max_error_threshold = 1e-7
+            scf_settings.rebuild_frequency = 5
+            scf_settings.int_settings.eri_threshold = 1e-12
+            scf_settings.int_settings.csam_multiplier = 1.0
+            scf_settings.int_settings.pair_overlap_threshold = 1e-12
+        case Mode.DEBUG:
+            scf_settings.energy_threshold = 1e-9
+            scf_settings.rms_error_threshold = 1e-10
+            scf_settings.max_error_threshold = 1e-9
+            scf_settings.rebuild_frequency = 1
+            scf_settings.int_settings.eri_threshold = 1e-14
+            scf_settings.int_settings.csam_multiplier = 1e10  # in other words, disable CSAM
+            scf_settings.int_settings.pair_overlap_threshold = 1e-14
+        case _:
+            raise ValueError(f"Unknown mode ``{mode.value}``!")
+    return scf_settings
+def _assign_opt_settings_by_mode(mode: Mode, opt_settings: OptimizationSettings) -> OptimizationSettings:
+    """
+    Assign optimization settings based on the mode.
+    Constraints lead to a lot of noise, so we need to loosen the thresholds.
+    cf. DLFIND manual, and https://www.cup.uni-muenchen.de/ch/compchem/geom/basic.html
+    and the discussion at https://geometric.readthedocs.io/en/latest/how-it-works.html
+    in periodic systems, "normal" is 0.05 eV/Å ~= 2e-3 Hartree/Å, and "careful" is 0.01 ~= 4e-4
+    Note: thresholds here are in units of Hartree/Å, not Hartree/Bohr as listed in many places.
+    """
+    opt_settings.energy_threshold = 1e-6
+    match mode:
+        case Mode.RECKLESS:
+            opt_settings.energy_threshold = 2e-5
+            opt_settings.max_gradient_threshold = 7e-3
+            opt_settings.rms_gradient_threshold = 6e-3
+        case Mode.RAPID:
+            opt_settings.energy_threshold = 5e-5
+            opt_settings.max_gradient_threshold = 5e-3
+            opt_settings.rms_gradient_threshold = 3.5e-3
+        case Mode.CAREFUL:
+            opt_settings.max_gradient_threshold = 9e-4
+            opt_settings.rms_gradient_threshold = 6e-4
+        case Mode.METICULOUS:
+            opt_settings.max_gradient_threshold = 3e-5
+            opt_settings.rms_gradient_threshold = 2e-5
+        case Mode.DEBUG:
+            opt_settings.max_gradient_threshold = 4e-6
+            opt_settings.rms_gradient_threshold = 2e-6
+        case _:
+            raise ValueError(f"Unknown mode ``{mode.value}``!")
+    return opt_settings

{stjames-0.0.42 → stjames-0.0.44}/stjames/workflows/bde.py RENAMED Viewed

@@ -56,6 +56,7 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
     Inherited:
     :param initial_molecule: Molecule of interest
+    :param mode: Mode for workflow
     :param multistage_opt_settings: set by mode unless mode=MANUAL (ignores additional settings if set)
     :param solvent: solvent to use
     :param xtb_preopt: pre-optimize with xtb (sets based on mode when None)
@@ -69,7 +70,6 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
     :param transition_state: whether this is a transition state (not supported)
     New:
-    :param mode: Mode for workflow
     :param optimize_fragments: whether to optimize the fragments, or just the starting molecule (default depends on mode)
     :param atoms: atoms to dissociate (1-indexed)
     :param fragment_indices: fragments to dissociate (all fields feed into this, 1-indexed)
@@ -80,7 +80,6 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
     :param bdes: BDE results
     """
-    mode: Mode
     mso_mode: Mode = _sentinel_mso_mode  # type: ignore [assignment]
     frequencies: bool = False
     optimize_fragments: bool = None  # type: ignore [assignment]
@@ -107,15 +106,6 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
         """
         return f"{type(self).__name__} {self.mode.name}\n" + "\n".join(map(str, self.fragment_indices))
-    def __repr__(self) -> str:
-        """
-        Return a string representation of the BDE workflow.
-        >>> BDEWorkflow(initial_molecule=Molecule.from_xyz("He 0 0 0"), mode=Mode.METICULOUS, atoms=[])
-        <BDEWorkflow METICULOUS>
-        """
-        return f"<{type(self).__name__} {self.mode.name}>"
     @property
     def energies(self) -> tuple[float | None, ...]:
         return tuple(bde.energy for bde in self.bdes)
@@ -128,22 +118,21 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
         return value
-    @field_validator("mode")
-    @classmethod
-    def set_mode_auto(cls, mode: Mode) -> Mode:
-        if mode == Mode.AUTO:
-            return Mode.RAPID
-        return mode
     @field_validator("initial_molecule", mode="before")
     @classmethod
-    def no_charge_or_spin(cls, mol: Molecule) -> Molecule:
+    def no_charge_or_spin(cls, val: Molecule | dict[str, Any]) -> Molecule | dict[str, Any]:
         """Ensure the molecule has no charge or spin."""
+        if isinstance(val, dict):
+            mol = Molecule(**val)
+        elif isinstance(val, Molecule):
+            mol = val
+        else:
+            raise ValueError(f"{val=} is not a Molecule.")
         if mol.charge != 0 or mol.multiplicity != 1:
             raise ValueError("Charge and spin partitioning undefined for BDE, only neutral singlet molecules supported.")
-        return mol
+        return val
     @model_validator(mode="before")
     @classmethod
@@ -159,10 +148,10 @@ class BDEWorkflow(Workflow, MultiStageOptMixin):
         self.fragment_indices = tuple(map(tuple, self.fragment_indices))
         match self.mode:
-            case Mode.RECKLESS | Mode.RAPID:
-                # Default off
-                self.optimize_fragments = self.optimize_fragments or False
-            case Mode.CAREFUL | Mode.METICULOUS:
+            case Mode.RECKLESS:
+                # GFN-FF doesn't support open-shell species
+                self.optimize_fragments = False
+            case Mode.RAPID | Mode.CAREFUL | Mode.METICULOUS:
                 # Default on
                 self.optimize_fragments = self.optimize_fragments or self.optimize_fragments is None
             case _:

{stjames-0.0.42 → stjames-0.0.44}/stjames/workflows/molecular_dynamics.py RENAMED Viewed

@@ -56,5 +56,7 @@ class MolecularDynamicsWorkflow(Workflow):
     calc_settings: Settings
     calc_engine: str | None = None
+    save_interval: PositiveInt = 10
     # uuids of scan points
     frames: list[Frame] = []

{stjames-0.0.42 → stjames-0.0.44}/stjames/workflows/multistage_opt.py RENAMED Viewed

@@ -22,9 +22,9 @@ class MultiStageOptSettings(BaseModel):
     RAPID *default
         r²SCAN-3c//GFN2-xTB with GFN0-xTB pre-opt (off by default)
     CAREFUL
-        wB97X-3c//B97-3c with GFN2-xTB pre-opt
+        wB97X-3c//r²SCAN-3c with GFN2-xTB pre-opt
     METICULOUS
-        wB97M-D3BJ/def2-TZVPPD//wB97X-3c//B97-3c with GFN2-xTB pre-opt
+        wB97M-D3BJ/def2-TZVPPD//wB97X-3c//r²SCAN-3c with GFN2-xTB pre-opt
     Notes:
     - No solvent in pre-opt
@@ -163,7 +163,7 @@ class MultiStageOptSettings(BaseModel):
                 self.xtb_preopt = (self.xtb_preopt is None) or self.xtb_preopt
                 self.optimization_settings = [
                     *gfn2_pre_opt * self.xtb_preopt,
-                    opt(Method.B973C, solvent=self.solvent, freq=self.frequencies),
+                    opt(Method.R2SCAN3C, solvent=self.solvent, freq=self.frequencies),
                 ]
                 self.singlepoint_settings = sp(Method.WB97X3C, solvent=self.solvent)
@@ -171,7 +171,7 @@ class MultiStageOptSettings(BaseModel):
                 self.xtb_preopt = (self.xtb_preopt is None) or self.xtb_preopt
                 self.optimization_settings = [
                     *gfn2_pre_opt * self.xtb_preopt,
-                    opt(Method.B973C, solvent=self.solvent),
+                    opt(Method.R2SCAN3C, solvent=self.solvent),
                     opt(Method.WB97X3C, solvent=self.solvent, freq=self.frequencies),
                 ]
                 self.singlepoint_settings = sp(Method.WB97MD3BJ, "def2-TZVPPD", solvent=self.solvent)
@@ -212,6 +212,12 @@ class MultiStageOptWorkflow(Workflow, MultiStageOptSettings):
     # Populated while running the workflow
     calculations: list[UUID | None] = Field(default_factory=list)
+    def __repr__(self) -> str:
+        if self.mode != Mode.MANUAL:
+            return f"<{type(self).__name__} {self.mode.name}>"
+        return f"<{type(self).__name__} {self.level_of_theory}>"
 # the id of a mutable object may change, thus using object()
 _sentinel_msos = object()

{stjames-0.0.42 → stjames-0.0.44}/stjames/workflows/spin_states.py RENAMED Viewed

@@ -49,6 +49,7 @@ class SpinStatesWorkflow(Workflow, MultiStageOptMixin):
     Inherited
     :param initial_molecule: Molecule of interest
+    :param mode: Mode for workflow
     :param multistage_opt_settings: set by mode unless mode=MANUAL (ignores additional settings if set)
     :param solvent: solvent to use
     :param xtb_preopt: pre-optimize with xtb (sets based on mode when None)
@@ -60,7 +61,6 @@ class SpinStatesWorkflow(Workflow, MultiStageOptMixin):
     :param mso_mode: Mode for MultiStageOptSettings
     New:
-    :param mode: Mode for workflow
     :param states: multiplicities of the spin state targetted
     :param spin_states: resulting spin states data
@@ -72,16 +72,12 @@ class SpinStatesWorkflow(Workflow, MultiStageOptMixin):
     '<SpinStatesWorkflow [1, 3, 5] RAPID>'
     """
-    mode: Mode
     mso_mode: Mode = _sentinel_mso_mode  # type: ignore [assignment]
     states: list[PositiveInt]
     # Results
     spin_states: list[SpinState] = Field(default_factory=list)
-    def __str__(self) -> str:
-        return repr(self)
     def __repr__(self) -> str:
         if self.mode != Mode.MANUAL:
             return f"<{type(self).__name__} {self.states} {self.mode.name}>"
@@ -121,14 +117,6 @@ class SpinStatesWorkflow(Workflow, MultiStageOptMixin):
         values["mso_mode"] = values["mode"]
         return values
-    @field_validator("mode")
-    @classmethod
-    def set_mode_auto(cls, mode: Mode) -> Mode:
-        if mode == Mode.AUTO:
-            return Mode.RAPID
-        return mode
     @field_validator("spin_states")
     @classmethod
     def validate_spin_states(cls, spin_states: list[SpinState]) -> list[SpinState]:

{stjames-0.0.42 → stjames-0.0.44}/stjames/workflows/workflow.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, ConfigDict
+from pydantic import field_validator
 from ..base import Base
 from ..message import Message
@@ -8,30 +8,17 @@ from ..types import UUID
 class Workflow(Base):
-    """All workflows should have these properties."""
-    initial_molecule: Molecule
-    messages: list[Message] = []
-class DBCalculation(Base):
-    """Encodes a calculation that's in the database. This isn't terribly useful by itself."""
-    uuid: UUID
-class WorkflowInput(BaseModel):
     """
-    Input for a workflow.
+    Base class for Workflows.
     :param initial_molecule: Molecule of interest
-    :param mode: Mode for workflow
+    :param mode: Mode to use
+    :param messages: messages to display
     """
-    model_config = ConfigDict(extra="forbid")
     initial_molecule: Molecule
-    mode: Mode
+    mode: Mode = Mode.AUTO
+    messages: list[Message] = []
     def __str__(self) -> str:
         return repr(self)
@@ -39,8 +26,17 @@ class WorkflowInput(BaseModel):
     def __repr__(self) -> str:
         return f"<{type(self).__name__} {self.mode.name}>"
+    @field_validator("mode")
+    @classmethod
+    def set_mode_auto(cls, mode: Mode) -> Mode:
+        """Set the mode to RAPID if AUTO is selected."""
+        if mode == Mode.AUTO:
+            return Mode.RAPID
+        return mode
-class WorkflowResults(BaseModel):
-    """Results of a workflow."""
-    model_config = ConfigDict(extra="forbid", frozen=True)
+class DBCalculation(Base):
+    """Encodes a calculation that's in the database. This isn't terribly useful by itself."""
+    uuid: UUID

{stjames-0.0.42 → stjames-0.0.44/stjames.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: stjames
-Version: 0.0.42
+Version: 0.0.44
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames

{stjames-0.0.42 → stjames-0.0.44}/stjames.egg-info/SOURCES.txt RENAMED Viewed

@@ -53,4 +53,6 @@ stjames/workflows/scan.py
 stjames/workflows/spin_states.py
 stjames/workflows/tautomer.py
 stjames/workflows/workflow.py
-tests/test_molecule.py
+tests/test_from_extxyz.py
+tests/test_molecule.py
+tests/test_settings.py

stjames-0.0.44/tests/test_from_extxyz.py ADDED Viewed

@@ -0,0 +1,231 @@
+import pytest
+from stjames import Atom, Molecule, MoleculeReadError, PeriodicCell
+valid_extxyz = """
+5
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_num_atoms = """
+6
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+not_digit_num_atoms = """
+v
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+many_num_atoms = """
+6 9
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+no_num_atoms = """
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+xyz_style = """
+5
+Comment
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+missing_lattice = """
+5
+Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+missing_properties = """
+5
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0"
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_properites = """
+5
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3foo:1
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_extra = """
+5
+Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0 3.14" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_equals = """
+5
+Lattice="6.0 0.0 =0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_str = """
+5
+Lattice="6.0 0.0 0.0 hi 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_extra_string = """
+5
+Lattice="6.0 0.0 0.0 0.0 sup 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_single_quote = """
+5
+Lattice="6.0 0.0 0.0 0.0 6.0 '0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_double_quote = """
+5
+Lattice="6.0 0.0 0.0 0.0 "6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_double_single_quote = """
+5
+Lattice="6.0 0.0 0.0 0.0 '6.0 0.0 0.0 '0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+incorrect_lattice_double_double_quote = """
+5
+Lattice="6.0 0.0 "0.0 0.0 6.0 0.0 0.0 "0.0 6.0" Properties=species:S:1:pos:R:3
+C        0.0        0.0        0.0
+H        0.0        0.0        1.0
+H        1.0        0.0        0.0
+H        0.0        1.0        0.0
+H        1.0        1.0        1.0
+"""
+expected_cell = (
+    (6.0, 0.0, 0.0),
+    (0.0, 6.0, 0.0),
+    (0.0, 0.0, 6.0),
+)
+expected_atoms = [
+    Atom(atomic_number=6, position=(0.0, 0.0, 0.0)),  # C
+    Atom(atomic_number=1, position=(0.0, 0.0, 1.0)),  # H
+    Atom(atomic_number=1, position=(1.0, 0.0, 0.0)),  # H
+    Atom(atomic_number=1, position=(0.0, 1.0, 0.0)),  # H
+    Atom(atomic_number=1, position=(1.0, 1.0, 1.0)),  # H
+]
+expected_molecule = Molecule(
+    charge=0,
+    multiplicity=1,
+    atoms=expected_atoms,
+    cell=PeriodicCell(lattice_vectors=expected_cell),
+)
+def test_molecule_from_extxyz_valid() -> None:
+    """
+    Test case for valid extxyz string.
+    """
+    molecule = Molecule.from_extxyz(valid_extxyz)
+    assert molecule == expected_molecule, f"Valid case failed: got {molecule}, expected {expected_molecule}"
+@pytest.mark.parametrize(
+    "invalid_extxyz",
+    [
+        incorrect_num_atoms,
+        no_num_atoms,
+        not_digit_num_atoms,
+        many_num_atoms,
+        xyz_style,
+        missing_lattice,
+        missing_properties,
+        incorrect_properites,
+        incorrect_lattice_extra,
+        incorrect_lattice_equals,
+        incorrect_lattice_str,
+        incorrect_lattice_extra_string,
+        incorrect_lattice_single_quote,
+        incorrect_lattice_double_quote,
+        incorrect_lattice_double_single_quote,
+        incorrect_lattice_double_double_quote,
+    ],
+)
+def test_molecule_from_extxyz_invalid(invalid_extxyz: str) -> None:
+    """
+    Test case for invalid extxyz strings, ensuring they raise MoleculeReadError.
+    """
+    with pytest.raises(MoleculeReadError):
+        Molecule.from_extxyz(invalid_extxyz)

stjames-0.0.44/tests/test_settings.py ADDED Viewed

@@ -0,0 +1,34 @@
+from stjames import Constraint, Mode, OptimizationSettings, Settings
+def test_set_mode_auto() -> None:
+    Settings()
+    assert Settings().mode == Mode.RAPID
+def test_opt_settings() -> None:
+    settings_rapid = Settings(mode=Mode.RAPID)
+    settings_meticulous = Settings(mode=Mode.METICULOUS)
+    cons = [Constraint(atoms=[1, 2], constraint_type="bond")]
+    settings_careful = Settings(mode=Mode.CAREFUL, opt_settings=OptimizationSettings(constraints=cons))
+    rap_opt_set = settings_rapid.opt_settings
+    car_opt_set = settings_careful.opt_settings
+    met_opt_set = settings_meticulous.opt_settings
+    assert not rap_opt_set.constraints
+    assert not met_opt_set.constraints
+    assert car_opt_set.constraints == cons
+    assert rap_opt_set.energy_threshold == 5e-5
+    assert rap_opt_set.max_gradient_threshold == 5e-3
+    assert rap_opt_set.rms_gradient_threshold == 3.5e-3
+    assert car_opt_set.energy_threshold == 1e-6
+    assert car_opt_set.max_gradient_threshold == 9e-4
+    assert car_opt_set.rms_gradient_threshold == 6e-4
+    assert met_opt_set.energy_threshold == 1e-6
+    assert met_opt_set.max_gradient_threshold == 3e-5
+    assert met_opt_set.rms_gradient_threshold == 2e-5

stjames-0.0.42/stjames/settings.py DELETED Viewed

@@ -1,209 +0,0 @@
-from typing import Any, Optional, TypeVar
-import pydantic
-from .base import Base, UniqueList
-from .basis_set import BasisSet
-from .correction import Correction
-from .method import METHODS_WITH_CORRECTION, PREPACKAGED_METHODS, Method
-from .mode import Mode
-from .opt_settings import OptimizationSettings
-from .scf_settings import SCFSettings
-from .solvent import SolventSettings
-from .task import Task
-from .thermochem_settings import ThermochemistrySettings
-_T = TypeVar("_T")
-class Settings(Base):
-    method: Method = Method.HARTREE_FOCK
-    basis_set: Optional[BasisSet] = None
-    tasks: UniqueList[Task] = [Task.ENERGY, Task.CHARGE, Task.DIPOLE]
-    corrections: UniqueList[Correction] = []
-    mode: Mode = Mode.AUTO
-    solvent_settings: Optional[SolventSettings] = None
-    # scf/opt settings will be set automatically based on mode, but can be overridden manually
-    scf_settings: SCFSettings = SCFSettings()
-    opt_settings: OptimizationSettings = OptimizationSettings()
-    thermochem_settings: ThermochemistrySettings = ThermochemistrySettings()
-    # mypy has this dead wrong (https://docs.pydantic.dev/2.0/usage/computed_fields/)
-    # Python 3.12 narrows the reason for the ignore to prop-decorator
-    @pydantic.computed_field  # type: ignore[misc, prop-decorator, unused-ignore]
-    @property
-    def level_of_theory(self) -> str:
-        corrections = list(filter(lambda x: x not in (None, ""), self.corrections))
-        if self.method in PREPACKAGED_METHODS or self.basis_set is None:
-            method = self.method.value
-        elif self.method in METHODS_WITH_CORRECTION or len(corrections) == 0:
-            method = f"{self.method.value}/{self.basis_set.name.lower()}"
-        else:
-            method = f"{self.method.value}-{'-'.join([c.value for c in corrections])}/{self.basis_set.name.lower()}"
-        if self.solvent_settings is not None:
-            method += f"/{self.solvent_settings.model.value}({self.solvent_settings.solvent.value})"
-        return method
-    def model_post_init(self, __context: Any) -> None:
-        _assign_settings_by_mode(self)
-        # figure out `optimize_ts`
-        if Task.OPTIMIZE_TS in self.tasks:
-            self.tasks.pop(self.tasks.index(Task.OPTIMIZE_TS))
-            self.tasks.append(Task.OPTIMIZE)
-            self.opt_settings.transition_state = True
-        # composite methods have their own basis sets, so overwrite user stuff
-        if self.method == Method.HF3C:
-            self.basis_set = BasisSet(name="minix")
-        elif self.method == Method.B973C:
-            self.basis_set = BasisSet(name="def2-mTZVP")
-        elif self.method == Method.R2SCAN3C:
-            self.basis_set = BasisSet(name="def2-mTZVPP")
-        elif self.method == Method.WB97X3C:
-            self.basis_set = BasisSet(name="vDZP")
-    @pydantic.field_validator("basis_set", mode="before")
-    @classmethod
-    def parse_basis_set(cls, v: Any) -> BasisSet | dict[str, Any] | None:
-        """Turn a string into a ``BasisSet`` object. (This is a little crude.)"""
-        if isinstance(v, BasisSet):
-            return None if v.name is None else v
-        elif isinstance(v, dict):
-            return None if v.get("name") is None else v
-        elif isinstance(v, str):
-            if len(v):
-                return BasisSet(name=v)
-            # "" is basically None, let's be real here...
-            return None
-        elif v is None:
-            return None
-        else:
-            raise ValueError(f"invalid value ``{v}`` for ``basis_set``")
-    @pydantic.field_validator("corrections", mode="before")
-    @classmethod
-    def remove_empty_string(cls, v: list[_T]) -> list[_T]:
-        """Remove empty string values."""
-        return [c for c in v if c] if v is not None else v
-def _assign_settings_by_mode(settings: Settings) -> None:
-    """Modifies ``scf_settings`` and ``opt_settings`` based on preset ``mode``."""
-    mode = settings.mode
-    if mode == Mode.AUTO:
-        if (Task.OPTIMIZE in settings.tasks) or (Task.GRADIENT in settings.tasks) or (Task.FREQUENCIES in settings.tasks) or (Task.HESSIAN in settings.tasks):
-            # noisy gradient! struggles to converge
-            if settings.method == Method.AIMNET2_WB97MD3:
-                mode = Mode.RAPID
-            else:
-                mode = Mode.CAREFUL
-        else:
-            mode = Mode.RAPID
-    elif mode == Mode.MANUAL:
-        return
-    # modify scf settings!
-    #
-    # values based off of the following sources:
-    # qchem:
-    #   https://manual.q-chem.com/5.2/Ch4.S3.SS2.html
-    #   https://manual.q-chem.com/5.2/Ch4.S5.SS2.html
-    #
-    # gaussian:
-    #   https://gaussian.com/integral/
-    #   https://gaussian.com/overlay5/
-    #
-    # orca:
-    #   manual 4.2.1, §9.6.1 and §9.7.3
-    #
-    # psi4:
-    #   https://psicode.org/psi4manual/master/autodir_options_c/module__scf.html
-    #   https://psicode.org/psi4manual/master/autodoc_glossary_options_c.html
-    #
-    # terachem:
-    #   manual, it's easy to locate everything.
-    #
-    # the below values are my best attempt at homogenizing various sources.
-    # in general, eri_threshold should be 3 OOM lower than scf convergence
-    scf_settings = settings.scf_settings
-    if mode == Mode.RECKLESS:
-        scf_settings.energy_threshold = 1e-5
-        scf_settings.rms_error_threshold = 1e-7
-        scf_settings.max_error_threshold = 1e-5
-        scf_settings.rebuild_frequency = 100
-        scf_settings.int_settings.eri_threshold = 1e-8
-        scf_settings.int_settings.csam_multiplier = 3.0
-        scf_settings.int_settings.pair_overlap_threshold = 1e-8
-    elif mode == Mode.RAPID:
-        scf_settings.energy_threshold = 5e-5
-        scf_settings.rms_error_threshold = 1e-8
-        scf_settings.max_error_threshold = 1e-6
-        scf_settings.rebuild_frequency = 20
-        scf_settings.int_settings.eri_threshold = 1e-9
-        scf_settings.int_settings.csam_multiplier = 1.0
-        scf_settings.int_settings.pair_overlap_threshold = 1e-9
-    elif mode == Mode.CAREFUL:
-        scf_settings.energy_threshold = 1e-6
-        scf_settings.rms_error_threshold = 1e-9
-        scf_settings.max_error_threshold = 1e-7
-        scf_settings.rebuild_frequency = 10
-        scf_settings.int_settings.eri_threshold = 1e-10
-        scf_settings.int_settings.csam_multiplier = 1.0
-        scf_settings.int_settings.pair_overlap_threshold = 1e-10
-    elif mode == Mode.METICULOUS:
-        scf_settings.energy_threshold = 1e-8
-        scf_settings.rms_error_threshold = 1e-9
-        scf_settings.max_error_threshold = 1e-7
-        scf_settings.rebuild_frequency = 5
-        scf_settings.int_settings.eri_threshold = 1e-12
-        scf_settings.int_settings.csam_multiplier = 1.0
-        scf_settings.int_settings.pair_overlap_threshold = 1e-12
-    elif mode == Mode.DEBUG:
-        scf_settings.energy_threshold = 1e-9
-        scf_settings.rms_error_threshold = 1e-10
-        scf_settings.max_error_threshold = 1e-9
-        scf_settings.rebuild_frequency = 1
-        scf_settings.int_settings.eri_threshold = 1e-14
-        scf_settings.int_settings.csam_multiplier = 1e10  # in other words, disable CSAM
-        scf_settings.int_settings.pair_overlap_threshold = 1e-14
-    else:
-        raise ValueError(f"Unknown mode ``{mode.value}``!")
-    opt_settings = settings.opt_settings
-    # constrained optimizations warrant loosening the settings a bit
-    has_constraints = len(opt_settings.constraints) > 0
-    # cf. DLFIND manual, and https://www.cup.uni-muenchen.de/ch/compchem/geom/basic.html
-    # and the discussion at https://geometric.readthedocs.io/en/latest/how-it-works.html
-    # in periodic systems, "normal" is 0.05 eV/Å ~= 2e-3 Hartree/Å, and "careful" is 0.01 ~= 4e-4
-    if mode == Mode.RECKLESS:
-        opt_settings.energy_threshold = 2e-5
-        opt_settings.max_gradient_threshold = 7e-3
-        opt_settings.rms_gradient_threshold = 6e-3
-    elif mode == Mode.RAPID or (mode == Mode.CAREFUL and has_constraints):
-        opt_settings.energy_threshold = 5e-5
-        opt_settings.max_gradient_threshold = 5e-3
-        opt_settings.rms_gradient_threshold = 3.5e-3
-    elif mode == Mode.CAREFUL or (mode == Mode.METICULOUS and has_constraints):
-        opt_settings.energy_threshold = 1e-6
-        opt_settings.max_gradient_threshold = 9e-4
-        opt_settings.rms_gradient_threshold = 6e-4
-    elif mode == Mode.METICULOUS:
-        opt_settings.energy_threshold = 1e-6
-        opt_settings.max_gradient_threshold = 3e-5
-        opt_settings.rms_gradient_threshold = 2e-5
-    elif mode == Mode.DEBUG:
-        opt_settings.energy_threshold = 1e-6
-        opt_settings.max_gradient_threshold = 4e-6
-        opt_settings.rms_gradient_threshold = 2e-6
-    else:
-        raise ValueError(f"Unknown mode ``{mode.value}``!")