PyPI - stjames - Versions diffs - 0.0.65__py3-none-any.whl → 0.0.67__py3-none-any.whl - Mend

stjames 0.0.65py3-none-any.whl → 0.0.67py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (7) hide show

stjames/molecule.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import re
 from pathlib import Path
-from typing import Annotated, Iterable, Optional, Self, TypeAlias
+from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
 import pydantic
 from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
@@ -9,6 +9,7 @@ from rdkit.Chem import AllChem
 from .atom import Atom
 from .base import Base, round_float, round_optional_float
+from .data import SYMBOL_ELEMENT
 from .periodic_cell import PeriodicCell
 from .types import (
     FloatPerAtom,
@@ -246,28 +247,102 @@ class Molecule(Base):
         return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
     @classmethod
-    def from_extxyz_lines(cls: type[Self], lines: Iterable[str], charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
-        # ensure first line is number of atoms
-        lines = list(lines)
+    def from_extxyz_lines(
+        cls: type[Self],
+        lines: Iterable[str],
+        charge: int | None = None,
+        multiplicity: PositiveInt | None = None,
+        cell: PeriodicCell | None = None,
+    ) -> Self:
+        """
+        Parses an EXTXYZ file, extracting atom positions, forces (if present), and metadata.
+        Supports:
+        - Lattice vectors (cell information)
+        - Properties field (species, positions, forces, etc.)
+        - Other metadata like charge, multiplicity, energy, etc.
+        :param lines: Iterable of lines from an EXTXYZ file
+        :param charge: total charge of the molecule (default: 0 if not found)
+        :param multiplicity: spin multiplicity of the molecule (default: 1 if not found)
+        :param cell: PeriodicCell containing lattice vectors
+        :return: Molecule
+        :raises MoleculeReadError: if the file is not in the correct format
+        """
+        if not isinstance(lines, Sequence):
+            lines = list(lines)
+        # Ensure first line contains number of atoms
         if len(lines[0].split()) == 1:
             natoms = lines[0].strip()
-            if not natoms.isdigit() or (int(lines[0]) != len(lines) - 2):
-                raise MoleculeReadError(f"First line of EXTXYZ file should be the number of atoms, got: {lines[0]} != {len(lines) - 2}")
-            lines = lines[1:]
+            if not natoms.isdigit() or (int(natoms) != len(lines) - 2):
+                raise MoleculeReadError(f"First line should be number of atoms, got: {lines[0]} != {len(lines) - 2}")
+            data_line, *lines = lines[1:]
         else:
-            raise MoleculeReadError(f"First line of EXTXYZ should be only an int denoting number of atoms. Got {lines[0].split()}")
+            raise MoleculeReadError(f"First line should be an integer denoting atom count. Got {lines[0].split()}")
-        # ensure second line contains key-value pairs
-        if "=" not in lines[0]:
-            raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
+        metadata = parse_extxyz_comment_line(data_line)
-        cell = parse_comment_line(lines[0])
-        lines = lines[1:]
+        T = TypeVar("T")
-        try:
-            return cls(atoms=[Atom.from_xyz(line) for line in lines], cell=cell, charge=charge, multiplicity=multiplicity)
-        except (ValueError, ValidationError) as e:
-            raise MoleculeReadError("Error reading molecule from extxyz") from e
+        def metadata_optional_get(key: str, value: T | None, default: T) -> T:
+            """Set key to default if not found in metadata"""
+            if value is None:
+                return metadata.get(key, default)  # type: ignore [return-value]
+            return value
+        charge = metadata_optional_get("total_charge", charge, 0)
+        multiplicity = metadata_optional_get("multiplicity", multiplicity, 1)
+        cell = cell or metadata.get("cell")
+        energy = metadata.get("energy", None)
+        force_idx = None
+        if properties := metadata.get("properties", "").split(":"):
+            if properties[0].lower() != "species":
+                raise MoleculeReadError(f"Invalid or missing 'Properties' field in EXTXYZ, got: {properties}")
+            # Identify column indices for position and force data
+            pos_idx = None
+            current_idx = 0  # Start after 'species:S'
+            while current_idx < len(properties):
+                if properties[current_idx].lower() == "pos" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
+                    pos_idx = current_idx
+                elif properties[current_idx].lower() == "forces" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
+                    force_idx = current_idx
+                current_idx += 3
+            if pos_idx is None:
+                raise MoleculeReadError("No position data ('pos:R:3') found in Properties field.")
+        def parse_line_atoms(line: str) -> Atom:
+            symbol, sx, sy, sz, *_ = line.split()
+            atomic_number = SYMBOL_ELEMENT[symbol.title()]
+            x, y, z = map(float, (sx, sy, sz))
+            return Atom(atomic_number=atomic_number, position=(x, y, z))
+        def parse_line_with_grad(line: str) -> tuple[Atom, Vector3D]:
+            symbol, sx, sy, sz, sgx, sgy, sgz, *_ = line.split()
+            atomic_number = SYMBOL_ELEMENT[symbol.title()]
+            x, y, z = map(float, (sx, sy, sz))
+            gx, gy, gz = map(float, (sgx, sgy, sgz))
+            return (
+                Atom(atomic_number=atomic_number, position=(x, y, z)),
+                (-gx, -gy, -gz),
+            )
+        atoms: list[Atom]
+        gradients: list[Vector3D] | None
+        if force_idx is not None:
+            atoms, gradients = zip(*map(parse_line_with_grad, lines), strict=True)  # type: ignore [assignment]
+        else:
+            atoms = [parse_line_atoms(line) for line in lines]
+            gradients = None
+        return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
     @classmethod
     def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
@@ -313,43 +388,62 @@ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
     return rdkm
-def parse_comment_line(line: str) -> PeriodicCell:
-    """
-    currently only supporting lattice and porperites fields from comment line
-    modify in future to support other fields from comment from_xyz_lines
-    ex: name, mulitplicity, charge, etc.
+class EXTXYZMetadata(TypedDict, total=False):
+    properties: Any
+    total_charge: int
+    multiplicity: int
+    energy: float
+    cell: PeriodicCell
+def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
     """
-    cell = None
+    Parse the comment line of an EXTXYZ file, extracting lattice, properties, and metadata.
+    Supports:
+    - Lattice vectors (cell information)
+    - Properties field (species, positions, forces, etc.)
+    - Other metadata fields like charge, multiplicity, energy, etc.
+    :param line: comment line from an EXTXYZ file
+    :return: parsed properties
+    >>> parse_extxyz_comment_line('Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3')
+    {'cell': PeriodicCell(lattice_vectors=((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0)), is_periodic=(True, True, True), volume=0.0), 'properties': 'species:S:1:pos:R:3'}
+    """  # noqa: E501
     # Regular expression to match key="value", key='value', or key=value
     pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
     pairs = re.findall(pattern, line)
-    prop_dict = {}
+    prop_dict: EXTXYZMetadata = {}
     for pair in pairs:
         key, value = pair.split("=", 1)
-        if key.lower() == "lattice":
-            value = value.strip("'\"").split()
-            if len(value) != 9:
-                raise MoleculeReadError(f"Lattice should have 9 entries got {len(value)}")
+        key = key.lower().strip()
+        value = value.strip("'\"")
+        if key == "lattice":
+            lattice_values = value.split()
+            if len(lattice_values) != 9:
+                raise MoleculeReadError(f"Lattice should have 9 entries, got {len(lattice_values)}")
-            # Convert the value to a 3x3 tuple of tuples of floats
             try:
-                cell = tuple(tuple(map(float, value[i : i + 3])) for i in range(0, 9, 3))
+                cell = tuple(tuple(map(float, lattice_values[i : i + 3])) for i in range(0, 9, 3))
             except ValueError:
-                raise MoleculeReadError(f"Lattice should be floats, got {value}")
+                raise MoleculeReadError(f"Lattice should be floats, got {lattice_values}")
-            prop_dict[key] = value
+            prop_dict["cell"] = PeriodicCell(lattice_vectors=cell)
-        elif key.lower() == "properties":
-            if value.lower() != "species:s:1:pos:r:3":
-                raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
-            prop_dict[key] = value
-        else:
-            raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
+        elif key == "properties":
+            prop_dict["properties"] = value
-    if cell is None:
-        raise MoleculeReadError("Lattice field is required but missing.")
+        elif key == "total_charge":
+            prop_dict["total_charge"] = int(value)
+        elif key == "multiplicity":
+            prop_dict["multiplicity"] = int(value)
+        elif key == "energy":
+            prop_dict["energy"] = float(value)
+        else:
+            prop_dict[key] = value  # type: ignore [literal-required]
-    if "properties" not in [key.lower() for key in prop_dict.keys()]:
-        raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
-    return PeriodicCell(lattice_vectors=cell)
+    return prop_dict

stjames/pdb.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import re
 from datetime import date, datetime
 from pathlib import Path
 from typing import Any, Literal
@@ -276,7 +277,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=this_chain_id,
                         res_name=residue.name,
-                        res_num=int(_residue_id[2:]),
+                        res_num=_residue_id[2:],
                         alt_loc=atom.alt_loc or "",
                     )
                     pdb_lines.append(line)
@@ -286,12 +287,12 @@ def pdb_object_to_pdb_filestring(
                             atom=atom,
                             chain_id=this_chain_id,
                             res_name=residue.name,
-                            res_num=int(_residue_id[2:]),
+                            res_num=_residue_id[2:],
                             alt_loc=atom.alt_loc or "",
                         )
                         pdb_lines.append(line)
-            pdb_lines.append(f"TER   {_atom_id + 1:>5}      {residue.name:>3} {this_chain_id}{int(_residue_id[2:]):>4}")
+            pdb_lines.append(f"TER   {_atom_id + 1:>5}      {residue.name:>3} {this_chain_id}{_residue_id[2:]:>4}")
         # === 2) Non-polymers (e.g. ligands, ions) ===
         for _np_id, nonpoly in model.non_polymer.items():
@@ -308,7 +309,7 @@ def pdb_object_to_pdb_filestring(
                     atom=atom,
                     chain_id=chain_id_for_np,
                     res_name=nonpoly.name,
-                    res_num=int(_np_id[2:]),
+                    res_num=_np_id[2:],
                 )
                 pdb_lines.append(line)
                 if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -317,7 +318,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=chain_id_for_np,
                         res_name=nonpoly.name,
-                        res_num=int(_np_id[2:]),
+                        res_num=_np_id[2:],
                     )
                     pdb_lines.append(line)
@@ -330,7 +331,7 @@ def pdb_object_to_pdb_filestring(
                     atom=atom,
                     chain_id=_w_id[0],  # Or you can use water.polymer if set
                     res_name="HOH",
-                    res_num=int(_w_id[2:]),  # or an incrementing value
+                    res_num=_w_id[2:],  # or an incrementing value
                 )
                 pdb_lines.append(line)
                 if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -339,7 +340,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id=_w_id[0],
                         res_name="HOH",
-                        res_num=int(_w_id[2:]),
+                        res_num=_w_id[2:],
                     )
                     pdb_lines.append(line)
@@ -357,7 +358,7 @@ def pdb_object_to_pdb_filestring(
                         atom=atom,
                         chain_id="B",
                         res_name="BRN",  # or branched_obj.get("name", "BRN")
-                        res_num=1,
+                        res_num="1",
                     )
                     pdb_lines.append(line)
                     if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -366,7 +367,7 @@ def pdb_object_to_pdb_filestring(
                             atom=atom,
                             chain_id="B",
                             res_name="BRN",
-                            res_num=1,
+                            res_num="1",
                         )
                         pdb_lines.append(line)
@@ -407,7 +408,7 @@ def _format_atom_line(
     atom: PDBAtom,
     chain_id: str,
     res_name: str,
-    res_num: int | None,
+    res_num: str | None,
     alt_loc: str = "",
 ) -> str:
     """
@@ -423,7 +424,15 @@ def _format_atom_line(
     alt_loc_char = alt_loc if alt_loc else " "
     residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
     chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
-    residue_num = res_num if res_num is not None else 1
+    residue_num_str = "1"
+    insertion_code = " "
+    if res_num:
+        match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
+        if match:
+            residue_num_str, insertion_code = match.groups()
+            insertion_code = insertion_code if insertion_code != "" else " "
+    residue_num = int(residue_num_str)
     # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
     # If your model stores charges differently, adapt as needed.
@@ -451,7 +460,8 @@ def _format_atom_line(
         f"{residue_name:>3}"  # residue name (columns 18-20)
         f" {chain_char}"  # chain ID (column 22)
         f"{residue_num:4d}"  # residue sequence number (columns 23-26)
-        f"    "  # columns 27-30 (insertion code plus spacing)
+        f"{insertion_code}"
+        f"   "  # columns 27-30 (spacing)
         f"{atom.x:8.3f}"  # x (columns 31-38)
         f"{atom.y:8.3f}"  # y (columns 39-46)
         f"{atom.z:8.3f}"  # z (columns 47-54)
@@ -469,7 +479,7 @@ def _format_anisou_line(
     atom: PDBAtom,
     chain_id: str,
     res_name: str,
-    res_num: int | None,
+    res_num: str | None,
     alt_loc: str = "",
 ) -> str:
     """
@@ -485,7 +495,15 @@ def _format_anisou_line(
     alt_loc_char = alt_loc if alt_loc else " "
     residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
     chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
-    residue_num = res_num if res_num is not None else 1
+    residue_num_str = "1"
+    insertion_code = " "
+    if res_num:
+        match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
+        if match:
+            residue_num_str, insertion_code = match.groups()
+            insertion_code = insertion_code if insertion_code != "" else " "
+    residue_num = int(residue_num_str)
     chg = ""
     if atom.charge and abs(atom.charge) > 0:
@@ -528,7 +546,8 @@ def _format_anisou_line(
         f"{residue_name:>3}"  # residue name (columns 18-20)
         f" {chain_char}"  # chain ID (column 22)
         f"{residue_num:4d}"  # residue sequence number (columns 23-26)
-        f"  "  # columns 27-28 (insertion code plus spacing)
+        f"{insertion_code}"
+        f" "  # columns 27-28 (plus spacing)
         f"{aniso_lines}"
         f"      "  # columns 70-76 (padding)
         f"{atom.element:>2}"  # element (columns 77-78)

{stjames-0.0.65.dist-info → stjames-0.0.67.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: stjames
-Version: 0.0.65
+Version: 0.0.67
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
 Requires-Dist: numpy
 Requires-Dist: requests
 Requires-Dist: rdkit
+Dynamic: license-file
 # stjames

{stjames-0.0.65.dist-info → stjames-0.0.67.dist-info}/RECORD RENAMED Viewed

@@ -13,9 +13,9 @@ stjames/int_settings.py,sha256=5HXp8opt5ZyY1UpmfaK7NVloWVLM5jkG0elEEqpVLUo,896
 stjames/message.py,sha256=Rq6QqmHZKecWxYH8fVyXmuoCCPZv8YinvgykSeorXSU,216
 stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
 stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
-stjames/molecule.py,sha256=R0BalcXdvyhuyffoH11Nml_49qiVl4mD0dcGcLqfQYM,14371
+stjames/molecule.py,sha256=4dakMkn-_I5bSWsijLLY0tn5NkBEuZhmtYDj-MDSJE0,17987
 stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
-stjames/pdb.py,sha256=YxJgheU37H74cAPjRcYS16Z_-fZo2Yel9V_trluYq9Q,25938
+stjames/pdb.py,sha256=_pIdJCMhIzS4t2HWQa_susDWjZEl0oLn4Njb1QoKvKw,26460
 stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
 stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
@@ -60,8 +60,8 @@ stjames/workflows/solubility.py,sha256=kGfVyPPGDLRpf2j6dSY7woCkfsoXSbUzdSImA4mcM
 stjames/workflows/spin_states.py,sha256=0degmE-frovgoXweshZyjfjqL7nkbaFoO9YoJhvQnaI,4748
 stjames/workflows/tautomer.py,sha256=7eYKziGPg8Km6lfowTzSkgJfJ4SHUPrAmnTf8Bi-SB0,1164
 stjames/workflows/workflow.py,sha256=sk2BUz59wdIkT_EyKOnMt5woNrjo3aHVK38cU8x8I7Q,1423
-stjames-0.0.65.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
-stjames-0.0.65.dist-info/METADATA,sha256=OQsAzZU4ZxSwkHhpyaHaiI-3SvcHB5VS_nDKKlv-3FM,1672
-stjames-0.0.65.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
-stjames-0.0.65.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
-stjames-0.0.65.dist-info/RECORD,,
+stjames-0.0.67.dist-info/licenses/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
+stjames-0.0.67.dist-info/METADATA,sha256=6njJyhDxM_XVsH0R-2HzU98usLv4FbAcJw9GI20Kono,1694
+stjames-0.0.67.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+stjames-0.0.67.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
+stjames-0.0.67.dist-info/RECORD,,

{stjames-0.0.65.dist-info → stjames-0.0.67.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.1.0)
+Generator: setuptools (77.0.3)
 Root-Is-Purelib: true
 Tag: py3-none-any

{stjames-0.0.65.dist-info → stjames-0.0.67.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{stjames-0.0.65.dist-info → stjames-0.0.67.dist-info}/top_level.txt RENAMED Viewed

File without changes

stjames 0.0.65__py3-none-any.whl → 0.0.67__py3-none-any.whl

Potentially problematic release.

stjames 0.0.65py3-none-any.whl → 0.0.67py3-none-any.whl