PyPI - stjames - Versions diffs - 0.0.52__py3-none-any.whl → 0.0.53__py3-none-any.whl - Mend

stjames 0.0.52py3-none-any.whl → 0.0.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of stjames might be problematic. Click here for more details.

Files changed (12) hide show

stjames/atomium_stjames/__init__.py +5 -0
stjames/atomium_stjames/data.py +377 -0
stjames/atomium_stjames/mmcif.py +651 -0
stjames/atomium_stjames/pdb.py +572 -0
stjames/atomium_stjames/utilities.py +125 -0
stjames/pdb.py +482 -10
stjames/workflows/irc.py +14 -7
{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/METADATA +2 -1
{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/RECORD +12 -7
{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/LICENSE +0 -0
{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/WHEEL +0 -0
{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/top_level.txt +0 -0

stjames/pdb.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from datetime import date
+from datetime import date, datetime
 from pathlib import Path
 from typing import Any, Literal
-import atomium  # type: ignore [import-untyped]
-from atomium.pdb import pdb_dict_to_data_dict, pdb_string_to_pdb_dict  # type: ignore [import-untyped]
 from pydantic import BaseModel, ConfigDict, Field, field_validator
+import stjames.atomium_stjames as astj
+from stjames.atomium_stjames.mmcif import mmcif_dict_to_data_dict, mmcif_string_to_mmcif_dict
+from stjames.atomium_stjames.pdb import inverse_make_sequences, pdb_dict_to_data_dict, pdb_string_to_pdb_dict
 from stjames.types import Matrix3x3, Vector3D
 # Mostly for testing purposes
@@ -22,12 +23,12 @@ class PDBAtom(BaseModel):
     z: float
     element: str
     name: str
-    charge: float
-    occupancy: float
+    charge: float | None
+    occupancy: float | None
     alt_loc: str | None
-    anisotropy: list[float]
+    anisotropy: list[float] | None
     bvalue: float
-    is_hetatm: bool
+    is_hetatm: bool | None
 class PDBWater(BaseModel):
@@ -175,6 +176,9 @@ class PDBDescription(BaseModel):
         if v is None:
             return v
+        if isinstance(v, date):
+            return v.isoformat()
         return str(date)
@@ -192,14 +196,482 @@ class PDB(BaseModel):
 def read_pdb(path: Path | str) -> PDB:
     """Read a pdb located at path."""
-    return PDB.model_validate(atomium.open(str(path), data_dict=True))
+    return PDB.model_validate(astj.open(str(path), data_dict=True))
 def fetch_pdb(code: str) -> PDB:
     """Fetch a pdb from the Protein Data Bank."""
-    return PDB.model_validate(atomium.fetch(code, data_dict=True))
+    return PDB.model_validate(astj.fetch(code, data_dict=True))
-def pdb_from_string(pdb: str) -> PDB:
+def pdb_from_pdb_filestring(pdb: str) -> PDB:
     """Read a PDB from a string."""
     return PDB.model_validate(pdb_dict_to_data_dict(pdb_string_to_pdb_dict(pdb)))
+def pdb_from_mmcif_filestring(pdb: str) -> PDB:
+    """Read a PDB from a string."""
+    return PDB.model_validate(mmcif_dict_to_data_dict(mmcif_string_to_mmcif_dict(pdb)))
+def pdb_object_to_pdb_filestring(pdb: PDB) -> str:
+    pdb_lines: list[str] = []
+    chains: list[str] = []
+    # Header
+    pdb_lines.extend(_build_header_section(pdb))
+    pdb_lines.extend(_build_source_section(pdb))
+    pdb_lines.extend(_build_keyword_section(pdb))
+    full_name_dict: dict[str, str] = {}
+    seqres_lines, chains = _build_secondary_structure_and_seqres(pdb, full_name_dict)
+    pdb_lines.extend(seqres_lines)
+    pdb_lines.extend(_build_hetname_section(full_name_dict))
+    pdb_lines.extend(_build_remark_section(pdb, chains))
+    pdb_lines.extend(_build_crystallography_section(pdb))
+    for model_index, model in enumerate(pdb.models, start=1):
+        # If more than one model, add MODEL line
+        if len(pdb.models) > 1:
+            pdb_lines.append(f"MODEL     {model_index:>4}")
+        # === 1) Polymers (protein, DNA, etc.) ===
+        for chain_id, polymer in model.polymer.items():
+            # Use polymer's internal_id if you want that as the chain ID
+            # otherwise just use the dictionary key
+            this_chain_id = polymer.internal_id or chain_id
+            for _residue_id, residue in polymer.residues.items():
+                assert residue.name is not None
+                for _atom_id, atom in residue.atoms.items():
+                    line = _format_atom_line(
+                        serial=_atom_id,
+                        atom=atom,
+                        chain_id=this_chain_id,
+                        res_name=residue.name,
+                        res_num=int(_residue_id[2:]),
+                        alt_loc=atom.alt_loc or "",
+                    )
+                    pdb_lines.append(line)
+                    if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
+                        line = _format_anisou_line(
+                            serial=_atom_id,
+                            atom=atom,
+                            chain_id=this_chain_id,
+                            res_name=residue.name,
+                            res_num=int(_residue_id[2:]),
+                            alt_loc=atom.alt_loc or "",
+                        )
+                        pdb_lines.append(line)
+            pdb_lines.append(f"TER   {_atom_id + 1:>5}      {residue.name:>3} {this_chain_id}{int(_residue_id[2:]):>4}")
+        # === 2) Non-polymers (e.g. ligands, ions) ===
+        for _np_id, nonpoly in model.non_polymer.items():
+            # We'll treat each non-polymer as if it had a chain ID = nonpoly.polymer (or "Z")
+            chain_id_for_np = nonpoly.polymer or "Z"
+            # For residue name, we can just use nonpoly.name or a 3-letter variant
+            # There's no standard "residue number" for these, so pick something
+            # or let the user define it in the original model. We'll just use 1 for demonstration.
+            # If you prefer incremental numbering, keep a separate counter.
+            for _atom_id, atom in nonpoly.atoms.items():
+                line = _format_atom_line(
+                    serial=_atom_id,
+                    atom=atom,
+                    chain_id=chain_id_for_np,
+                    res_name=nonpoly.name,
+                    res_num=int(_np_id[2:]),
+                )
+                pdb_lines.append(line)
+                if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
+                    line = _format_anisou_line(
+                        serial=_atom_id,
+                        atom=atom,
+                        chain_id=chain_id_for_np,
+                        res_name=nonpoly.name,
+                        res_num=int(_np_id[2:]),
+                    )
+                    pdb_lines.append(line)
+        # === 3) Water ===
+        for _w_id, water in model.water.items():
+            # Water is typically "HOH" in PDB
+            for _atom_id, atom in water.atoms.items():
+                line = _format_atom_line(
+                    serial=_atom_id,
+                    atom=atom,
+                    chain_id=_w_id[0],  # Or you can use water.polymer if set
+                    res_name="HOH",
+                    res_num=int(_w_id[2:]),  # or an incrementing value
+                )
+                pdb_lines.append(line)
+                if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
+                    line = _format_anisou_line(
+                        serial=_atom_id,
+                        atom=atom,
+                        chain_id=_w_id[0],
+                        res_name="HOH",
+                        res_num=int(_w_id[2:]),
+                    )
+                    pdb_lines.append(line)
+        # === 4) Branched ===
+        # If your structure has branched molecules (glycans, etc.),
+        # adapt similarly. For now, let's demonstrate if there's anything in branched
+        for _b_id, branched_obj in model.branched.items():
+            # "branched_obj" could be a custom structure. We'll assume it
+            # mirrors the format of non_polymer or something similar.
+            # If it has `.atoms`, we do the same:
+            if isinstance(branched_obj, dict) and "atoms" in branched_obj:
+                for _atom_id, atom in branched_obj["atoms"].items():
+                    line = _format_atom_line(
+                        serial=_atom_id,
+                        atom=atom,
+                        chain_id="B",
+                        res_name="BRN",  # or branched_obj.get("name", "BRN")
+                        res_num=1,
+                    )
+                    pdb_lines.append(line)
+                    if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
+                        line = _format_anisou_line(
+                            serial=_atom_id,
+                            atom=atom,
+                            chain_id="B",
+                            res_name="BRN",
+                            res_num=1,
+                        )
+                        pdb_lines.append(line)
+        if len(pdb.models) > 1:
+            pdb_lines.append("ENDMDL")
+    # Finally, the PDB standard ends with an END record
+    pdb_lines.append("END")
+    resulting_string = _create_filestring(pdb_lines)
+    return resulting_string
+def _create_filestring(lines: list[str]) -> str:
+    # Join the lines with newline characters and add a newline at the end if desired
+    filestring = "\n".join(lines) + "\n"
+    return filestring
+def _format_date(date_str: str | None) -> str | None:
+    """
+    Formats a date string from "YYYY-MM-DD" to "DD-MMM-YY".
+    Args:
+        date_str (str): Date string in "YYYY-MM-DD" format.
+    Returns:
+        str: Formatted date string in "DD-MMM-YY" format.
+    """
+    if date_str is None:
+        return None
+    date_obj = datetime.strptime(date_str, "%Y-%m-%d").date()
+    return date_obj.strftime("%d-%b-%y").upper()
+def _format_atom_line(
+    serial: int,
+    atom: PDBAtom,
+    chain_id: str,
+    res_name: str,
+    res_num: int | None,
+    alt_loc: str = "",
+) -> str:
+    """
+    Return a single PDB ATOM/HETATM record line as a string, using standard
+    column alignment conventions:
+    See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
+    """
+    record_type = "HETATM" if atom.is_hetatm else "ATOM  "
+    # Columns are typically strict. We'll use Python formatting with fixed widths.
+    # Some fields might need defaults if missing.
+    alt_loc_char = alt_loc if alt_loc else " "
+    residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
+    chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
+    residue_num = res_num if res_num is not None else 1
+    # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
+    # If your model stores charges differently, adapt as needed.
+    # For simplicity, let's store integer/float charges as strings, e.g. " 0", " 2", etc.
+    # Or we can leave it blank if zero.
+    chg = ""
+    if atom.charge and abs(atom.charge) > 0:
+        # E.g., +1.0 -> " +1", -2.0 -> " -2"
+        # Convert to integer if it's always integral
+        chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
+        chg = f"{chg_val:2}"
+    else:
+        chg = "  "
+    # Construct the line.
+    # Use exact spacing & field widths to match PDB guidelines.
+    line = (
+        f"{record_type}"
+        f"{serial:5d} "  # atom serial number (columns 7-11)
+        f"{atom.name:<4}"  # atom name (columns 13-16, left-justified in this snippet)
+        f"{alt_loc_char}"  # altLoc (column 17)
+        f"{residue_name:>3}"  # residue name (columns 18-20)
+        f" {chain_char}"  # chain ID (column 22)
+        f"{residue_num:4d}"  # residue sequence number (columns 23-26)
+        f"    "  # columns 27-30 (insertion code plus spacing)
+        f"{atom.x:8.3f}"  # x (columns 31-38)
+        f"{atom.y:8.3f}"  # y (columns 39-46)
+        f"{atom.z:8.3f}"  # z (columns 47-54)
+        f"{atom.occupancy:6.2f}"  # occupancy (columns 55-60)
+        f"{atom.bvalue:6.2f}"  # temp factor (columns 61-66)
+        f"          "  # columns 67-76 (padding)
+        f"{atom.element:>2}"  # element (columns 77-78)
+        f"{chg:>2}"  # charge (columns 79-80)
+    )
+    return line
+def _format_anisou_line(
+    serial: int,
+    atom: PDBAtom,
+    chain_id: str,
+    res_name: str,
+    res_num: int | None,
+    alt_loc: str = "",
+) -> str:
+    """
+    Return a single PDB ANISOU record line as a string, using standard
+    column alignment conventions:
+    See https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_Letter.pdf for details
+    """
+    record_type = "ANISOU"
+    # Columns are typically strict. We'll use Python formatting with fixed widths.
+    # Some fields might need defaults if missing.
+    alt_loc_char = alt_loc if alt_loc else " "
+    residue_name = (res_name or "UNK")[:3]  # limit to 3 chars
+    chain_char = (chain_id or "A")[:1]  # PDB chain ID is 1 char
+    residue_num = res_num if res_num is not None else 1
+    chg = ""
+    if atom.charge and abs(atom.charge) > 0:
+        # E.g., +1.0 -> " +1", -2.0 -> " -2"
+        # Convert to integer if it's always integral
+        chg_val = int(atom.charge) if float(atom.charge).is_integer() else atom.charge
+        chg = f"{chg_val:2}"
+    else:
+        chg = "  "
+    if atom.anisotropy:
+        aniso_lines = (
+            f"{_float_to_pdb_string(atom.anisotropy[0]):>7}"  # x (columns 29-35)
+            f"{_float_to_pdb_string(atom.anisotropy[1]):>7}"  # x (columns 36-42)
+            f"{_float_to_pdb_string(atom.anisotropy[2]):>7}"  # x (columns 43-49)
+            f"{_float_to_pdb_string(atom.anisotropy[3]):>7}"  # x (columns 50-56)
+            f"{_float_to_pdb_string(atom.anisotropy[4]):>7}"  # x (columns 57-63)
+            f"{_float_to_pdb_string(atom.anisotropy[5]):>7}"
+        )
+    else:
+        space = " "
+        aniso_lines = (
+            f"{space:>7}"  # x (columns 29-35)
+            f"{space:>7}"  # x (columns 36-42)
+            f"{space:>7}"  # x (columns 43-49)
+            f"{space:>7}"  # x (columns 50-56)
+            f"{space:>7}"  # x (columns 57-63)
+            f"{space:>7}"
+        )
+    # Construct the line.
+    # Use exact spacing & field widths to match PDB guidelines.
+    line = (
+        f"{record_type}"
+        f"{serial:5d} "  # atom serial number (columns 7-11)
+        f"{atom.name:<4}"  # atom name (columns 13-16, left-justified in this snippet)
+        f"{alt_loc_char}"  # altLoc (column 17)
+        f"{residue_name:>3}"  # residue name (columns 18-20)
+        f" {chain_char}"  # chain ID (column 22)
+        f"{residue_num:4d}"  # residue sequence number (columns 23-26)
+        f"  "  # columns 27-28 (insertion code plus spacing)
+        f"{aniso_lines}"
+        f"      "  # columns 70-76 (padding)
+        f"{atom.element:>2}"  # element (columns 77-78)
+        f"{chg:>2}"  # charge (columns 79-80)
+    )
+    return line
+# chat code
+def _float_to_pdb_string(x: float) -> str:
+    # Determine the sign
+    sign = "-" if x < 0 else ""
+    a = abs(x)
+    if a < 1:
+        # Format with exactly 4 decimals, e.g. 0.0044 -> "0.0044"
+        s = f"{a:.4f}"
+        # Remove the "0." and then remove any leading zeros.
+        significant = s[2:].lstrip("0")
+        return sign + significant
+    else:
+        # Format with exactly 4 decimals. For example, 1.131 -> "1.1310"
+        s = f"{a:.4f}"
+        # Split into integer and fractional parts.
+        integer_part, fractional_part = s.split(".")
+        # We want a total of 5 digits. So, the number of fractional digits we need is:
+        needed = 5 - len(integer_part)
+        # Use the needed number of digits from the fractional part.
+        result = integer_part + fractional_part[:needed]
+        return sign + result
+def _helix_list_to_pdb_helix(polymer_dict: dict[str, PDBPolymer], helices: list[list[str]]) -> list[str]:
+    helix_lines = []
+    for i, helix in enumerate(helices, start=1):
+        start_aa_name = polymer_dict[helix[0][0]].residues[helix[0]].name
+        end_aa_name = polymer_dict[helix[-1][0]].residues[helix[-1]].name
+        helix_line = f"HELIX  {i:>3} {i:>3} {start_aa_name} {helix[0][0]} {helix[0][2:]:>4}  {end_aa_name} {helix[-1][0]} {helix[-1][2:]:>4}  1{len(helix):>36}"
+        helix_lines.append(helix_line)
+    return helix_lines
+def _strand_list_to_pdb_sheets(polymer_dict: dict[str, PDBPolymer], strands: list[list[str]]) -> list[str]:
+    strand_lines = []
+    for i, strand in enumerate(strands, start=1):
+        start_aa_name = polymer_dict[strand[0][0]].residues[strand[0]].name
+        end_aa_name = polymer_dict[strand[-1][0]].residues[strand[-1]].name
+        helix_line = (
+            f"SHEET  {i:>3} {strand[0][0]:>3}{len(strands):>2} {start_aa_name} {strand[0][0]}{strand[0][2:]:>4}  "
+            f"{end_aa_name} {strand[-1][0]}{strand[-1][2:]:>4} {-1 if i != 1 else 0:>2}"
+        )
+        strand_lines.append(helix_line)
+    return strand_lines
+def _build_header_section(pdb: PDB) -> list[str]:
+    header = f"HEADER    {pdb.description.classification or '':<40}{_format_date(pdb.description.deposition_date) or '':<10}  {pdb.description.code or '':<5}"
+    title = f"TITLE     {pdb.description.title or '':<70}"
+    exp_dta = f"EXPDTA    {pdb.experiment.technique or '':<69}"
+    authors = f"AUTHOR    {','.join(pdb.description.authors).upper():<69}"
+    return [header, title, exp_dta, authors]
+def _build_source_section(pdb: PDB) -> list[str]:
+    """Builds the source organism and expression system lines."""
+    organism_line = f"SOURCE    ORGANISM_SCIENTIFIC: {(pdb.experiment.source_organism + ';') if pdb.experiment.source_organism else '':<69}"
+    expression_line = f"SOURCE    EXPRESSION_SYSTEM: {(pdb.experiment.expression_system + ';') if pdb.experiment.expression_system else '':<69}"
+    return [organism_line, expression_line]
+def _build_keyword_section(pdb: PDB) -> list[str]:
+    """Builds the keyword (KEYWDS) lines."""
+    lines = []
+    for i, keyword in enumerate(pdb.description.keywords):
+        if i == len(pdb.description.keywords) - 1:
+            lines.append(f"KEYWDS    {keyword:<79}")
+        else:
+            lines.append(f"KEYWDS    {keyword + ',':<79}")
+    return lines
+def _build_secondary_structure_and_seqres(pdb: PDB, full_name_dict: dict[str, str]) -> tuple[list[str], list[str]]:
+    """
+    Iterates over models and polymers to build secondary structure lines (e.g. sheets, helices)
+    and sequence records (SEQRES). Also collects full names for heterogen records.
+    Returns a tuple: (list of seqres (and secondary structure) lines, list of chain IDs).
+    """
+    seqres_lines = []
+    chains = []
+    for model in pdb.models:
+        for chain_id, polymer in model.polymer.items():
+            chains.append(chain_id)
+            # Add sheet and helix records (if available)
+            for strand_line in _strand_list_to_pdb_sheets(model.polymer, polymer.strands):
+                seqres_lines.append(strand_line)
+            for helix_line in _helix_list_to_pdb_helix(model.polymer, polymer.helices):
+                seqres_lines.append(helix_line)
+            # Add SEQRES lines from the polymer’s sequence
+            if polymer.sequence:
+                seqres_lines.extend(inverse_make_sequences(polymer.sequence, chain_id))
+            # Collect full names from each residue
+            for _, residue in polymer.residues.items():
+                if residue.full_name and residue.name:
+                    full_name_dict[residue.name] = residue.full_name
+        # Also collect full names for non-polymer molecules
+        for _, non_polymer in model.non_polymer.items():
+            if non_polymer.full_name and non_polymer.name:
+                full_name_dict[non_polymer.name] = non_polymer.full_name
+    return seqres_lines, chains
+def _build_hetname_section(full_name_dict: dict[str, str]) -> list[str]:
+    """Builds the HETNAM lines for non-polymer molecules."""
+    lines = []
+    for name, full_name in full_name_dict.items():
+        if len(full_name) > 55:
+            for i in range(0, len(full_name), 55):
+                lines.append(f"HETNAM  {int(i / 55):>2} {name:<3} {full_name[i : i + 55]:<55}")
+        else:
+            lines.append(f"HETNAM     {name:<3} {full_name:<55}")
+    return lines
+def _build_remark_section(pdb: PDB, chains: list[str]) -> list[str]:
+    """Builds REMARK lines (resolution, R factors, biomolecule and missing residues)."""
+    lines = []
+    lines.append(f"REMARK   2 RESOLUTION. {pdb.quality.resolution:>7} ANGSTROMS.")
+    if pdb.quality.rfree:
+        lines.append(f"REMARK   3   FREE R VALUE                     : {pdb.quality.rfree}")
+    if pdb.quality.rvalue:
+        lines.append(f"REMARK   3   R VALUE            (WORKING SET) : {pdb.quality.rvalue}")
+    # REMARK 350: Biomolecule details
+    lines.append("REMARK 350")
+    lines.append("REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN")
+    lines.append("REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE")
+    lines.append("REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS")
+    lines.append("REMARK 350 GIVEN BELOW.  BOTH NON-CRYSTALLOGRAPHIC AND")
+    lines.append("REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN.")
+    lines.append("REMARK 350")
+    lines.append("REMARK 350 BIOMOLECULE: 1")
+    lines.append("REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: MONOMERIC")
+    lines.append(f"REMARK 350 APPLY THE FOLLOWING TO CHAINS: {', '.join(chains)}")
+    lines.append("REMARK 350   BIOMT1   1  1.000000  0.000000  0.000000        0.00000")
+    lines.append("REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000")
+    lines.append("REMARK 350   BIOMT3   1  0.000000  0.000000  1.000000        0.00000")
+    # REMARK 465: Missing residues
+    lines.append("REMARK 465 MISSING RESIDUES")
+    lines.append("REMARK 465 THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE")
+    lines.append("REMARK 465 EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN")
+    lines.append("REMARK 465 IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.)")
+    lines.append("REMARK 465")
+    lines.append("REMARK 465   M RES C SSSEQI")
+    for missing_residue in pdb.experiment.missing_residues:
+        lines.append(f"REMARK 465     {missing_residue.name} {missing_residue.id[0]}   {missing_residue.id[2:]}")
+    return lines
+def _build_crystallography_section(pdb: PDB) -> list[str]:
+    """Builds the CRYST1 line if unit cell data is provided."""
+    lines = []
+    if pdb.geometry.crystallography.unit_cell:
+        lines.append(
+            f"CRYST1{pdb.geometry.crystallography.unit_cell[0]:>9}"
+            f"{pdb.geometry.crystallography.unit_cell[1]:>9}"
+            f"{pdb.geometry.crystallography.unit_cell[2]:>9}"
+            f"{pdb.geometry.crystallography.unit_cell[3]:>7}"
+            f"{pdb.geometry.crystallography.unit_cell[4]:>7}"
+            f"{pdb.geometry.crystallography.unit_cell[5]:>7} "
+            f"{pdb.geometry.crystallography.space_group or '':<11}"
+        )
+    return lines

stjames/workflows/irc.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Self
-from pydantic import Field, model_validator
+from pydantic import Field, PositiveFloat, field_validator, model_validator
 from ..method import XTB_METHODS, Method
 from ..mode import Mode
@@ -24,28 +24,26 @@ class IRCWorkflow(Workflow):
     :param settings: Settings for running the IRC (only for manual mode)
     :param solvent: Solvent for the calculation (non-Manual mode only)
     :param preopt: whether to optimize the geometry before starting the IRC
-    :param final_opt: whether to optimize the final IRC geometry to a minimum
+    :param max_irc_steps: maximum number of steps for the IRC
+    :param step_size: step size for the IRC (Å)
     Results:
     :param starting_TS: optimized TS before the IRC (==initial_molecule if preopt=False)
     :param irc_forward: forward calculations
     :param irc_backward: reverse calculations
-    :param opt_forward: optimization steps after the forward IRC
-    :param opt_backward: optimization steps after the reverse IRC
     """
     settings: Settings = _sentinel_settings
     solvent: Solvent | None = None
     preopt: bool = False
-    final_opt: bool = False
+    max_irc_steps: int = 10
+    step_size: PositiveFloat = 0.05
     starting_TS: UUID | None = None
     irc_forward: list[UUID] = Field(default_factory=list)
     irc_backward: list[UUID] = Field(default_factory=list)
-    opt_forward: list[UUID] = Field(default_factory=list)
-    opt_backward: list[UUID] = Field(default_factory=list)
     def __str__(self) -> str:
         return repr(self)
@@ -62,6 +60,15 @@ class IRCWorkflow(Workflow):
         """Level of theory for the workflow."""
         return self.settings.level_of_theory
+    @field_validator("step_size", mode="after")
+    @classmethod
+    def validate_step_size(cls, step_size: float) -> float:
+        """Validate the step size."""
+        if step_size < 1e-3 or step_size > 0.1:
+            raise ValueError(f"Step size must be between 0.001 and 0.1 Å, got: {step_size}")
+        return step_size
     @model_validator(mode="after")
     def validate_mode(self) -> Self:
         """Convert the mode to settings."""

{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: stjames
-Version: 0.0.52
+Version: 0.0.53
 Summary: standardized JSON atom/molecule encoding scheme
 Author-email: Corin Wagen <corin@rowansci.com>
 Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: atomium<2,>=1
 Requires-Dist: pydantic>=2.4
 Requires-Dist: numpy
 Requires-Dist: atomium<2.0,>=1.0
+Requires-Dist: requests
 # stjames

{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/RECORD RENAMED Viewed

@@ -15,7 +15,7 @@ stjames/method.py,sha256=5hBHk2xQLpxZ52LwJ9FHWaqQMdFKnsbQEOxaVe6O4Go,2321
 stjames/mode.py,sha256=xw46Cc7f3eTS8i35qECi-8DocAlANhayK3w4akD4HBU,496
 stjames/molecule.py,sha256=DeNYmFdvbuKeXvLqlu-UxHMyZVK6y4j-Lw3HITGMnHw,12406
 stjames/opt_settings.py,sha256=gxXGtjy9l-Q5Wen9eO6T6HHRCuS8rfOofdVQIJj0JcI,550
-stjames/pdb.py,sha256=LDAxBLwd_xAzpEmls5G3fdvq77O7Cf7nMY8_ApDPfH8,4742
+stjames/pdb.py,sha256=yAEqFV2BxStd-G1PDNqtB8Qy_8x4sWZDiaSk8ifM1U0,25130
 stjames/periodic_cell.py,sha256=eV_mArsY_MPEFSrFEsTC-CyCc6V8ITAXdk7yhjjNI7M,1080
 stjames/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 stjames/scf_settings.py,sha256=WotVgVrayQ_8PUHP39zVtG7iLT9PV41lpzruttFACP8,2356
@@ -25,6 +25,11 @@ stjames/status.py,sha256=wTKNcNxStoEHrxxgr_zTyN90NITa3rxMQZzOgrCifEw,332
 stjames/task.py,sha256=OLINRqe66o7t8arffilwmggrF_7TH0L79u6DhGruxV8,329
 stjames/thermochem_settings.py,sha256=ZTLz31v8Ltutde5Nfm0vH5YahWjcfFWfr_R856KffxE,517
 stjames/types.py,sha256=hw-3UBikESvN3DzfK5doZB030kIEfx9gC3yBkIbebsI,3764
+stjames/atomium_stjames/__init__.py,sha256=gZkzC7i9D_fmWUTN55gtygITo3-qvJUda5CXLR0jyCQ,306
+stjames/atomium_stjames/data.py,sha256=-hzwBpTHq5JetsOVyopUJswKnKAkMtJ_XkONxjXVupU,5675
+stjames/atomium_stjames/mmcif.py,sha256=16LNhQW7GkwEmRAG2lDEnhQaeBabtzIiEbzjjBnLhNg,27108
+stjames/atomium_stjames/pdb.py,sha256=nkCqdc6fy6rKNcIZZDysDLTdlPJWWRmTYBYEFr1wcAQ,22365
+stjames/atomium_stjames/utilities.py,sha256=B_TNLTrsiGaEPBG5-4mhTcj0v4VgYUi55ICF9IR_LG0,4776
 stjames/data/__init__.py,sha256=O59Ksp7AIqwOELCWymfCx7YeBzwNOGCMlGQi7tNLqiE,24
 stjames/data/bragg_radii.json,sha256=hhbn-xyZNSdmnULIjN2Cvq-_BGIZIqG243Ls_mey61w,1350
 stjames/data/elements.py,sha256=9BW01LZlyJ0H5s7Q26vUmjZIST41fwOYYrGvmPd7q0w,858
@@ -43,7 +48,7 @@ stjames/workflows/docking.py,sha256=K6zy4lo1XfrrMd7ZmKAe_Fd9wvKhtCMoK66gp-TsuJA,
 stjames/workflows/electronic_properties.py,sha256=uAIcGKKLhqoHyDgcOZulEXwTU2EjidyvOndZDYyeJEk,4003
 stjames/workflows/fukui.py,sha256=2J23RjkSOZ-40AM3AdnbJkRBGaCevkjkhnV3pVfa6lo,738
 stjames/workflows/hydrogen_bond_basicity.py,sha256=Luvov2DlDvZN06W-mU6YaN7wcIrTLwzdoWww-jNE3x4,517
-stjames/workflows/irc.py,sha256=y4KXeHPfPoT9jsroABxWyFdg6k5y3r99hdtvo8g26_A,3148
+stjames/workflows/irc.py,sha256=3lA3EOXju6d0vETs-PEnRBnGCkshTXDtjw4dVDj0N5A,3333
 stjames/workflows/molecular_dynamics.py,sha256=4HmYETU1VT2BA4-PqAayRZLjnj1WuYxd5bqpIyH9g5k,2465
 stjames/workflows/multistage_opt.py,sha256=0ou-UYMGIrewZIg3QZIgwS_eweYdsh2pRplxgRCqLcE,13572
 stjames/workflows/pka.py,sha256=vSbMc7wuUKATNLq2kQyfCyX6aUthCj-XGSoXnuk4GMo,1031
@@ -52,8 +57,8 @@ stjames/workflows/scan.py,sha256=uNSuUmVMAV4exNvcv1viVe7930i7GZMn7RtEimnwEE8,100
 stjames/workflows/spin_states.py,sha256=b-uCf-pHjF_JHbExeb5GdRToE0pIxP0JTd50U130ckI,4693
 stjames/workflows/tautomer.py,sha256=x3TC8hkMs87ZUodLyhce5EUzYoV276ePfPMi7ISWyNU,651
 stjames/workflows/workflow.py,sha256=tIu5naADYgYS7kdW8quvGEWHWosBcrIdcD7L86v-uMQ,976
-stjames-0.0.52.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
-stjames-0.0.52.dist-info/METADATA,sha256=4vbKOo5yexzEQRYM3x6_fYAyAt72lhpU-UKSn_hdOwE,1689
-stjames-0.0.52.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-stjames-0.0.52.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
-stjames-0.0.52.dist-info/RECORD,,
+stjames-0.0.53.dist-info/LICENSE,sha256=i7ehYBS-6gGmbTcgU4mgk28pyOx2kScJ0kcx8n7bWLM,1084
+stjames-0.0.53.dist-info/METADATA,sha256=rXyJcZw2ooRmEXl7y7YNoca93EyheOn780kZPISC-Aw,1713
+stjames-0.0.53.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+stjames-0.0.53.dist-info/top_level.txt,sha256=FYCwxl6quhYOAgG-mnPQcCK8vsVM7B8rIUrO-WrQ_PI,8
+stjames-0.0.53.dist-info/RECORD,,

{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/LICENSE RENAMED Viewed

File without changes

{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/WHEEL RENAMED Viewed

File without changes

{stjames-0.0.52.dist-info → stjames-0.0.53.dist-info}/top_level.txt RENAMED Viewed

File without changes

stjames 0.0.52__py3-none-any.whl → 0.0.53__py3-none-any.whl

Potentially problematic release.

stjames 0.0.52py3-none-any.whl → 0.0.53py3-none-any.whl