stjames 0.0.65__tar.gz → 0.0.67__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stjames might be problematic. Click here for more details.
- {stjames-0.0.65/stjames.egg-info → stjames-0.0.67}/PKG-INFO +3 -2
- {stjames-0.0.65 → stjames-0.0.67}/pyproject.toml +1 -1
- {stjames-0.0.65 → stjames-0.0.67}/stjames/molecule.py +137 -43
- {stjames-0.0.65 → stjames-0.0.67}/stjames/pdb.py +34 -15
- {stjames-0.0.65 → stjames-0.0.67/stjames.egg-info}/PKG-INFO +3 -2
- stjames-0.0.67/tests/test_from_extxyz.py +322 -0
- {stjames-0.0.65 → stjames-0.0.67}/tests/test_pdb.py +15 -0
- stjames-0.0.65/tests/test_from_extxyz.py +0 -231
- {stjames-0.0.65 → stjames-0.0.67}/LICENSE +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/README.md +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/setup.cfg +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/__init__.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/_deprecated_solvent_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atom.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/__init__.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/data.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/mmcif.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/pdb.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/utilities.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/base.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/basis_set.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/calculation.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/compute_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/constraint.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/correction.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/__init__.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/bragg_radii.json +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/elements.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/isotopes.json +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/nist_isotopes.json +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/read_nist_isotopes.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/data/symbol_element.json +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/diis_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/grid_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/int_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/message.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/method.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/mode.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/opt_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/periodic_cell.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/py.typed +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/scf_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/solvent.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/status.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/task.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/thermochem_settings.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/types.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/__init__.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/admet.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/basic_calculation.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/bde.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/conformer.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/conformer_search.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/descriptors.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/docking.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/electronic_properties.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/fukui.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/hydrogen_bond_basicity.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/ion_mobility.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/irc.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/macropka.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/molecular_dynamics.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/multistage_opt.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/pka.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/redox_potential.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/scan.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/solubility.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/spin_states.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/tautomer.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/workflow.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/SOURCES.txt +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/dependency_links.txt +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/requires.txt +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/top_level.txt +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/tests/test_constraints.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/tests/test_molecule.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/tests/test_rounding.py +0 -0
- {stjames-0.0.65 → stjames-0.0.67}/tests/test_settings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: stjames
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.67
|
|
4
4
|
Summary: standardized JSON atom/molecule encoding scheme
|
|
5
5
|
Author-email: Corin Wagen <corin@rowansci.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/rowansci/stjames
|
|
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
|
|
|
12
12
|
Requires-Dist: numpy
|
|
13
13
|
Requires-Dist: requests
|
|
14
14
|
Requires-Dist: rdkit
|
|
15
|
+
Dynamic: license-file
|
|
15
16
|
|
|
16
17
|
# stjames
|
|
17
18
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Annotated, Iterable, Optional, Self, TypeAlias
|
|
3
|
+
from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
6
|
from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
|
|
@@ -9,6 +9,7 @@ from rdkit.Chem import AllChem
|
|
|
9
9
|
|
|
10
10
|
from .atom import Atom
|
|
11
11
|
from .base import Base, round_float, round_optional_float
|
|
12
|
+
from .data import SYMBOL_ELEMENT
|
|
12
13
|
from .periodic_cell import PeriodicCell
|
|
13
14
|
from .types import (
|
|
14
15
|
FloatPerAtom,
|
|
@@ -246,28 +247,102 @@ class Molecule(Base):
|
|
|
246
247
|
return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
|
|
247
248
|
|
|
248
249
|
@classmethod
|
|
249
|
-
def from_extxyz_lines(
|
|
250
|
-
|
|
251
|
-
lines
|
|
250
|
+
def from_extxyz_lines(
|
|
251
|
+
cls: type[Self],
|
|
252
|
+
lines: Iterable[str],
|
|
253
|
+
charge: int | None = None,
|
|
254
|
+
multiplicity: PositiveInt | None = None,
|
|
255
|
+
cell: PeriodicCell | None = None,
|
|
256
|
+
) -> Self:
|
|
257
|
+
"""
|
|
258
|
+
Parses an EXTXYZ file, extracting atom positions, forces (if present), and metadata.
|
|
259
|
+
|
|
260
|
+
Supports:
|
|
261
|
+
- Lattice vectors (cell information)
|
|
262
|
+
- Properties field (species, positions, forces, etc.)
|
|
263
|
+
- Other metadata like charge, multiplicity, energy, etc.
|
|
264
|
+
|
|
265
|
+
:param lines: Iterable of lines from an EXTXYZ file
|
|
266
|
+
:param charge: total charge of the molecule (default: 0 if not found)
|
|
267
|
+
:param multiplicity: spin multiplicity of the molecule (default: 1 if not found)
|
|
268
|
+
:param cell: PeriodicCell containing lattice vectors
|
|
269
|
+
:return: Molecule
|
|
270
|
+
:raises MoleculeReadError: if the file is not in the correct format
|
|
271
|
+
"""
|
|
272
|
+
if not isinstance(lines, Sequence):
|
|
273
|
+
lines = list(lines)
|
|
274
|
+
|
|
275
|
+
# Ensure first line contains number of atoms
|
|
252
276
|
if len(lines[0].split()) == 1:
|
|
253
277
|
natoms = lines[0].strip()
|
|
254
|
-
if not natoms.isdigit() or (int(
|
|
255
|
-
raise MoleculeReadError(f"First line
|
|
256
|
-
lines = lines[1:]
|
|
278
|
+
if not natoms.isdigit() or (int(natoms) != len(lines) - 2):
|
|
279
|
+
raise MoleculeReadError(f"First line should be number of atoms, got: {lines[0]} != {len(lines) - 2}")
|
|
280
|
+
data_line, *lines = lines[1:]
|
|
257
281
|
else:
|
|
258
|
-
raise MoleculeReadError(f"First line
|
|
282
|
+
raise MoleculeReadError(f"First line should be an integer denoting atom count. Got {lines[0].split()}")
|
|
259
283
|
|
|
260
|
-
|
|
261
|
-
if "=" not in lines[0]:
|
|
262
|
-
raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
|
|
284
|
+
metadata = parse_extxyz_comment_line(data_line)
|
|
263
285
|
|
|
264
|
-
|
|
265
|
-
lines = lines[1:]
|
|
286
|
+
T = TypeVar("T")
|
|
266
287
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
288
|
+
def metadata_optional_get(key: str, value: T | None, default: T) -> T:
|
|
289
|
+
"""Set key to default if not found in metadata"""
|
|
290
|
+
if value is None:
|
|
291
|
+
return metadata.get(key, default) # type: ignore [return-value]
|
|
292
|
+
|
|
293
|
+
return value
|
|
294
|
+
|
|
295
|
+
charge = metadata_optional_get("total_charge", charge, 0)
|
|
296
|
+
multiplicity = metadata_optional_get("multiplicity", multiplicity, 1)
|
|
297
|
+
cell = cell or metadata.get("cell")
|
|
298
|
+
energy = metadata.get("energy", None)
|
|
299
|
+
|
|
300
|
+
force_idx = None
|
|
301
|
+
if properties := metadata.get("properties", "").split(":"):
|
|
302
|
+
if properties[0].lower() != "species":
|
|
303
|
+
raise MoleculeReadError(f"Invalid or missing 'Properties' field in EXTXYZ, got: {properties}")
|
|
304
|
+
|
|
305
|
+
# Identify column indices for position and force data
|
|
306
|
+
pos_idx = None
|
|
307
|
+
current_idx = 0 # Start after 'species:S'
|
|
308
|
+
|
|
309
|
+
while current_idx < len(properties):
|
|
310
|
+
if properties[current_idx].lower() == "pos" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
|
|
311
|
+
pos_idx = current_idx
|
|
312
|
+
elif properties[current_idx].lower() == "forces" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
|
|
313
|
+
force_idx = current_idx
|
|
314
|
+
current_idx += 3
|
|
315
|
+
|
|
316
|
+
if pos_idx is None:
|
|
317
|
+
raise MoleculeReadError("No position data ('pos:R:3') found in Properties field.")
|
|
318
|
+
|
|
319
|
+
def parse_line_atoms(line: str) -> Atom:
|
|
320
|
+
symbol, sx, sy, sz, *_ = line.split()
|
|
321
|
+
atomic_number = SYMBOL_ELEMENT[symbol.title()]
|
|
322
|
+
x, y, z = map(float, (sx, sy, sz))
|
|
323
|
+
|
|
324
|
+
return Atom(atomic_number=atomic_number, position=(x, y, z))
|
|
325
|
+
|
|
326
|
+
def parse_line_with_grad(line: str) -> tuple[Atom, Vector3D]:
|
|
327
|
+
symbol, sx, sy, sz, sgx, sgy, sgz, *_ = line.split()
|
|
328
|
+
atomic_number = SYMBOL_ELEMENT[symbol.title()]
|
|
329
|
+
x, y, z = map(float, (sx, sy, sz))
|
|
330
|
+
gx, gy, gz = map(float, (sgx, sgy, sgz))
|
|
331
|
+
|
|
332
|
+
return (
|
|
333
|
+
Atom(atomic_number=atomic_number, position=(x, y, z)),
|
|
334
|
+
(-gx, -gy, -gz),
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
atoms: list[Atom]
|
|
338
|
+
gradients: list[Vector3D] | None
|
|
339
|
+
if force_idx is not None:
|
|
340
|
+
atoms, gradients = zip(*map(parse_line_with_grad, lines), strict=True) # type: ignore [assignment]
|
|
341
|
+
else:
|
|
342
|
+
atoms = [parse_line_atoms(line) for line in lines]
|
|
343
|
+
gradients = None
|
|
344
|
+
|
|
345
|
+
return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
|
|
271
346
|
|
|
272
347
|
@classmethod
|
|
273
348
|
def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
|
|
@@ -313,43 +388,62 @@ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
|
|
|
313
388
|
return rdkm
|
|
314
389
|
|
|
315
390
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
391
|
+
class EXTXYZMetadata(TypedDict, total=False):
|
|
392
|
+
properties: Any
|
|
393
|
+
total_charge: int
|
|
394
|
+
multiplicity: int
|
|
395
|
+
energy: float
|
|
396
|
+
cell: PeriodicCell
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
|
|
321
400
|
"""
|
|
322
|
-
|
|
401
|
+
Parse the comment line of an EXTXYZ file, extracting lattice, properties, and metadata.
|
|
402
|
+
|
|
403
|
+
Supports:
|
|
404
|
+
- Lattice vectors (cell information)
|
|
405
|
+
- Properties field (species, positions, forces, etc.)
|
|
406
|
+
- Other metadata fields like charge, multiplicity, energy, etc.
|
|
407
|
+
|
|
408
|
+
:param line: comment line from an EXTXYZ file
|
|
409
|
+
:return: parsed properties
|
|
410
|
+
|
|
411
|
+
>>> parse_extxyz_comment_line('Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3')
|
|
412
|
+
{'cell': PeriodicCell(lattice_vectors=((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0)), is_periodic=(True, True, True), volume=0.0), 'properties': 'species:S:1:pos:R:3'}
|
|
413
|
+
""" # noqa: E501
|
|
414
|
+
|
|
323
415
|
# Regular expression to match key="value", key='value', or key=value
|
|
324
416
|
pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
|
|
325
417
|
pairs = re.findall(pattern, line)
|
|
326
418
|
|
|
327
|
-
prop_dict = {}
|
|
419
|
+
prop_dict: EXTXYZMetadata = {}
|
|
328
420
|
for pair in pairs:
|
|
329
421
|
key, value = pair.split("=", 1)
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
422
|
+
key = key.lower().strip()
|
|
423
|
+
value = value.strip("'\"")
|
|
424
|
+
|
|
425
|
+
if key == "lattice":
|
|
426
|
+
lattice_values = value.split()
|
|
427
|
+
if len(lattice_values) != 9:
|
|
428
|
+
raise MoleculeReadError(f"Lattice should have 9 entries, got {len(lattice_values)}")
|
|
334
429
|
|
|
335
|
-
# Convert the value to a 3x3 tuple of tuples of floats
|
|
336
430
|
try:
|
|
337
|
-
cell = tuple(tuple(map(float,
|
|
431
|
+
cell = tuple(tuple(map(float, lattice_values[i : i + 3])) for i in range(0, 9, 3))
|
|
338
432
|
except ValueError:
|
|
339
|
-
raise MoleculeReadError(f"Lattice should be floats, got {
|
|
433
|
+
raise MoleculeReadError(f"Lattice should be floats, got {lattice_values}")
|
|
340
434
|
|
|
341
|
-
prop_dict[
|
|
435
|
+
prop_dict["cell"] = PeriodicCell(lattice_vectors=cell)
|
|
342
436
|
|
|
343
|
-
elif key
|
|
344
|
-
|
|
345
|
-
raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
|
|
346
|
-
prop_dict[key] = value
|
|
347
|
-
else:
|
|
348
|
-
raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
|
|
437
|
+
elif key == "properties":
|
|
438
|
+
prop_dict["properties"] = value
|
|
349
439
|
|
|
350
|
-
|
|
351
|
-
|
|
440
|
+
elif key == "total_charge":
|
|
441
|
+
prop_dict["total_charge"] = int(value)
|
|
442
|
+
elif key == "multiplicity":
|
|
443
|
+
prop_dict["multiplicity"] = int(value)
|
|
444
|
+
elif key == "energy":
|
|
445
|
+
prop_dict["energy"] = float(value)
|
|
446
|
+
else:
|
|
447
|
+
prop_dict[key] = value # type: ignore [literal-required]
|
|
352
448
|
|
|
353
|
-
|
|
354
|
-
raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
|
|
355
|
-
return PeriodicCell(lattice_vectors=cell)
|
|
449
|
+
return prop_dict
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from datetime import date, datetime
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Any, Literal
|
|
@@ -276,7 +277,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
276
277
|
atom=atom,
|
|
277
278
|
chain_id=this_chain_id,
|
|
278
279
|
res_name=residue.name,
|
|
279
|
-
res_num=
|
|
280
|
+
res_num=_residue_id[2:],
|
|
280
281
|
alt_loc=atom.alt_loc or "",
|
|
281
282
|
)
|
|
282
283
|
pdb_lines.append(line)
|
|
@@ -286,12 +287,12 @@ def pdb_object_to_pdb_filestring(
|
|
|
286
287
|
atom=atom,
|
|
287
288
|
chain_id=this_chain_id,
|
|
288
289
|
res_name=residue.name,
|
|
289
|
-
res_num=
|
|
290
|
+
res_num=_residue_id[2:],
|
|
290
291
|
alt_loc=atom.alt_loc or "",
|
|
291
292
|
)
|
|
292
293
|
pdb_lines.append(line)
|
|
293
294
|
|
|
294
|
-
pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{
|
|
295
|
+
pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{_residue_id[2:]:>4}")
|
|
295
296
|
|
|
296
297
|
# === 2) Non-polymers (e.g. ligands, ions) ===
|
|
297
298
|
for _np_id, nonpoly in model.non_polymer.items():
|
|
@@ -308,7 +309,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
308
309
|
atom=atom,
|
|
309
310
|
chain_id=chain_id_for_np,
|
|
310
311
|
res_name=nonpoly.name,
|
|
311
|
-
res_num=
|
|
312
|
+
res_num=_np_id[2:],
|
|
312
313
|
)
|
|
313
314
|
pdb_lines.append(line)
|
|
314
315
|
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
@@ -317,7 +318,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
317
318
|
atom=atom,
|
|
318
319
|
chain_id=chain_id_for_np,
|
|
319
320
|
res_name=nonpoly.name,
|
|
320
|
-
res_num=
|
|
321
|
+
res_num=_np_id[2:],
|
|
321
322
|
)
|
|
322
323
|
pdb_lines.append(line)
|
|
323
324
|
|
|
@@ -330,7 +331,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
330
331
|
atom=atom,
|
|
331
332
|
chain_id=_w_id[0], # Or you can use water.polymer if set
|
|
332
333
|
res_name="HOH",
|
|
333
|
-
res_num=
|
|
334
|
+
res_num=_w_id[2:], # or an incrementing value
|
|
334
335
|
)
|
|
335
336
|
pdb_lines.append(line)
|
|
336
337
|
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
@@ -339,7 +340,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
339
340
|
atom=atom,
|
|
340
341
|
chain_id=_w_id[0],
|
|
341
342
|
res_name="HOH",
|
|
342
|
-
res_num=
|
|
343
|
+
res_num=_w_id[2:],
|
|
343
344
|
)
|
|
344
345
|
pdb_lines.append(line)
|
|
345
346
|
|
|
@@ -357,7 +358,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
357
358
|
atom=atom,
|
|
358
359
|
chain_id="B",
|
|
359
360
|
res_name="BRN", # or branched_obj.get("name", "BRN")
|
|
360
|
-
res_num=1,
|
|
361
|
+
res_num="1",
|
|
361
362
|
)
|
|
362
363
|
pdb_lines.append(line)
|
|
363
364
|
if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
|
|
@@ -366,7 +367,7 @@ def pdb_object_to_pdb_filestring(
|
|
|
366
367
|
atom=atom,
|
|
367
368
|
chain_id="B",
|
|
368
369
|
res_name="BRN",
|
|
369
|
-
res_num=1,
|
|
370
|
+
res_num="1",
|
|
370
371
|
)
|
|
371
372
|
pdb_lines.append(line)
|
|
372
373
|
|
|
@@ -407,7 +408,7 @@ def _format_atom_line(
|
|
|
407
408
|
atom: PDBAtom,
|
|
408
409
|
chain_id: str,
|
|
409
410
|
res_name: str,
|
|
410
|
-
res_num:
|
|
411
|
+
res_num: str | None,
|
|
411
412
|
alt_loc: str = "",
|
|
412
413
|
) -> str:
|
|
413
414
|
"""
|
|
@@ -423,7 +424,15 @@ def _format_atom_line(
|
|
|
423
424
|
alt_loc_char = alt_loc if alt_loc else " "
|
|
424
425
|
residue_name = (res_name or "UNK")[:3] # limit to 3 chars
|
|
425
426
|
chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
|
|
426
|
-
|
|
427
|
+
residue_num_str = "1"
|
|
428
|
+
insertion_code = " "
|
|
429
|
+
if res_num:
|
|
430
|
+
match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
|
|
431
|
+
if match:
|
|
432
|
+
residue_num_str, insertion_code = match.groups()
|
|
433
|
+
insertion_code = insertion_code if insertion_code != "" else " "
|
|
434
|
+
|
|
435
|
+
residue_num = int(residue_num_str)
|
|
427
436
|
|
|
428
437
|
# Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
|
|
429
438
|
# If your model stores charges differently, adapt as needed.
|
|
@@ -451,7 +460,8 @@ def _format_atom_line(
|
|
|
451
460
|
f"{residue_name:>3}" # residue name (columns 18-20)
|
|
452
461
|
f" {chain_char}" # chain ID (column 22)
|
|
453
462
|
f"{residue_num:4d}" # residue sequence number (columns 23-26)
|
|
454
|
-
f"
|
|
463
|
+
f"{insertion_code}"
|
|
464
|
+
f" " # columns 27-30 (spacing)
|
|
455
465
|
f"{atom.x:8.3f}" # x (columns 31-38)
|
|
456
466
|
f"{atom.y:8.3f}" # y (columns 39-46)
|
|
457
467
|
f"{atom.z:8.3f}" # z (columns 47-54)
|
|
@@ -469,7 +479,7 @@ def _format_anisou_line(
|
|
|
469
479
|
atom: PDBAtom,
|
|
470
480
|
chain_id: str,
|
|
471
481
|
res_name: str,
|
|
472
|
-
res_num:
|
|
482
|
+
res_num: str | None,
|
|
473
483
|
alt_loc: str = "",
|
|
474
484
|
) -> str:
|
|
475
485
|
"""
|
|
@@ -485,7 +495,15 @@ def _format_anisou_line(
|
|
|
485
495
|
alt_loc_char = alt_loc if alt_loc else " "
|
|
486
496
|
residue_name = (res_name or "UNK")[:3] # limit to 3 chars
|
|
487
497
|
chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
|
|
488
|
-
|
|
498
|
+
residue_num_str = "1"
|
|
499
|
+
insertion_code = " "
|
|
500
|
+
if res_num:
|
|
501
|
+
match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
|
|
502
|
+
if match:
|
|
503
|
+
residue_num_str, insertion_code = match.groups()
|
|
504
|
+
insertion_code = insertion_code if insertion_code != "" else " "
|
|
505
|
+
|
|
506
|
+
residue_num = int(residue_num_str)
|
|
489
507
|
|
|
490
508
|
chg = ""
|
|
491
509
|
if atom.charge and abs(atom.charge) > 0:
|
|
@@ -528,7 +546,8 @@ def _format_anisou_line(
|
|
|
528
546
|
f"{residue_name:>3}" # residue name (columns 18-20)
|
|
529
547
|
f" {chain_char}" # chain ID (column 22)
|
|
530
548
|
f"{residue_num:4d}" # residue sequence number (columns 23-26)
|
|
531
|
-
f"
|
|
549
|
+
f"{insertion_code}"
|
|
550
|
+
f" " # columns 27-28 (plus spacing)
|
|
532
551
|
f"{aniso_lines}"
|
|
533
552
|
f" " # columns 70-76 (padding)
|
|
534
553
|
f"{atom.element:>2}" # element (columns 77-78)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: stjames
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.67
|
|
4
4
|
Summary: standardized JSON atom/molecule encoding scheme
|
|
5
5
|
Author-email: Corin Wagen <corin@rowansci.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/rowansci/stjames
|
|
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
|
|
|
12
12
|
Requires-Dist: numpy
|
|
13
13
|
Requires-Dist: requests
|
|
14
14
|
Requires-Dist: rdkit
|
|
15
|
+
Dynamic: license-file
|
|
15
16
|
|
|
16
17
|
# stjames
|
|
17
18
|
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
from pytest import mark, raises
|
|
2
|
+
|
|
3
|
+
from stjames import Atom, Molecule, MoleculeReadError, PeriodicCell
|
|
4
|
+
|
|
5
|
+
# Valid EXTXYZ without forces (only positions)
|
|
6
|
+
valid_extxyz = """
|
|
7
|
+
5
|
|
8
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
9
|
+
C 0.0 0.0 0.0
|
|
10
|
+
H 0.0 0.0 1.0
|
|
11
|
+
H 1.0 0.0 0.0
|
|
12
|
+
H 0.0 1.0 0.0
|
|
13
|
+
H 1.0 1.0 1.0
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# Valid EXTXYZ with forces: forces are provided after positions.
|
|
17
|
+
# For each atom, the forces are given and the molecule's gradient should be -forces.
|
|
18
|
+
valid_extxyz_with_forces_and_energy = """
|
|
19
|
+
22
|
|
20
|
+
Properties=species:S:1:pos:R:3:forces:R:3:MACE_forces:R:3 smiles="[H:13][C:5]([H:14])([H:15])[C:7]([H:18])([H:19])[C:6]([H:16])([H:17])[S:2][C:4]([H:11])([H:12])[S:1][C:3]([H:8])([H:9])[H:10].[H:21][O:20][H:22]" total_charge=0 energy=-29139.55729148458 config_type="DES370K Dimers" MACE_energy=-29139.53445598132 pbc="F F F"
|
|
21
|
+
S 2.75637007 2.70758009 4.82925987 1.55959951 -0.22801913 -0.54136018 1.57411973 -0.23083920 -0.56575688
|
|
22
|
+
S 2.84554004 0.22606000 3.09241009 -0.21334177 -1.76227773 0.33161567 -0.21053821 -1.76733183 0.31476973
|
|
23
|
+
C 1.01258004 2.49340009 4.93010997 -1.31854381 -0.37388875 -0.24927582 -1.32686027 -0.41468579 -0.24419847
|
|
24
|
+
C 3.17725992 1.94638002 3.18563008 -0.09910375 1.52775439 -0.55371287 -0.09844947 1.52902410 -0.49063227
|
|
25
|
+
C 0.00000000 0.00000000 0.00000000 0.54047842 -1.02932160 -0.37462899 0.55420524 -1.04859973 -0.38165141
|
|
26
|
+
C 2.22169995 -0.06399999 1.41622007 -0.74959058 0.13849016 -0.66947629 -0.74189889 0.14272211 -0.65926828
|
|
27
|
+
C 0.72196996 0.27478000 1.31911993 -0.88825228 1.14448169 0.10462363 -0.88035930 1.15971257 0.11942591
|
|
28
|
+
H 0.73436999 1.43289995 4.80946970 0.11871475 0.45471041 0.08255662 0.12983545 0.47803891 0.09119142
|
|
29
|
+
H 0.46778000 3.08784985 4.17749977 0.24911941 -0.20360289 0.32966949 0.24533754 -0.20850576 0.33744960
|
|
30
|
+
H 0.70124000 2.82674027 5.93438053 0.20534666 -0.13940533 -0.46775994 0.20706729 -0.12047003 -0.47116810
|
|
31
|
+
H 2.59819007 2.49752998 2.41988992 0.29845053 -0.31965051 0.34195845 0.29323885 -0.33151445 0.32609726
|
|
32
|
+
H 4.25116014 2.06114006 2.94268989 -0.62592601 0.53239934 0.63500463 -0.63988801 0.53433737 0.61779540
|
|
33
|
+
H 0.01781000 -1.10166001 -0.05143000 -0.31652062 0.33261293 -0.62734778 -0.32052002 0.33247610 -0.62978105
|
|
34
|
+
H 0.50296998 0.40217999 -0.89547998 -0.14484048 -0.10780390 0.42454441 -0.15022920 -0.10846825 0.42265847
|
|
35
|
+
H -1.05070996 0.33554000 -0.00612000 0.55084403 -0.10031390 0.18450139 0.55970312 -0.09816881 0.19243150
|
|
36
|
+
H 2.77840995 0.53621000 0.67106002 -0.14551747 -0.35971650 0.43068238 -0.14826788 -0.34813759 0.42951233
|
|
37
|
+
H 2.39210987 -1.13260996 1.18280005 0.04825265 0.56114332 0.26057253 0.04712810 0.55862479 0.26730085
|
|
38
|
+
H 0.59988999 1.35992002 1.50082004 0.18014249 -0.45468505 0.36480505 0.17898520 -0.45518615 0.36105601
|
|
39
|
+
H 0.10381000 -0.26278001 2.06366992 0.77321355 0.35561432 0.04327458 0.76235138 0.36555290 0.03043141
|
|
40
|
+
O 5.42839002 0.27989000 7.42625999 -0.71867266 0.97644353 0.67783188 -0.72326306 0.98088507 0.67544292
|
|
41
|
+
H 4.56538010 0.65290999 7.13586997 1.00844588 -0.37843304 0.30550308 0.99937753 -0.37383106 0.29632339
|
|
42
|
+
H 5.64198017 0.76508999 8.25524997 -0.31126251 -0.56856677 -1.03720163 -0.31107513 -0.57563526 -1.03942973
|
|
43
|
+
""" # noqa: E501
|
|
44
|
+
|
|
45
|
+
# Other invalid cases (these remain unchanged)
|
|
46
|
+
incorrect_num_atoms = """
|
|
47
|
+
6
|
|
48
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
49
|
+
C 0.0 0.0 0.0
|
|
50
|
+
H 0.0 0.0 1.0
|
|
51
|
+
H 1.0 0.0 0.0
|
|
52
|
+
H 0.0 1.0 0.0
|
|
53
|
+
H 1.0 1.0 1.0
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
not_digit_num_atoms = """
|
|
57
|
+
v
|
|
58
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
59
|
+
C 0.0 0.0 0.0
|
|
60
|
+
H 0.0 0.0 1.0
|
|
61
|
+
H 1.0 0.0 0.0
|
|
62
|
+
H 0.0 1.0 0.0
|
|
63
|
+
H 1.0 1.0 1.0
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
many_num_atoms = """
|
|
67
|
+
6 9
|
|
68
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
69
|
+
C 0.0 0.0 0.0
|
|
70
|
+
H 0.0 0.0 1.0
|
|
71
|
+
H 1.0 0.0 0.0
|
|
72
|
+
H 0.0 1.0 0.0
|
|
73
|
+
H 1.0 1.0 1.0
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
no_num_atoms = """
|
|
77
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
78
|
+
C 0.0 0.0 0.0
|
|
79
|
+
H 0.0 0.0 1.0
|
|
80
|
+
H 1.0 0.0 0.0
|
|
81
|
+
H 0.0 1.0 0.0
|
|
82
|
+
H 1.0 1.0 1.0
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
xyz_style = """
|
|
86
|
+
5
|
|
87
|
+
Comment
|
|
88
|
+
C 0.0 0.0 0.0
|
|
89
|
+
H 0.0 0.0 1.0
|
|
90
|
+
H 1.0 0.0 0.0
|
|
91
|
+
H 0.0 1.0 0.0
|
|
92
|
+
H 1.0 1.0 1.0
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
missing_properties = """
|
|
96
|
+
5
|
|
97
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0"
|
|
98
|
+
C 0.0 0.0 0.0
|
|
99
|
+
H 0.0 0.0 1.0
|
|
100
|
+
H 1.0 0.0 0.0
|
|
101
|
+
H 0.0 1.0 0.0
|
|
102
|
+
H 1.0 1.0 1.0
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
incorrect_properites = """
|
|
106
|
+
5
|
|
107
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3foo:1
|
|
108
|
+
C 0.0 0.0 0.0
|
|
109
|
+
H 0.0 0.0 1.0
|
|
110
|
+
H 1.0 0.0 0.0
|
|
111
|
+
H 0.0 1.0 0.0
|
|
112
|
+
H 1.0 1.0 1.0
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
incorrect_lattice_extra = """
|
|
116
|
+
5
|
|
117
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0 3.14" Properties=species:S:1:pos:R:3
|
|
118
|
+
C 0.0 0.0 0.0
|
|
119
|
+
H 0.0 0.0 1.0
|
|
120
|
+
H 1.0 0.0 0.0
|
|
121
|
+
H 0.0 1.0 0.0
|
|
122
|
+
H 1.0 1.0 1.0
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
incorrect_lattice_equals = """
|
|
126
|
+
5
|
|
127
|
+
Lattice="6.0 0.0 =0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
128
|
+
C 0.0 0.0 0.0
|
|
129
|
+
H 0.0 0.0 1.0
|
|
130
|
+
H 1.0 0.0 0.0
|
|
131
|
+
H 0.0 1.0 0.0
|
|
132
|
+
H 1.0 1.0 1.0
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
incorrect_lattice_str = """
|
|
136
|
+
5
|
|
137
|
+
Lattice="6.0 0.0 0.0 hi 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
138
|
+
C 0.0 0.0 0.0
|
|
139
|
+
H 0.0 0.0 1.0
|
|
140
|
+
H 1.0 0.0 0.0
|
|
141
|
+
H 0.0 1.0 0.0
|
|
142
|
+
H 1.0 1.0 1.0
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
incorrect_lattice_extra_string = """
|
|
146
|
+
5
|
|
147
|
+
Lattice="6.0 0.0 0.0 0.0 sup 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
148
|
+
C 0.0 0.0 0.0
|
|
149
|
+
H 0.0 0.0 1.0
|
|
150
|
+
H 1.0 0.0 0.0
|
|
151
|
+
H 0.0 1.0 0.0
|
|
152
|
+
H 1.0 1.0 1.0
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
incorrect_lattice_single_quote = """
|
|
156
|
+
5
|
|
157
|
+
Lattice="6.0 0.0 0.0 0.0 6.0 '0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
158
|
+
C 0.0 0.0 0.0
|
|
159
|
+
H 0.0 0.0 1.0
|
|
160
|
+
H 1.0 0.0 0.0
|
|
161
|
+
H 0.0 1.0 0.0
|
|
162
|
+
H 1.0 1.0 1.0
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
incorrect_lattice_double_quote = """
|
|
166
|
+
5
|
|
167
|
+
Lattice="6.0 0.0 0.0 0.0 "6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
168
|
+
C 0.0 0.0 0.0
|
|
169
|
+
H 0.0 0.0 1.0
|
|
170
|
+
H 1.0 0.0 0.0
|
|
171
|
+
H 0.0 1.0 0.0
|
|
172
|
+
H 1.0 1.0 1.0
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
incorrect_lattice_double_single_quote = """
|
|
176
|
+
5
|
|
177
|
+
Lattice="6.0 0.0 0.0 0.0 '6.0 0.0 0.0 '0.0 6.0" Properties=species:S:1:pos:R:3
|
|
178
|
+
C 0.0 0.0 0.0
|
|
179
|
+
H 0.0 0.0 1.0
|
|
180
|
+
H 1.0 0.0 0.0
|
|
181
|
+
H 0.0 1.0 0.0
|
|
182
|
+
H 1.0 1.0 1.0
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
incorrect_lattice_double_double_quote = """
|
|
186
|
+
5
|
|
187
|
+
Lattice="6.0 0.0 "0.0 0.0 6.0 0.0 0.0 "0.0 6.0" Properties=species:S:1:pos:R:3
|
|
188
|
+
C 0.0 0.0 0.0
|
|
189
|
+
H 0.0 0.0 1.0
|
|
190
|
+
H 1.0 0.0 0.0
|
|
191
|
+
H 0.0 1.0 0.0
|
|
192
|
+
H 1.0 1.0 1.0
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
expected_cell = (
|
|
196
|
+
(6.0, 0.0, 0.0),
|
|
197
|
+
(0.0, 6.0, 0.0),
|
|
198
|
+
(0.0, 0.0, 6.0),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
expected_atoms = [
|
|
202
|
+
Atom(atomic_number=6, position=(0.0, 0.0, 0.0)), # C
|
|
203
|
+
Atom(atomic_number=1, position=(0.0, 0.0, 1.0)), # H
|
|
204
|
+
Atom(atomic_number=1, position=(1.0, 0.0, 0.0)), # H
|
|
205
|
+
Atom(atomic_number=1, position=(0.0, 1.0, 0.0)), # H
|
|
206
|
+
Atom(atomic_number=1, position=(1.0, 1.0, 1.0)), # H
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
expected_atoms_force_energy = [
|
|
210
|
+
Atom(atomic_number=16, position=(2.75637007, 2.70758009, 4.82925987)), # S
|
|
211
|
+
Atom(atomic_number=16, position=(2.84554004, 0.22606000, 3.09241009)), # S
|
|
212
|
+
Atom(atomic_number=6, position=(1.01258004, 2.49340009, 4.93010997)), # C
|
|
213
|
+
Atom(atomic_number=6, position=(3.17725992, 1.94638002, 3.18563008)), # C
|
|
214
|
+
Atom(atomic_number=6, position=(0.00000000, 0.00000000, 0.00000000)), # C
|
|
215
|
+
Atom(atomic_number=6, position=(2.22169995, -0.06399999, 1.41622007)), # C
|
|
216
|
+
Atom(atomic_number=6, position=(0.72196996, 0.27478000, 1.31911993)), # C
|
|
217
|
+
Atom(atomic_number=1, position=(0.73436999, 1.43289995, 4.80946970)), # H
|
|
218
|
+
Atom(atomic_number=1, position=(0.46778000, 3.08784985, 4.17749977)), # H
|
|
219
|
+
Atom(atomic_number=1, position=(0.70124000, 2.82674027, 5.93438053)), # H
|
|
220
|
+
Atom(atomic_number=1, position=(2.59819007, 2.49752998, 2.41988992)), # H
|
|
221
|
+
Atom(atomic_number=1, position=(4.25116014, 2.06114006, 2.94268989)), # H
|
|
222
|
+
Atom(atomic_number=1, position=(0.01781000, -1.10166001, -0.05143000)), # H
|
|
223
|
+
Atom(atomic_number=1, position=(0.50296998, 0.40217999, -0.89547998)), # H
|
|
224
|
+
Atom(atomic_number=1, position=(-1.05070996, 0.33554000, -0.00612000)), # H
|
|
225
|
+
Atom(atomic_number=1, position=(2.77840995, 0.53621000, 0.67106002)), # H
|
|
226
|
+
Atom(atomic_number=1, position=(2.39210987, -1.13260996, 1.18280005)), # H
|
|
227
|
+
Atom(atomic_number=1, position=(0.59988999, 1.35992002, 1.50082004)), # H
|
|
228
|
+
Atom(atomic_number=1, position=(0.10381000, -0.26278001, 2.06366992)), # H
|
|
229
|
+
Atom(atomic_number=8, position=(5.42839002, 0.27989000, 7.42625999)), # O
|
|
230
|
+
Atom(atomic_number=1, position=(4.56538010, 0.65290999, 7.13586997)), # H
|
|
231
|
+
Atom(atomic_number=1, position=(5.64198017, 0.76508999, 8.25524997)), # H
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
expected_gradient = [
|
|
235
|
+
(-1.55959951, 0.22801913, 0.54136018),
|
|
236
|
+
(0.21334177, 1.76227773, -0.33161567),
|
|
237
|
+
(1.31854381, 0.37388875, 0.24927582),
|
|
238
|
+
(0.09910375, -1.52775439, 0.55371287),
|
|
239
|
+
(-0.54047842, 1.02932160, 0.37462899),
|
|
240
|
+
(0.74959058, -0.13849016, 0.66947629),
|
|
241
|
+
(0.88825228, -1.14448169, -0.10462363),
|
|
242
|
+
(-0.11871475, -0.45471041, -0.08255662),
|
|
243
|
+
(-0.24911941, 0.20360289, -0.32966949),
|
|
244
|
+
(-0.20534666, 0.13940533, 0.46775994),
|
|
245
|
+
(-0.29845053, 0.31965051, -0.34195845),
|
|
246
|
+
(0.62592601, -0.53239934, -0.63500463),
|
|
247
|
+
(0.31652062, -0.33261293, 0.62734778),
|
|
248
|
+
(0.14484048, 0.10780390, -0.42454441),
|
|
249
|
+
(-0.55084403, 0.10031390, -0.18450139),
|
|
250
|
+
(0.14551747, 0.35971650, -0.43068238),
|
|
251
|
+
(-0.04825265, -0.56114332, -0.26057253),
|
|
252
|
+
(-0.18014249, 0.45468505, -0.36480505),
|
|
253
|
+
(-0.77321355, -0.35561432, -0.04327458),
|
|
254
|
+
(0.71867266, -0.97644353, -0.67783188),
|
|
255
|
+
(-1.00844588, 0.37843304, -0.30550308),
|
|
256
|
+
(0.31126251, 0.56856677, 1.03720163),
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
expected_molecule = Molecule(
|
|
261
|
+
charge=0,
|
|
262
|
+
multiplicity=1,
|
|
263
|
+
atoms=expected_atoms,
|
|
264
|
+
cell=PeriodicCell(lattice_vectors=expected_cell),
|
|
265
|
+
gradient=None,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
expected_molecule_with_forces_and_energy = Molecule(
|
|
269
|
+
charge=0,
|
|
270
|
+
multiplicity=1,
|
|
271
|
+
atoms=expected_atoms_force_energy,
|
|
272
|
+
cell=None,
|
|
273
|
+
gradient=expected_gradient,
|
|
274
|
+
energy=-29139.55729148458,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def test_molecule_from_extxyz_valid() -> None:
|
|
279
|
+
"""
|
|
280
|
+
Test a valid extxyz string (without forces).
|
|
281
|
+
"""
|
|
282
|
+
molecule = Molecule.from_extxyz(valid_extxyz)
|
|
283
|
+
assert molecule == expected_molecule, f"Valid case failed:\nGot {molecule}\nExpected {expected_molecule}"
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def test_molecule_from_extxyz_valid_with_forces() -> None:
|
|
287
|
+
"""
|
|
288
|
+
Test a valid extxyz string that includes forces.
|
|
289
|
+
The forces should be converted to gradients (as the negative of forces).
|
|
290
|
+
"""
|
|
291
|
+
molecule = Molecule.from_extxyz(valid_extxyz_with_forces_and_energy)
|
|
292
|
+
assert molecule == expected_molecule_with_forces_and_energy, (
|
|
293
|
+
f"Valid forces case failed:\nGot {molecule}\nExpected {expected_molecule_with_forces_and_energy}"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@mark.parametrize(
|
|
298
|
+
"invalid_extxyz",
|
|
299
|
+
[
|
|
300
|
+
incorrect_num_atoms,
|
|
301
|
+
no_num_atoms,
|
|
302
|
+
not_digit_num_atoms,
|
|
303
|
+
many_num_atoms,
|
|
304
|
+
xyz_style,
|
|
305
|
+
missing_properties,
|
|
306
|
+
incorrect_properites,
|
|
307
|
+
incorrect_lattice_extra,
|
|
308
|
+
incorrect_lattice_equals,
|
|
309
|
+
incorrect_lattice_str,
|
|
310
|
+
incorrect_lattice_extra_string,
|
|
311
|
+
incorrect_lattice_single_quote,
|
|
312
|
+
incorrect_lattice_double_quote,
|
|
313
|
+
incorrect_lattice_double_single_quote,
|
|
314
|
+
incorrect_lattice_double_double_quote,
|
|
315
|
+
],
|
|
316
|
+
)
|
|
317
|
+
def test_molecule_from_extxyz_invalid(invalid_extxyz: str) -> None:
|
|
318
|
+
"""
|
|
319
|
+
Test that invalid extxyz strings raise MoleculeReadError.
|
|
320
|
+
"""
|
|
321
|
+
with raises(MoleculeReadError):
|
|
322
|
+
Molecule.from_extxyz(invalid_extxyz)
|
|
@@ -96,6 +96,21 @@ def test_from_pdb_to_pdb_1ema() -> None:
|
|
|
96
96
|
|
|
97
97
|
assert pdb == pdb2
|
|
98
98
|
|
|
99
|
+
def test_from_pdb_to_pdb_2hu4() -> None:
|
|
100
|
+
with open("tests/data/2HU4.pdb") as f:
|
|
101
|
+
data = f.read()
|
|
102
|
+
pdb = pdb_from_pdb_filestring(data)
|
|
103
|
+
filestring = pdb_object_to_pdb_filestring(pdb, header=True, source=True, keyword=True, crystallography=True)
|
|
104
|
+
|
|
105
|
+
pdb2 = pdb_from_pdb_filestring(filestring)
|
|
106
|
+
|
|
107
|
+
assert pdb.description == pdb2.description
|
|
108
|
+
assert pdb.experiment == pdb2.experiment
|
|
109
|
+
# not true but doesn't matter
|
|
110
|
+
print(pdb.geometry == pdb2.geometry)
|
|
111
|
+
assert pdb.models == pdb2.models
|
|
112
|
+
assert pdb.quality == pdb2.quality
|
|
113
|
+
|
|
99
114
|
def mmcif_author_format_to_pdb_format(authors: list[str]) -> list[str]:
|
|
100
115
|
return [f"{last.upper()}{first.upper()}" for first, last in
|
|
101
116
|
(author.split(", ") for author in authors)]
|
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
from stjames import Atom, Molecule, MoleculeReadError, PeriodicCell
|
|
4
|
-
|
|
5
|
-
valid_extxyz = """
|
|
6
|
-
5
|
|
7
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
8
|
-
C 0.0 0.0 0.0
|
|
9
|
-
H 0.0 0.0 1.0
|
|
10
|
-
H 1.0 0.0 0.0
|
|
11
|
-
H 0.0 1.0 0.0
|
|
12
|
-
H 1.0 1.0 1.0
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
incorrect_num_atoms = """
|
|
16
|
-
6
|
|
17
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
18
|
-
C 0.0 0.0 0.0
|
|
19
|
-
H 0.0 0.0 1.0
|
|
20
|
-
H 1.0 0.0 0.0
|
|
21
|
-
H 0.0 1.0 0.0
|
|
22
|
-
H 1.0 1.0 1.0
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
not_digit_num_atoms = """
|
|
26
|
-
v
|
|
27
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
28
|
-
C 0.0 0.0 0.0
|
|
29
|
-
H 0.0 0.0 1.0
|
|
30
|
-
H 1.0 0.0 0.0
|
|
31
|
-
H 0.0 1.0 0.0
|
|
32
|
-
H 1.0 1.0 1.0
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
many_num_atoms = """
|
|
36
|
-
6 9
|
|
37
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
38
|
-
C 0.0 0.0 0.0
|
|
39
|
-
H 0.0 0.0 1.0
|
|
40
|
-
H 1.0 0.0 0.0
|
|
41
|
-
H 0.0 1.0 0.0
|
|
42
|
-
H 1.0 1.0 1.0
|
|
43
|
-
"""
|
|
44
|
-
no_num_atoms = """
|
|
45
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
46
|
-
C 0.0 0.0 0.0
|
|
47
|
-
H 0.0 0.0 1.0
|
|
48
|
-
H 1.0 0.0 0.0
|
|
49
|
-
H 0.0 1.0 0.0
|
|
50
|
-
H 1.0 1.0 1.0
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
xyz_style = """
|
|
54
|
-
5
|
|
55
|
-
Comment
|
|
56
|
-
C 0.0 0.0 0.0
|
|
57
|
-
H 0.0 0.0 1.0
|
|
58
|
-
H 1.0 0.0 0.0
|
|
59
|
-
H 0.0 1.0 0.0
|
|
60
|
-
H 1.0 1.0 1.0
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
missing_lattice = """
|
|
64
|
-
5
|
|
65
|
-
Properties=species:S:1:pos:R:3
|
|
66
|
-
C 0.0 0.0 0.0
|
|
67
|
-
H 0.0 0.0 1.0
|
|
68
|
-
H 1.0 0.0 0.0
|
|
69
|
-
H 0.0 1.0 0.0
|
|
70
|
-
H 1.0 1.0 1.0
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
missing_properties = """
|
|
74
|
-
5
|
|
75
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0"
|
|
76
|
-
C 0.0 0.0 0.0
|
|
77
|
-
H 0.0 0.0 1.0
|
|
78
|
-
H 1.0 0.0 0.0
|
|
79
|
-
H 0.0 1.0 0.0
|
|
80
|
-
H 1.0 1.0 1.0
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
incorrect_properites = """
|
|
84
|
-
5
|
|
85
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3foo:1
|
|
86
|
-
C 0.0 0.0 0.0
|
|
87
|
-
H 0.0 0.0 1.0
|
|
88
|
-
H 1.0 0.0 0.0
|
|
89
|
-
H 0.0 1.0 0.0
|
|
90
|
-
H 1.0 1.0 1.0
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
incorrect_lattice_extra = """
|
|
94
|
-
5
|
|
95
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0 3.14" Properties=species:S:1:pos:R:3
|
|
96
|
-
C 0.0 0.0 0.0
|
|
97
|
-
H 0.0 0.0 1.0
|
|
98
|
-
H 1.0 0.0 0.0
|
|
99
|
-
H 0.0 1.0 0.0
|
|
100
|
-
H 1.0 1.0 1.0
|
|
101
|
-
"""
|
|
102
|
-
|
|
103
|
-
incorrect_lattice_equals = """
|
|
104
|
-
5
|
|
105
|
-
Lattice="6.0 0.0 =0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
106
|
-
C 0.0 0.0 0.0
|
|
107
|
-
H 0.0 0.0 1.0
|
|
108
|
-
H 1.0 0.0 0.0
|
|
109
|
-
H 0.0 1.0 0.0
|
|
110
|
-
H 1.0 1.0 1.0
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
incorrect_lattice_str = """
|
|
114
|
-
5
|
|
115
|
-
Lattice="6.0 0.0 0.0 hi 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
116
|
-
C 0.0 0.0 0.0
|
|
117
|
-
H 0.0 0.0 1.0
|
|
118
|
-
H 1.0 0.0 0.0
|
|
119
|
-
H 0.0 1.0 0.0
|
|
120
|
-
H 1.0 1.0 1.0
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
incorrect_lattice_extra_string = """
|
|
124
|
-
5
|
|
125
|
-
Lattice="6.0 0.0 0.0 0.0 sup 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
126
|
-
C 0.0 0.0 0.0
|
|
127
|
-
H 0.0 0.0 1.0
|
|
128
|
-
H 1.0 0.0 0.0
|
|
129
|
-
H 0.0 1.0 0.0
|
|
130
|
-
H 1.0 1.0 1.0
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
incorrect_lattice_single_quote = """
|
|
135
|
-
5
|
|
136
|
-
Lattice="6.0 0.0 0.0 0.0 6.0 '0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
137
|
-
C 0.0 0.0 0.0
|
|
138
|
-
H 0.0 0.0 1.0
|
|
139
|
-
H 1.0 0.0 0.0
|
|
140
|
-
H 0.0 1.0 0.0
|
|
141
|
-
H 1.0 1.0 1.0
|
|
142
|
-
"""
|
|
143
|
-
|
|
144
|
-
incorrect_lattice_double_quote = """
|
|
145
|
-
5
|
|
146
|
-
Lattice="6.0 0.0 0.0 0.0 "6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
|
|
147
|
-
C 0.0 0.0 0.0
|
|
148
|
-
H 0.0 0.0 1.0
|
|
149
|
-
H 1.0 0.0 0.0
|
|
150
|
-
H 0.0 1.0 0.0
|
|
151
|
-
H 1.0 1.0 1.0
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
incorrect_lattice_double_single_quote = """
|
|
155
|
-
5
|
|
156
|
-
Lattice="6.0 0.0 0.0 0.0 '6.0 0.0 0.0 '0.0 6.0" Properties=species:S:1:pos:R:3
|
|
157
|
-
C 0.0 0.0 0.0
|
|
158
|
-
H 0.0 0.0 1.0
|
|
159
|
-
H 1.0 0.0 0.0
|
|
160
|
-
H 0.0 1.0 0.0
|
|
161
|
-
H 1.0 1.0 1.0
|
|
162
|
-
"""
|
|
163
|
-
|
|
164
|
-
incorrect_lattice_double_double_quote = """
|
|
165
|
-
5
|
|
166
|
-
Lattice="6.0 0.0 "0.0 0.0 6.0 0.0 0.0 "0.0 6.0" Properties=species:S:1:pos:R:3
|
|
167
|
-
C 0.0 0.0 0.0
|
|
168
|
-
H 0.0 0.0 1.0
|
|
169
|
-
H 1.0 0.0 0.0
|
|
170
|
-
H 0.0 1.0 0.0
|
|
171
|
-
H 1.0 1.0 1.0
|
|
172
|
-
"""
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
expected_cell = (
|
|
176
|
-
(6.0, 0.0, 0.0),
|
|
177
|
-
(0.0, 6.0, 0.0),
|
|
178
|
-
(0.0, 0.0, 6.0),
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
expected_atoms = [
|
|
182
|
-
Atom(atomic_number=6, position=(0.0, 0.0, 0.0)), # C
|
|
183
|
-
Atom(atomic_number=1, position=(0.0, 0.0, 1.0)), # H
|
|
184
|
-
Atom(atomic_number=1, position=(1.0, 0.0, 0.0)), # H
|
|
185
|
-
Atom(atomic_number=1, position=(0.0, 1.0, 0.0)), # H
|
|
186
|
-
Atom(atomic_number=1, position=(1.0, 1.0, 1.0)), # H
|
|
187
|
-
]
|
|
188
|
-
|
|
189
|
-
expected_molecule = Molecule(
|
|
190
|
-
charge=0,
|
|
191
|
-
multiplicity=1,
|
|
192
|
-
atoms=expected_atoms,
|
|
193
|
-
cell=PeriodicCell(lattice_vectors=expected_cell),
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
def test_molecule_from_extxyz_valid() -> None:
|
|
198
|
-
"""
|
|
199
|
-
Test case for valid extxyz string.
|
|
200
|
-
"""
|
|
201
|
-
molecule = Molecule.from_extxyz(valid_extxyz)
|
|
202
|
-
assert molecule == expected_molecule, f"Valid case failed: got {molecule}, expected {expected_molecule}"
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
@pytest.mark.parametrize(
|
|
206
|
-
"invalid_extxyz",
|
|
207
|
-
[
|
|
208
|
-
incorrect_num_atoms,
|
|
209
|
-
no_num_atoms,
|
|
210
|
-
not_digit_num_atoms,
|
|
211
|
-
many_num_atoms,
|
|
212
|
-
xyz_style,
|
|
213
|
-
missing_lattice,
|
|
214
|
-
missing_properties,
|
|
215
|
-
incorrect_properites,
|
|
216
|
-
incorrect_lattice_extra,
|
|
217
|
-
incorrect_lattice_equals,
|
|
218
|
-
incorrect_lattice_str,
|
|
219
|
-
incorrect_lattice_extra_string,
|
|
220
|
-
incorrect_lattice_single_quote,
|
|
221
|
-
incorrect_lattice_double_quote,
|
|
222
|
-
incorrect_lattice_double_single_quote,
|
|
223
|
-
incorrect_lattice_double_double_quote,
|
|
224
|
-
],
|
|
225
|
-
)
|
|
226
|
-
def test_molecule_from_extxyz_invalid(invalid_extxyz: str) -> None:
|
|
227
|
-
"""
|
|
228
|
-
Test case for invalid extxyz strings, ensuring they raise MoleculeReadError.
|
|
229
|
-
"""
|
|
230
|
-
with pytest.raises(MoleculeReadError):
|
|
231
|
-
Molecule.from_extxyz(invalid_extxyz)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|