stjames 0.0.65__tar.gz → 0.0.67__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stjames might be problematic. Click here for more details.

Files changed (79) hide show
  1. {stjames-0.0.65/stjames.egg-info → stjames-0.0.67}/PKG-INFO +3 -2
  2. {stjames-0.0.65 → stjames-0.0.67}/pyproject.toml +1 -1
  3. {stjames-0.0.65 → stjames-0.0.67}/stjames/molecule.py +137 -43
  4. {stjames-0.0.65 → stjames-0.0.67}/stjames/pdb.py +34 -15
  5. {stjames-0.0.65 → stjames-0.0.67/stjames.egg-info}/PKG-INFO +3 -2
  6. stjames-0.0.67/tests/test_from_extxyz.py +322 -0
  7. {stjames-0.0.65 → stjames-0.0.67}/tests/test_pdb.py +15 -0
  8. stjames-0.0.65/tests/test_from_extxyz.py +0 -231
  9. {stjames-0.0.65 → stjames-0.0.67}/LICENSE +0 -0
  10. {stjames-0.0.65 → stjames-0.0.67}/README.md +0 -0
  11. {stjames-0.0.65 → stjames-0.0.67}/setup.cfg +0 -0
  12. {stjames-0.0.65 → stjames-0.0.67}/stjames/__init__.py +0 -0
  13. {stjames-0.0.65 → stjames-0.0.67}/stjames/_deprecated_solvent_settings.py +0 -0
  14. {stjames-0.0.65 → stjames-0.0.67}/stjames/atom.py +0 -0
  15. {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/__init__.py +0 -0
  16. {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/data.py +0 -0
  17. {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/mmcif.py +0 -0
  18. {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/pdb.py +0 -0
  19. {stjames-0.0.65 → stjames-0.0.67}/stjames/atomium_stjames/utilities.py +0 -0
  20. {stjames-0.0.65 → stjames-0.0.67}/stjames/base.py +0 -0
  21. {stjames-0.0.65 → stjames-0.0.67}/stjames/basis_set.py +0 -0
  22. {stjames-0.0.65 → stjames-0.0.67}/stjames/calculation.py +0 -0
  23. {stjames-0.0.65 → stjames-0.0.67}/stjames/compute_settings.py +0 -0
  24. {stjames-0.0.65 → stjames-0.0.67}/stjames/constraint.py +0 -0
  25. {stjames-0.0.65 → stjames-0.0.67}/stjames/correction.py +0 -0
  26. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/__init__.py +0 -0
  27. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/bragg_radii.json +0 -0
  28. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/elements.py +0 -0
  29. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/isotopes.json +0 -0
  30. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/nist_isotopes.json +0 -0
  31. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/read_nist_isotopes.py +0 -0
  32. {stjames-0.0.65 → stjames-0.0.67}/stjames/data/symbol_element.json +0 -0
  33. {stjames-0.0.65 → stjames-0.0.67}/stjames/diis_settings.py +0 -0
  34. {stjames-0.0.65 → stjames-0.0.67}/stjames/grid_settings.py +0 -0
  35. {stjames-0.0.65 → stjames-0.0.67}/stjames/int_settings.py +0 -0
  36. {stjames-0.0.65 → stjames-0.0.67}/stjames/message.py +0 -0
  37. {stjames-0.0.65 → stjames-0.0.67}/stjames/method.py +0 -0
  38. {stjames-0.0.65 → stjames-0.0.67}/stjames/mode.py +0 -0
  39. {stjames-0.0.65 → stjames-0.0.67}/stjames/opt_settings.py +0 -0
  40. {stjames-0.0.65 → stjames-0.0.67}/stjames/periodic_cell.py +0 -0
  41. {stjames-0.0.65 → stjames-0.0.67}/stjames/py.typed +0 -0
  42. {stjames-0.0.65 → stjames-0.0.67}/stjames/scf_settings.py +0 -0
  43. {stjames-0.0.65 → stjames-0.0.67}/stjames/settings.py +0 -0
  44. {stjames-0.0.65 → stjames-0.0.67}/stjames/solvent.py +0 -0
  45. {stjames-0.0.65 → stjames-0.0.67}/stjames/status.py +0 -0
  46. {stjames-0.0.65 → stjames-0.0.67}/stjames/task.py +0 -0
  47. {stjames-0.0.65 → stjames-0.0.67}/stjames/thermochem_settings.py +0 -0
  48. {stjames-0.0.65 → stjames-0.0.67}/stjames/types.py +0 -0
  49. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/__init__.py +0 -0
  50. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/admet.py +0 -0
  51. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/basic_calculation.py +0 -0
  52. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/bde.py +0 -0
  53. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/conformer.py +0 -0
  54. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/conformer_search.py +0 -0
  55. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/descriptors.py +0 -0
  56. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/docking.py +0 -0
  57. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/electronic_properties.py +0 -0
  58. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/fukui.py +0 -0
  59. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/hydrogen_bond_basicity.py +0 -0
  60. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/ion_mobility.py +0 -0
  61. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/irc.py +0 -0
  62. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/macropka.py +0 -0
  63. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/molecular_dynamics.py +0 -0
  64. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/multistage_opt.py +0 -0
  65. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/pka.py +0 -0
  66. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/redox_potential.py +0 -0
  67. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/scan.py +0 -0
  68. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/solubility.py +0 -0
  69. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/spin_states.py +0 -0
  70. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/tautomer.py +0 -0
  71. {stjames-0.0.65 → stjames-0.0.67}/stjames/workflows/workflow.py +0 -0
  72. {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/SOURCES.txt +0 -0
  73. {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/dependency_links.txt +0 -0
  74. {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/requires.txt +0 -0
  75. {stjames-0.0.65 → stjames-0.0.67}/stjames.egg-info/top_level.txt +0 -0
  76. {stjames-0.0.65 → stjames-0.0.67}/tests/test_constraints.py +0 -0
  77. {stjames-0.0.65 → stjames-0.0.67}/tests/test_molecule.py +0 -0
  78. {stjames-0.0.65 → stjames-0.0.67}/tests/test_rounding.py +0 -0
  79. {stjames-0.0.65 → stjames-0.0.67}/tests/test_settings.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: stjames
3
- Version: 0.0.65
3
+ Version: 0.0.67
4
4
  Summary: standardized JSON atom/molecule encoding scheme
5
5
  Author-email: Corin Wagen <corin@rowansci.com>
6
6
  Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
12
12
  Requires-Dist: numpy
13
13
  Requires-Dist: requests
14
14
  Requires-Dist: rdkit
15
+ Dynamic: license-file
15
16
 
16
17
  # stjames
17
18
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "stjames"
3
- version = "0.0.65"
3
+ version = "0.0.67"
4
4
  description = "standardized JSON atom/molecule encoding scheme"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  from pathlib import Path
3
- from typing import Annotated, Iterable, Optional, Self, TypeAlias
3
+ from typing import Annotated, Any, Iterable, Optional, Self, Sequence, TypeAlias, TypedDict, TypeVar
4
4
 
5
5
  import pydantic
6
6
  from pydantic import AfterValidator, NonNegativeInt, PositiveInt, ValidationError
@@ -9,6 +9,7 @@ from rdkit.Chem import AllChem
9
9
 
10
10
  from .atom import Atom
11
11
  from .base import Base, round_float, round_optional_float
12
+ from .data import SYMBOL_ELEMENT
12
13
  from .periodic_cell import PeriodicCell
13
14
  from .types import (
14
15
  FloatPerAtom,
@@ -246,28 +247,102 @@ class Molecule(Base):
246
247
  return cls.from_extxyz_lines(extxyz.strip().splitlines(), charge=charge, multiplicity=multiplicity)
247
248
 
248
249
  @classmethod
249
- def from_extxyz_lines(cls: type[Self], lines: Iterable[str], charge: int = 0, multiplicity: PositiveInt = 1) -> Self:
250
- # ensure first line is number of atoms
251
- lines = list(lines)
250
+ def from_extxyz_lines(
251
+ cls: type[Self],
252
+ lines: Iterable[str],
253
+ charge: int | None = None,
254
+ multiplicity: PositiveInt | None = None,
255
+ cell: PeriodicCell | None = None,
256
+ ) -> Self:
257
+ """
258
+ Parses an EXTXYZ file, extracting atom positions, forces (if present), and metadata.
259
+
260
+ Supports:
261
+ - Lattice vectors (cell information)
262
+ - Properties field (species, positions, forces, etc.)
263
+ - Other metadata like charge, multiplicity, energy, etc.
264
+
265
+ :param lines: Iterable of lines from an EXTXYZ file
266
+ :param charge: total charge of the molecule (default: 0 if not found)
267
+ :param multiplicity: spin multiplicity of the molecule (default: 1 if not found)
268
+ :param cell: PeriodicCell containing lattice vectors
269
+ :return: Molecule
270
+ :raises MoleculeReadError: if the file is not in the correct format
271
+ """
272
+ if not isinstance(lines, Sequence):
273
+ lines = list(lines)
274
+
275
+ # Ensure first line contains number of atoms
252
276
  if len(lines[0].split()) == 1:
253
277
  natoms = lines[0].strip()
254
- if not natoms.isdigit() or (int(lines[0]) != len(lines) - 2):
255
- raise MoleculeReadError(f"First line of EXTXYZ file should be the number of atoms, got: {lines[0]} != {len(lines) - 2}")
256
- lines = lines[1:]
278
+ if not natoms.isdigit() or (int(natoms) != len(lines) - 2):
279
+ raise MoleculeReadError(f"First line should be number of atoms, got: {lines[0]} != {len(lines) - 2}")
280
+ data_line, *lines = lines[1:]
257
281
  else:
258
- raise MoleculeReadError(f"First line of EXTXYZ should be only an int denoting number of atoms. Got {lines[0].split()}")
282
+ raise MoleculeReadError(f"First line should be an integer denoting atom count. Got {lines[0].split()}")
259
283
 
260
- # ensure second line contains key-value pairs
261
- if "=" not in lines[0]:
262
- raise MoleculeReadError(f"Invalid property line, got {lines[0]}")
284
+ metadata = parse_extxyz_comment_line(data_line)
263
285
 
264
- cell = parse_comment_line(lines[0])
265
- lines = lines[1:]
286
+ T = TypeVar("T")
266
287
 
267
- try:
268
- return cls(atoms=[Atom.from_xyz(line) for line in lines], cell=cell, charge=charge, multiplicity=multiplicity)
269
- except (ValueError, ValidationError) as e:
270
- raise MoleculeReadError("Error reading molecule from extxyz") from e
288
+ def metadata_optional_get(key: str, value: T | None, default: T) -> T:
289
+ """Set key to default if not found in metadata"""
290
+ if value is None:
291
+ return metadata.get(key, default) # type: ignore [return-value]
292
+
293
+ return value
294
+
295
+ charge = metadata_optional_get("total_charge", charge, 0)
296
+ multiplicity = metadata_optional_get("multiplicity", multiplicity, 1)
297
+ cell = cell or metadata.get("cell")
298
+ energy = metadata.get("energy", None)
299
+
300
+ force_idx = None
301
+ if properties := metadata.get("properties", "").split(":"):
302
+ if properties[0].lower() != "species":
303
+ raise MoleculeReadError(f"Invalid or missing 'Properties' field in EXTXYZ, got: {properties}")
304
+
305
+ # Identify column indices for position and force data
306
+ pos_idx = None
307
+ current_idx = 0 # Start after 'species:S'
308
+
309
+ while current_idx < len(properties):
310
+ if properties[current_idx].lower() == "pos" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
311
+ pos_idx = current_idx
312
+ elif properties[current_idx].lower() == "forces" and properties[current_idx + 1].lower() == "r" and properties[current_idx + 2] == "3":
313
+ force_idx = current_idx
314
+ current_idx += 3
315
+
316
+ if pos_idx is None:
317
+ raise MoleculeReadError("No position data ('pos:R:3') found in Properties field.")
318
+
319
+ def parse_line_atoms(line: str) -> Atom:
320
+ symbol, sx, sy, sz, *_ = line.split()
321
+ atomic_number = SYMBOL_ELEMENT[symbol.title()]
322
+ x, y, z = map(float, (sx, sy, sz))
323
+
324
+ return Atom(atomic_number=atomic_number, position=(x, y, z))
325
+
326
+ def parse_line_with_grad(line: str) -> tuple[Atom, Vector3D]:
327
+ symbol, sx, sy, sz, sgx, sgy, sgz, *_ = line.split()
328
+ atomic_number = SYMBOL_ELEMENT[symbol.title()]
329
+ x, y, z = map(float, (sx, sy, sz))
330
+ gx, gy, gz = map(float, (sgx, sgy, sgz))
331
+
332
+ return (
333
+ Atom(atomic_number=atomic_number, position=(x, y, z)),
334
+ (-gx, -gy, -gz),
335
+ )
336
+
337
+ atoms: list[Atom]
338
+ gradients: list[Vector3D] | None
339
+ if force_idx is not None:
340
+ atoms, gradients = zip(*map(parse_line_with_grad, lines), strict=True) # type: ignore [assignment]
341
+ else:
342
+ atoms = [parse_line_atoms(line) for line in lines]
343
+ gradients = None
344
+
345
+ return cls(atoms=atoms, cell=cell, charge=charge, multiplicity=multiplicity, energy=energy, gradient=gradients)
271
346
 
272
347
  @classmethod
273
348
  def from_rdkit(cls: type[Self], rdkm: RdkitMol, cid: int = 0) -> Self:
@@ -313,43 +388,62 @@ def _embed_rdkit_mol(rdkm: RdkitMol) -> RdkitMol:
313
388
  return rdkm
314
389
 
315
390
 
316
- def parse_comment_line(line: str) -> PeriodicCell:
317
- """
318
- currently only supporting lattice and porperites fields from comment line
319
- modify in future to support other fields from comment from_xyz_lines
320
- ex: name, mulitplicity, charge, etc.
391
+ class EXTXYZMetadata(TypedDict, total=False):
392
+ properties: Any
393
+ total_charge: int
394
+ multiplicity: int
395
+ energy: float
396
+ cell: PeriodicCell
397
+
398
+
399
+ def parse_extxyz_comment_line(line: str) -> EXTXYZMetadata:
321
400
  """
322
- cell = None
401
+ Parse the comment line of an EXTXYZ file, extracting lattice, properties, and metadata.
402
+
403
+ Supports:
404
+ - Lattice vectors (cell information)
405
+ - Properties field (species, positions, forces, etc.)
406
+ - Other metadata fields like charge, multiplicity, energy, etc.
407
+
408
+ :param line: comment line from an EXTXYZ file
409
+ :return: parsed properties
410
+
411
+ >>> parse_extxyz_comment_line('Lattice="6.0 0.0 0.0 6.0 0.0 0.0 6.0 0.0 0.0"Properties=species:S:1:pos:R:3')
412
+ {'cell': PeriodicCell(lattice_vectors=((6.0, 0.0, 0.0), (6.0, 0.0, 0.0), (6.0, 0.0, 0.0)), is_periodic=(True, True, True), volume=0.0), 'properties': 'species:S:1:pos:R:3'}
413
+ """ # noqa: E501
414
+
323
415
  # Regular expression to match key="value", key='value', or key=value
324
416
  pattern = r"(\S+?=(?:\".*?\"|\'.*?\'|\S+))"
325
417
  pairs = re.findall(pattern, line)
326
418
 
327
- prop_dict = {}
419
+ prop_dict: EXTXYZMetadata = {}
328
420
  for pair in pairs:
329
421
  key, value = pair.split("=", 1)
330
- if key.lower() == "lattice":
331
- value = value.strip("'\"").split()
332
- if len(value) != 9:
333
- raise MoleculeReadError(f"Lattice should have 9 entries got {len(value)}")
422
+ key = key.lower().strip()
423
+ value = value.strip("'\"")
424
+
425
+ if key == "lattice":
426
+ lattice_values = value.split()
427
+ if len(lattice_values) != 9:
428
+ raise MoleculeReadError(f"Lattice should have 9 entries, got {len(lattice_values)}")
334
429
 
335
- # Convert the value to a 3x3 tuple of tuples of floats
336
430
  try:
337
- cell = tuple(tuple(map(float, value[i : i + 3])) for i in range(0, 9, 3))
431
+ cell = tuple(tuple(map(float, lattice_values[i : i + 3])) for i in range(0, 9, 3))
338
432
  except ValueError:
339
- raise MoleculeReadError(f"Lattice should be floats, got {value}")
433
+ raise MoleculeReadError(f"Lattice should be floats, got {lattice_values}")
340
434
 
341
- prop_dict[key] = value
435
+ prop_dict["cell"] = PeriodicCell(lattice_vectors=cell)
342
436
 
343
- elif key.lower() == "properties":
344
- if value.lower() != "species:s:1:pos:r:3":
345
- raise MoleculeReadError(f"Only accepting properties of form species:S:1:pos:R:3, got {value}")
346
- prop_dict[key] = value
347
- else:
348
- raise MoleculeReadError(f"Currently only accepting lattice and propery keys. Got {key}")
437
+ elif key == "properties":
438
+ prop_dict["properties"] = value
349
439
 
350
- if cell is None:
351
- raise MoleculeReadError("Lattice field is required but missing.")
440
+ elif key == "total_charge":
441
+ prop_dict["total_charge"] = int(value)
442
+ elif key == "multiplicity":
443
+ prop_dict["multiplicity"] = int(value)
444
+ elif key == "energy":
445
+ prop_dict["energy"] = float(value)
446
+ else:
447
+ prop_dict[key] = value # type: ignore [literal-required]
352
448
 
353
- if "properties" not in [key.lower() for key in prop_dict.keys()]:
354
- raise MoleculeReadError(f"Property field is required, got keys {prop_dict.keys()}")
355
- return PeriodicCell(lattice_vectors=cell)
449
+ return prop_dict
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from datetime import date, datetime
2
3
  from pathlib import Path
3
4
  from typing import Any, Literal
@@ -276,7 +277,7 @@ def pdb_object_to_pdb_filestring(
276
277
  atom=atom,
277
278
  chain_id=this_chain_id,
278
279
  res_name=residue.name,
279
- res_num=int(_residue_id[2:]),
280
+ res_num=_residue_id[2:],
280
281
  alt_loc=atom.alt_loc or "",
281
282
  )
282
283
  pdb_lines.append(line)
@@ -286,12 +287,12 @@ def pdb_object_to_pdb_filestring(
286
287
  atom=atom,
287
288
  chain_id=this_chain_id,
288
289
  res_name=residue.name,
289
- res_num=int(_residue_id[2:]),
290
+ res_num=_residue_id[2:],
290
291
  alt_loc=atom.alt_loc or "",
291
292
  )
292
293
  pdb_lines.append(line)
293
294
 
294
- pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{int(_residue_id[2:]):>4}")
295
+ pdb_lines.append(f"TER {_atom_id + 1:>5} {residue.name:>3} {this_chain_id}{_residue_id[2:]:>4}")
295
296
 
296
297
  # === 2) Non-polymers (e.g. ligands, ions) ===
297
298
  for _np_id, nonpoly in model.non_polymer.items():
@@ -308,7 +309,7 @@ def pdb_object_to_pdb_filestring(
308
309
  atom=atom,
309
310
  chain_id=chain_id_for_np,
310
311
  res_name=nonpoly.name,
311
- res_num=int(_np_id[2:]),
312
+ res_num=_np_id[2:],
312
313
  )
313
314
  pdb_lines.append(line)
314
315
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -317,7 +318,7 @@ def pdb_object_to_pdb_filestring(
317
318
  atom=atom,
318
319
  chain_id=chain_id_for_np,
319
320
  res_name=nonpoly.name,
320
- res_num=int(_np_id[2:]),
321
+ res_num=_np_id[2:],
321
322
  )
322
323
  pdb_lines.append(line)
323
324
 
@@ -330,7 +331,7 @@ def pdb_object_to_pdb_filestring(
330
331
  atom=atom,
331
332
  chain_id=_w_id[0], # Or you can use water.polymer if set
332
333
  res_name="HOH",
333
- res_num=int(_w_id[2:]), # or an incrementing value
334
+ res_num=_w_id[2:], # or an incrementing value
334
335
  )
335
336
  pdb_lines.append(line)
336
337
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -339,7 +340,7 @@ def pdb_object_to_pdb_filestring(
339
340
  atom=atom,
340
341
  chain_id=_w_id[0],
341
342
  res_name="HOH",
342
- res_num=int(_w_id[2:]),
343
+ res_num=_w_id[2:],
343
344
  )
344
345
  pdb_lines.append(line)
345
346
 
@@ -357,7 +358,7 @@ def pdb_object_to_pdb_filestring(
357
358
  atom=atom,
358
359
  chain_id="B",
359
360
  res_name="BRN", # or branched_obj.get("name", "BRN")
360
- res_num=1,
361
+ res_num="1",
361
362
  )
362
363
  pdb_lines.append(line)
363
364
  if atom.anisotropy and atom.anisotropy != [0, 0, 0, 0, 0, 0]:
@@ -366,7 +367,7 @@ def pdb_object_to_pdb_filestring(
366
367
  atom=atom,
367
368
  chain_id="B",
368
369
  res_name="BRN",
369
- res_num=1,
370
+ res_num="1",
370
371
  )
371
372
  pdb_lines.append(line)
372
373
 
@@ -407,7 +408,7 @@ def _format_atom_line(
407
408
  atom: PDBAtom,
408
409
  chain_id: str,
409
410
  res_name: str,
410
- res_num: int | None,
411
+ res_num: str | None,
411
412
  alt_loc: str = "",
412
413
  ) -> str:
413
414
  """
@@ -423,7 +424,15 @@ def _format_atom_line(
423
424
  alt_loc_char = alt_loc if alt_loc else " "
424
425
  residue_name = (res_name or "UNK")[:3] # limit to 3 chars
425
426
  chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
426
- residue_num = res_num if res_num is not None else 1
427
+ residue_num_str = "1"
428
+ insertion_code = " "
429
+ if res_num:
430
+ match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
431
+ if match:
432
+ residue_num_str, insertion_code = match.groups()
433
+ insertion_code = insertion_code if insertion_code != "" else " "
434
+
435
+ residue_num = int(residue_num_str)
427
436
 
428
437
  # Format charge: PDB uses e.g. " 2-", " 1+" in columns 79-80
429
438
  # If your model stores charges differently, adapt as needed.
@@ -451,7 +460,8 @@ def _format_atom_line(
451
460
  f"{residue_name:>3}" # residue name (columns 18-20)
452
461
  f" {chain_char}" # chain ID (column 22)
453
462
  f"{residue_num:4d}" # residue sequence number (columns 23-26)
454
- f" " # columns 27-30 (insertion code plus spacing)
463
+ f"{insertion_code}"
464
+ f" " # columns 27-30 (spacing)
455
465
  f"{atom.x:8.3f}" # x (columns 31-38)
456
466
  f"{atom.y:8.3f}" # y (columns 39-46)
457
467
  f"{atom.z:8.3f}" # z (columns 47-54)
@@ -469,7 +479,7 @@ def _format_anisou_line(
469
479
  atom: PDBAtom,
470
480
  chain_id: str,
471
481
  res_name: str,
472
- res_num: int | None,
482
+ res_num: str | None,
473
483
  alt_loc: str = "",
474
484
  ) -> str:
475
485
  """
@@ -485,7 +495,15 @@ def _format_anisou_line(
485
495
  alt_loc_char = alt_loc if alt_loc else " "
486
496
  residue_name = (res_name or "UNK")[:3] # limit to 3 chars
487
497
  chain_char = (chain_id or "A")[:1] # PDB chain ID is 1 char
488
- residue_num = res_num if res_num is not None else 1
498
+ residue_num_str = "1"
499
+ insertion_code = " "
500
+ if res_num:
501
+ match = re.match(r"(\d+)([a-zA-Z]*)", res_num)
502
+ if match:
503
+ residue_num_str, insertion_code = match.groups()
504
+ insertion_code = insertion_code if insertion_code != "" else " "
505
+
506
+ residue_num = int(residue_num_str)
489
507
 
490
508
  chg = ""
491
509
  if atom.charge and abs(atom.charge) > 0:
@@ -528,7 +546,8 @@ def _format_anisou_line(
528
546
  f"{residue_name:>3}" # residue name (columns 18-20)
529
547
  f" {chain_char}" # chain ID (column 22)
530
548
  f"{residue_num:4d}" # residue sequence number (columns 23-26)
531
- f" " # columns 27-28 (insertion code plus spacing)
549
+ f"{insertion_code}"
550
+ f" " # columns 27-28 (plus spacing)
532
551
  f"{aniso_lines}"
533
552
  f" " # columns 70-76 (padding)
534
553
  f"{atom.element:>2}" # element (columns 77-78)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: stjames
3
- Version: 0.0.65
3
+ Version: 0.0.67
4
4
  Summary: standardized JSON atom/molecule encoding scheme
5
5
  Author-email: Corin Wagen <corin@rowansci.com>
6
6
  Project-URL: Homepage, https://github.com/rowansci/stjames
@@ -12,6 +12,7 @@ Requires-Dist: pydantic>=2.4
12
12
  Requires-Dist: numpy
13
13
  Requires-Dist: requests
14
14
  Requires-Dist: rdkit
15
+ Dynamic: license-file
15
16
 
16
17
  # stjames
17
18
 
@@ -0,0 +1,322 @@
1
+ from pytest import mark, raises
2
+
3
+ from stjames import Atom, Molecule, MoleculeReadError, PeriodicCell
4
+
5
+ # Valid EXTXYZ without forces (only positions)
6
+ valid_extxyz = """
7
+ 5
8
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
9
+ C 0.0 0.0 0.0
10
+ H 0.0 0.0 1.0
11
+ H 1.0 0.0 0.0
12
+ H 0.0 1.0 0.0
13
+ H 1.0 1.0 1.0
14
+ """
15
+
16
+ # Valid EXTXYZ with forces: forces are provided after positions.
17
+ # For each atom, the forces are given and the molecule's gradient should be -forces.
18
+ valid_extxyz_with_forces_and_energy = """
19
+ 22
20
+ Properties=species:S:1:pos:R:3:forces:R:3:MACE_forces:R:3 smiles="[H:13][C:5]([H:14])([H:15])[C:7]([H:18])([H:19])[C:6]([H:16])([H:17])[S:2][C:4]([H:11])([H:12])[S:1][C:3]([H:8])([H:9])[H:10].[H:21][O:20][H:22]" total_charge=0 energy=-29139.55729148458 config_type="DES370K Dimers" MACE_energy=-29139.53445598132 pbc="F F F"
21
+ S 2.75637007 2.70758009 4.82925987 1.55959951 -0.22801913 -0.54136018 1.57411973 -0.23083920 -0.56575688
22
+ S 2.84554004 0.22606000 3.09241009 -0.21334177 -1.76227773 0.33161567 -0.21053821 -1.76733183 0.31476973
23
+ C 1.01258004 2.49340009 4.93010997 -1.31854381 -0.37388875 -0.24927582 -1.32686027 -0.41468579 -0.24419847
24
+ C 3.17725992 1.94638002 3.18563008 -0.09910375 1.52775439 -0.55371287 -0.09844947 1.52902410 -0.49063227
25
+ C 0.00000000 0.00000000 0.00000000 0.54047842 -1.02932160 -0.37462899 0.55420524 -1.04859973 -0.38165141
26
+ C 2.22169995 -0.06399999 1.41622007 -0.74959058 0.13849016 -0.66947629 -0.74189889 0.14272211 -0.65926828
27
+ C 0.72196996 0.27478000 1.31911993 -0.88825228 1.14448169 0.10462363 -0.88035930 1.15971257 0.11942591
28
+ H 0.73436999 1.43289995 4.80946970 0.11871475 0.45471041 0.08255662 0.12983545 0.47803891 0.09119142
29
+ H 0.46778000 3.08784985 4.17749977 0.24911941 -0.20360289 0.32966949 0.24533754 -0.20850576 0.33744960
30
+ H 0.70124000 2.82674027 5.93438053 0.20534666 -0.13940533 -0.46775994 0.20706729 -0.12047003 -0.47116810
31
+ H 2.59819007 2.49752998 2.41988992 0.29845053 -0.31965051 0.34195845 0.29323885 -0.33151445 0.32609726
32
+ H 4.25116014 2.06114006 2.94268989 -0.62592601 0.53239934 0.63500463 -0.63988801 0.53433737 0.61779540
33
+ H 0.01781000 -1.10166001 -0.05143000 -0.31652062 0.33261293 -0.62734778 -0.32052002 0.33247610 -0.62978105
34
+ H 0.50296998 0.40217999 -0.89547998 -0.14484048 -0.10780390 0.42454441 -0.15022920 -0.10846825 0.42265847
35
+ H -1.05070996 0.33554000 -0.00612000 0.55084403 -0.10031390 0.18450139 0.55970312 -0.09816881 0.19243150
36
+ H 2.77840995 0.53621000 0.67106002 -0.14551747 -0.35971650 0.43068238 -0.14826788 -0.34813759 0.42951233
37
+ H 2.39210987 -1.13260996 1.18280005 0.04825265 0.56114332 0.26057253 0.04712810 0.55862479 0.26730085
38
+ H 0.59988999 1.35992002 1.50082004 0.18014249 -0.45468505 0.36480505 0.17898520 -0.45518615 0.36105601
39
+ H 0.10381000 -0.26278001 2.06366992 0.77321355 0.35561432 0.04327458 0.76235138 0.36555290 0.03043141
40
+ O 5.42839002 0.27989000 7.42625999 -0.71867266 0.97644353 0.67783188 -0.72326306 0.98088507 0.67544292
41
+ H 4.56538010 0.65290999 7.13586997 1.00844588 -0.37843304 0.30550308 0.99937753 -0.37383106 0.29632339
42
+ H 5.64198017 0.76508999 8.25524997 -0.31126251 -0.56856677 -1.03720163 -0.31107513 -0.57563526 -1.03942973
43
+ """ # noqa: E501
44
+
45
+ # Other invalid cases (these remain unchanged)
46
+ incorrect_num_atoms = """
47
+ 6
48
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
49
+ C 0.0 0.0 0.0
50
+ H 0.0 0.0 1.0
51
+ H 1.0 0.0 0.0
52
+ H 0.0 1.0 0.0
53
+ H 1.0 1.0 1.0
54
+ """
55
+
56
+ not_digit_num_atoms = """
57
+ v
58
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
59
+ C 0.0 0.0 0.0
60
+ H 0.0 0.0 1.0
61
+ H 1.0 0.0 0.0
62
+ H 0.0 1.0 0.0
63
+ H 1.0 1.0 1.0
64
+ """
65
+
66
+ many_num_atoms = """
67
+ 6 9
68
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
69
+ C 0.0 0.0 0.0
70
+ H 0.0 0.0 1.0
71
+ H 1.0 0.0 0.0
72
+ H 0.0 1.0 0.0
73
+ H 1.0 1.0 1.0
74
+ """
75
+
76
+ no_num_atoms = """
77
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
78
+ C 0.0 0.0 0.0
79
+ H 0.0 0.0 1.0
80
+ H 1.0 0.0 0.0
81
+ H 0.0 1.0 0.0
82
+ H 1.0 1.0 1.0
83
+ """
84
+
85
+ xyz_style = """
86
+ 5
87
+ Comment
88
+ C 0.0 0.0 0.0
89
+ H 0.0 0.0 1.0
90
+ H 1.0 0.0 0.0
91
+ H 0.0 1.0 0.0
92
+ H 1.0 1.0 1.0
93
+ """
94
+
95
+ missing_properties = """
96
+ 5
97
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0"
98
+ C 0.0 0.0 0.0
99
+ H 0.0 0.0 1.0
100
+ H 1.0 0.0 0.0
101
+ H 0.0 1.0 0.0
102
+ H 1.0 1.0 1.0
103
+ """
104
+
105
+ incorrect_properites = """
106
+ 5
107
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3foo:1
108
+ C 0.0 0.0 0.0
109
+ H 0.0 0.0 1.0
110
+ H 1.0 0.0 0.0
111
+ H 0.0 1.0 0.0
112
+ H 1.0 1.0 1.0
113
+ """
114
+
115
+ incorrect_lattice_extra = """
116
+ 5
117
+ Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0 3.14" Properties=species:S:1:pos:R:3
118
+ C 0.0 0.0 0.0
119
+ H 0.0 0.0 1.0
120
+ H 1.0 0.0 0.0
121
+ H 0.0 1.0 0.0
122
+ H 1.0 1.0 1.0
123
+ """
124
+
125
+ incorrect_lattice_equals = """
126
+ 5
127
+ Lattice="6.0 0.0 =0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
128
+ C 0.0 0.0 0.0
129
+ H 0.0 0.0 1.0
130
+ H 1.0 0.0 0.0
131
+ H 0.0 1.0 0.0
132
+ H 1.0 1.0 1.0
133
+ """
134
+
135
+ incorrect_lattice_str = """
136
+ 5
137
+ Lattice="6.0 0.0 0.0 hi 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
138
+ C 0.0 0.0 0.0
139
+ H 0.0 0.0 1.0
140
+ H 1.0 0.0 0.0
141
+ H 0.0 1.0 0.0
142
+ H 1.0 1.0 1.0
143
+ """
144
+
145
+ incorrect_lattice_extra_string = """
146
+ 5
147
+ Lattice="6.0 0.0 0.0 0.0 sup 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
148
+ C 0.0 0.0 0.0
149
+ H 0.0 0.0 1.0
150
+ H 1.0 0.0 0.0
151
+ H 0.0 1.0 0.0
152
+ H 1.0 1.0 1.0
153
+ """
154
+
155
+ incorrect_lattice_single_quote = """
156
+ 5
157
+ Lattice="6.0 0.0 0.0 0.0 6.0 '0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
158
+ C 0.0 0.0 0.0
159
+ H 0.0 0.0 1.0
160
+ H 1.0 0.0 0.0
161
+ H 0.0 1.0 0.0
162
+ H 1.0 1.0 1.0
163
+ """
164
+
165
+ incorrect_lattice_double_quote = """
166
+ 5
167
+ Lattice="6.0 0.0 0.0 0.0 "6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
168
+ C 0.0 0.0 0.0
169
+ H 0.0 0.0 1.0
170
+ H 1.0 0.0 0.0
171
+ H 0.0 1.0 0.0
172
+ H 1.0 1.0 1.0
173
+ """
174
+
175
+ incorrect_lattice_double_single_quote = """
176
+ 5
177
+ Lattice="6.0 0.0 0.0 0.0 '6.0 0.0 0.0 '0.0 6.0" Properties=species:S:1:pos:R:3
178
+ C 0.0 0.0 0.0
179
+ H 0.0 0.0 1.0
180
+ H 1.0 0.0 0.0
181
+ H 0.0 1.0 0.0
182
+ H 1.0 1.0 1.0
183
+ """
184
+
185
+ incorrect_lattice_double_double_quote = """
186
+ 5
187
+ Lattice="6.0 0.0 "0.0 0.0 6.0 0.0 0.0 "0.0 6.0" Properties=species:S:1:pos:R:3
188
+ C 0.0 0.0 0.0
189
+ H 0.0 0.0 1.0
190
+ H 1.0 0.0 0.0
191
+ H 0.0 1.0 0.0
192
+ H 1.0 1.0 1.0
193
+ """
194
+
195
+ expected_cell = (
196
+ (6.0, 0.0, 0.0),
197
+ (0.0, 6.0, 0.0),
198
+ (0.0, 0.0, 6.0),
199
+ )
200
+
201
+ expected_atoms = [
202
+ Atom(atomic_number=6, position=(0.0, 0.0, 0.0)), # C
203
+ Atom(atomic_number=1, position=(0.0, 0.0, 1.0)), # H
204
+ Atom(atomic_number=1, position=(1.0, 0.0, 0.0)), # H
205
+ Atom(atomic_number=1, position=(0.0, 1.0, 0.0)), # H
206
+ Atom(atomic_number=1, position=(1.0, 1.0, 1.0)), # H
207
+ ]
208
+
209
+ expected_atoms_force_energy = [
210
+ Atom(atomic_number=16, position=(2.75637007, 2.70758009, 4.82925987)), # S
211
+ Atom(atomic_number=16, position=(2.84554004, 0.22606000, 3.09241009)), # S
212
+ Atom(atomic_number=6, position=(1.01258004, 2.49340009, 4.93010997)), # C
213
+ Atom(atomic_number=6, position=(3.17725992, 1.94638002, 3.18563008)), # C
214
+ Atom(atomic_number=6, position=(0.00000000, 0.00000000, 0.00000000)), # C
215
+ Atom(atomic_number=6, position=(2.22169995, -0.06399999, 1.41622007)), # C
216
+ Atom(atomic_number=6, position=(0.72196996, 0.27478000, 1.31911993)), # C
217
+ Atom(atomic_number=1, position=(0.73436999, 1.43289995, 4.80946970)), # H
218
+ Atom(atomic_number=1, position=(0.46778000, 3.08784985, 4.17749977)), # H
219
+ Atom(atomic_number=1, position=(0.70124000, 2.82674027, 5.93438053)), # H
220
+ Atom(atomic_number=1, position=(2.59819007, 2.49752998, 2.41988992)), # H
221
+ Atom(atomic_number=1, position=(4.25116014, 2.06114006, 2.94268989)), # H
222
+ Atom(atomic_number=1, position=(0.01781000, -1.10166001, -0.05143000)), # H
223
+ Atom(atomic_number=1, position=(0.50296998, 0.40217999, -0.89547998)), # H
224
+ Atom(atomic_number=1, position=(-1.05070996, 0.33554000, -0.00612000)), # H
225
+ Atom(atomic_number=1, position=(2.77840995, 0.53621000, 0.67106002)), # H
226
+ Atom(atomic_number=1, position=(2.39210987, -1.13260996, 1.18280005)), # H
227
+ Atom(atomic_number=1, position=(0.59988999, 1.35992002, 1.50082004)), # H
228
+ Atom(atomic_number=1, position=(0.10381000, -0.26278001, 2.06366992)), # H
229
+ Atom(atomic_number=8, position=(5.42839002, 0.27989000, 7.42625999)), # O
230
+ Atom(atomic_number=1, position=(4.56538010, 0.65290999, 7.13586997)), # H
231
+ Atom(atomic_number=1, position=(5.64198017, 0.76508999, 8.25524997)), # H
232
+ ]
233
+
234
+ expected_gradient = [
235
+ (-1.55959951, 0.22801913, 0.54136018),
236
+ (0.21334177, 1.76227773, -0.33161567),
237
+ (1.31854381, 0.37388875, 0.24927582),
238
+ (0.09910375, -1.52775439, 0.55371287),
239
+ (-0.54047842, 1.02932160, 0.37462899),
240
+ (0.74959058, -0.13849016, 0.66947629),
241
+ (0.88825228, -1.14448169, -0.10462363),
242
+ (-0.11871475, -0.45471041, -0.08255662),
243
+ (-0.24911941, 0.20360289, -0.32966949),
244
+ (-0.20534666, 0.13940533, 0.46775994),
245
+ (-0.29845053, 0.31965051, -0.34195845),
246
+ (0.62592601, -0.53239934, -0.63500463),
247
+ (0.31652062, -0.33261293, 0.62734778),
248
+ (0.14484048, 0.10780390, -0.42454441),
249
+ (-0.55084403, 0.10031390, -0.18450139),
250
+ (0.14551747, 0.35971650, -0.43068238),
251
+ (-0.04825265, -0.56114332, -0.26057253),
252
+ (-0.18014249, 0.45468505, -0.36480505),
253
+ (-0.77321355, -0.35561432, -0.04327458),
254
+ (0.71867266, -0.97644353, -0.67783188),
255
+ (-1.00844588, 0.37843304, -0.30550308),
256
+ (0.31126251, 0.56856677, 1.03720163),
257
+ ]
258
+
259
+
260
+ expected_molecule = Molecule(
261
+ charge=0,
262
+ multiplicity=1,
263
+ atoms=expected_atoms,
264
+ cell=PeriodicCell(lattice_vectors=expected_cell),
265
+ gradient=None,
266
+ )
267
+
268
+ expected_molecule_with_forces_and_energy = Molecule(
269
+ charge=0,
270
+ multiplicity=1,
271
+ atoms=expected_atoms_force_energy,
272
+ cell=None,
273
+ gradient=expected_gradient,
274
+ energy=-29139.55729148458,
275
+ )
276
+
277
+
278
+ def test_molecule_from_extxyz_valid() -> None:
279
+ """
280
+ Test a valid extxyz string (without forces).
281
+ """
282
+ molecule = Molecule.from_extxyz(valid_extxyz)
283
+ assert molecule == expected_molecule, f"Valid case failed:\nGot {molecule}\nExpected {expected_molecule}"
284
+
285
+
286
+ def test_molecule_from_extxyz_valid_with_forces() -> None:
287
+ """
288
+ Test a valid extxyz string that includes forces.
289
+ The forces should be converted to gradients (as the negative of forces).
290
+ """
291
+ molecule = Molecule.from_extxyz(valid_extxyz_with_forces_and_energy)
292
+ assert molecule == expected_molecule_with_forces_and_energy, (
293
+ f"Valid forces case failed:\nGot {molecule}\nExpected {expected_molecule_with_forces_and_energy}"
294
+ )
295
+
296
+
297
+ @mark.parametrize(
298
+ "invalid_extxyz",
299
+ [
300
+ incorrect_num_atoms,
301
+ no_num_atoms,
302
+ not_digit_num_atoms,
303
+ many_num_atoms,
304
+ xyz_style,
305
+ missing_properties,
306
+ incorrect_properites,
307
+ incorrect_lattice_extra,
308
+ incorrect_lattice_equals,
309
+ incorrect_lattice_str,
310
+ incorrect_lattice_extra_string,
311
+ incorrect_lattice_single_quote,
312
+ incorrect_lattice_double_quote,
313
+ incorrect_lattice_double_single_quote,
314
+ incorrect_lattice_double_double_quote,
315
+ ],
316
+ )
317
+ def test_molecule_from_extxyz_invalid(invalid_extxyz: str) -> None:
318
+ """
319
+ Test that invalid extxyz strings raise MoleculeReadError.
320
+ """
321
+ with raises(MoleculeReadError):
322
+ Molecule.from_extxyz(invalid_extxyz)
@@ -96,6 +96,21 @@ def test_from_pdb_to_pdb_1ema() -> None:
96
96
 
97
97
  assert pdb == pdb2
98
98
 
99
+ def test_from_pdb_to_pdb_2hu4() -> None:
100
+ with open("tests/data/2HU4.pdb") as f:
101
+ data = f.read()
102
+ pdb = pdb_from_pdb_filestring(data)
103
+ filestring = pdb_object_to_pdb_filestring(pdb, header=True, source=True, keyword=True, crystallography=True)
104
+
105
+ pdb2 = pdb_from_pdb_filestring(filestring)
106
+
107
+ assert pdb.description == pdb2.description
108
+ assert pdb.experiment == pdb2.experiment
109
+ # not true but doesn't matter
110
+ print(pdb.geometry == pdb2.geometry)
111
+ assert pdb.models == pdb2.models
112
+ assert pdb.quality == pdb2.quality
113
+
99
114
  def mmcif_author_format_to_pdb_format(authors: list[str]) -> list[str]:
100
115
  return [f"{last.upper()}{first.upper()}" for first, last in
101
116
  (author.split(", ") for author in authors)]
@@ -1,231 +0,0 @@
1
- import pytest
2
-
3
- from stjames import Atom, Molecule, MoleculeReadError, PeriodicCell
4
-
5
- valid_extxyz = """
6
- 5
7
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
8
- C 0.0 0.0 0.0
9
- H 0.0 0.0 1.0
10
- H 1.0 0.0 0.0
11
- H 0.0 1.0 0.0
12
- H 1.0 1.0 1.0
13
- """
14
-
15
- incorrect_num_atoms = """
16
- 6
17
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
18
- C 0.0 0.0 0.0
19
- H 0.0 0.0 1.0
20
- H 1.0 0.0 0.0
21
- H 0.0 1.0 0.0
22
- H 1.0 1.0 1.0
23
- """
24
-
25
- not_digit_num_atoms = """
26
- v
27
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
28
- C 0.0 0.0 0.0
29
- H 0.0 0.0 1.0
30
- H 1.0 0.0 0.0
31
- H 0.0 1.0 0.0
32
- H 1.0 1.0 1.0
33
- """
34
-
35
- many_num_atoms = """
36
- 6 9
37
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
38
- C 0.0 0.0 0.0
39
- H 0.0 0.0 1.0
40
- H 1.0 0.0 0.0
41
- H 0.0 1.0 0.0
42
- H 1.0 1.0 1.0
43
- """
44
- no_num_atoms = """
45
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
46
- C 0.0 0.0 0.0
47
- H 0.0 0.0 1.0
48
- H 1.0 0.0 0.0
49
- H 0.0 1.0 0.0
50
- H 1.0 1.0 1.0
51
- """
52
-
53
- xyz_style = """
54
- 5
55
- Comment
56
- C 0.0 0.0 0.0
57
- H 0.0 0.0 1.0
58
- H 1.0 0.0 0.0
59
- H 0.0 1.0 0.0
60
- H 1.0 1.0 1.0
61
- """
62
-
63
- missing_lattice = """
64
- 5
65
- Properties=species:S:1:pos:R:3
66
- C 0.0 0.0 0.0
67
- H 0.0 0.0 1.0
68
- H 1.0 0.0 0.0
69
- H 0.0 1.0 0.0
70
- H 1.0 1.0 1.0
71
- """
72
-
73
- missing_properties = """
74
- 5
75
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0"
76
- C 0.0 0.0 0.0
77
- H 0.0 0.0 1.0
78
- H 1.0 0.0 0.0
79
- H 0.0 1.0 0.0
80
- H 1.0 1.0 1.0
81
- """
82
-
83
- incorrect_properites = """
84
- 5
85
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3foo:1
86
- C 0.0 0.0 0.0
87
- H 0.0 0.0 1.0
88
- H 1.0 0.0 0.0
89
- H 0.0 1.0 0.0
90
- H 1.0 1.0 1.0
91
- """
92
-
93
- incorrect_lattice_extra = """
94
- 5
95
- Lattice="6.0 0.0 0.0 0.0 6.0 0.0 0.0 0.0 6.0 3.14" Properties=species:S:1:pos:R:3
96
- C 0.0 0.0 0.0
97
- H 0.0 0.0 1.0
98
- H 1.0 0.0 0.0
99
- H 0.0 1.0 0.0
100
- H 1.0 1.0 1.0
101
- """
102
-
103
- incorrect_lattice_equals = """
104
- 5
105
- Lattice="6.0 0.0 =0.0 0.0 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
106
- C 0.0 0.0 0.0
107
- H 0.0 0.0 1.0
108
- H 1.0 0.0 0.0
109
- H 0.0 1.0 0.0
110
- H 1.0 1.0 1.0
111
- """
112
-
113
- incorrect_lattice_str = """
114
- 5
115
- Lattice="6.0 0.0 0.0 hi 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
116
- C 0.0 0.0 0.0
117
- H 0.0 0.0 1.0
118
- H 1.0 0.0 0.0
119
- H 0.0 1.0 0.0
120
- H 1.0 1.0 1.0
121
- """
122
-
123
- incorrect_lattice_extra_string = """
124
- 5
125
- Lattice="6.0 0.0 0.0 0.0 sup 6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
126
- C 0.0 0.0 0.0
127
- H 0.0 0.0 1.0
128
- H 1.0 0.0 0.0
129
- H 0.0 1.0 0.0
130
- H 1.0 1.0 1.0
131
- """
132
-
133
-
134
- incorrect_lattice_single_quote = """
135
- 5
136
- Lattice="6.0 0.0 0.0 0.0 6.0 '0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
137
- C 0.0 0.0 0.0
138
- H 0.0 0.0 1.0
139
- H 1.0 0.0 0.0
140
- H 0.0 1.0 0.0
141
- H 1.0 1.0 1.0
142
- """
143
-
144
- incorrect_lattice_double_quote = """
145
- 5
146
- Lattice="6.0 0.0 0.0 0.0 "6.0 0.0 0.0 0.0 6.0" Properties=species:S:1:pos:R:3
147
- C 0.0 0.0 0.0
148
- H 0.0 0.0 1.0
149
- H 1.0 0.0 0.0
150
- H 0.0 1.0 0.0
151
- H 1.0 1.0 1.0
152
- """
153
-
154
- incorrect_lattice_double_single_quote = """
155
- 5
156
- Lattice="6.0 0.0 0.0 0.0 '6.0 0.0 0.0 '0.0 6.0" Properties=species:S:1:pos:R:3
157
- C 0.0 0.0 0.0
158
- H 0.0 0.0 1.0
159
- H 1.0 0.0 0.0
160
- H 0.0 1.0 0.0
161
- H 1.0 1.0 1.0
162
- """
163
-
164
- incorrect_lattice_double_double_quote = """
165
- 5
166
- Lattice="6.0 0.0 "0.0 0.0 6.0 0.0 0.0 "0.0 6.0" Properties=species:S:1:pos:R:3
167
- C 0.0 0.0 0.0
168
- H 0.0 0.0 1.0
169
- H 1.0 0.0 0.0
170
- H 0.0 1.0 0.0
171
- H 1.0 1.0 1.0
172
- """
173
-
174
-
175
- expected_cell = (
176
- (6.0, 0.0, 0.0),
177
- (0.0, 6.0, 0.0),
178
- (0.0, 0.0, 6.0),
179
- )
180
-
181
- expected_atoms = [
182
- Atom(atomic_number=6, position=(0.0, 0.0, 0.0)), # C
183
- Atom(atomic_number=1, position=(0.0, 0.0, 1.0)), # H
184
- Atom(atomic_number=1, position=(1.0, 0.0, 0.0)), # H
185
- Atom(atomic_number=1, position=(0.0, 1.0, 0.0)), # H
186
- Atom(atomic_number=1, position=(1.0, 1.0, 1.0)), # H
187
- ]
188
-
189
- expected_molecule = Molecule(
190
- charge=0,
191
- multiplicity=1,
192
- atoms=expected_atoms,
193
- cell=PeriodicCell(lattice_vectors=expected_cell),
194
- )
195
-
196
-
197
- def test_molecule_from_extxyz_valid() -> None:
198
- """
199
- Test case for valid extxyz string.
200
- """
201
- molecule = Molecule.from_extxyz(valid_extxyz)
202
- assert molecule == expected_molecule, f"Valid case failed: got {molecule}, expected {expected_molecule}"
203
-
204
-
205
- @pytest.mark.parametrize(
206
- "invalid_extxyz",
207
- [
208
- incorrect_num_atoms,
209
- no_num_atoms,
210
- not_digit_num_atoms,
211
- many_num_atoms,
212
- xyz_style,
213
- missing_lattice,
214
- missing_properties,
215
- incorrect_properites,
216
- incorrect_lattice_extra,
217
- incorrect_lattice_equals,
218
- incorrect_lattice_str,
219
- incorrect_lattice_extra_string,
220
- incorrect_lattice_single_quote,
221
- incorrect_lattice_double_quote,
222
- incorrect_lattice_double_single_quote,
223
- incorrect_lattice_double_double_quote,
224
- ],
225
- )
226
- def test_molecule_from_extxyz_invalid(invalid_extxyz: str) -> None:
227
- """
228
- Test case for invalid extxyz strings, ensuring they raise MoleculeReadError.
229
- """
230
- with pytest.raises(MoleculeReadError):
231
- Molecule.from_extxyz(invalid_extxyz)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes