PyPI - smoldynutils - Versions diffs - 0.1.0__tar.gz - Mend

smoldynutils 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

smoldynutils-0.1.0/LICENSE.md +21 -0
smoldynutils-0.1.0/PKG-INFO +57 -0
smoldynutils-0.1.0/README.md +34 -0
smoldynutils-0.1.0/pyproject.toml +73 -0
smoldynutils-0.1.0/src/smoldynutils/__init__.py +7 -0
smoldynutils-0.1.0/src/smoldynutils/data_objects.py +190 -0
smoldynutils-0.1.0/src/smoldynutils/metrics.py +147 -0
smoldynutils-0.1.0/src/smoldynutils/parsing.py +93 -0
smoldynutils-0.1.0/src/smoldynutils/plots.py +220 -0
smoldynutils-0.1.0/src/smoldynutils/utils.py +27 -0
smoldynutils-0.1.0/src/smoldynutils/workflows.py +97 -0

smoldynutils-0.1.0/LICENSE.md ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 rgrosseholz
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

smoldynutils-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,57 @@
+Metadata-Version: 2.4
+Name: smoldynutils
+Version: 0.1.0
+Summary: A small collection of utility tools to process the output of Smoldyn simulations.
+License: MIT
+License-File: LICENSE.md
+Keywords: smoldyn,simulation,modeling
+Author: Fabian Ormersbach
+Author-email: fabian.ormersbach@maastrichtuniversity.nl
+Requires-Python: >=3.12
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Requires-Dist: matplotlib (>=3.10.8,<4.0.0)
+Requires-Dist: numpy (>=2.4.2,<3.0.0)
+Requires-Dist: pandas (>=3.0.0,<4.0.0)
+Requires-Dist: scipy (>=1.17.0,<2.0.0)
+Requires-Dist: seaborn (>=0.13.2,<0.14.0)
+Project-URL: Homepage, https://github.com/rgrosseholz/smoldynutils
+Project-URL: Issues, https://github.com/rgrosseholz/smoldynutils/issues
+Project-URL: Repository, https://github.com/rgrosseholz/smoldynutils
+Description-Content-Type: text/markdown
+# smoldynutils
+Utilities for parsing, analyzing, and visualizing Smoldyn simulation outputs.
+## Installation
+From GitHub with poetry:
+```bash
+git clone https://github.com/rgrosseholz/smoldynutils.git
+cd smoldynutils
+poetry install
+poetry shell
+```
+Or via pip:
+```bash
+pip install smoldynutils
+```
+## Quickstart
+```python
+from smoldynutils.parser import SmoldynParser
+parser = SmoldynParser(delimiter=",")
+trajectories = parser.parse_fixed_grid("molpos_output.txt")
+for traj in trajectories:
+    print(traj.positions)
+```
+## Authors
+Fabian Ormersbach, Maastricht Centre for Systems Biology and Bioinformatics, Maastricht University
+Ruth Grosseholz, Maastricht Centre for Systems Biology and Bioinformatics, Maastricht University
+## License
+This project is licensed under the MIT License. See `LICENSE.md` for details.

smoldynutils-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# smoldynutils
+Utilities for parsing, analyzing, and visualizing Smoldyn simulation outputs.
+## Installation
+From GitHub with poetry:
+```bash
+git clone https://github.com/rgrosseholz/smoldynutils.git
+cd smoldynutils
+poetry install
+poetry shell
+```
+Or via pip:
+```bash
+pip install smoldynutils
+```
+## Quickstart
+```python
+from smoldynutils.parser import SmoldynParser
+parser = SmoldynParser(delimiter=",")
+trajectories = parser.parse_fixed_grid("molpos_output.txt")
+for traj in trajectories:
+    print(traj.positions)
+```
+## Authors
+Fabian Ormersbach, Maastricht Centre for Systems Biology and Bioinformatics, Maastricht University
+Ruth Grosseholz, Maastricht Centre for Systems Biology and Bioinformatics, Maastricht University
+## License
+This project is licensed under the MIT License. See `LICENSE.md` for details.

smoldynutils-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,73 @@
+[project]
+name = "smoldynutils"
+version = "0.1.0"
+description = "A small collection of utility tools to process the output of Smoldyn simulations."
+authors = [
+    {name = "Fabian Ormersbach",email = "fabian.ormersbach@maastrichtuniversity.nl"},
+	{name = "Ruth Grosseholz",email = "ruth.grosseholz@maastrichtuniversity.nl"},
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "numpy (>=2.4.2,<3.0.0)",
+    "pandas (>=3.0.0,<4.0.0)",
+    "matplotlib (>=3.10.8,<4.0.0)",
+    "scipy (>=1.17.0,<2.0.0)",
+    "seaborn (>=0.13.2,<0.14.0)"
+]
+keywords = ["smoldyn", "simulation", "modeling"]
+classifiers = [
+	"License :: OSI Approved :: MIT License",
+	"Programming Language :: Python :: 3",
+]
+packages = [{ include = "smoldynutils", from = "src" }]
+[project.urls]
+Homepage = "https://github.com/rgrosseholz/smoldynutils"
+Repository = "https://github.com/rgrosseholz/smoldynutils"
+Issues = "https://github.com/rgrosseholz/smoldynutils/issues"
+[dependency-groups]
+dev = [
+    "flake8 (>=7.3.0,<8.0.0)",
+    "pytest (>=9.0.2,<10.0.0)",
+    "pytest-cov (>=7.0.0,<8.0.0)",
+    "black (>=26.1.0,<27.0.0)",
+    "pre-commit (>=4.5.1,<5.0.0)",
+    "ruff (>=0.15.0,<0.16.0)",
+    "isort (>=7.0.0,<8.0.0)",
+    "mypy (>=1.19.1,<2.0.0)"
+]
+[tool.poetry]
+packages = [{include = "smoldynutils", from = "src"}]
+[tool.black]
+line-length = 100
+target-version = ["py312"]
+[tool.ruff]
+line-length = 100
+lint.select = ["E", "F", "W", "C90", "N"]
+lint.ignore = ["E501"]
+extend-exclude = ["tests"]
+[tool.ruff.lint.pep8-naming]
+ignore-names = ["D", "MSD"]
+[tool.isort]
+profile = "black"
+src_paths = ["src", "tests"]
+[tool.mypy]
+python_version = "3.12"
+strict = true
+ignore_missing_imports = true
+mypy_path = ["src"]
+explicit_package_bases = true
+exclude = ["^tests/"]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

smoldynutils-0.1.0/src/smoldynutils/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from importlib.metadata import version, PackageNotFoundError
+try:
+    __version__ = version("smoldynutils")
+except PackageNotFoundError:
+    # package not installed (e.g. local dev)
+    __version__ = "0.0.0"

smoldynutils-0.1.0/src/smoldynutils/data_objects.py ADDED Viewed

@@ -0,0 +1,190 @@
+import warnings
+from dataclasses import dataclass
+from typing import Iterator, Optional, Sequence, Type, Union, overload
+import numpy as np
+@dataclass(frozen=True, slots=True)
+class Trajectory:
+    """Immutable container for trajectory."""
+    serialnumber: int
+    t: np.ndarray
+    x: np.ndarray
+    y: np.ndarray
+    species: np.ndarray
+    def __post_init__(self) -> None:
+        """Performs sensibility checks.
+        Raises:
+            ValueError: Differing lenghts of t, x, y, or species
+            ValueError: >1D for t, x, y, or species
+            TypeError: Species is not integer
+        """
+        n = len(self.t)
+        if not (len(self.x) == len(self.y) == len(self.species) == n):
+            raise ValueError("t, x, y, and species must have the same length")
+        if self.t.ndim != 1 or self.x.ndim != 1 or self.y.ndim != 1 or self.species.ndim != 1:
+            raise ValueError("t, x, y, species must be 1D arrays")
+        if not np.issubdtype(self.species.dtype, np.integer):
+            raise TypeError("Species must be integer-coded")
+        self._check_jumps(self.x)
+        self._check_jumps(self.y)
+    def _check_jumps(self, positions: np.ndarray) -> None:
+        jump_sensitivity = 0.5
+        max_pos = np.max(np.abs(positions))
+        forward_diff = np.diff(positions)
+        upper_jumps = forward_diff < jump_sensitivity * max_pos * -1
+        lower_jumps = forward_diff > jump_sensitivity * max_pos
+        def user_format_warning(
+            message: Warning | str,
+            category: Type[Warning],
+            filename: str,
+            lineno: int,
+            line: Optional[str] = None,
+        ) -> str:
+            return f"Warning: {message}\n"
+        if (upper_jumps + lower_jumps).sum() != 0:
+            warnings.formatwarning = user_format_warning
+            warnings.warn(f"Large jumps in trajectory {self.serialnumber} detected.", UserWarning)
+    def __len__(self) -> int:
+        """Returns number of points in trajectory
+        Returns:
+            int: Number of timepoints in trajectory
+        """
+        return len(self.t)
+    def __eq__(self, other: object) -> bool:
+        """Checks for equality.
+        Args:
+            other (object): Can be Trajectory or dict containing data.
+        Returns:
+            bool: True if t, x, y, and species match. False otherwise. NotImplemented if other is not dict or Trajectory.
+        """
+        if isinstance(other, Trajectory):
+            serial_bool = self.serialnumber == other.serialnumber
+            t_bool = np.allclose(self.t, other.t)
+            x_bool = np.allclose(self.x, other.x)
+            y_bool = np.allclose(self.y, other.y)
+            species_bool = np.allclose(self.species, other.species)
+            return serial_bool and t_bool and x_bool and y_bool and species_bool
+        if isinstance(other, dict):
+            if len(other["t"]) != len(self):
+                return False
+            serial_bool = self.serialnumber == other["serialnum"]
+            t_bool = np.allclose(self.t, other["t"])
+            x_bool = np.allclose(self.x, other["x"])
+            y_bool = np.allclose(self.y, other["y"])
+            species_bool = np.allclose(self.species, other["species"])
+            return serial_bool and t_bool and x_bool and y_bool and species_bool
+        return NotImplemented
+    def __getitem__(self, i: int) -> tuple[int, float, float, float, int]:
+        return (
+            self.serialnumber,
+            self.t[i],
+            self.x[i],
+            self.y[i],
+            self.species[i],
+        )
+    @staticmethod
+    def adjust_for_periodic_boundaries(
+        position: np.ndarray, min_pos: float, max_pos: float
+    ) -> np.ndarray:
+        size = max_pos - min_pos
+        half_delta = 0.5 * (size)
+        forward_diff = np.diff(position, prepend=position[0])
+        upper_jumps = forward_diff < -1 * half_delta
+        lower_jumps = forward_diff > half_delta
+        if (upper_jumps + lower_jumps).sum() == 0:
+            return position
+        upper_jumps_cumsum = upper_jumps.cumsum() * size
+        lower_jumps_cumsum = lower_jumps.cumsum() * size * -1
+        position_mask = upper_jumps_cumsum + lower_jumps_cumsum
+        return position + position_mask
+@dataclass(frozen=True, slots=True)
+class TrajectorySet:
+    """Immutable container for set of trajectories."""
+    trajectories: tuple[Trajectory, ...]
+    @classmethod
+    def from_list(cls, trajectories: Sequence[Trajectory]) -> "TrajectorySet":
+        """Create TrajectorySet from sequence of trajectories
+        Args:
+            trajectories (Sequence[Trajectory]): Sequence of `Trajectory` objects.
+        Returns:
+            TrajectorySet: Contains provided Trajectories
+        """
+        return cls(tuple(trajectories))
+    def __len__(self) -> int:
+        """Returns the number of trajectories in the set
+        Returns:
+            int: Number of stored trajectories
+        """
+        return len(self.trajectories)
+    def __getitem__(self, key: int) -> Trajectory:
+        """Return trajectory by index.
+        Args:
+            key (int): Index of trajectory to retrieve
+        Returns:
+            Trajectory: Trajectory at given index
+        """
+        return self.trajectories[key]
+    @overload
+    def __add__(self, other: "TrajectorySet") -> "TrajectorySet": ...
+    @overload
+    def __add__(self, other: Trajectory) -> "TrajectorySet": ...
+    def __add__(self, other: Union["TrajectorySet", Trajectory]) -> "TrajectorySet":
+        """Combines given trajectories
+        Args:
+            other (Union[TrajectorySet, Trajectory]): TrajectorySet or Trajectory to combine with current
+        Returns:
+            TrajectorySet: New TrajectorySet containing the combined trajectories.
+        """
+        if isinstance(other, TrajectorySet):
+            return TrajectorySet(self.trajectories + other.trajectories)
+        if isinstance(other, Trajectory):
+            return TrajectorySet(self.trajectories + (other,))
+        return NotImplemented
+    def __iter__(self) -> Iterator[Trajectory]:
+        """Iterate over trajectories
+        Yields:
+            Trajectory: Trajectorie object
+        """
+        return iter(self.trajectories)
+    @property
+    def serialnums(self) -> np.ndarray:
+        serialnums = np.zeros(len(self))
+        for index, traj in enumerate(self):
+            serialnums[index] = traj.serialnumber
+        return serialnums
+    # TODO: Methods .t, .x, ... that return array of values of all trajectories

smoldynutils-0.1.0/src/smoldynutils/metrics.py ADDED Viewed

@@ -0,0 +1,147 @@
+import warnings
+from typing import cast
+import numpy as np
+from scipy.optimize import curve_fit
+from smoldynutils.data_objects import Trajectory
+from smoldynutils.utils import theoretical_msd, theoretical_msd_residue
+FloatArray = np.typing.NDArray[np.floating]
+def calc_displacements(traj_values: FloatArray, lag: int = 1) -> FloatArray:
+    """Calculates the displacement depending on time lag.
+    Eq: x(t+lag) - x(t)
+    Args:
+        traj_values (np.ndarray): x or y values
+        lag (int, optional): Controls the shift of the window. Defaults to 1.
+    Raises:
+        ValueError: Chosen timelag is bigger than the length of x/y
+    Returns:
+        np.ndarray: Timelag displacement values
+    """
+    if lag > len(traj_values) - 1:
+        raise ValueError("Timelag is bigger than length of trajectory.")
+    displacement = traj_values[lag:] - traj_values[:-lag]
+    return displacement
+def calc_xy_displacement(traj: Trajectory, lag: int = 1) -> tuple[np.ndarray, np.ndarray]:
+    """Feeds x and y of Trajectory into calc_displacements
+    Args:
+        traj (Trajectory): Trajectory object for which x and y displacements should be calculated.
+        lag (int, optional): Controls the shift of the window. Defaults to 1.
+    Raises:
+        ValueError: Chosen Timelag is bigger than the trajectory is long.
+    Returns:
+        tuple[np.ndarray, np.ndarray]: Timelag displacements in x and y direction
+    """
+    if lag > len(traj.x) - 1 or lag > len(traj.y) - 1:
+        raise ValueError("Timelag is bigger than number of datapoints in x or y")
+    x_displacement = calc_displacements(traj.x, lag)
+    y_displacement = calc_displacements(traj.y, lag)
+    return (x_displacement, y_displacement)
+def calc_msd(displacment: np.ndarray) -> np.ndarray:
+    """Calculates mean squeared displacement.
+    Equation: mean(dx**2)
+    Args:
+        displacment (np.ndarray): Displacement values.
+    Returns:
+        np.ndarray: Mean squared displacement values.
+    """
+    squared_displacement = displacment**2
+    mean_squared_displacement = np.array(np.mean(squared_displacement))
+    return mean_squared_displacement
+def calc_xy_msd(displacements: tuple[np.ndarray, np.ndarray]) -> tuple[np.ndarray, np.ndarray]:
+    """Feeds x and y into calc_msd
+    Args:
+        displacements (tuple[np.ndarray, np.ndarray]): x displacement followed by y displacement
+    Returns:
+        tuple[np.ndarray, np.ndarray]: MSD of x and y
+    """
+    x_msd = calc_msd(displacements[0])
+    y_msd = calc_msd(displacements[1])
+    return (x_msd, y_msd)
+def calc_sq_displacement_from_zero(traj_values: FloatArray) -> FloatArray:
+    """Calculates displacement relative to start position.
+    Args:
+        traj_values (np.ndarray): Position value of Trajectory
+    Returns:
+        np.ndarray: Displacement from start position.
+    """
+    x0 = float(traj_values[0])
+    return (traj_values - x0) ** 2
+def calc_combined_msd(msds: tuple[np.ndarray, np.ndarray]) -> np.ndarray:
+    return np.array(msds[0] + msds[1])
+def estimate_diffcoff_fullinfo(
+    msds: np.ndarray, timepoints: np.ndarray, add_epsilon: bool = False
+) -> tuple[FloatArray, FloatArray]:
+    """Estimates diffusion coefficient from MSD.
+    Fitted equation is MSD = 4*D*t
+    Args:
+        msds (np.ndarray): Array of MSD values
+        timepoints (np.ndarray): Array of timelag or time values
+        add_epsilon (bool, optional): Use equation MSD = 4*D*t + epsilon for fitting. Defaults to False.
+        return_full (bool, optional): Return full information about curve fitting. Defaults to False.
+    Returns:
+        np.ndarray: _description_
+    """
+    if len(timepoints) < 2 and add_epsilon is True:
+        warnings.warn(
+            "Cannot fit with epsilon if only one timelag given. Setting add_epsilon to False.",
+            UserWarning,
+        )
+        add_epsilon = False
+    if add_epsilon is True:
+        line_fit = curve_fit(theoretical_msd_residue, timepoints, msds)
+    else:
+        line_fit = curve_fit(theoretical_msd, timepoints, msds)
+    return cast(tuple[FloatArray, FloatArray], line_fit)
+def estimate_diffcoff(msds: np.ndarray, timepoints: np.ndarray, add_epsilon: bool = False) -> float:
+    """Estimates diffusion coefficient from MSD.
+    Fitted equation is MSD = 4*D*t
+    Args:
+        msds (np.ndarray): Array of MSD values
+        timepoints (np.ndarray): Array of timelag or time values
+        add_epsilon (bool, optional): Use equation MSD = 4*D*t + epsilon for fitting. Defaults to False.
+        return_full (bool, optional): Return full information about curve fitting. Defaults to False.
+    Returns:
+        np.ndarray: _description_
+    """
+    popt, _ = estimate_diffcoff_fullinfo(msds, timepoints, add_epsilon)
+    return float(popt[0])

smoldynutils-0.1.0/src/smoldynutils/parsing.py ADDED Viewed

@@ -0,0 +1,93 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+import numpy as np
+import numpy.typing as npt
+from smoldynutils.data_objects import Trajectory, TrajectorySet
+@dataclass
+class SmoldynParser:
+    path: str
+    delimiter: str = ","
+    dt: float = 0.5
+    min_val: Optional[float] = None
+    max_val: Optional[float] = None
+    def parse_fixed_grid(
+        self,
+        dtype_xy: npt.DTypeLike = np.float64,
+        dtype_t: npt.DTypeLike = np.float32,
+        dtype_species: npt.DTypeLike = np.uint16,
+        dtype_serialnum: npt.DTypeLike = np.uint32,
+    ) -> TrajectorySet:
+        """Parser based on numpy loadtxt assuming equal size of all trajectories.
+        Sorts based on time and serialnumber. Then generates Trajectories based on expected size.
+        Args:
+            path (str): Path to smoldyn data (assuming listmols2 command)
+            delimiter (str, optional): Column delimiter. Defaults to ",".
+            dtype_xy (np.float32, optional): xy data type. Defaults to np.float32.
+            dtype_t (np.float32, optional): t data type. Defaults to np.float32.
+            dtype_species (np.uint16, optional): Species data type. Defaults to np.uint16.
+        Returns:
+            TrajectorySet: Set of read trajectories.
+        """
+        file_content = np.loadtxt(self.path, delimiter=self.delimiter, dtype=np.float32)
+        if file_content.size == 0:
+            raise ValueError("Data file appears to be empty.")
+        t = file_content[:, 0].astype(dtype_t, copy=False)
+        serial_number = file_content[:, 5].astype(dtype_serialnum, copy=False)
+        order = np.lexsort((t, serial_number))
+        t = t[order]
+        serial_number = serial_number[order]
+        species = file_content[:, 1].astype(dtype_species, copy=False)[order]
+        x = file_content[:, 3].astype(dtype_xy, copy=False)[order]
+        y = file_content[:, 4].astype(dtype_xy, copy=False)[order]
+        serial_number = file_content[:, 5].astype(dtype_serialnum, copy=False)[order]
+        serial_ids, serial_start, serial_counts = np.unique(
+            serial_number, return_index=True, return_counts=True
+        )
+        expected = int(serial_counts[0])
+        if not np.all(serial_counts == expected):
+            raise NotImplementedError(
+                "Not a fixed grid. Serials have different number of timepoints."
+            )
+        trajs: list[Trajectory] = []
+        for sid, start in zip(serial_ids, serial_start):
+            end = start + expected
+            if self.min_val is not None and self.max_val is not None:
+                trajs.append(
+                    Trajectory(
+                        int(sid),
+                        t=t[start:end],
+                        x=Trajectory.adjust_for_periodic_boundaries(
+                            x[start:end], self.min_val, self.max_val
+                        ),
+                        y=Trajectory.adjust_for_periodic_boundaries(
+                            y[start:end], self.min_val, self.max_val
+                        ),
+                        species=species[start:end],
+                    )
+                )
+            else:
+                trajs.append(
+                    Trajectory(
+                        int(sid),
+                        t=t[start:end],
+                        x=x[start:end],
+                        y=y[start:end],
+                        species=species[start:end],
+                    )
+                )
+        return TrajectorySet(tuple(trajs))

smoldynutils-0.1.0/src/smoldynutils/plots.py ADDED Viewed

@@ -0,0 +1,220 @@
+from typing import Optional, Sequence, Union, Dict
+import numpy as np
+from matplotlib.axes import Axes
+from smoldynutils.data_objects import Trajectory, TrajectorySet
+import seaborn as sns
+FloatArray = np.typing.NDArray[np.floating]
+def plot_gauss_comparison(
+    displacement: np.ndarray,
+    gauss_vals: np.ndarray,
+    ax: Axes,
+    bins: Union[str, Sequence[float]] = "fd",
+    title: str = "Title",
+) -> Axes:
+    """Plots histogram of measured displacement and theoretical displacement.
+    Args:
+        displacement (np.ndarray): Measured displacement
+        gauss_vals (np.ndarray): Theoretical expectation
+        ax (Axes, optional): Axis to plot onto. Defaults to None.
+        bins (str, optional): Algorithm to determine bins or bins. Defaults to "fd".
+        title (str, optional): Title for the plot. Defaults to "Title".
+    Raises:
+        ValueError: No axis to plot onto provided.
+    Returns:
+        Axes: Axis that contains the histogram.
+    """
+    ax.hist(displacement, bins=bins, density=True)
+    ax.hist(gauss_vals, bins=bins, density=True)
+    ax.set_xlabel("Δx")
+    ax.set_ylabel("density")
+    ax.set_title(title)
+    return ax
+def plot_trajectorie(traj: Trajectory, ax: Axes, title: str = "Title") -> Axes:
+    """Simple xy plot of a single trajectory.
+    Args:
+        traj (Trajectory): Trajectory to plot
+        ax (Axes): Axis onto which the trajectory will be plotted
+        title (str, optional): Figure title. Defaults to "Title".
+    Raises:
+        ValueError: No axis to plot onto provided
+    Returns:
+        Axes: Axis that contains the xy plot
+    """
+    ax.plot(traj.x, traj.y, color="black")
+    ax.scatter(traj.x, traj.y, c=traj.t)
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+    ax.set_title(title)
+    return ax
+def plot_trajectories(trajs: TrajectorySet, ax: Axes, title: str = "Title") -> Axes:
+    """Creates xy plot for multiple trajectories.
+    Args:
+        trajs (TrajectorySet): Set of trajectories
+        ax (Axes): Axis onto which the trajectory will be plotted
+        title (str, optional): Figure title. Defaults to "Title".
+    Raises:
+        ValueError: No axis to plot onto provided
+    Returns:
+        Axes: Axis that contains the xy plot
+    """
+    for traj in trajs:
+        ax = plot_trajectorie(traj, ax, title)
+    return ax
+def plot_msd(
+    msd: np.ndarray,
+    ax: Axes,
+    time: Optional[np.ndarray] = None,
+    title: str = "Title",
+    color: Optional[str] = None,
+) -> Axes:
+    if time is None:
+        time = np.arange(len(msd))
+    if color is None:
+        color = "blue"
+    if len(msd.shape) > 2:
+        raise ValueError("Input MSD array is > 2D")
+    if not len(msd) == len(time):
+        msd = msd.T
+        if not len(msd) == len(time):
+            raise ValueError("Input MSD array and time array have no shape in common.")
+    ax.plot(time, msd, color=color)
+    ax.set_xlabel("time")
+    ax.set_ylabel("msd")
+    ax.set_title(title)
+    return ax
+def plot_msd_comparison(
+    msd: np.ndarray,
+    theoretical_msd: np.ndarray,
+    ax: Axes,
+    time: Optional[np.ndarray] = None,
+    title: str = "Title",
+) -> Axes:
+    """Lineplot showing the calculated MSD values vs the theoretical expectation.
+    Args:
+        msd (np.ndarray): Calculated MSD values
+        theoretical_msd (np.ndarray): Theoretically expected MSD values
+        ax (Axes): Axis onto which will be plotted
+        title (str, optional): Figure title. Defaults to "Title".
+    Returns:
+        Axes: Axis that contains the msd comparison.
+    """
+    if len(msd) != len(theoretical_msd):
+        raise ValueError(
+            f"Mismatch in MSD arrays: input array length={len(msd)}, theoretical array length={len(theoretical_msd)}"
+        )
+    if time is None:
+        time = np.arange(len(msd))
+    ax = plot_msd(msd, ax, time=time)
+    ax = plot_msd(theoretical_msd, ax, time=time, color="red")
+    ax.set_xlabel("time")
+    ax.set_ylabel("msd")
+    ax.set_title(title)
+    return ax
+def plot_diffconst_hist(
+    diffcoffs: np.ndarray, reference_diffcoff: float, ax: Axes, title: str = "Title"
+) -> Axes:
+    """Plots histogram of diffusion coefficients
+    Args:
+        diffcoffs (np.ndarray): Array of diffusion coefficients
+        reference_diffcoff (float): Expected diffusion coefficient
+        ax (Axes): Axes onto which will be plotted
+        title (str, optional): Plot title. Defaults to "Title".
+    Returns:
+        Axes: Axes with histogram
+    """
+    lower_bound = min(diffcoffs)
+    upper_bound = max(diffcoffs)
+    bins = list(np.linspace(lower_bound, upper_bound, 20))
+    ax.hist(diffcoffs, bins=bins)
+    ax.set_xscale("log")
+    ax.axvline(float(np.mean(diffcoffs)))
+    ax.axvline(reference_diffcoff)
+    ax.set_xlabel("Diffusion coefficient")
+    ax.set_ylabel("Count")
+    ax.set_title(title)
+    return ax
+def plot_violin_with_mean(
+    diffcoff: Dict[float, FloatArray],
+    reference_diffcoffs: Sequence[float],
+    permeability: Sequence[float],
+    ax: Axes,
+    title: str = "Title",
+) -> Axes:
+    """Generates a violinplot of diffcoff vs permeability.
+    Args:
+        diffcoff (Dict[float, FloatArray]): Permeability vs diffusion coefficients
+        reference_diffcoffs (Sequence[float]): Expected diffusion coefficients
+        permeability (Sequence[float]): Permeabilities for x axis
+        ax (Axes): Axes onto which will be plotted
+        title (str, optional): Title of plot. Defaults to "Title".
+    Raises:
+        ValueError: Number of entries in diffcoff does not match number of permeabilities
+    Returns:
+        Axes: Axis that contains violin plots
+    """
+    if not len(diffcoff.keys()) == len(permeability):
+        raise ValueError(
+            "Number of entries in diffcoff dict does not match number of permeabilites."
+        )
+    sns.violinplot(diffcoff, ax=ax, order=list(diffcoff.keys()), color="skyblue", inner=None)
+    mean_ds = [np.mean(vals) for vals in diffcoff.values()]
+    indices = np.arange(0, len(diffcoff.keys()))
+    ax.scatter(indices, mean_ds, color="black", marker="_", zorder=10, alpha=1, s=100)
+    ax.axhline(
+        reference_diffcoffs[0],
+        color="red",
+        linestyle="--",
+        linewidth=1,
+        label=f"WT D={reference_diffcoffs[0]}",
+    )
+    ax.axhline(
+        reference_diffcoffs[1],
+        color="blue",
+        linestyle=":",
+        linewidth=1,
+        label=f"PHSD D={reference_diffcoffs[1]}",
+    )
+    ax.set_xlabel("Permeability")
+    ax.set_ylabel("Diffusion coefficient")
+    ax.set_title(title)
+    return ax

smoldynutils-0.1.0/src/smoldynutils/utils.py ADDED Viewed

@@ -0,0 +1,27 @@
+import numpy as np
+def gauss_probability_density(x: float, mu: float, sigma: float) -> float:
+    if sigma <= 0:
+        raise ValueError("sigma must be > 0")
+    value = (1 / (np.sqrt(2 * np.pi) * sigma)) * np.exp(-np.square(x - mu) / (2 * sigma**2))
+    return float(value)
+def theoretical_brownian_motion_pdf(x: float, D: float, t: float) -> float:
+    if D <= 0:
+        raise ValueError("D must be > 0")
+    if t < 0:
+        raise ValueError("t must be >= 0")
+    sigma = np.sqrt(2 * D * t)
+    mu = 0
+    return gauss_probability_density(x, mu, sigma)
+def theoretical_msd(t: float, D: float) -> float:
+    return 4 * D * t
+def theoretical_msd_residue(t: float, D: float, epsilon: float) -> float:
+    return 4 * D * t + epsilon

smoldynutils-0.1.0/src/smoldynutils/workflows.py ADDED Viewed

@@ -0,0 +1,97 @@
+from typing import Dict, Sequence
+import numpy as np
+from smoldynutils.data_objects import Trajectory, TrajectorySet
+from smoldynutils.metrics import (
+    calc_combined_msd,
+    calc_sq_displacement_from_zero,
+    calc_xy_displacement,
+    calc_xy_msd,
+    estimate_diffcoff,
+)
+def estimate_timelag_msd_from_traj(traj: Trajectory, timelags: Sequence[int]) -> Dict[int, float]:
+    """Calculates MSD(timelag) for trajectory.
+    Args:
+        traj (Trajectory): Trajectory for which MSD will be calculated
+        timelags (Sequence[int]): Sequence of timelags that will be used
+    Returns:
+        Dict[int, float]: Keys are timelags, values the corresponding MSD
+    """
+    msd_dict = {}
+    for timelag in timelags:
+        xy_displacement = calc_xy_displacement(traj, timelag)
+        xy_msd = calc_xy_msd(xy_displacement)
+        msd = calc_combined_msd(xy_msd)
+        msd_dict[timelag] = float(msd)
+    return msd_dict
+def estimate_timelag_diffcoff_from_trajset(
+    trajs: TrajectorySet, timelags: Sequence[int] = (1, 2, 3, 4), add_epsilon: bool = False
+) -> Dict[int, float]:
+    """Calculates observed diffusion coefficient based on MSD(timelag) for set of trajectories
+    Args:
+        trajs (TrajectorySet): Set of trajectories for which diff coff will be calculated
+        timelags (Sequence[int], optional): Sequence of timelags for MSD calculation. Defaults to (1, 2, 3, 4).
+    Returns:
+        Dict[int, float]: Keys are trajectory serialnums or index, values the corresponding diff coff.
+    """
+    diffcoffs = {}
+    use_index_for_dict = False
+    timelag_array = np.array(timelags)
+    if len(np.unique(trajs.serialnums)) < len(trajs):
+        use_index_for_dict = True
+    for index, traj in enumerate(trajs):
+        msd_dict = estimate_timelag_msd_from_traj(traj, timelags)
+        msds = np.array(list(msd_dict.values()))
+        if use_index_for_dict is True:
+            diffcoffs[index] = estimate_diffcoff(msds, timelag_array, add_epsilon=add_epsilon)
+        else:
+            diffcoffs[traj.serialnumber] = estimate_diffcoff(
+                msds, timelag_array, add_epsilon=add_epsilon
+            )
+    return diffcoffs
+def estimate_time_msd_from_traj(traj: Trajectory) -> np.ndarray:
+    """Calculates MSD(time) for trajectory.
+    Args:
+        traj (Trajectory): Trajectory for which MSD will be calculated
+    Returns:
+        np.ndarray: Calculated MSDs.
+    """
+    x_sqdisplacement = calc_sq_displacement_from_zero(traj.x)
+    y_sqdisplacement = calc_sq_displacement_from_zero(traj.y)
+    msd = calc_combined_msd((x_sqdisplacement, y_sqdisplacement))
+    return msd
+def estimate_time_diffcoff_from_trajset(trajs: TrajectorySet) -> Dict[int, float]:
+    """Estimates diffusion coefficient of set of Trajectories based on MSD(time)
+    Args:
+        trajs (TrajectorySet): Set of trajectories for which diffusion coefficient will be estimated.
+    Returns:
+        Dict[int, float]: Keys are serialnums or index, values are diffcoffs
+    """
+    diffcoffs = {}
+    use_index_for_dict = False
+    if len(np.unique(trajs.serialnums)) < len(trajs):
+        use_index_for_dict = True
+    for index, traj in enumerate(trajs):
+        msd = estimate_time_msd_from_traj(traj)
+        if use_index_for_dict is True:
+            diffcoffs[index] = estimate_diffcoff(msd, traj.t)
+        else:
+            diffcoffs[traj.serialnumber] = estimate_diffcoff(msd, traj.t)
+    return diffcoffs