PyPI - torchdtw - Versions diffs - 0.0.1__tar.gz - Mend

torchdtw 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

torchdtw-0.0.1/PKG-INFO +15 -0
torchdtw-0.0.1/README.md +2 -0
torchdtw-0.0.1/pyproject.toml +68 -0
torchdtw-0.0.1/setup.cfg +4 -0
torchdtw-0.0.1/setup.py +58 -0
torchdtw-0.0.1/src/torchdtw/__init__.py +49 -0
torchdtw-0.0.1/src/torchdtw/csrc/dtw.cpp +140 -0
torchdtw-0.0.1/src/torchdtw/py.typed +0 -0
torchdtw-0.0.1/src/torchdtw.egg-info/PKG-INFO +15 -0
torchdtw-0.0.1/src/torchdtw.egg-info/SOURCES.txt +13 -0
torchdtw-0.0.1/src/torchdtw.egg-info/dependency_links.txt +1 -0
torchdtw-0.0.1/src/torchdtw.egg-info/requires.txt +2 -0
torchdtw-0.0.1/src/torchdtw.egg-info/top_level.txt +1 -0
torchdtw-0.0.1/tests/test_dtw.py +63 -0
torchdtw-0.0.1/tests/test_opcheck.py +54 -0

torchdtw-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.4
+Name: torchdtw
+Version: 0.0.1
+Summary: Add your description here
+Author: Maxime Poli
+Author-email: CoML <dev@cognitive-ml.fr>
+License-Expression: MIT
+Keywords: machine learning
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+Requires-Dist: numpy>=1.26.4
+Requires-Dist: torch>=2.9.0
+# PyTorch DTW C++ extension

torchdtw-0.0.1/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # PyTorch DTW C++ extension
2	+

torchdtw-0.0.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,68 @@
+[build-system]
+requires = [
+    "setuptools>=80.9.0",
+    "torch>=2.9.0",
+    "numpy>=1.26.4",
+    "ninja>=1.11",
+]
+build-backend = "setuptools.build_meta"
+[project]
+name = "torchdtw"
+version = "0.0.1"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+authors = [
+    { name = "Maxime Poli" },
+    { name = "CoML", email = "dev@cognitive-ml.fr" },
+]
+keywords = ["machine learning"]
+license = "MIT"
+dependencies = [
+    "numpy>=1.26.4",
+    "torch>=2.9.0",
+]
+[dependency-groups]
+dev = [
+    "ipykernel>=7.1.0",
+    "ruff>=0.14.2",
+    "typos>=1.36.2",
+]
+test = [
+    "hypothesis>=6.142.5",
+    "pytest>=8.4.2",
+]
+[tool.ruff]
+line-length = 119
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    "COM812",  # missing-trailing-comma
+    "D105",    # undocumented-magic-method
+    "D107",    # undocumented-public-init
+    "D203",    # incorrect-blank-line-before-class
+    "D213",    # multi-line-summary-second-line
+    "N803",    # invalid-argument-name
+    "N806",    # non-lowercase-variable-in-function
+    "PLR0913", # too-many-arguments
+]
+[tool.ruff.lint.pylint]
+allow-magic-value-types = ["int"]
+[tool.ruff.lint.flake8-self]
+ignore-names = ["_check"]
+[tool.ruff.lint.pep8-naming]
+ignore-names = ["F"]
+[tool.uv.workspace]
+members = [
+    "benchmark",
+    "torchdtw",
+]

torchdtw-0.0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

torchdtw-0.0.1/setup.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""Build the DTW PyTorch C++ extension."""
+import os
+import sys
+import torch
+from setuptools import Extension, setup
+from torch.torch_version import Version
+from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CppExtension, CUDAExtension
+def get_openmp_flags() -> tuple[list[str], list[str]]:
+    """Return the compiler and linker flags for OpenMP."""
+    match sys.platform:
+        case "linux":
+            compile_flags, link_flags = ["-fopenmp"], ["-fopenmp"]
+        case "win32":
+            compile_flags, link_flags = ["-openmp"], []
+        case _:  # On MacOS, we use the OpenMP version vendored by PyTorch
+            return [], []
+    return compile_flags, link_flags
+def get_cuda_arch_list() -> str:
+    """Supported CUDA architectures. Volta is not supported by CUDA 13.0."""
+    if torch.version.cuda is None or Version(torch.version.cuda) < Version("13.0"):
+        return "Volta;Turing;Ampere;Ada;Hopper"
+    return "Turing;Ampere;Ada;Hopper"
+def get_extension() -> Extension:
+    """Either CUDA or CPU extension."""
+    use_cuda = CUDA_HOME is not None
+    extension = CUDAExtension if use_cuda else CppExtension
+    openmp_flags = get_openmp_flags()
+    extra_compile_args = {
+        "cxx": ["-fdiagnostics-color=always", "-O3"] + openmp_flags[0],
+        "nvcc": ["-O3"],
+    }
+    sources = ["src/torchdtw/csrc/dtw.cpp"]
+    if use_cuda:
+        os.environ["TORCH_CUDA_ARCH_LIST"] = get_cuda_arch_list()
+        sources.append("src/torchdtw/csrc/cuda/dtw.cu")
+    return extension(
+        "torchdtw._C",
+        sources,
+        extra_compile_args=extra_compile_args,
+        extra_link_args=openmp_flags[1],
+        py_limited_api=True,
+    )
+if __name__ == "__main__":
+    setup(
+        ext_modules=[get_extension()],
+        cmdclass={"build_ext": BuildExtension},
+        options={"bdist_wheel": {"py_limited_api": "cp312"}},
+    )

torchdtw-0.0.1/src/torchdtw/__init__.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""DTW implementation using PyTorch C++ extensions, with CPU and CUDA backends."""
+import torch
+from . import _C  # noqa: F401 # ty: ignore[unresolved-import]
+__all__ = ["dtw", "dtw_batch"]
+def dtw(distances: torch.Tensor) -> torch.Tensor:
+    """Compute the DTW of the given ``distances`` 2D tensor.
+    :param distances: A 2D tensor of shape (n, m) representing the pairwise distances between two sequences.
+    """
+    return torch.ops.torchdtw.dtw.default(distances)
+def dtw_batch(distances: torch.Tensor, sx: torch.Tensor, sy: torch.Tensor, *, symmetric: bool) -> torch.Tensor:
+    """Compute the batched DTW on the ``distances`` 4D tensor.
+    :param distances: A 4D tensor of shape (n1, n2, s1, s2) representing the pairwise distances between two
+        batches of sequences.
+    :param sx: A 1D tensor of shape (n1,) representing the lengths of the sequences in the first batch.
+    :param sy: A 1D tensor of shape (n2,) representing the lengths of the sequences in the second batch.
+    :param symmetric: Whether or not the DTW is symmetric (i.e., the two batches are the same).
+    """
+    return torch.ops.torchdtw.dtw_batch.default(distances, sx, sy, symmetric)
+@torch.library.register_fake("torchdtw::dtw")
+def _(distances: torch.Tensor) -> torch.Tensor:
+    """Register the FakeTensor kernel for dtw, for compatibility with torch.compile."""
+    torch._check(distances.ndim == 2)
+    torch._check(distances.dtype == torch.float32)
+    return torch.empty((), dtype=torch.float32, layout=distances.layout, device=distances.device)
+@torch.library.register_fake("torchdtw::dtw_batch")
+def _(distances: torch.Tensor, sx: torch.Tensor, sy: torch.Tensor, symmetric: bool) -> torch.Tensor:  # noqa: FBT001
+    """Register the FakeTensor kernel for dtw_batch, for compatibility with torch.compile."""
+    torch._check(distances.ndim == 4)
+    torch._check(sx.ndim == 1)
+    torch._check(sy.ndim == 1)
+    torch._check(distances.dtype == torch.float32)
+    torch._check(sx.dtype == torch.long)
+    torch._check(sy.dtype == torch.long)
+    torch._check(isinstance(symmetric, bool))
+    nx, ny, _, _ = distances.shape
+    return torch.empty((nx, ny), dtype=torch.float32, layout=distances.layout, device=distances.device)

torchdtw-0.0.1/src/torchdtw/csrc/dtw.cpp ADDED Viewed

@@ -0,0 +1,140 @@
+#include <Python.h>
+#include <omp.h>
+#include <torch/csrc/stable/library.h>
+#include <torch/csrc/stable/ops.h>
+#include <torch/csrc/stable/tensor.h>
+#include <torch/headeronly/util/Exception.h>
+#include <algorithm>
+#include <vector>
+extern "C" {
+/* Creates a dummy empty _C module that can be imported from Python.
+   The import from Python will load the .so consisting of this file
+   in this extension, so that the STABLE_TORCH_LIBRARY static initializers
+   below are run. */
+PyObject* PyInit__C(void) {
+  static struct PyModuleDef module_def = {
+      PyModuleDef_HEAD_INIT,
+      "_C", /* name of module */
+      NULL, /* module documentation, may be NULL */
+      -1, /* size of per-interpreter state of the module,
+             or -1 if the module keeps state in global variables. */
+      NULL, /* methods */
+  };
+  return PyModule_Create(&module_def);
+}
+}
+namespace torchdtw {
+using torch::stable::Tensor;
+inline float dtw(
+    const float* distances,
+    const int64_t N,
+    const int64_t M,
+    const int64_t stride_x,
+    const int64_t stride_y) {
+  STD_TORCH_CHECK(N > 0 && M > 0, "Empty input tensor");
+  STD_TORCH_CHECK(stride_x > 0 && stride_y > 0, "Strides must be positive");
+  std::vector<float> cost(N * M);
+  cost[0] = distances[0];
+  for (int64_t i = 1; i < N; i++) {
+    cost[i * M] = distances[i * stride_x] + cost[(i - 1) * M];
+  }
+  for (int64_t j = 1; j < M; j++) {
+    cost[j] = distances[j * stride_y] + cost[j - 1];
+  }
+  for (int64_t i = 1; i < N; i++) {
+    for (int64_t j = 1; j < M; j++) {
+      cost[i * M + j] = distances[i * stride_x + j * stride_y] +
+          std::min({cost[(i - 1) * M + j], cost[(i - 1) * M + j - 1], cost[i * M + j - 1]});
+    }
+  }
+  int64_t path_len = 1;
+  int64_t i = N - 1;
+  int64_t j = M - 1;
+  while (i > 0 && j > 0) {
+    const float c_up = cost[(i - 1) * M + j];
+    const float c_left = cost[i * M + j - 1];
+    const float c_diag = cost[(i - 1) * M + j - 1];
+    if (c_diag <= c_left && c_diag <= c_up) {
+      i--;
+      j--;
+    } else if (c_left <= c_up) {
+      j--;
+    } else {
+      i--;
+    }
+    path_len++;
+  }
+  if (i == 0)
+    path_len += j;
+  if (j == 0)
+    path_len += i;
+  return cost[(N - 1) * M + M - 1] / path_len;
+}
+Tensor dtw_cpu(const Tensor distances) {
+  float result =
+      dtw(reinterpret_cast<const float*>(distances.data_ptr()),
+          distances.size(0),
+          distances.size(1),
+          distances.stride(0),
+          distances.stride(1));
+  Tensor out = torch::stable::new_empty(distances, {});
+  torch::stable::fill_(out, result);
+  return out;
+}
+Tensor dtw_batch_cpu(const Tensor distances, const Tensor sx, const Tensor sy, bool symmetric) {
+  const int64_t nx = distances.size(0);
+  const int64_t ny = distances.size(1);
+  Tensor out = torch::stable::new_zeros(distances, {nx, ny});
+  const float* distances_ptr = reinterpret_cast<const float*>(distances.data_ptr());
+  const int64_t* sx_ptr = reinterpret_cast<const int64_t*>(sx.data_ptr());
+  const int64_t* sy_ptr = reinterpret_cast<const int64_t*>(sy.data_ptr());
+  float* out_ptr = reinterpret_cast<float*>(out.data_ptr());
+#pragma omp parallel for schedule(dynamic)
+  for (int64_t i = 0; i < nx; i++) {
+    const int64_t start_j = symmetric ? i : 0;
+    for (int64_t j = start_j; j < ny; j++) {
+      if (symmetric && i == j)
+        continue;
+      out_ptr[i * ny + j] =
+          dtw(distances_ptr + i * distances.stride(0) + j * distances.stride(1),
+              sx_ptr[i],
+              sy_ptr[j],
+              distances.stride(2),
+              distances.stride(3));
+      if (symmetric && i != j) {
+        out_ptr[j * ny + i] = out_ptr[i * ny + j];
+      }
+    }
+  };
+  return out;
+}
+void boxed_dtw_cpu(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
+  stack[0] = from(dtw_cpu(to<Tensor>(stack[0])));
+}
+void boxed_dtw_batch_cpu(StableIValue* stack, uint64_t num_args, uint64_t num_outputs) {
+  stack[0] = from(dtw_batch_cpu(to<Tensor>(stack[0]), to<Tensor>(stack[1]), to<Tensor>(stack[2]), to<bool>(stack[3])));
+}
+STABLE_TORCH_LIBRARY(torchdtw, m) {
+  m.def("dtw(Tensor distances) -> Tensor");
+  m.def("dtw_batch(Tensor distances, Tensor sx, Tensor sy, bool symmetric) -> Tensor");
+}
+STABLE_TORCH_LIBRARY_IMPL(torchdtw, CPU, m) {
+  m.impl("dtw", &boxed_dtw_cpu);
+  m.impl("dtw_batch", &boxed_dtw_batch_cpu);
+}
+} // namespace torchdtw

torchdtw-0.0.1/src/torchdtw/py.typed ADDED Viewed

File without changes

torchdtw-0.0.1/src/torchdtw.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.4
+Name: torchdtw
+Version: 0.0.1
+Summary: Add your description here
+Author: Maxime Poli
+Author-email: CoML <dev@cognitive-ml.fr>
+License-Expression: MIT
+Keywords: machine learning
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+Requires-Dist: numpy>=1.26.4
+Requires-Dist: torch>=2.9.0
+# PyTorch DTW C++ extension

torchdtw-0.0.1/src/torchdtw.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,13 @@
+README.md
+pyproject.toml
+setup.py
+src/torchdtw/__init__.py
+src/torchdtw/py.typed
+src/torchdtw.egg-info/PKG-INFO
+src/torchdtw.egg-info/SOURCES.txt
+src/torchdtw.egg-info/dependency_links.txt
+src/torchdtw.egg-info/requires.txt
+src/torchdtw.egg-info/top_level.txt
+src/torchdtw/csrc/dtw.cpp
+tests/test_dtw.py
+tests/test_opcheck.py

torchdtw-0.0.1/src/torchdtw.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

torchdtw-0.0.1/src/torchdtw.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ numpy>=1.26.4
2	+ torch>=2.9.0

torchdtw-0.0.1/src/torchdtw.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ torchdtw

torchdtw-0.0.1/tests/test_dtw.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""Compare CPU and CUDA dtw implementations."""
+import pytest
+import torch
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from torchdtw import dtw, dtw_batch
+rtol, atol = 0, 1e-9
+skipifnogpu = pytest.mark.skipif(not torch.cuda.is_available(), reason="No GPU available")
+DIM, BATCH = st.integers(1, 1280), st.integers(1, 3)
+LOW, HIGH_MINUS_LOW = st.floats(-100, 100), st.floats(0.1, 100)
+def make_tensor(shape: tuple[int, ...], *, dtype: torch.dtype, low: float, high: float) -> torch.Tensor:
+    """Build a tensor for testing."""
+    if low == high and dtype == torch.long:
+        return torch.ones(shape, dtype=torch.long, device="cpu")
+    return torch.testing.make_tensor(shape, dtype=dtype, device="cpu", low=low, high=high)
+@skipifnogpu
+@given(x=DIM, y=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_dtw(x: int, y: int, low: float, high_minus_low: float) -> None:
+    """Compare the output of dtw between CPU and GPU implementations."""
+    d = make_tensor((x, y), dtype=torch.float32, low=low, high=high_minus_low + low)
+    torch.testing.assert_close(dtw(d), dtw(d.cuda()).cpu(), rtol=rtol, atol=atol)
+@skipifnogpu
+@given(n=BATCH, x=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_dtw_batch_symmetric(n: int, x: int, low: float, high_minus_low: float) -> None:
+    """Compare the output of dtw_batch between CPU and GPU implementations, symmetric case."""
+    d = make_tensor((n, n, x, x), dtype=torch.float32, low=low, high=high_minus_low + low)
+    sx = make_tensor((n,), dtype=torch.long, low=1, high=x)
+    i, j = torch.triu_indices(n, n)
+    d[i, j] = d[j, i]
+    torch.testing.assert_close(
+        dtw_batch(d, sx, sx, symmetric=True),
+        dtw_batch(d.cuda(), sx.cuda(), sx.cuda(), symmetric=True).cpu(),
+        rtol=rtol,
+        atol=atol,
+    )
+@skipifnogpu
+@given(n=BATCH, m=BATCH, x=DIM, y=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_dtw_batch_not_symmetric(n: int, m: int, x: int, y: int, low: float, high_minus_low: float) -> None:
+    """Compare the output of dtw_batch between CPU and GPU implementations, non symmetric case."""
+    d = make_tensor((n, m, x, y), dtype=torch.float32, low=low, high=high_minus_low + low)
+    sx = make_tensor((n,), dtype=torch.long, low=1, high=x)
+    sy = make_tensor((m,), dtype=torch.long, low=1, high=y)
+    torch.testing.assert_close(
+        dtw_batch(d, sx, sy, symmetric=False),
+        dtw_batch(d.cuda(), sx.cuda(), sy.cuda(), symmetric=False).cpu(),
+        rtol=rtol,
+        atol=atol,
+    )

torchdtw-0.0.1/tests/test_opcheck.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Check for compatibility with torch.compile."""
+import torch
+from hypothesis import given, settings
+from hypothesis import strategies as st
+from torch.library import opcheck
+import torchdtw  # noqa: F401 # Need to import it to register dtw operation
+DIM, BATCH = st.integers(1, 1280), st.integers(1, 3)
+LOW, HIGH_MINUS_LOW = st.floats(-100, 100), st.floats(0.1, 100)
+CUDA_AVAILABLE = torch.cuda.is_available()
+def make_tensor(shape: tuple[int, ...], *, dtype: torch.dtype, low: float, high: float) -> torch.Tensor:
+    """Build a tensor for testing."""
+    if low == high and dtype == torch.long:
+        return torch.ones(shape, dtype=torch.long, device="cpu")
+    return torch.testing.make_tensor(shape, dtype=dtype, device="cpu", low=low, high=high)
+@given(x=DIM, y=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_opcheck_dtw(x: int, y: int, low: float, high_minus_low: float) -> None:
+    """Verify that dtw can be torch compiled."""
+    sample = make_tensor((x, y), dtype=torch.float32, low=low, high=high_minus_low + low)
+    opcheck(torch.ops.torchdtw.dtw.default, (sample,))
+    if CUDA_AVAILABLE:
+        opcheck(torch.ops.torchdtw.dtw.default, (sample.cuda(),))
+@given(n=BATCH, x=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_opcheck_dtw_batch_symmetric(n: int, x: int, low: float, high_minus_low: float) -> None:
+    """Verify that dtw_batch can be torch compiled, with symmetric input."""
+    sample = make_tensor((n, n, x, x), dtype=torch.float32, low=low, high=high_minus_low + low)
+    sx = make_tensor((n,), dtype=torch.long, low=1, high=x)
+    i, j = torch.triu_indices(n, n)
+    sample[i, j] = sample[j, i]
+    opcheck(torch.ops.torchdtw.dtw_batch.default, (sample, sx, sx), {"symmetric": True})
+    if CUDA_AVAILABLE:
+        opcheck(torch.ops.torchdtw.dtw_batch.default, (sample.cuda(), sx.cuda(), sx.cuda()), {"symmetric": True})
+@given(n=BATCH, m=BATCH, x=DIM, y=DIM, low=LOW, high_minus_low=HIGH_MINUS_LOW)
+@settings(deadline=None)
+def test_opcheck_dtw_batch_not_symmetric(n: int, m: int, x: int, y: int, low: float, high_minus_low: float) -> None:
+    """Verify that dtw_batch can be torch compiled, with symmetric input."""
+    sample = make_tensor((n, m, x, y), dtype=torch.float32, low=low, high=high_minus_low + low)
+    sx = make_tensor((n,), dtype=torch.long, low=1, high=x)
+    sy = make_tensor((m,), dtype=torch.long, low=1, high=y)
+    opcheck(torch.ops.torchdtw.dtw_batch.default, (sample, sx, sy), {"symmetric": False})
+    if CUDA_AVAILABLE:
+        opcheck(torch.ops.torchdtw.dtw_batch.default, (sample.cuda(), sx.cuda(), sy.cuda()), {"symmetric": False})