PyPI - typeseg - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

typeseg 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{typeseg-0.2.0 → typeseg-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: typeseg
-Version: 0.2.0
+Version: 0.2.2
 Summary: Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba).
 Author: Martin Dallinger
 License-Expression: Apache-2.0
@@ -17,8 +17,8 @@ License-File: LICENSE
 Requires-Dist: numpy>=1.21
 Requires-Dist: onnxruntime>=1.17
 Provides-Extra: gpu
-Requires-Dist: onnxruntime-gpu>=1.17; extra == "gpu"
-Requires-Dist: cupy-cuda12x>=13; extra == "gpu"
+Requires-Dist: onnxruntime-gpu>=1.17; platform_system != "Darwin" and extra == "gpu"
+Requires-Dist: cupy-cuda12x[ctk]>=13; platform_system != "Darwin" and extra == "gpu"
 Dynamic: license-file
 # TypeSeg

{typeseg-0.2.0 → typeseg-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "typeseg"
-version = "0.2.0"
+version = "0.2.2"
 description = "Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba)."
 readme = "README.md"
 license = "Apache-2.0"
@@ -32,7 +32,20 @@ segcat = "typeseg._cli:main"
 # as a custom CUDA kernel (one thread per channel; the ONNX `Scan` op is
 # launch-bound and slow on GPU). The base install already ships the fast ONNX CPU
 # backend, so this extra only adds the GPU pieces.
-gpu = ["onnxruntime-gpu>=1.17", "cupy-cuda12x>=13"]
+#
+# cupy-cuda12x[ctk]: CuPy JIT-compiles its kernels via nvrtc at runtime, which needs
+# the CUDA toolkit *headers*. The plain cupy wheel ships nvrtc but not the headers,
+# so on a box without a system CUDA install the Mamba kernel fails to compile at
+# first use; the `[ctk]` extra pulls the nvidia-*-cu12 header/lib wheels so it works
+# out of the box. (The runtime also degrades to CPU + warns if headers are missing.)
+#
+# Gated to non-macOS: neither onnxruntime-gpu nor cupy-cuda12x publishes macOS
+# wheels (and Apple has no NVIDIA CUDA), so on a Mac `typeseg[gpu]` resolves to the
+# base package instead of failing with ResolutionImpossible.
+gpu = [
+    "onnxruntime-gpu>=1.17; platform_system != 'Darwin'",
+    "cupy-cuda12x[ctk]>=13; platform_system != 'Darwin'",
+]
 [tool.setuptools]
 packages = ["typeseg"]

{typeseg-0.2.0 → typeseg-0.2.2}/typeseg/__init__.py RENAMED Viewed

@@ -17,7 +17,13 @@ from ._runtime import backend_info, run as _run
 from ._segmentation import Segment, Segmentation
 __all__ = ["fast", "precise", "Options", "Segment", "Segmentation", "backend_info", "__version__"]
-__version__ = "0.1.0"
+try:
+    from importlib.metadata import PackageNotFoundError, version as _pkg_version
+    __version__ = _pkg_version("typeseg")
+except (ImportError, PackageNotFoundError):  # not installed / running from source tree
+    __version__ = "0.0.0"
 def fast(text: str, options: Optional[Options] = None) -> Segmentation:

{typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_cupy_backend.py RENAMED Viewed

@@ -14,6 +14,7 @@ follows the same contract as ``_onnx_backend``: ``numpy`` forces it off,
 from __future__ import annotations
 import json
+import warnings
 from functools import lru_cache
 from typing import Optional
@@ -121,33 +122,81 @@ def _has_device() -> bool:
     return int(cp.cuda.runtime.getDeviceCount()) > 0
+@lru_cache(maxsize=1)
+def _probe_compile():
+    """Confirm CuPy can JIT-compile a kernel. Returns ``(ok, error_or_None)``.
+    CuPy compiles every elementwise/raw kernel at runtime via nvrtc, which needs the
+    CUDA toolkit headers. The pip ``cupy-cuda12x`` wheel ships nvrtc but NOT those
+    headers, so on a machine without a system CUDA toolkit (or the ``[ctk]`` header
+    wheels) compilation raises at first use -- e.g. ``RuntimeError: Failed to find
+    CUDA headers``. A bare device check passes there, so without this probe the auto
+    router picks CuPy and then crashes mid-inference instead of falling back. The
+    ``astype`` forces a real nvrtc compile; the result is cached so a broken box pays
+    it once. Never raises -- callers branch on the returned flag.
+    """
+    try:
+        cp = _import_cupy()
+        cp.arange(4, dtype=cp.int32).astype(cp.float32).sum().item()  # forces nvrtc compile
+        return True, None
+    except Exception as exc:  # nvrtc/header/driver init failure
+        return False, exc
+_warned_compile_fail = False
+def _warn_compile_fail_once(exc: Exception) -> None:
+    global _warned_compile_fail
+    if _warned_compile_fail:
+        return
+    _warned_compile_fail = True
+    warnings.warn(
+        "typeseg: a CUDA device was found but CuPy could not compile its GPU kernels "
+        f"({type(exc).__name__}: {exc}); falling back to the CPU (ONNX) backend for "
+        "precise(). CuPy JIT-compiles kernels and needs the CUDA toolkit headers -- "
+        "install them with: pip install \"cupy-cuda12x[ctk]\" (or set the CUDA_PATH "
+        "environment variable to a system CUDA 12.x install). Silence with "
+        "TYPESEG_BACKEND=numpy or Python's warnings filters.",
+        RuntimeWarning,
+        stacklevel=3,
+    )
 def available() -> bool:
     """True if the CuPy GPU Mamba path should be used.
-    Auto mode: True when cupy imports and a CUDA device is present. With
-    ``TYPESEG_BACKEND=gpu``/``cuda`` a missing CuPy or device is a hard error.
-    With ``TYPESEG_BACKEND=numpy`` this is always off.
+    Auto mode: True when cupy imports, a CUDA device is present, AND CuPy can
+    actually compile a kernel (headers available). Any of those failing falls back
+    to ONNX/numpy (a one-time warning if a device was present but kernels won't
+    compile). With ``TYPESEG_BACKEND=gpu``/``cuda`` any failure is a hard error;
+    with ``TYPESEG_BACKEND=numpy`` this is always off.
     """
     mode = _mode()
     if mode == "numpy":
         return False
+    stage = "device"
     try:
         if not _has_device():
             raise RuntimeError("no CUDA device visible to CuPy")
+        if not _data("mamba_al.npz").is_file():
+            raise RuntimeError("bundled Mamba weights are missing")
+        stage = "compile"
+        ok, perr = _probe_compile()
+        if not ok:
+            raise perr if perr is not None else RuntimeError("CuPy kernel compilation failed")
     except Exception as exc:
         if _require_gpu():
             raise RuntimeError(
-                f"TYPESEG_BACKEND={mode} requires the GPU backend, but CuPy could not "
-                f"initialise a CUDA device ({exc}). Install with: pip install \"typeseg[gpu]\" "
-                "and ensure CUDA 12.x is on the library path."
+                f"TYPESEG_BACKEND={mode} requires the CuPy GPU backend, but it could not "
+                f"initialise ({exc}). Install with: pip install \"typeseg[gpu]\" (which bundles "
+                "the CUDA toolkit headers CuPy needs to JIT its kernels); with a system CUDA "
+                "install, set CUDA_PATH."
             ) from exc
+        if stage == "compile":  # device present but headers missing -> the actionable case
+            _warn_compile_fail_once(exc)
         return False
-    try:
-        return _data("mamba_al.npz").is_file()
-    except Exception:
-        if _require_gpu():
-            raise
-        return False
+    return True
 @lru_cache(maxsize=1)

{typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_runtime.py RENAMED Viewed

@@ -2,6 +2,8 @@
 from __future__ import annotations
 import json
+import platform
+import warnings
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple
@@ -30,6 +32,36 @@ except ImportError:  # pragma: no cover
 DEFAULT_CHUNK = 1536
+_warned_no_gpu = False
+def _maybe_warn_no_gpu() -> None:
+    """On macOS, warn once that GPU backends are unavailable (CPU/ONNX only).
+    Wheel installs run no install-time hook, so we cannot print during
+    ``pip install``; this is the first runtime opportunity to tell a Mac user why
+    ``typeseg[gpu]`` resolved to the plain CPU package. Only fires in auto mode —
+    an explicit ``TYPESEG_BACKEND`` (numpy forced, or gpu/cuda fail-fast) is the
+    user's own choice and is left to the backend to honour. Suppressible via the
+    standard ``warnings`` filters or ``TYPESEG_BACKEND=numpy``.
+    """
+    global _warned_no_gpu
+    if _warned_no_gpu or platform.system() != "Darwin":
+        return
+    if ob._mode():  # explicit backend choice — don't second-guess it
+        _warned_no_gpu = True
+        return
+    _warned_no_gpu = True
+    warnings.warn(
+        "typeseg: running on CPU (ONNX). GPU backends are unavailable on macOS — "
+        "neither onnxruntime-gpu nor cupy-cuda12x publishes macOS wheels and Apple "
+        "has no NVIDIA CUDA, so 'typeseg[gpu]' installs the same CPU package. "
+        "This is expected; the CPU ONNX backend is fast. Silence this warning with "
+        "TYPESEG_BACKEND=numpy or Python's warnings filters.",
+        RuntimeWarning,
+        stacklevel=2,
+    )
 @lru_cache(maxsize=1)
 def _manifest() -> dict:
@@ -165,6 +197,7 @@ def _empty(text: str) -> Segmentation:
 def run(model: str, text: str, options: Optional[Options]) -> Segmentation:
     if options is None:
         options = Options()
+    _maybe_warn_no_gpu()
     if not text:
         return _empty(text)
     byte_tokens = text_to_bytes(text)

{typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: typeseg
-Version: 0.2.0
+Version: 0.2.2
 Summary: Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba).
 Author: Martin Dallinger
 License-Expression: Apache-2.0
@@ -17,8 +17,8 @@ License-File: LICENSE
 Requires-Dist: numpy>=1.21
 Requires-Dist: onnxruntime>=1.17
 Provides-Extra: gpu
-Requires-Dist: onnxruntime-gpu>=1.17; extra == "gpu"
-Requires-Dist: cupy-cuda12x>=13; extra == "gpu"
+Requires-Dist: onnxruntime-gpu>=1.17; platform_system != "Darwin" and extra == "gpu"
+Requires-Dist: cupy-cuda12x[ctk]>=13; platform_system != "Darwin" and extra == "gpu"
 Dynamic: license-file
 # TypeSeg

{typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/requires.txt RENAMED Viewed

@@ -2,5 +2,7 @@ numpy>=1.21
 onnxruntime>=1.17
 [gpu]
+[gpu:platform_system != "Darwin"]
 onnxruntime-gpu>=1.17
-cupy-cuda12x>=13
+cupy-cuda12x[ctk]>=13