typeseg 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {typeseg-0.2.0 → typeseg-0.2.2}/PKG-INFO +3 -3
  2. {typeseg-0.2.0 → typeseg-0.2.2}/pyproject.toml +15 -2
  3. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/__init__.py +7 -1
  4. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_cupy_backend.py +61 -12
  5. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_runtime.py +33 -0
  6. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/PKG-INFO +3 -3
  7. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/requires.txt +3 -1
  8. {typeseg-0.2.0 → typeseg-0.2.2}/LICENSE +0 -0
  9. {typeseg-0.2.0 → typeseg-0.2.2}/README.md +0 -0
  10. {typeseg-0.2.0 → typeseg-0.2.2}/setup.cfg +0 -0
  11. {typeseg-0.2.0 → typeseg-0.2.2}/tests/test_cupy_parity.py +0 -0
  12. {typeseg-0.2.0 → typeseg-0.2.2}/tests/test_distribution.py +0 -0
  13. {typeseg-0.2.0 → typeseg-0.2.2}/tests/test_postprocess_perf.py +0 -0
  14. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/__main__.py +0 -0
  15. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_cli.py +0 -0
  16. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_color.py +0 -0
  17. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_mamba_kernel.py +0 -0
  18. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_numpy_backend.py +0 -0
  19. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_onnx_backend.py +0 -0
  20. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_options.py +0 -0
  21. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_postprocess.py +0 -0
  22. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_segmentation.py +0 -0
  23. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/_tokenize.py +0 -0
  24. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/data/mamba_al.npz +0 -0
  25. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/data/mamba_al.onnx +0 -0
  26. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/data/manifest.json +0 -0
  27. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/data/unet_al.npz +0 -0
  28. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg/data/unet_al.onnx +0 -0
  29. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/SOURCES.txt +0 -0
  30. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/dependency_links.txt +0 -0
  31. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/entry_points.txt +0 -0
  32. {typeseg-0.2.0 → typeseg-0.2.2}/typeseg.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: typeseg
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba).
5
5
  Author: Martin Dallinger
6
6
  License-Expression: Apache-2.0
@@ -17,8 +17,8 @@ License-File: LICENSE
17
17
  Requires-Dist: numpy>=1.21
18
18
  Requires-Dist: onnxruntime>=1.17
19
19
  Provides-Extra: gpu
20
- Requires-Dist: onnxruntime-gpu>=1.17; extra == "gpu"
21
- Requires-Dist: cupy-cuda12x>=13; extra == "gpu"
20
+ Requires-Dist: onnxruntime-gpu>=1.17; platform_system != "Darwin" and extra == "gpu"
21
+ Requires-Dist: cupy-cuda12x[ctk]>=13; platform_system != "Darwin" and extra == "gpu"
22
22
  Dynamic: license-file
23
23
 
24
24
  # TypeSeg
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "typeseg"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba)."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -32,7 +32,20 @@ segcat = "typeseg._cli:main"
32
32
  # as a custom CUDA kernel (one thread per channel; the ONNX `Scan` op is
33
33
  # launch-bound and slow on GPU). The base install already ships the fast ONNX CPU
34
34
  # backend, so this extra only adds the GPU pieces.
35
- gpu = ["onnxruntime-gpu>=1.17", "cupy-cuda12x>=13"]
35
+ #
36
+ # cupy-cuda12x[ctk]: CuPy JIT-compiles its kernels via nvrtc at runtime, which needs
37
+ # the CUDA toolkit *headers*. The plain cupy wheel ships nvrtc but not the headers,
38
+ # so on a box without a system CUDA install the Mamba kernel fails to compile at
39
+ # first use; the `[ctk]` extra pulls the nvidia-*-cu12 header/lib wheels so it works
40
+ # out of the box. (The runtime also degrades to CPU + warns if headers are missing.)
41
+ #
42
+ # Gated to non-macOS: neither onnxruntime-gpu nor cupy-cuda12x publishes macOS
43
+ # wheels (and Apple has no NVIDIA CUDA), so on a Mac `typeseg[gpu]` resolves to the
44
+ # base package instead of failing with ResolutionImpossible.
45
+ gpu = [
46
+ "onnxruntime-gpu>=1.17; platform_system != 'Darwin'",
47
+ "cupy-cuda12x[ctk]>=13; platform_system != 'Darwin'",
48
+ ]
36
49
 
37
50
  [tool.setuptools]
38
51
  packages = ["typeseg"]
@@ -17,7 +17,13 @@ from ._runtime import backend_info, run as _run
17
17
  from ._segmentation import Segment, Segmentation
18
18
 
19
19
  __all__ = ["fast", "precise", "Options", "Segment", "Segmentation", "backend_info", "__version__"]
20
- __version__ = "0.1.0"
20
+
21
+ try:
22
+ from importlib.metadata import PackageNotFoundError, version as _pkg_version
23
+
24
+ __version__ = _pkg_version("typeseg")
25
+ except (ImportError, PackageNotFoundError): # not installed / running from source tree
26
+ __version__ = "0.0.0"
21
27
 
22
28
 
23
29
  def fast(text: str, options: Optional[Options] = None) -> Segmentation:
@@ -14,6 +14,7 @@ follows the same contract as ``_onnx_backend``: ``numpy`` forces it off,
14
14
  from __future__ import annotations
15
15
 
16
16
  import json
17
+ import warnings
17
18
  from functools import lru_cache
18
19
  from typing import Optional
19
20
 
@@ -121,33 +122,81 @@ def _has_device() -> bool:
121
122
  return int(cp.cuda.runtime.getDeviceCount()) > 0
122
123
 
123
124
 
125
+ @lru_cache(maxsize=1)
126
+ def _probe_compile():
127
+ """Confirm CuPy can JIT-compile a kernel. Returns ``(ok, error_or_None)``.
128
+
129
+ CuPy compiles every elementwise/raw kernel at runtime via nvrtc, which needs the
130
+ CUDA toolkit headers. The pip ``cupy-cuda12x`` wheel ships nvrtc but NOT those
131
+ headers, so on a machine without a system CUDA toolkit (or the ``[ctk]`` header
132
+ wheels) compilation raises at first use -- e.g. ``RuntimeError: Failed to find
133
+ CUDA headers``. A bare device check passes there, so without this probe the auto
134
+ router picks CuPy and then crashes mid-inference instead of falling back. The
135
+ ``astype`` forces a real nvrtc compile; the result is cached so a broken box pays
136
+ it once. Never raises -- callers branch on the returned flag.
137
+ """
138
+ try:
139
+ cp = _import_cupy()
140
+ cp.arange(4, dtype=cp.int32).astype(cp.float32).sum().item() # forces nvrtc compile
141
+ return True, None
142
+ except Exception as exc: # nvrtc/header/driver init failure
143
+ return False, exc
144
+
145
+
146
+ _warned_compile_fail = False
147
+
148
+
149
+ def _warn_compile_fail_once(exc: Exception) -> None:
150
+ global _warned_compile_fail
151
+ if _warned_compile_fail:
152
+ return
153
+ _warned_compile_fail = True
154
+ warnings.warn(
155
+ "typeseg: a CUDA device was found but CuPy could not compile its GPU kernels "
156
+ f"({type(exc).__name__}: {exc}); falling back to the CPU (ONNX) backend for "
157
+ "precise(). CuPy JIT-compiles kernels and needs the CUDA toolkit headers -- "
158
+ "install them with: pip install \"cupy-cuda12x[ctk]\" (or set the CUDA_PATH "
159
+ "environment variable to a system CUDA 12.x install). Silence with "
160
+ "TYPESEG_BACKEND=numpy or Python's warnings filters.",
161
+ RuntimeWarning,
162
+ stacklevel=3,
163
+ )
164
+
165
+
124
166
  def available() -> bool:
125
167
  """True if the CuPy GPU Mamba path should be used.
126
168
 
127
- Auto mode: True when cupy imports and a CUDA device is present. With
128
- ``TYPESEG_BACKEND=gpu``/``cuda`` a missing CuPy or device is a hard error.
129
- With ``TYPESEG_BACKEND=numpy`` this is always off.
169
+ Auto mode: True when cupy imports, a CUDA device is present, AND CuPy can
170
+ actually compile a kernel (headers available). Any of those failing falls back
171
+ to ONNX/numpy (a one-time warning if a device was present but kernels won't
172
+ compile). With ``TYPESEG_BACKEND=gpu``/``cuda`` any failure is a hard error;
173
+ with ``TYPESEG_BACKEND=numpy`` this is always off.
130
174
  """
131
175
  mode = _mode()
132
176
  if mode == "numpy":
133
177
  return False
178
+ stage = "device"
134
179
  try:
135
180
  if not _has_device():
136
181
  raise RuntimeError("no CUDA device visible to CuPy")
182
+ if not _data("mamba_al.npz").is_file():
183
+ raise RuntimeError("bundled Mamba weights are missing")
184
+ stage = "compile"
185
+ ok, perr = _probe_compile()
186
+ if not ok:
187
+ raise perr if perr is not None else RuntimeError("CuPy kernel compilation failed")
137
188
  except Exception as exc:
138
189
  if _require_gpu():
139
190
  raise RuntimeError(
140
- f"TYPESEG_BACKEND={mode} requires the GPU backend, but CuPy could not "
141
- f"initialise a CUDA device ({exc}). Install with: pip install \"typeseg[gpu]\" "
142
- "and ensure CUDA 12.x is on the library path."
191
+ f"TYPESEG_BACKEND={mode} requires the CuPy GPU backend, but it could not "
192
+ f"initialise ({exc}). Install with: pip install \"typeseg[gpu]\" (which bundles "
193
+ "the CUDA toolkit headers CuPy needs to JIT its kernels); with a system CUDA "
194
+ "install, set CUDA_PATH."
143
195
  ) from exc
196
+ if stage == "compile": # device present but headers missing -> the actionable case
197
+ _warn_compile_fail_once(exc)
144
198
  return False
145
- try:
146
- return _data("mamba_al.npz").is_file()
147
- except Exception:
148
- if _require_gpu():
149
- raise
150
- return False
199
+ return True
151
200
 
152
201
 
153
202
  @lru_cache(maxsize=1)
@@ -2,6 +2,8 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import json
5
+ import platform
6
+ import warnings
5
7
  from functools import lru_cache
6
8
  from typing import Dict, List, Optional, Tuple
7
9
 
@@ -30,6 +32,36 @@ except ImportError: # pragma: no cover
30
32
 
31
33
  DEFAULT_CHUNK = 1536
32
34
 
35
+ _warned_no_gpu = False
36
+
37
+
38
+ def _maybe_warn_no_gpu() -> None:
39
+ """On macOS, warn once that GPU backends are unavailable (CPU/ONNX only).
40
+
41
+ Wheel installs run no install-time hook, so we cannot print during
42
+ ``pip install``; this is the first runtime opportunity to tell a Mac user why
43
+ ``typeseg[gpu]`` resolved to the plain CPU package. Only fires in auto mode —
44
+ an explicit ``TYPESEG_BACKEND`` (numpy forced, or gpu/cuda fail-fast) is the
45
+ user's own choice and is left to the backend to honour. Suppressible via the
46
+ standard ``warnings`` filters or ``TYPESEG_BACKEND=numpy``.
47
+ """
48
+ global _warned_no_gpu
49
+ if _warned_no_gpu or platform.system() != "Darwin":
50
+ return
51
+ if ob._mode(): # explicit backend choice — don't second-guess it
52
+ _warned_no_gpu = True
53
+ return
54
+ _warned_no_gpu = True
55
+ warnings.warn(
56
+ "typeseg: running on CPU (ONNX). GPU backends are unavailable on macOS — "
57
+ "neither onnxruntime-gpu nor cupy-cuda12x publishes macOS wheels and Apple "
58
+ "has no NVIDIA CUDA, so 'typeseg[gpu]' installs the same CPU package. "
59
+ "This is expected; the CPU ONNX backend is fast. Silence this warning with "
60
+ "TYPESEG_BACKEND=numpy or Python's warnings filters.",
61
+ RuntimeWarning,
62
+ stacklevel=2,
63
+ )
64
+
33
65
 
34
66
  @lru_cache(maxsize=1)
35
67
  def _manifest() -> dict:
@@ -165,6 +197,7 @@ def _empty(text: str) -> Segmentation:
165
197
  def run(model: str, text: str, options: Optional[Options]) -> Segmentation:
166
198
  if options is None:
167
199
  options = Options()
200
+ _maybe_warn_no_gpu()
168
201
  if not text:
169
202
  return _empty(text)
170
203
  byte_tokens = text_to_bytes(text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: typeseg
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Fine-grained, character-level content-type segmentation for textual inputs (U-Net + Mamba).
5
5
  Author: Martin Dallinger
6
6
  License-Expression: Apache-2.0
@@ -17,8 +17,8 @@ License-File: LICENSE
17
17
  Requires-Dist: numpy>=1.21
18
18
  Requires-Dist: onnxruntime>=1.17
19
19
  Provides-Extra: gpu
20
- Requires-Dist: onnxruntime-gpu>=1.17; extra == "gpu"
21
- Requires-Dist: cupy-cuda12x>=13; extra == "gpu"
20
+ Requires-Dist: onnxruntime-gpu>=1.17; platform_system != "Darwin" and extra == "gpu"
21
+ Requires-Dist: cupy-cuda12x[ctk]>=13; platform_system != "Darwin" and extra == "gpu"
22
22
  Dynamic: license-file
23
23
 
24
24
  # TypeSeg
@@ -2,5 +2,7 @@ numpy>=1.21
2
2
  onnxruntime>=1.17
3
3
 
4
4
  [gpu]
5
+
6
+ [gpu:platform_system != "Darwin"]
5
7
  onnxruntime-gpu>=1.17
6
- cupy-cuda12x>=13
8
+ cupy-cuda12x[ctk]>=13
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes