spacy-accelerate 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. spacy_accelerate-0.3.0/.claude/settings.local.json +11 -0
  2. spacy_accelerate-0.3.0/.dockerignore +20 -0
  3. spacy_accelerate-0.3.0/.gitignore +88 -0
  4. spacy_accelerate-0.3.0/LICENSE +21 -0
  5. spacy_accelerate-0.3.0/Makefile +52 -0
  6. spacy_accelerate-0.3.0/PKG-INFO +341 -0
  7. spacy_accelerate-0.3.0/README.md +296 -0
  8. spacy_accelerate-0.3.0/benchmarks/README.md +109 -0
  9. spacy_accelerate-0.3.0/benchmarks/benchmark.py +806 -0
  10. spacy_accelerate-0.3.0/benchmarks/check_tensorrt.py +128 -0
  11. spacy_accelerate-0.3.0/benchmarks/dataset_loader.py +394 -0
  12. spacy_accelerate-0.3.0/benchmarks/docker/Dockerfile +31 -0
  13. spacy_accelerate-0.3.0/benchmarks/docker/run.sh +21 -0
  14. spacy_accelerate-0.3.0/benchmarks/requirements.txt +28 -0
  15. spacy_accelerate-0.3.0/docs/article.html +1385 -0
  16. spacy_accelerate-0.3.0/docs/article.md +479 -0
  17. spacy_accelerate-0.3.0/docs/article_en.html +1481 -0
  18. spacy_accelerate-0.3.0/docs/article_en.md +442 -0
  19. spacy_accelerate-0.3.0/pyproject.toml +101 -0
  20. spacy_accelerate-0.3.0/requirements.txt +36 -0
  21. spacy_accelerate-0.3.0/spacy_accelerate/__init__.py +35 -0
  22. spacy_accelerate-0.3.0/spacy_accelerate/__main__.py +95 -0
  23. spacy_accelerate-0.3.0/spacy_accelerate/_logging.py +44 -0
  24. spacy_accelerate-0.3.0/spacy_accelerate/_version.py +2 -0
  25. spacy_accelerate-0.3.0/spacy_accelerate/api.py +292 -0
  26. spacy_accelerate-0.3.0/spacy_accelerate/cache/__init__.py +5 -0
  27. spacy_accelerate-0.3.0/spacy_accelerate/cache/manager.py +262 -0
  28. spacy_accelerate-0.3.0/spacy_accelerate/config.py +150 -0
  29. spacy_accelerate-0.3.0/spacy_accelerate/conversion/__init__.py +5 -0
  30. spacy_accelerate-0.3.0/spacy_accelerate/conversion/exporter.py +254 -0
  31. spacy_accelerate-0.3.0/spacy_accelerate/conversion/fp16_converter.py +80 -0
  32. spacy_accelerate-0.3.0/spacy_accelerate/conversion/weight_mapper.py +268 -0
  33. spacy_accelerate-0.3.0/spacy_accelerate/core/__init__.py +12 -0
  34. spacy_accelerate-0.3.0/spacy_accelerate/core/discovery.py +143 -0
  35. spacy_accelerate-0.3.0/spacy_accelerate/core/patcher.py +55 -0
  36. spacy_accelerate-0.3.0/spacy_accelerate/core/validation.py +40 -0
  37. spacy_accelerate-0.3.0/spacy_accelerate/exceptions.py +37 -0
  38. spacy_accelerate-0.3.0/spacy_accelerate/runtime/__init__.py +9 -0
  39. spacy_accelerate-0.3.0/spacy_accelerate/runtime/cpu_proxy.py +133 -0
  40. spacy_accelerate-0.3.0/spacy_accelerate/runtime/io_binding_proxy.py +369 -0
  41. spacy_accelerate-0.3.0/spacy_accelerate/runtime/ort_proxy.py +142 -0
  42. spacy_accelerate-0.3.0/spacy_accelerate/runtime/providers.py +264 -0
  43. spacy_accelerate-0.3.0/spacy_accelerate/runtime/proxy_base.py +115 -0
  44. spacy_accelerate-0.3.0/tests/__init__.py +1 -0
  45. spacy_accelerate-0.3.0/tests/conftest.py +66 -0
  46. spacy_accelerate-0.3.0/tests/test_api.py +207 -0
  47. spacy_accelerate-0.3.0/tests/test_cache.py +134 -0
  48. spacy_accelerate-0.3.0/tests/test_discovery.py +36 -0
  49. spacy_accelerate-0.3.0/tests/test_patcher.py +45 -0
  50. spacy_accelerate-0.3.0/tests/test_providers.py +33 -0
  51. spacy_accelerate-0.3.0/tests/test_validation.py +49 -0
@@ -0,0 +1,11 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(git add:*)",
5
+ "Bash(python3:*)",
6
+ "Bash(git commit:*)",
7
+ "Bash(grep:*)",
8
+ "Bash(echo:*)"
9
+ ]
10
+ }
11
+ }
@@ -0,0 +1,20 @@
1
+ .git
2
+ .vscode
3
+ .agent
4
+ .cadence
5
+ .idea
6
+ .jupyter
7
+ .mypy_cache
8
+ .pytest_cache
9
+ .venv
10
+ venv
11
+ __pycache__
12
+ *.pyc
13
+ *.pyo
14
+ *.pyd
15
+ .DS_Store
16
+ data
17
+ notebooks
18
+ static
19
+ artifacts
20
+ benchmarks/results
@@ -0,0 +1,88 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ *.manifest
29
+ *.spec
30
+
31
+ # Installer logs
32
+ pip-log.txt
33
+ pip-delete-this-directory.txt
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .nox/
39
+ .coverage
40
+ .coverage.*
41
+ .cache
42
+ nosetests.xml
43
+ coverage.xml
44
+ *.cover
45
+ *.py,cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Environments
54
+ .env
55
+ .venv
56
+ env/
57
+ venv/
58
+ ENV/
59
+ env.bak/
60
+ venv.bak/
61
+
62
+ # IDE
63
+ .idea/
64
+ .vscode/
65
+ *.swp
66
+ *.swo
67
+ *~
68
+
69
+ # OS
70
+ .DS_Store
71
+ Thumbs.db
72
+
73
+ # ONNX models
74
+ *.onnx
75
+
76
+ # TensorRT engines
77
+ *.engine
78
+ *.plan
79
+ trt_engines/
80
+
81
+ # Cache
82
+ .cache/
83
+
84
+ # PyPI
85
+ .pypirc
86
+
87
+ # Benchmark artifacts
88
+ artifacts/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Siarhei Niaverau
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,52 @@
1
+ IMAGE_NAME ?= spacy-accelerate-bench
2
+ ARTIFACTS_DIR ?= $(CURDIR)/artifacts/benchmarks/docker
3
+ DOCKERFILE ?= benchmarks/docker/Dockerfile
4
+ MODELS ?=
5
+ BENCHMARK_ARGS ?=
6
+ PYTHON ?= .venv/bin/python
7
+ TEST_PYPI_REPOSITORY_URL ?= https://test.pypi.org/legacy/
8
+
9
+ .PHONY: build benchmark benchmark-full benchmark-ner-only run docker-build docker-benchmark docker-benchmark-full docker-benchmark-ner-only docker-benchmark-shell package package-check publish-testpypi
10
+
11
+ docker-build:
12
+ docker build -f $(DOCKERFILE) -t $(IMAGE_NAME) .
13
+
14
+ docker-benchmark:
15
+ mkdir -p $(ARTIFACTS_DIR)
16
+ docker run --rm --gpus all \
17
+ -v "$(ARTIFACTS_DIR):/artifacts" \
18
+ $(IMAGE_NAME) \
19
+ $(if $(strip $(MODELS)),--models $(MODELS),) \
20
+ $(BENCHMARK_ARGS)
21
+
22
+ docker-benchmark-full:
23
+ $(MAKE) docker-benchmark BENCHMARK_ARGS="$(BENCHMARK_ARGS)"
24
+
25
+ docker-benchmark-ner-only:
26
+ $(MAKE) docker-benchmark BENCHMARK_ARGS="--ner-only $(BENCHMARK_ARGS)"
27
+
28
+ docker-benchmark-shell:
29
+ mkdir -p $(ARTIFACTS_DIR)
30
+ docker run --rm -it --gpus all --entrypoint bash \
31
+ -v "$(ARTIFACTS_DIR):/artifacts" \
32
+ $(IMAGE_NAME)
33
+
34
+ build: docker-build
35
+
36
+ benchmark: docker-benchmark-full
37
+
38
+ benchmark-full: docker-benchmark-full
39
+
40
+ benchmark-ner-only: docker-benchmark-ner-only
41
+
42
+ run: docker-benchmark-full
43
+
44
+ package:
45
+ rm -rf dist/
46
+ $(PYTHON) -m build
47
+
48
+ package-check: package
49
+ $(PYTHON) -m twine check dist/*
50
+
51
+ publish-testpypi: package-check
52
+ $(PYTHON) -m twine upload --repository-url $(TEST_PYPI_REPOSITORY_URL) dist/*
@@ -0,0 +1,341 @@
1
+ Metadata-Version: 2.4
2
+ Name: spacy-accelerate
3
+ Version: 0.3.0
4
+ Summary: Accelerate spaCy transformers with TensorRT/ONNX Runtime
5
+ Project-URL: Homepage, https://github.com/nesergey/spacy-accelerate
6
+ Project-URL: Documentation, https://github.com/nesergey/spacy-accelerate#readme
7
+ Project-URL: Repository, https://github.com/nesergey/spacy-accelerate
8
+ Project-URL: Issues, https://github.com/nesergey/spacy-accelerate/issues
9
+ Author-email: Siarhei Niaverau <nesergey@gmail.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: acceleration,nlp,onnx,spacy,tensorrt,transformer
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Text Processing :: Linguistic
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: cupy-cuda12x==13.6.0
24
+ Requires-Dist: numpy==2.4.1
25
+ Requires-Dist: onnx==1.20.1
26
+ Requires-Dist: onnxruntime-gpu==1.23.2
27
+ Requires-Dist: onnxscript<0.2.0,>=0.1.0
28
+ Requires-Dist: spacy-transformers==1.3.9
29
+ Requires-Dist: spacy==3.8.2
30
+ Requires-Dist: tensorrt-cu12-bindings==10.15.1.29
31
+ Requires-Dist: tensorrt-cu12-libs==10.15.1.29
32
+ Requires-Dist: tensorrt-cu12==10.15.1.29
33
+ Requires-Dist: tensorrt==10.15.1.29
34
+ Requires-Dist: thinc==8.3.10
35
+ Requires-Dist: torch==2.5.1
36
+ Requires-Dist: transformers==4.41.2
37
+ Provides-Extra: dev
38
+ Requires-Dist: black>=24.0.0; extra == 'dev'
39
+ Requires-Dist: datasets>=2.0.0; extra == 'dev'
40
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
41
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
42
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
43
+ Requires-Dist: ruff>=0.3.0; extra == 'dev'
44
+ Description-Content-Type: text/markdown
45
+
46
+ # spacy-accelerate
47
+
48
+ Accelerate spaCy transformers with TensorRT/ONNX Runtime. Drop-in replacement for transformer-based spaCy pipelines with Docker-verified GPU benchmark workflows.
49
+
50
+ ## Installation
51
+
52
+ `spacy-accelerate` depends on a CUDA/TensorRT stack that must stay version-aligned.
53
+ The two failure modes we hit in practice were:
54
+
55
+ - a second dependency resolution pass upgrading parts of the stack to different CUDA majors;
56
+ - CUDA/TensorRT shared libraries from pip wheels not being visible to CuPy / ONNX Runtime.
57
+
58
+ The package now pins the runtime versions in `pyproject.toml`, and it configures
59
+ the pip-installed native libraries automatically on import.
60
+
61
+ Benchmark Docker files live under `benchmarks/docker/`, and canonical benchmark
62
+ artifacts are saved under `artifacts/benchmarks/docker/`. The root
63
+ `.dockerignore` is kept at repository level because Docker build context
64
+ filtering applies to the whole repo root.
65
+
66
+ ### PyPI install
67
+
68
+ ```bash
69
+ pip install spacy-accelerate
70
+ pip install --force-reinstall \
71
+ --extra-index-url https://pypi.nvidia.com \
72
+ onnxruntime-gpu==1.23.2
73
+ ```
74
+
75
+ The second command is still required to guarantee the TensorRT-enabled
76
+ `onnxruntime-gpu` build from NVIDIA.
77
+
78
+ ### Source / editable install
79
+
80
+ ```bash
81
+ pip install -r requirements.txt
82
+ pip install -e . --no-deps
83
+ ```
84
+
85
+ Do not run plain `pip install -e .` after that. It can trigger a second resolver
86
+ pass and replace the pinned CUDA 12 stack with newer incompatible packages.
87
+
88
+ **Verify the installation:**
89
+ ```bash
90
+ python -m spacy_accelerate
91
+ ```
92
+
93
+ You should see `TensorRT EP : OK` and `CUDA EP : OK` in the output.
94
+
95
+ **Requirements:**
96
+ - Python 3.11+
97
+ - CUDA 12.x
98
+ - NVIDIA GPU with TensorRT support (Ampere / Ada Lovelace recommended)
99
+ - spaCy 3.8+ with spacy-transformers
100
+
101
+ ## Quick Start
102
+
103
+ ```python
104
+ import spacy
105
+ import spacy_accelerate
106
+
107
+ # Load your spaCy transformer model
108
+ nlp = spacy.load("en_core_web_trf")
109
+
110
+ # Optimize with one line!
111
+ nlp = spacy_accelerate.optimize(nlp, precision="fp16")
112
+
113
+ # Use as normal - same API, faster inference
114
+ doc = nlp("Apple Inc. was founded by Steve Jobs in Cupertino.")
115
+ print([(ent.text, ent.label_) for ent in doc.ents])
116
+ # [('Apple Inc.', 'ORG'), ('Steve Jobs', 'PERSON'), ('Cupertino', 'GPE')]
117
+
118
+ # Batch processing works too
119
+ texts = ["Text one.", "Text two.", "Text three."]
120
+ docs = list(nlp.pipe(texts, batch_size=32))
121
+ ```
122
+
123
+ ## API Reference
124
+
125
+ ### `optimize(nlp, **kwargs)`
126
+
127
+ Optimize a spaCy transformer pipeline with ONNX Runtime / TensorRT.
128
+
129
+ **Parameters:**
130
+
131
+ | Parameter | Type | Default | Description |
132
+ |-----------|------|---------|-------------|
133
+ | `nlp` | `spacy.Language` | required | spaCy pipeline with transformer |
134
+ | `precision` | `"fp32"` \| `"fp16"` | `"fp16"` | Model precision |
135
+ | `provider` | `"tensorrt"` \| `"cuda"` \| `"cpu"` | `"cuda"` | Execution provider |
136
+ | `cache_dir` | `Path` \| `str` | `~/.cache/spacy-accelerate` | ONNX model cache directory |
137
+ | `warmup` | `bool` | `True` | Run warmup inference |
138
+ | `device_id` | `int` | `0` | CUDA device ID |
139
+ | `max_batch_size` | `int` | `128` | Max batch size for IO Binding |
140
+ | `max_seq_length` | `int` | `512` | Max sequence length for IO Binding |
141
+ | `use_io_binding` | `bool` | `True` | Use zero-copy IO Binding |
142
+ | `verbose` | `bool` | `False` | Enable verbose logging |
143
+
144
+ **TensorRT-specific parameters:**
145
+
146
+ | Parameter | Type | Default | Description |
147
+ |-----------|------|---------|-------------|
148
+ | `trt_max_workspace_size` | `int` | `4GB` | TensorRT workspace size |
149
+ | `trt_builder_optimization_level` | `int` | `3` | Optimization level (0-5) |
150
+ | `trt_timing_cache` | `bool` | `True` | Enable timing cache |
151
+
152
+ **Returns:** The optimized `spacy.Language` object (modified in-place).
153
+
154
+ ### Cache Management
155
+
156
+ ```python
157
+ import spacy_accelerate
158
+
159
+ # List cached models
160
+ cached = spacy_accelerate.list_cached()
161
+ print(f"Cached models: {cached}")
162
+
163
+ # Get cache size
164
+ size_bytes = spacy_accelerate.get_cache_size()
165
+ print(f"Cache size: {size_bytes / 1024**2:.1f} MB")
166
+
167
+ # Clear cache
168
+ cleared = spacy_accelerate.clear_cache()
169
+ print(f"Cleared {cleared} cache entries")
170
+ ```
171
+
172
+ ## Performance
173
+
174
+ Canonical benchmark results are the Docker runs under [artifacts/benchmarks/docker](/Users/nesergeyv/Projects/spacy-accelerate/artifacts/benchmarks/docker).
175
+
176
+ Benchmark commands and runner details are maintained in [benchmarks/README.md](/Users/nesergeyv/Projects/spacy-accelerate/benchmarks/README.md).
177
+
178
+ Latest full-pipeline Docker measurement for `en_core_web_trf` on **NVIDIA RTX 4000 SFF Ada Generation**, **CoNLL-2003** test set, `batch_size=128`, `1` discarded prime pass and `3` measured passes averaged:
179
+
180
+ | Execution Provider | Speed (WPS) | Speedup vs PyTorch | Accuracy |
181
+ |--------------------|-------------|--------------------|----------|
182
+ | PyTorch Baseline (FP32) | 6,241 | 1.00x | 100.00% |
183
+ | PyTorch Baseline (FP16) | 6,166 | 0.99x | 100.00% |
184
+ | CUDA FP32 | 9,910 | 1.59x | 99.90% |
185
+ | CUDA FP16 | 15,763 | 2.53x | 99.75% |
186
+ | TensorRT FP32 | 10,552 | 1.69x | 99.95% |
187
+ | **TensorRT FP16** | **16,935** | **2.71x** | **99.50%** |
188
+
189
+ Latest Docker NER-only measurement for `en_core_web_trf` with `tagger`, `parser`, `attribute_ruler`, and `lemmatizer` disabled:
190
+
191
+ | Execution Provider | Speed (WPS) | Speedup vs PyTorch | Accuracy |
192
+ |--------------------|-------------|--------------------|----------|
193
+ | PyTorch Baseline (FP32) | 7,066 | 1.00x | 100.00% |
194
+ | PyTorch Baseline (FP16) | 6,859 | 0.97x | 100.00% |
195
+ | CUDA FP32 | 11,972 | 1.69x | 99.90% |
196
+ | CUDA FP16 | 22,394 | 3.17x | 99.75% |
197
+ | TensorRT FP32 | 13,138 | 1.86x | 99.95% |
198
+ | **TensorRT FP16** | **24,823** | **3.51x** | **99.65%** |
199
+
200
+
201
+ ## Examples
202
+
203
+ ### Using TensorRT for Maximum Performance
204
+
205
+ ```python
206
+ import spacy
207
+ import spacy_accelerate
208
+
209
+ nlp = spacy.load("en_core_web_trf")
210
+
211
+ nlp = spacy_accelerate.optimize(
212
+ nlp,
213
+ provider="tensorrt",
214
+ precision="fp16",
215
+ trt_max_workspace_size=8 * 1024**3, # 8GB
216
+ trt_builder_optimization_level=5, # Maximum optimization
217
+ )
218
+
219
+ # First inference builds TensorRT engine (cached for subsequent runs)
220
+ doc = nlp("TensorRT provides maximum inference speed.")
221
+ ```
222
+
223
+
224
+
225
+ ### Custom Cache Directory
226
+
227
+ ```python
228
+ import spacy
229
+ import spacy_accelerate
230
+
231
+ nlp = spacy.load("en_core_web_trf")
232
+
233
+ nlp = spacy_accelerate.optimize(
234
+ nlp,
235
+ cache_dir="/path/to/custom/cache",
236
+ precision="fp16",
237
+ )
238
+ ```
239
+
240
+ ### Verbose Mode for Debugging
241
+
242
+ ```python
243
+ import spacy
244
+ import spacy_accelerate
245
+
246
+ nlp = spacy.load("en_core_web_trf")
247
+
248
+ nlp = spacy_accelerate.optimize(
249
+ nlp,
250
+ verbose=True, # Print detailed logs
251
+ )
252
+ ```
253
+
254
+ ## Supported Models
255
+
256
+ Right now the confirmed spaCy model support is:
257
+
258
+ - `en_core_web_trf`
259
+
260
+ The earlier wording here listed transformer architecture families, not actual
261
+ published spaCy package names. Internally, the exporter and architecture
262
+ detection logic currently target curated-transformer / RoBERTa-style backbones,
263
+ with partial code paths for BERT and XLM-RoBERTa families, but those are not yet
264
+ claimed here as generally supported spaCy packages.
265
+
266
+ ## How It Works
267
+
268
+ 1. **Weight Mapping**: Extracts transformer weights from spaCy's internal format and maps them to HuggingFace format.
269
+
270
+ 2. **ONNX Export**: Exports the mapped model to ONNX format with dynamic batch and sequence dimensions.
271
+
272
+ 3. **FP16 Optimization** (optional): Applies BERT-style optimizations and converts to FP16 for faster inference.
273
+
274
+ 4. **Runtime Patching**: Replaces the PyTorch transformer with an ONNX Runtime proxy that provides the same interface.
275
+
276
+ 5. **Caching**: Converted models are cached to avoid re-conversion on subsequent loads.
277
+
278
+ ## Troubleshooting
279
+
280
+ ### TensorRT provider not available
281
+
282
+ Run the diagnostic tool first:
283
+ ```bash
284
+ python -m spacy_accelerate
285
+ ```
286
+
287
+ If you see `TensorRT EP : MISSING`, the NVIDIA build of onnxruntime-gpu is not installed.
288
+ Fix with step 2 from the installation instructions:
289
+ ```bash
290
+ pip install --force-reinstall \
291
+ --extra-index-url https://pypi.nvidia.com \
292
+ onnxruntime-gpu==1.23.2
293
+ ```
294
+
295
+ ### libnvinfer.so / libcublas.so / libcublasLt.so not found
296
+
297
+ If you see errors like `libnvinfer.so.10`, `libcublas.so.12`, or
298
+ `libcublasLt.so.12: cannot open shared object file`:
299
+
300
+ **Automatic fix:** `spacy-accelerate` automatically configures both TensorRT
301
+ libraries and the CUDA libraries installed under `site-packages/nvidia/*/lib`.
302
+ Import `spacy_accelerate` before creating ONNX Runtime sessions or calling
303
+ `spacy.require_gpu()`.
304
+
305
+ **Manual fix:** If the automatic configuration doesn't work (e.g., running scripts directly):
306
+ ```bash
307
+ SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
308
+ export LD_LIBRARY_PATH="$SITE_PACKAGES/tensorrt_libs:$SITE_PACKAGES/nvidia/cublas/lib:$SITE_PACKAGES/nvidia/cuda_runtime/lib:$SITE_PACKAGES/nvidia/cudnn/lib:$LD_LIBRARY_PATH"
309
+ ```
310
+
311
+ ### CUDA out of memory
312
+
313
+ Reduce workspace size or batch size:
314
+
315
+ ```python
316
+ nlp = spacy_accelerate.optimize(
317
+ nlp,
318
+ trt_max_workspace_size=2 * 1024**3, # 2GB instead of 4GB
319
+ max_batch_size=16, # Smaller batches
320
+ )
321
+ ```
322
+
323
+ ### First inference is slow
324
+
325
+ TensorRT builds optimized engines on first run. Enable caching:
326
+
327
+ ```python
328
+ nlp = spacy_accelerate.optimize(
329
+ nlp,
330
+ provider="tensorrt",
331
+ trt_timing_cache=True, # Cache timing data
332
+ )
333
+ ```
334
+
335
+ ## License
336
+
337
+ MIT License
338
+
339
+ ## Contributing
340
+
341
+ Contributions are welcome! Please open an issue or submit a pull request.