turboocr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. turboocr-0.1.0/.gitignore +14 -0
  2. turboocr-0.1.0/LICENSE +21 -0
  3. turboocr-0.1.0/PKG-INFO +242 -0
  4. turboocr-0.1.0/README.md +191 -0
  5. turboocr-0.1.0/docs/api/cli.md +68 -0
  6. turboocr-0.1.0/docs/api/clients.md +17 -0
  7. turboocr-0.1.0/docs/api/errors.md +61 -0
  8. turboocr-0.1.0/docs/api/grpc.md +22 -0
  9. turboocr-0.1.0/docs/api/layout.md +43 -0
  10. turboocr-0.1.0/docs/api/markdown.md +36 -0
  11. turboocr-0.1.0/docs/api/models.md +39 -0
  12. turboocr-0.1.0/docs/api/retry.md +53 -0
  13. turboocr-0.1.0/docs/api/searchable_pdf.md +35 -0
  14. turboocr-0.1.0/docs/examples.md +99 -0
  15. turboocr-0.1.0/docs/explanation/http_vs_grpc.md +46 -0
  16. turboocr-0.1.0/docs/explanation/index.md +25 -0
  17. turboocr-0.1.0/docs/explanation/layout_and_reading_order.md +76 -0
  18. turboocr-0.1.0/docs/explanation/searchable_pdf_internals.md +64 -0
  19. turboocr-0.1.0/docs/how-tos/batch_with_partial_failures.md +42 -0
  20. turboocr-0.1.0/docs/how-tos/configure_retries.md +41 -0
  21. turboocr-0.1.0/docs/how-tos/handle_non_latin_pdfs.md +61 -0
  22. turboocr-0.1.0/docs/how-tos/index.md +22 -0
  23. turboocr-0.1.0/docs/how-tos/use_custom_httpx_client.md +44 -0
  24. turboocr-0.1.0/docs/index.md +60 -0
  25. turboocr-0.1.0/docs/mkdocs.yml +115 -0
  26. turboocr-0.1.0/docs/tutorials/01_first_pdf_to_markdown.md +145 -0
  27. turboocr-0.1.0/docs/tutorials/02_async_folder_pipeline.md +102 -0
  28. turboocr-0.1.0/docs/tutorials/index.md +23 -0
  29. turboocr-0.1.0/examples/00_quickstart.py +19 -0
  30. turboocr-0.1.0/examples/01_image_ocr_with_layout.py +41 -0
  31. turboocr-0.1.0/examples/02_pdf_to_markdown.py +46 -0
  32. turboocr-0.1.0/examples/03_searchable_pdf.py +28 -0
  33. turboocr-0.1.0/examples/04_async_client.py +25 -0
  34. turboocr-0.1.0/examples/05_batch.py +21 -0
  35. turboocr-0.1.0/examples/06_grpc.py +17 -0
  36. turboocr-0.1.0/examples/07_retry_and_timeout.py +27 -0
  37. turboocr-0.1.0/examples/08_custom_httpx_client.py +25 -0
  38. turboocr-0.1.0/examples/09_markdown_style.py +54 -0
  39. turboocr-0.1.0/examples/10_tables_and_formulas.py +39 -0
  40. turboocr-0.1.0/examples/11_folder_pipeline.py +34 -0
  41. turboocr-0.1.0/examples/12_hooks_and_logging.py +32 -0
  42. turboocr-0.1.0/examples/README.md +49 -0
  43. turboocr-0.1.0/examples/sample/acme_invoice.pdf +93 -0
  44. turboocr-0.1.0/examples/sample/acme_invoice.png +0 -0
  45. turboocr-0.1.0/examples/sample/generate.py +202 -0
  46. turboocr-0.1.0/pyproject.toml +144 -0
  47. turboocr-0.1.0/src/turboocr/__init__.py +122 -0
  48. turboocr-0.1.0/src/turboocr/_core/__init__.py +0 -0
  49. turboocr-0.1.0/src/turboocr/_core/auth.py +36 -0
  50. turboocr-0.1.0/src/turboocr/_core/content.py +89 -0
  51. turboocr-0.1.0/src/turboocr/_core/env.py +35 -0
  52. turboocr-0.1.0/src/turboocr/_core/ids.py +23 -0
  53. turboocr-0.1.0/src/turboocr/_core/options.py +37 -0
  54. turboocr-0.1.0/src/turboocr/_core/retry.py +132 -0
  55. turboocr-0.1.0/src/turboocr/_grpc/__init__.py +19 -0
  56. turboocr-0.1.0/src/turboocr/_grpc/_stubs/__init__.py +1 -0
  57. turboocr-0.1.0/src/turboocr/_grpc/_stubs/ocr_pb2.py +58 -0
  58. turboocr-0.1.0/src/turboocr/_grpc/_stubs/ocr_pb2.pyi +129 -0
  59. turboocr-0.1.0/src/turboocr/_grpc/_stubs/ocr_pb2_grpc.py +277 -0
  60. turboocr-0.1.0/src/turboocr/_grpc/channel.py +84 -0
  61. turboocr-0.1.0/src/turboocr/_grpc/client.py +586 -0
  62. turboocr-0.1.0/src/turboocr/_grpc/errors.py +131 -0
  63. turboocr-0.1.0/src/turboocr/_grpc/metadata.py +31 -0
  64. turboocr-0.1.0/src/turboocr/_grpc/parse.py +96 -0
  65. turboocr-0.1.0/src/turboocr/_grpc/requests.py +77 -0
  66. turboocr-0.1.0/src/turboocr/_grpc/retry.py +68 -0
  67. turboocr-0.1.0/src/turboocr/_http/__init__.py +3 -0
  68. turboocr-0.1.0/src/turboocr/_http/_kwargs.py +42 -0
  69. turboocr-0.1.0/src/turboocr/_http/client.py +978 -0
  70. turboocr-0.1.0/src/turboocr/_http/retry.py +93 -0
  71. turboocr-0.1.0/src/turboocr/_http/specs.py +112 -0
  72. turboocr-0.1.0/src/turboocr/_http/transport.py +63 -0
  73. turboocr-0.1.0/src/turboocr/cli.py +192 -0
  74. turboocr-0.1.0/src/turboocr/errors.py +106 -0
  75. turboocr-0.1.0/src/turboocr/markdown/__init__.py +41 -0
  76. turboocr-0.1.0/src/turboocr/markdown/render.py +179 -0
  77. turboocr-0.1.0/src/turboocr/markdown/style.py +243 -0
  78. turboocr-0.1.0/src/turboocr/models.py +367 -0
  79. turboocr-0.1.0/src/turboocr/py.typed +0 -0
  80. turboocr-0.1.0/src/turboocr/searchable_pdf.py +283 -0
  81. turboocr-0.1.0/tests/__init__.py +0 -0
  82. turboocr-0.1.0/tests/_grpc/__init__.py +0 -0
  83. turboocr-0.1.0/tests/_grpc/conftest.py +159 -0
  84. turboocr-0.1.0/tests/_grpc/test_client.py +110 -0
  85. turboocr-0.1.0/tests/_grpc/test_errors.py +97 -0
  86. turboocr-0.1.0/tests/_grpc/test_metadata.py +69 -0
  87. turboocr-0.1.0/tests/_grpc/test_parse.py +35 -0
  88. turboocr-0.1.0/tests/_grpc/test_retry.py +98 -0
  89. turboocr-0.1.0/tests/integration/__init__.py +0 -0
  90. turboocr-0.1.0/tests/integration/conftest.py +95 -0
  91. turboocr-0.1.0/tests/integration/test_smoke.py +55 -0
  92. turboocr-0.1.0/tests/test_client.py +407 -0
  93. turboocr-0.1.0/tests/test_markdown.py +205 -0
  94. turboocr-0.1.0/tests/test_models.py +186 -0
  95. turboocr-0.1.0/tests/test_production_features.py +124 -0
  96. turboocr-0.1.0/tests/test_retry_and_errors.py +76 -0
  97. turboocr-0.1.0/tests/test_searchable_pdf.py +178 -0
@@ -0,0 +1,14 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ .mypy_cache/
8
+ dist/
9
+ build/
10
+ *.so
11
+ .DS_Store
12
+ .coverage
13
+ htmlcov/
14
+ site/
turboocr-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 aiptimizer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: turboocr
3
+ Version: 0.1.0
4
+ Summary: Python SDK for TurboOCR — fast GPU OCR server (HTTP + gRPC)
5
+ Project-URL: Homepage, https://turboocr.com
6
+ Project-URL: Repository, https://github.com/aiptimizer/turboocr-python
7
+ Project-URL: Issues, https://github.com/aiptimizer/turboocr-python/issues
8
+ Project-URL: Server, https://github.com/aiptimizer/TurboOCR
9
+ Author: turbo-ocr contributors
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: layout,markdown,ocr,paddleocr,pdf,tensorrt
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.12
23
+ Requires-Dist: httpx>=0.28
24
+ Requires-Dist: pydantic>=2.10
25
+ Requires-Dist: pypdf>=5.0
26
+ Requires-Dist: reportlab>=4.2
27
+ Requires-Dist: rich>=14
28
+ Requires-Dist: typer>=0.15
29
+ Provides-Extra: all
30
+ Requires-Dist: grpcio>=1.66; extra == 'all'
31
+ Requires-Dist: protobuf>=5.27; extra == 'all'
32
+ Provides-Extra: dev
33
+ Requires-Dist: grpcio-tools>=1.66; extra == 'dev'
34
+ Requires-Dist: grpcio>=1.66; extra == 'dev'
35
+ Requires-Dist: mypy>=1.13; extra == 'dev'
36
+ Requires-Dist: protobuf>=5.27; extra == 'dev'
37
+ Requires-Dist: pypdfium2>=4.30; extra == 'dev'
38
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
39
+ Requires-Dist: pytest>=8; extra == 'dev'
40
+ Requires-Dist: respx>=0.22; extra == 'dev'
41
+ Requires-Dist: ruff>=0.8; extra == 'dev'
42
+ Provides-Extra: docs
43
+ Requires-Dist: mike>=2.1; extra == 'docs'
44
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
45
+ Requires-Dist: mkdocs>=1.6; extra == 'docs'
46
+ Requires-Dist: mkdocstrings[python]>=0.27; extra == 'docs'
47
+ Provides-Extra: grpc
48
+ Requires-Dist: grpcio>=1.66; extra == 'grpc'
49
+ Requires-Dist: protobuf>=5.27; extra == 'grpc'
50
+ Description-Content-Type: text/markdown
51
+
52
+ # turboocr
53
+
54
+ Typed Python client for the [TurboOCR](https://github.com/aiptimizer/TurboOCR) server.
55
+ Sync + async, HTTP + gRPC, layout-aware Markdown rendering, searchable-PDF generation.
56
+
57
+ [![PyPI](https://img.shields.io/pypi/v/turboocr.svg)](https://pypi.org/project/turboocr/)
58
+ [![Python](https://img.shields.io/pypi/pyversions/turboocr.svg)](https://pypi.org/project/turboocr/)
59
+ [![Typed](https://img.shields.io/badge/typed-PEP_561-blue.svg)](https://peps.python.org/pep-0561/)
60
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
61
+
62
+ - [Install](#install) · [Quickstart](#quickstart) · [What you get](#what-you-get)
63
+ - [Examples](examples/) · [API reference](docs/) · [CLI](#cli) · [Errors](#errors)
64
+
65
+ ## Install
66
+
67
+ ```bash
68
+ pip install turboocr # HTTP client + CLI + searchable-PDF
69
+ pip install 'turboocr[grpc]' # add the gRPC transport
70
+ pip install 'turboocr[all]' # everything optional (currently == [grpc])
71
+ ```
72
+
73
+ Requires Python 3.12+.
74
+
75
+ ## Quickstart
76
+
77
+ Start a [TurboOCR](https://github.com/aiptimizer/TurboOCR) server (the C++/CUDA
78
+ OCR engine — this repo is just the Python client):
79
+
80
+ ```bash
81
+ docker run --gpus all -p 8000:8000 -p 50051:50051 \
82
+ -v trt-cache:/home/ocr/.cache/turbo-ocr \
83
+ -e OCR_LANG=latin \
84
+ ghcr.io/aiptimizer/turboocr:v2.2.3
85
+ ```
86
+
87
+ `OCR_LANG=latin` (default) covers English, French, German, Spanish, …. Swap for
88
+ `chinese`, `greek`, `eslav`, `arabic`, `korean`, or `thai` — all are baked in.
89
+ See the [TurboOCR repo](https://github.com/aiptimizer/TurboOCR) for build-from-source,
90
+ benchmarks, and the full set of server env vars.
91
+
92
+ Then recognise an image and turn a PDF into Markdown:
93
+
94
+ ```python
95
+ from turboocr import Client, render_to_markdown
96
+
97
+ with Client(base_url="http://localhost:8000") as client:
98
+ # Image OCR
99
+ img = client.recognize_image("page.png", layout=True, include_blocks=True)
100
+ print(f"{len(img.results)} text items, {len(img.blocks)} blocks")
101
+ print(img.text)
102
+
103
+ # PDF → Markdown
104
+ pdf = client.recognize_pdf("paper.pdf", dpi=150, include_blocks=True)
105
+ print(render_to_markdown(pdf).markdown)
106
+
107
+ # Searchable PDF (invisible text overlay)
108
+ overlay = client.make_searchable_pdf("scan.pdf", dpi=200)
109
+ open("scan.searchable.pdf", "wb").write(overlay)
110
+ ```
111
+
112
+ That's the 80% case. Full runnable examples for async, gRPC, batch, retries,
113
+ custom `httpx.Client`, hooks, Markdown styling, folder pipelines, and more live
114
+ in [`examples/`](examples/) — every script runs end-to-end against the bundled
115
+ ACME invoice fixture.
116
+
117
+ ## What you get
118
+
119
+ - **Sync + async, HTTP + gRPC.** Four clients (`Client`, `AsyncClient`,
120
+ `GrpcClient`, `AsyncGrpcClient`) with identical method surfaces.
121
+ - **Typed, immutable responses (pydantic v2).** IDE autocomplete, and if a newer
122
+ server adds a field your SDK doesn't know about, parsing still succeeds — the
123
+ extra lands on `.model_extra` instead of crashing.
124
+ - **Layout-aware Markdown.** `render_to_markdown(...)` walks the reading order
125
+ and maps each layout class (`doc_title`, `display_formula`, `table`, …) to a
126
+ Markdown construct. Pluggable via `MarkdownStyle`.
127
+ - **Searchable PDFs.** `make_searchable_pdf(...)` overlays an invisible text
128
+ layer aligned to the page geometry. Auto-discovers a Unicode font for
129
+ non-Latin scripts, or pass `font_path=`.
130
+ - **Production-friendly.** Configurable retry policy (HTTP status + gRPC status
131
+ + `Retry-After`), per-request timeouts, custom `httpx.Client`, `on_request` /
132
+ `on_response` event hooks, uuid7 `X-Request-ID` per call.
133
+ - **Precise exception hierarchy.** Maps the server's `error_code` to typed
134
+ exceptions — see [Errors](#errors).
135
+ - **`turbo-ocr` CLI** included in the default install.
136
+
137
+ Today's server does plain OCR + layout classification. Table-structure and
138
+ LaTeX-formula source are **not** yet emitted; the SDK exposes `page.tables` /
139
+ `page.formulas` as a forward-compatible surface that populates automatically
140
+ when those server features ship.
141
+
142
+ ## Configuration
143
+
144
+ ```python
145
+ from turboocr import Client, RetryPolicy
146
+
147
+ client = Client(
148
+ base_url="http://localhost:8000", # or TURBO_OCR_BASE_URL env
149
+ api_key="sk-...", # or TURBO_OCR_API_KEY env
150
+ auth_scheme="bearer", # "bearer" | "x-api-key"
151
+ timeout=30.0,
152
+ default_headers={"X-Tenant": "acme"},
153
+ retry=RetryPolicy(attempts=5, backoff=0.5),
154
+ )
155
+ ```
156
+
157
+ Pass `http_client=httpx.Client(...)` for custom TLS, connection limits, or
158
+ proxies — see [`examples/08_custom_httpx_client.py`](examples/08_custom_httpx_client.py).
159
+
160
+ Retry defaults: HTTP `{429, 502, 503, 504}`, gRPC
161
+ `{UNAVAILABLE, DEADLINE_EXCEEDED, RESOURCE_EXHAUSTED}`, 3 attempts, exponential
162
+ backoff + jitter, `Retry-After` honoured. Tune via `RetryPolicy(...)` — see
163
+ [`examples/07_retry_and_timeout.py`](examples/07_retry_and_timeout.py).
164
+
165
+ ## Errors
166
+
167
+ ```
168
+ TurboOcrError
169
+ ├── APIConnectionError # transport-level
170
+ │ ├── Timeout
171
+ │ ├── NetworkError
172
+ │ └── ProtocolError
173
+ ├── InvalidParameter # 4xx: bad params / headers / dims
174
+ ├── EmptyBody # 4xx: empty body / batch / PDF
175
+ ├── LayoutDisabled # asked for layout when server has it off
176
+ ├── ImageDecodeError # bad bytes / bad base64
177
+ ├── DimensionsTooLarge # image / PDF over server limits
178
+ ├── PoolExhausted # "Server at capacity"
179
+ ├── PdfRenderError # PDF rasterization failed
180
+ └── ServerError # 5xx, no specific code
181
+ ```
182
+
183
+ Server-side exceptions carry `.code`, `.status_code`, and `.payload`. Transport
184
+ exceptions inherit from `APIConnectionError`.
185
+
186
+ | Symptom | Cause | Fix |
187
+ |---|---|---|
188
+ | `NetworkError: Connection refused` | server not running | start the docker container (above) |
189
+ | `DimensionsTooLarge` | image > `MAX_IMAGE_DIM` (default 16384) | downscale, or raise the server limit |
190
+ | `LayoutDisabled` | server started with `DISABLE_LAYOUT=1` | restart without that env var |
191
+ | `UnicodeFontRequired` | non-Latin text, no Unicode font found | pass `font_path=` or set `TURBO_OCR_FONT` |
192
+ | `PoolExhausted` | server queue full | retry with backoff, or scale `PIPELINE_POOL_SIZE` |
193
+ | `Timeout` | per-request timeout hit | pass `timeout=N`, or raise `RetryPolicy.attempts` |
194
+
195
+ ## CLI
196
+
197
+ ```bash
198
+ turbo-ocr ocr page.png --output markdown
199
+ turbo-ocr pdf doc.pdf --dpi 150 --output json
200
+ turbo-ocr searchable-pdf doc.pdf -o out.pdf --font-path /path/to/font.ttf
201
+ turbo-ocr health --ready
202
+ ```
203
+
204
+ `--output` accepts `json | blocks | text | markdown`. Reads `TURBO_OCR_BASE_URL`,
205
+ `TURBO_OCR_API_KEY`, `TURBO_OCR_FONT` from the environment. Run
206
+ `turbo-ocr --help` for the full surface.
207
+
208
+ ## Logging
209
+
210
+ ```python
211
+ import logging
212
+ logging.getLogger("turboocr").setLevel(logging.DEBUG)
213
+ ```
214
+
215
+ Emits `method path -> status (Xms) [req=<short-id>]` per HTTP request. Retry
216
+ warnings go to `turboocr.retry` / `turboocr.grpc.retry`. Searchable-PDF font
217
+ resolution logs to `turboocr.searchable_pdf`. Every HTTP request sends a uuid7
218
+ `X-Request-ID` header (gRPC uses `x-request-id` metadata).
219
+
220
+ ## Learn more
221
+
222
+ - [`examples/`](examples/) — 13 runnable scripts (each runs against the bundled
223
+ ACME invoice fixture, no server config needed beyond `TURBO_OCR_BASE_URL`)
224
+ - [`docs/`](docs/) — full docs source (MkDocs + mkdocstrings, deployed at
225
+ https://aiptimizer.github.io/turboocr-python/). Preview locally with
226
+ `uv run --extra docs mkdocs serve -f docs/mkdocs.yml`
227
+ - Server compatibility: `SERVER_API_VERSION_MIN` /
228
+ `SERVER_API_VERSION_MAX_EXCLUSIVE` document the supported server range;
229
+ `extra="allow"` on response models means additive server changes don't break
230
+ parsing
231
+
232
+ ## Testing
233
+
234
+ ```bash
235
+ pytest -q # offline (respx)
236
+ TURBO_OCR_BASE_URL=http://localhost:8000 pytest tests/integration -v
237
+ python examples/03_searchable_pdf.py # smoke test
238
+ ```
239
+
240
+ ## License
241
+
242
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,191 @@
1
+ # turboocr
2
+
3
+ Typed Python client for the [TurboOCR](https://github.com/aiptimizer/TurboOCR) server.
4
+ Sync + async, HTTP + gRPC, layout-aware Markdown rendering, searchable-PDF generation.
5
+
6
+ [![PyPI](https://img.shields.io/pypi/v/turboocr.svg)](https://pypi.org/project/turboocr/)
7
+ [![Python](https://img.shields.io/pypi/pyversions/turboocr.svg)](https://pypi.org/project/turboocr/)
8
+ [![Typed](https://img.shields.io/badge/typed-PEP_561-blue.svg)](https://peps.python.org/pep-0561/)
9
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
10
+
11
+ - [Install](#install) · [Quickstart](#quickstart) · [What you get](#what-you-get)
12
+ - [Examples](examples/) · [API reference](docs/) · [CLI](#cli) · [Errors](#errors)
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install turboocr # HTTP client + CLI + searchable-PDF
18
+ pip install 'turboocr[grpc]' # add the gRPC transport
19
+ pip install 'turboocr[all]' # everything optional (currently == [grpc])
20
+ ```
21
+
22
+ Requires Python 3.12+.
23
+
24
+ ## Quickstart
25
+
26
+ Start a [TurboOCR](https://github.com/aiptimizer/TurboOCR) server (the C++/CUDA
27
+ OCR engine — this repo is just the Python client):
28
+
29
+ ```bash
30
+ docker run --gpus all -p 8000:8000 -p 50051:50051 \
31
+ -v trt-cache:/home/ocr/.cache/turbo-ocr \
32
+ -e OCR_LANG=latin \
33
+ ghcr.io/aiptimizer/turboocr:v2.2.3
34
+ ```
35
+
36
+ `OCR_LANG=latin` (default) covers English, French, German, Spanish, …. Swap for
37
+ `chinese`, `greek`, `eslav`, `arabic`, `korean`, or `thai` — all are baked in.
38
+ See the [TurboOCR repo](https://github.com/aiptimizer/TurboOCR) for build-from-source,
39
+ benchmarks, and the full set of server env vars.
40
+
41
+ Then recognise an image and turn a PDF into Markdown:
42
+
43
+ ```python
44
+ from turboocr import Client, render_to_markdown
45
+
46
+ with Client(base_url="http://localhost:8000") as client:
47
+ # Image OCR
48
+ img = client.recognize_image("page.png", layout=True, include_blocks=True)
49
+ print(f"{len(img.results)} text items, {len(img.blocks)} blocks")
50
+ print(img.text)
51
+
52
+ # PDF → Markdown
53
+ pdf = client.recognize_pdf("paper.pdf", dpi=150, include_blocks=True)
54
+ print(render_to_markdown(pdf).markdown)
55
+
56
+ # Searchable PDF (invisible text overlay)
57
+ overlay = client.make_searchable_pdf("scan.pdf", dpi=200)
58
+ open("scan.searchable.pdf", "wb").write(overlay)
59
+ ```
60
+
61
+ That's the 80% case. Full runnable examples for async, gRPC, batch, retries,
62
+ custom `httpx.Client`, hooks, Markdown styling, folder pipelines, and more live
63
+ in [`examples/`](examples/) — every script runs end-to-end against the bundled
64
+ ACME invoice fixture.
65
+
66
+ ## What you get
67
+
68
+ - **Sync + async, HTTP + gRPC.** Four clients (`Client`, `AsyncClient`,
69
+ `GrpcClient`, `AsyncGrpcClient`) with identical method surfaces.
70
+ - **Typed, immutable responses (pydantic v2).** IDE autocomplete, and if a newer
71
+ server adds a field your SDK doesn't know about, parsing still succeeds — the
72
+ extra lands on `.model_extra` instead of crashing.
73
+ - **Layout-aware Markdown.** `render_to_markdown(...)` walks the reading order
74
+ and maps each layout class (`doc_title`, `display_formula`, `table`, …) to a
75
+ Markdown construct. Pluggable via `MarkdownStyle`.
76
+ - **Searchable PDFs.** `make_searchable_pdf(...)` overlays an invisible text
77
+ layer aligned to the page geometry. Auto-discovers a Unicode font for
78
+ non-Latin scripts, or pass `font_path=`.
79
+ - **Production-friendly.** Configurable retry policy (HTTP status + gRPC status
80
+ + `Retry-After`), per-request timeouts, custom `httpx.Client`, `on_request` /
81
+ `on_response` event hooks, uuid7 `X-Request-ID` per call.
82
+ - **Precise exception hierarchy.** Maps the server's `error_code` to typed
83
+ exceptions — see [Errors](#errors).
84
+ - **`turbo-ocr` CLI** included in the default install.
85
+
86
+ Today's server does plain OCR + layout classification. Table-structure and
87
+ LaTeX-formula source are **not** yet emitted; the SDK exposes `page.tables` /
88
+ `page.formulas` as a forward-compatible surface that populates automatically
89
+ when those server features ship.
90
+
91
+ ## Configuration
92
+
93
+ ```python
94
+ from turboocr import Client, RetryPolicy
95
+
96
+ client = Client(
97
+ base_url="http://localhost:8000", # or TURBO_OCR_BASE_URL env
98
+ api_key="sk-...", # or TURBO_OCR_API_KEY env
99
+ auth_scheme="bearer", # "bearer" | "x-api-key"
100
+ timeout=30.0,
101
+ default_headers={"X-Tenant": "acme"},
102
+ retry=RetryPolicy(attempts=5, backoff=0.5),
103
+ )
104
+ ```
105
+
106
+ Pass `http_client=httpx.Client(...)` for custom TLS, connection limits, or
107
+ proxies — see [`examples/08_custom_httpx_client.py`](examples/08_custom_httpx_client.py).
108
+
109
+ Retry defaults: HTTP `{429, 502, 503, 504}`, gRPC
110
+ `{UNAVAILABLE, DEADLINE_EXCEEDED, RESOURCE_EXHAUSTED}`, 3 attempts, exponential
111
+ backoff + jitter, `Retry-After` honoured. Tune via `RetryPolicy(...)` — see
112
+ [`examples/07_retry_and_timeout.py`](examples/07_retry_and_timeout.py).
113
+
114
+ ## Errors
115
+
116
+ ```
117
+ TurboOcrError
118
+ ├── APIConnectionError # transport-level
119
+ │ ├── Timeout
120
+ │ ├── NetworkError
121
+ │ └── ProtocolError
122
+ ├── InvalidParameter # 4xx: bad params / headers / dims
123
+ ├── EmptyBody # 4xx: empty body / batch / PDF
124
+ ├── LayoutDisabled # asked for layout when server has it off
125
+ ├── ImageDecodeError # bad bytes / bad base64
126
+ ├── DimensionsTooLarge # image / PDF over server limits
127
+ ├── PoolExhausted # "Server at capacity"
128
+ ├── PdfRenderError # PDF rasterization failed
129
+ └── ServerError # 5xx, no specific code
130
+ ```
131
+
132
+ Server-side exceptions carry `.code`, `.status_code`, and `.payload`. Transport
133
+ exceptions inherit from `APIConnectionError`.
134
+
135
+ | Symptom | Cause | Fix |
136
+ |---|---|---|
137
+ | `NetworkError: Connection refused` | server not running | start the docker container (above) |
138
+ | `DimensionsTooLarge` | image > `MAX_IMAGE_DIM` (default 16384) | downscale, or raise the server limit |
139
+ | `LayoutDisabled` | server started with `DISABLE_LAYOUT=1` | restart without that env var |
140
+ | `UnicodeFontRequired` | non-Latin text, no Unicode font found | pass `font_path=` or set `TURBO_OCR_FONT` |
141
+ | `PoolExhausted` | server queue full | retry with backoff, or scale `PIPELINE_POOL_SIZE` |
142
+ | `Timeout` | per-request timeout hit | pass `timeout=N`, or raise `RetryPolicy.attempts` |
143
+
144
+ ## CLI
145
+
146
+ ```bash
147
+ turbo-ocr ocr page.png --output markdown
148
+ turbo-ocr pdf doc.pdf --dpi 150 --output json
149
+ turbo-ocr searchable-pdf doc.pdf -o out.pdf --font-path /path/to/font.ttf
150
+ turbo-ocr health --ready
151
+ ```
152
+
153
+ `--output` accepts `json | blocks | text | markdown`. Reads `TURBO_OCR_BASE_URL`,
154
+ `TURBO_OCR_API_KEY`, `TURBO_OCR_FONT` from the environment. Run
155
+ `turbo-ocr --help` for the full surface.
156
+
157
+ ## Logging
158
+
159
+ ```python
160
+ import logging
161
+ logging.getLogger("turboocr").setLevel(logging.DEBUG)
162
+ ```
163
+
164
+ Emits `method path -> status (Xms) [req=<short-id>]` per HTTP request. Retry
165
+ warnings go to `turboocr.retry` / `turboocr.grpc.retry`. Searchable-PDF font
166
+ resolution logs to `turboocr.searchable_pdf`. Every HTTP request sends a uuid7
167
+ `X-Request-ID` header (gRPC uses `x-request-id` metadata).
168
+
169
+ ## Learn more
170
+
171
+ - [`examples/`](examples/) — 13 runnable scripts (each runs against the bundled
172
+ ACME invoice fixture, no server config needed beyond `TURBO_OCR_BASE_URL`)
173
+ - [`docs/`](docs/) — full docs source (MkDocs + mkdocstrings, deployed at
174
+ https://aiptimizer.github.io/turboocr-python/). Preview locally with
175
+ `uv run --extra docs mkdocs serve -f docs/mkdocs.yml`
176
+ - Server compatibility: `SERVER_API_VERSION_MIN` /
177
+ `SERVER_API_VERSION_MAX_EXCLUSIVE` document the supported server range;
178
+ `extra="allow"` on response models means additive server changes don't break
179
+ parsing
180
+
181
+ ## Testing
182
+
183
+ ```bash
184
+ pytest -q # offline (respx)
185
+ TURBO_OCR_BASE_URL=http://localhost:8000 pytest tests/integration -v
186
+ python examples/03_searchable_pdf.py # smoke test
187
+ ```
188
+
189
+ ## License
190
+
191
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,68 @@
1
+ # CLI — `turbo-ocr`
2
+
3
+ The `turbo-ocr` command ships with the default install — no extras needed.
4
+
5
+ ```bash
6
+ turbo-ocr ocr page.png --output markdown
7
+ turbo-ocr pdf doc.pdf --dpi 150 --output json
8
+ turbo-ocr searchable-pdf doc.pdf -o out.pdf --font-path /path/to/font.ttf
9
+ turbo-ocr blocks doc.pdf
10
+ turbo-ocr health --ready
11
+ ```
12
+
13
+ ## Commands
14
+
15
+ ### `turbo-ocr ocr <image>`
16
+
17
+ Single-image OCR.
18
+
19
+ | Option | Notes |
20
+ |---|---|
21
+ | `--output` | `json` (default) · `blocks` · `text` · `markdown` |
22
+ | `--base-url` | env: `TURBO_OCR_BASE_URL` (default `http://localhost:8000`) |
23
+ | `--api-key` | env: `TURBO_OCR_API_KEY` |
24
+ | `--layout / --no-layout` | request layout (default on) |
25
+ | `--reading-order` | request reading-order grouping |
26
+ | `--include-blocks` | request reading-order-grouped paragraphs |
27
+
28
+ ### `turbo-ocr pdf <pdf>`
29
+
30
+ PDF OCR.
31
+
32
+ | Option | Notes |
33
+ |---|---|
34
+ | `--output` | `json` · `blocks` · `text` · `markdown` |
35
+ | `--dpi` | rasterization DPI (default `150`) |
36
+ | `--mode` | `ocr` · `text` · `auto` · `auto_verified` · `geometric` |
37
+ | `--base-url`, `--api-key`, `--layout`, `--reading-order`, `--include-blocks` | as above |
38
+
39
+ ### `turbo-ocr searchable-pdf <pdf>`
40
+
41
+ Generate a searchable PDF with an invisible text overlay.
42
+
43
+ | Option | Notes |
44
+ |---|---|
45
+ | `-o`, `--out` | output PDF path (required) |
46
+ | `--dpi` | rasterization DPI (default `200`) |
47
+ | `--mode` | `ocr` · `text` · `auto` · `auto_verified` · `geometric` (default `ocr`) |
48
+ | `--font-path` | TTF for non-Latin scripts; env: `TURBO_OCR_FONT` |
49
+ | `--base-url`, `--api-key` | as above |
50
+
51
+ ### `turbo-ocr blocks <pdf>`
52
+
53
+ Dump reading-order-grouped blocks as JSON (shortcut for
54
+ `pdf --include-blocks --output blocks`).
55
+
56
+ ### `turbo-ocr health [--ready]`
57
+
58
+ Probe `/healthz`; with `--ready`, also requires the pipeline to be ready.
59
+
60
+ ## Environment
61
+
62
+ | Variable | Used by |
63
+ |---|---|
64
+ | `TURBO_OCR_BASE_URL` | every command — default `http://localhost:8000` |
65
+ | `TURBO_OCR_API_KEY` | every command — sent as bearer or `X-API-Key` |
66
+ | `TURBO_OCR_FONT` | `searchable-pdf` — TTF path for non-Latin scripts |
67
+
68
+ Run `turbo-ocr <command> --help` for the live, authoritative option list.
@@ -0,0 +1,17 @@
1
+ # HTTP clients
2
+
3
+ `Client` and `AsyncClient` share an identical method surface — anything you
4
+ can do with one, you can `await` with the other.
5
+
6
+ Construct either directly with `base_url`, from env vars with
7
+ `Client.from_env()`, or pass a pre-built `httpx.Client` via `http_client=`.
8
+ The introspection properties (`.base_url`, `.auth_scheme`, `.default_headers`,
9
+ `.timeout`, `.retry`) read back the resolved config.
10
+
11
+ ## `Client`
12
+
13
+ ::: turboocr.Client
14
+
15
+ ## `AsyncClient`
16
+
17
+ ::: turboocr.AsyncClient
@@ -0,0 +1,61 @@
1
+ # Errors
2
+
3
+ ## Hierarchy
4
+
5
+ ```
6
+ TurboOcrError
7
+ ├── APIConnectionError # transport-level
8
+ │ ├── Timeout
9
+ │ ├── NetworkError
10
+ │ └── ProtocolError
11
+ ├── InvalidParameter # 4xx: bad params / headers / dims
12
+ ├── EmptyBody # 4xx: empty body / batch / PDF
13
+ ├── LayoutDisabled # asked for layout when server has it off
14
+ ├── ImageDecodeError # bad bytes / bad base64
15
+ ├── DimensionsTooLarge # image / PDF over server limits
16
+ ├── PoolExhausted # "Server at capacity"
17
+ ├── PdfRenderError # PDF rasterization failed
18
+ └── ServerError # 5xx, no specific code
19
+ ```
20
+
21
+ Server-side exceptions carry `.code`, `.status_code`, and `.payload`. Transport
22
+ exceptions inherit from `APIConnectionError`.
23
+
24
+ ## Common failures
25
+
26
+ | Symptom | Cause | Fix |
27
+ |---|---|---|
28
+ | `NetworkError: Connection refused` | server not running | start the docker container |
29
+ | `DimensionsTooLarge` | image > `MAX_IMAGE_DIM` (default 16384) | downscale or raise the server limit |
30
+ | `LayoutDisabled` | server started with `DISABLE_LAYOUT=1` | restart without that env var |
31
+ | `UnicodeFontRequired` | non-Latin text, no Unicode font found | pass `font_path=` or set `TURBO_OCR_FONT` |
32
+ | `PoolExhausted` | server queue full | retry with backoff, or scale `PIPELINE_POOL_SIZE` |
33
+ | `Timeout` | per-request timeout hit | pass `timeout=N` or raise `RetryPolicy.attempts` |
34
+
35
+ ## Reference
36
+
37
+ ::: turboocr.TurboOcrError
38
+
39
+ ::: turboocr.APIConnectionError
40
+
41
+ ::: turboocr.NetworkError
42
+
43
+ ::: turboocr.Timeout
44
+
45
+ ::: turboocr.ProtocolError
46
+
47
+ ::: turboocr.InvalidParameter
48
+
49
+ ::: turboocr.EmptyBody
50
+
51
+ ::: turboocr.LayoutDisabled
52
+
53
+ ::: turboocr.ImageDecodeError
54
+
55
+ ::: turboocr.DimensionsTooLarge
56
+
57
+ ::: turboocr.PoolExhausted
58
+
59
+ ::: turboocr.PdfRenderError
60
+
61
+ ::: turboocr.ServerError
@@ -0,0 +1,22 @@
1
+ # gRPC clients
2
+
3
+ `GrpcClient` and `AsyncGrpcClient` mirror the HTTP clients over gRPC.
4
+ Requires `pip install 'turboocr[grpc]'`.
5
+
6
+ !!! warning "Two parity caveats"
7
+
8
+ - The gRPC proto's bool fields lack field presence (proto3 without
9
+ `optional`), so `None` is sent as `False`. Today the server defaults all
10
+ bool options to `False`, so behavior matches the HTTP client — if the
11
+ server ever flips a default, gRPC users would have to opt in explicitly.
12
+ - `GrpcClient.recognize_pdf(reading_order=True)` raises
13
+ `InvalidParameter` — the proto lacks the `reading_order` field. Use the
14
+ HTTP client for PDFs that need reading order.
15
+
16
+ ## `GrpcClient`
17
+
18
+ ::: turboocr.GrpcClient
19
+
20
+ ## `AsyncGrpcClient`
21
+
22
+ ::: turboocr.AsyncGrpcClient
@@ -0,0 +1,43 @@
1
+ # Layout, blocks, geometry
2
+
3
+ ## `Block`
4
+
5
+ Reading-order-grouped paragraph with bounding box, layout class, and content.
6
+ Emitted on the response when `include_blocks=True`.
7
+
8
+ ::: turboocr.Block
9
+
10
+ ## `TextItem`
11
+
12
+ A single recognised word/line with confidence and bounding box.
13
+
14
+ ::: turboocr.TextItem
15
+
16
+ ## `BoundingBox`
17
+
18
+ ::: turboocr.BoundingBox
19
+
20
+ ## `LayoutBox`
21
+
22
+ ::: turboocr.LayoutBox
23
+
24
+ ## `LayoutLabel`
25
+
26
+ ::: turboocr.LayoutLabel
27
+
28
+ ## `Table`
29
+
30
+ ::: turboocr.Table
31
+
32
+ ## `Formula`
33
+
34
+ ::: turboocr.Formula
35
+
36
+ !!! info "Tables and formulas — partial support today"
37
+
38
+ As of server v2.2.3, the server detects table and formula **regions** (you
39
+ get a `bounding_box` and row-major OCR'd `text`) but does **not** emit
40
+ cell structure or LaTeX source. `Table.html`, `Table.cells`, and
41
+ `Formula.latex` are always `None`. The SDK is forward-compatible: when the
42
+ server ships table-structure-recognition and LaTeX OCR, those fields will
43
+ populate without any SDK code changes.