transcribe-cpp 0.0.0__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {transcribe_cpp-0.0.0 → transcribe_cpp-0.0.3}/.gitignore +16 -0
- transcribe_cpp-0.0.3/PKG-INFO +123 -0
- transcribe_cpp-0.0.3/README.md +92 -0
- transcribe_cpp-0.0.3/_generate/README.md +38 -0
- transcribe_cpp-0.0.3/_generate/check_version_sync.py +249 -0
- transcribe_cpp-0.0.3/_generate/generate.py +534 -0
- transcribe_cpp-0.0.3/examples/stream_wav.py +95 -0
- transcribe_cpp-0.0.3/examples/transcribe_wav.py +91 -0
- transcribe_cpp-0.0.3/pyproject.toml +71 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/__init__.py +1295 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/_abi.py +77 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/_generated.py +389 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/_library.py +386 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/errors.py +151 -0
- transcribe_cpp-0.0.3/src/transcribe_cpp/py.typed +0 -0
- transcribe_cpp-0.0.3/tests/conftest.py +159 -0
- transcribe_cpp-0.0.3/tests/test_abi.py +85 -0
- transcribe_cpp-0.0.3/tests/test_backends.py +77 -0
- transcribe_cpp-0.0.3/tests/test_errors.py +104 -0
- transcribe_cpp-0.0.3/tests/test_example.py +130 -0
- transcribe_cpp-0.0.3/tests/test_family_ext.py +206 -0
- transcribe_cpp-0.0.3/tests/test_lifetime.py +204 -0
- transcribe_cpp-0.0.3/tests/test_pcm.py +150 -0
- transcribe_cpp-0.0.3/tests/test_provider_discovery.py +341 -0
- transcribe_cpp-0.0.3/tests/test_streaming.py +152 -0
- transcribe_cpp-0.0.3/tests/test_transcribe.py +247 -0
- transcribe_cpp-0.0.3/uv.lock +416 -0
- transcribe_cpp-0.0.0/PKG-INFO +0 -39
- transcribe_cpp-0.0.0/README.md +0 -19
- transcribe_cpp-0.0.0/pyproject.toml +0 -30
- transcribe_cpp-0.0.0/src/transcribe_cpp/__init__.py +0 -12
- {transcribe_cpp-0.0.0 → transcribe_cpp-0.0.3}/LICENSE +0 -0
|
@@ -3,6 +3,19 @@
|
|
|
3
3
|
/build-*/
|
|
4
4
|
/cmake-build-*/
|
|
5
5
|
|
|
6
|
+
# Rust workspace build output (Cargo.lock IS committed — workspace has a binary)
|
|
7
|
+
/target/
|
|
8
|
+
|
|
9
|
+
# Python distribution output (provider wheels/sdists at the repo root;
|
|
10
|
+
# bindings/python/dist for the pure API package; wheelhouse* from local
|
|
11
|
+
# cibuildwheel / wheel-repair runs)
|
|
12
|
+
/dist/
|
|
13
|
+
/bindings/python/dist/
|
|
14
|
+
/wheelhouse*/
|
|
15
|
+
|
|
16
|
+
# Canary GGUFs fetched by CI / local smoke runs
|
|
17
|
+
/canary/
|
|
18
|
+
|
|
6
19
|
# Scratch space for benchmarks, staging, etc.
|
|
7
20
|
/tmp/
|
|
8
21
|
|
|
@@ -68,3 +81,6 @@ WER_TESTING.md
|
|
|
68
81
|
|
|
69
82
|
# IDE / language-server artifacts
|
|
70
83
|
.cache/
|
|
84
|
+
|
|
85
|
+
# Local working notes (plans, drafts) — never committed
|
|
86
|
+
/notes/
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: transcribe-cpp
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: Python bindings for transcribe.cpp
|
|
5
|
+
Project-URL: Homepage, https://github.com/handy-computer/transcribe.cpp
|
|
6
|
+
Project-URL: Repository, https://github.com/handy-computer/transcribe.cpp
|
|
7
|
+
Project-URL: Issues, https://github.com/handy-computer/transcribe.cpp/issues
|
|
8
|
+
Author: The transcribe.cpp authors
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: asr,ggml,parakeet,speech-to-text,transcription,whisper
|
|
12
|
+
Classifier: Development Status :: 1 - Planning
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: transcribe-cpp-native==0.0.3.*
|
|
25
|
+
Provides-Extra: cu12
|
|
26
|
+
Requires-Dist: transcribe-cpp-native-cu12==0.0.3.*; extra == 'cu12'
|
|
27
|
+
Provides-Extra: test
|
|
28
|
+
Requires-Dist: numpy; extra == 'test'
|
|
29
|
+
Requires-Dist: pytest>=7; extra == 'test'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# transcribe-cpp
|
|
33
|
+
|
|
34
|
+
Python bindings for [transcribe.cpp](https://github.com/handy-computer/transcribe.cpp),
|
|
35
|
+
a C/C++ speech-to-text library built on ggml.
|
|
36
|
+
|
|
37
|
+
> **Status: in development.** Until wheels are published, use a locally built
|
|
38
|
+
> `libtranscribe` through repo auto-discovery or `TRANSCRIBE_LIBRARY`.
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import transcribe_cpp
|
|
42
|
+
|
|
43
|
+
with transcribe_cpp.Model("model.gguf") as model:
|
|
44
|
+
with model.session() as session:
|
|
45
|
+
result = session.run(pcm_float32_16k_mono)
|
|
46
|
+
print(result.text)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
`run()` takes mono 16 kHz float32 PCM (buffer-protocol object or sequence). It
|
|
50
|
+
does not decode containers or resample; convert audio before calling it.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import numpy as np
|
|
54
|
+
|
|
55
|
+
pcm = np.asarray(audio, dtype=np.float32) # 1-D, 16 kHz mono
|
|
56
|
+
# Downmix stereo first; 2-D input is rejected:
|
|
57
|
+
# pcm = audio.mean(axis=1).astype(np.float32)
|
|
58
|
+
result = session.run(pcm)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Streaming models expose incremental transcription with committed/tentative
|
|
62
|
+
text views — see `examples/stream_wav.py`:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
with model.session() as session, session.stream() as stream:
|
|
66
|
+
for chunk in pcm_chunks:
|
|
67
|
+
stream.feed(chunk)
|
|
68
|
+
text = stream.text() # .committed (stable) + .tentative
|
|
69
|
+
stream.finalize()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Long transcriptions can be cancelled from another thread with
|
|
73
|
+
`session.cancel()` — the run raises `Aborted` with the partial transcript on
|
|
74
|
+
`exc.partial_result` (same for `OutputTruncated`).
|
|
75
|
+
|
|
76
|
+
## Backends
|
|
77
|
+
|
|
78
|
+
`Model(backend=...)` picks the compute device (`"auto"` uses the best
|
|
79
|
+
available). `transcribe_cpp.backends()` lists registered backends and
|
|
80
|
+
`backend_available(kind)` checks one kind.
|
|
81
|
+
|
|
82
|
+
| Variable | Effect |
|
|
83
|
+
|---|---|
|
|
84
|
+
| `TRANSCRIBE_BACKEND` | overrides the `"auto"` default; explicit `backend=` still wins |
|
|
85
|
+
| `TRANSCRIBE_NATIVE_PROVIDER` | forces an installed native provider package, for example `cu12` |
|
|
86
|
+
| `TRANSCRIBE_LIBRARY` | loads exactly this shared library |
|
|
87
|
+
|
|
88
|
+
Planned wheels will bundle CPU plus platform accelerators;
|
|
89
|
+
`transcribe-cpp[cu12]` will add the CUDA 12 provider.
|
|
90
|
+
|
|
91
|
+
## Running from a working tree
|
|
92
|
+
|
|
93
|
+
The binding loads the native library at import and verifies its ABI layout and
|
|
94
|
+
version before use. Build a shared library, then run from the repo or point
|
|
95
|
+
`TRANSCRIBE_LIBRARY` at it:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
cmake -B build-shared -DTRANSCRIBE_BUILD_SHARED=ON
|
|
99
|
+
cmake --build build-shared --target transcribe
|
|
100
|
+
|
|
101
|
+
cd bindings/python
|
|
102
|
+
PYTHONPATH=src uv run --no-project python examples/transcribe_wav.py \
|
|
103
|
+
../../models/whisper-tiny.en/whisper-tiny.en-Q5_K_M.gguf ../../samples/jfk.wav
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
No-model tests always run; model tests skip unless smoke assets are present.
|
|
107
|
+
Override paths with `TRANSCRIBE_SMOKE_MODEL`, `TRANSCRIBE_SMOKE_AUDIO`, and
|
|
108
|
+
`TRANSCRIBE_SMOKE_STREAMING_MODEL`.
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
cd bindings/python
|
|
112
|
+
TRANSCRIBE_LIBRARY=../../build-shared/src/libtranscribe.dylib \
|
|
113
|
+
uv run --extra test pytest
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Notes
|
|
117
|
+
|
|
118
|
+
- One run/stream at a time per `Model` in 0.x: sessions share the model's
|
|
119
|
+
compute backend, so serialize runs across sessions (or load one model per
|
|
120
|
+
worker). See the `Model` docstring.
|
|
121
|
+
- Import package: `transcribe_cpp`
|
|
122
|
+
- Distribution: `transcribe-cpp`
|
|
123
|
+
- License: MIT
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# transcribe-cpp
|
|
2
|
+
|
|
3
|
+
Python bindings for [transcribe.cpp](https://github.com/handy-computer/transcribe.cpp),
|
|
4
|
+
a C/C++ speech-to-text library built on ggml.
|
|
5
|
+
|
|
6
|
+
> **Status: in development.** Until wheels are published, use a locally built
|
|
7
|
+
> `libtranscribe` through repo auto-discovery or `TRANSCRIBE_LIBRARY`.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import transcribe_cpp
|
|
11
|
+
|
|
12
|
+
with transcribe_cpp.Model("model.gguf") as model:
|
|
13
|
+
with model.session() as session:
|
|
14
|
+
result = session.run(pcm_float32_16k_mono)
|
|
15
|
+
print(result.text)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
`run()` takes mono 16 kHz float32 PCM (buffer-protocol object or sequence). It
|
|
19
|
+
does not decode containers or resample; convert audio before calling it.
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
pcm = np.asarray(audio, dtype=np.float32) # 1-D, 16 kHz mono
|
|
25
|
+
# Downmix stereo first; 2-D input is rejected:
|
|
26
|
+
# pcm = audio.mean(axis=1).astype(np.float32)
|
|
27
|
+
result = session.run(pcm)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Streaming models expose incremental transcription with committed/tentative
|
|
31
|
+
text views — see `examples/stream_wav.py`:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
with model.session() as session, session.stream() as stream:
|
|
35
|
+
for chunk in pcm_chunks:
|
|
36
|
+
stream.feed(chunk)
|
|
37
|
+
text = stream.text() # .committed (stable) + .tentative
|
|
38
|
+
stream.finalize()
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Long transcriptions can be cancelled from another thread with
|
|
42
|
+
`session.cancel()` — the run raises `Aborted` with the partial transcript on
|
|
43
|
+
`exc.partial_result` (same for `OutputTruncated`).
|
|
44
|
+
|
|
45
|
+
## Backends
|
|
46
|
+
|
|
47
|
+
`Model(backend=...)` picks the compute device (`"auto"` uses the best
|
|
48
|
+
available). `transcribe_cpp.backends()` lists registered backends and
|
|
49
|
+
`backend_available(kind)` checks one kind.
|
|
50
|
+
|
|
51
|
+
| Variable | Effect |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `TRANSCRIBE_BACKEND` | overrides the `"auto"` default; explicit `backend=` still wins |
|
|
54
|
+
| `TRANSCRIBE_NATIVE_PROVIDER` | forces an installed native provider package, for example `cu12` |
|
|
55
|
+
| `TRANSCRIBE_LIBRARY` | loads exactly this shared library |
|
|
56
|
+
|
|
57
|
+
Planned wheels will bundle CPU plus platform accelerators;
|
|
58
|
+
`transcribe-cpp[cu12]` will add the CUDA 12 provider.
|
|
59
|
+
|
|
60
|
+
## Running from a working tree
|
|
61
|
+
|
|
62
|
+
The binding loads the native library at import and verifies its ABI layout and
|
|
63
|
+
version before use. Build a shared library, then run from the repo or point
|
|
64
|
+
`TRANSCRIBE_LIBRARY` at it:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
cmake -B build-shared -DTRANSCRIBE_BUILD_SHARED=ON
|
|
68
|
+
cmake --build build-shared --target transcribe
|
|
69
|
+
|
|
70
|
+
cd bindings/python
|
|
71
|
+
PYTHONPATH=src uv run --no-project python examples/transcribe_wav.py \
|
|
72
|
+
../../models/whisper-tiny.en/whisper-tiny.en-Q5_K_M.gguf ../../samples/jfk.wav
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
No-model tests always run; model tests skip unless smoke assets are present.
|
|
76
|
+
Override paths with `TRANSCRIBE_SMOKE_MODEL`, `TRANSCRIBE_SMOKE_AUDIO`, and
|
|
77
|
+
`TRANSCRIBE_SMOKE_STREAMING_MODEL`.
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
cd bindings/python
|
|
81
|
+
TRANSCRIBE_LIBRARY=../../build-shared/src/libtranscribe.dylib \
|
|
82
|
+
uv run --extra test pytest
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Notes
|
|
86
|
+
|
|
87
|
+
- One run/stream at a time per `Model` in 0.x: sessions share the model's
|
|
88
|
+
compute backend, so serialize runs across sessions (or load one model per
|
|
89
|
+
worker). See the `Model` docstring.
|
|
90
|
+
- Import package: `transcribe_cpp`
|
|
91
|
+
- Distribution: `transcribe-cpp`
|
|
92
|
+
- License: MIT
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# FFI generator
|
|
2
|
+
|
|
3
|
+
Generates `src/transcribe_cpp/_generated.py` — the low-level ctypes layer — from
|
|
4
|
+
`include/transcribe/extensions.h` using libclang. The generated module is
|
|
5
|
+
**committed**; it is never hand-edited.
|
|
6
|
+
|
|
7
|
+
## Regenerate
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd bindings/python
|
|
11
|
+
uv run --no-project --with 'libclang==18.1.1' _generate/generate.py
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Run this whenever the public C headers change. libclang is pinned so the output
|
|
15
|
+
is deterministic across machines; the freestanding headers (`stdbool.h`, …) come
|
|
16
|
+
from the host compiler's resource dir, discovered via `clang -print-resource-dir`
|
|
17
|
+
(macOS: `xcrun`).
|
|
18
|
+
|
|
19
|
+
## CI gate
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv run --no-project --with 'libclang==18.1.1' _generate/generate.py --check
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Exit non-zero if the committed `_generated.py` is out of date. Because the
|
|
26
|
+
generator works from the parsed AST, the check is **semantic**: a comment- or
|
|
27
|
+
whitespace-only header edit produces no diff, while any real ABI change (a field,
|
|
28
|
+
type, enum value, or function signature) does — and then fails CI until the
|
|
29
|
+
binding is regenerated.
|
|
30
|
+
|
|
31
|
+
## What it emits
|
|
32
|
+
|
|
33
|
+
- ctypes `Structure` for every public struct, field-for-field.
|
|
34
|
+
- Enum values as module constants.
|
|
35
|
+
- `configure(lib)` — `restype`/`argtypes` for every public function.
|
|
36
|
+
- `ABI_STRUCT_IDS` and `STRUCT_LAYOUT` (sizes/aligns/offsets), used by
|
|
37
|
+
`_abi.verify_layouts()` to check the layer against itself and the loaded
|
|
38
|
+
native library at import.
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Fail if the library version drifts across every place it is duplicated.
|
|
3
|
+
|
|
4
|
+
The native library version is defined once, in
|
|
5
|
+
``include/transcribe.h`` (``TRANSCRIBE_VERSION_{MAJOR,MINOR,PATCH}``); CMake
|
|
6
|
+
parses it from there. Every binding repeats it — in package manifests, lockfiles,
|
|
7
|
+
the Python ``__version__``, the cross-package dependency pins, and the Swift
|
|
8
|
+
``compiledVersion`` literal. The import-time gate enforces base-version match
|
|
9
|
+
against the *loaded* library at runtime; this script is the static, build-time
|
|
10
|
+
counterpart so a forgotten bump fails CI before anything is published.
|
|
11
|
+
|
|
12
|
+
This covers every §1b spot in ``notes/releasing.md`` — including the ones that
|
|
13
|
+
used to be §1c blind spots: the ``transcribe-cpp-sys`` dependency *pin*, both
|
|
14
|
+
``Cargo.lock`` entries, both ``package-lock.json`` spots, and Swift
|
|
15
|
+
``compiledVersion``. (Lockfile *internal* consistency — a stale lock silently
|
|
16
|
+
rewritten by an unlocked command — is still the job of the locked-command
|
|
17
|
+
checks, ``cargo metadata --locked`` / ``npm ci``, run in release-preflight.)
|
|
18
|
+
|
|
19
|
+
Comparison is on the PEP 440 *release segment* (``MAJOR.MINOR.PATCH``): the
|
|
20
|
+
header is always a clean triple, while a package side may legitimately carry a
|
|
21
|
+
``.postN`` packaging suffix that must still be accepted.
|
|
22
|
+
|
|
23
|
+
uv run --no-project bindings/python/_generate/check_version_sync.py
|
|
24
|
+
|
|
25
|
+
Exit 0 when all agree on the base version; 1 on drift; 2 if a version could not
|
|
26
|
+
be located (treated as a hard error, not a pass).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
REPO = Path(__file__).resolve().parents[3]
|
|
37
|
+
HEADER = REPO / "include" / "transcribe.h"
|
|
38
|
+
PYPROJECT = REPO / "bindings" / "python" / "pyproject.toml"
|
|
39
|
+
INIT = REPO / "bindings" / "python" / "src" / "transcribe_cpp" / "__init__.py"
|
|
40
|
+
TS_PACKAGE_JSON = REPO / "bindings" / "typescript" / "package.json"
|
|
41
|
+
RUST_SAFE_CARGO = REPO / "bindings" / "rust" / "transcribe-cpp" / "Cargo.toml"
|
|
42
|
+
CARGO_LOCK = REPO / "Cargo.lock"
|
|
43
|
+
PACKAGE_LOCK = REPO / "bindings" / "typescript" / "package-lock.json"
|
|
44
|
+
SWIFT_SOURCE = REPO / "bindings" / "swift" / "Sources" / "TranscribeCpp" / "TranscribeCpp.swift"
|
|
45
|
+
|
|
46
|
+
# Binding package manifests (requirements doc §2: every manifest is derived
|
|
47
|
+
# from or gated against the header). Gated by the `active` flag: a 0.0.0
|
|
48
|
+
# name-reservation placeholder is NOT version-locked — flip its entry to True
|
|
49
|
+
# in the PR that lands the real binding. Inactive manifests are still parsed
|
|
50
|
+
# (file must exist and carry a readable version) so the mechanism itself
|
|
51
|
+
# stays exercised. Package.swift has no entry: SwiftPM versions via git tags,
|
|
52
|
+
# so its gate is the tag itself (release-workflow concern, not this script).
|
|
53
|
+
BINDING_MANIFESTS = [
|
|
54
|
+
# (relative path, extractor name, active)
|
|
55
|
+
# The Rust crates are real (0.0.1), so they're version-locked. The sys
|
|
56
|
+
# crate's manifest is the repo-root Cargo.toml (it carries the whole C++
|
|
57
|
+
# tree); the safe wrapper is the sibling member at
|
|
58
|
+
# bindings/rust/transcribe-cpp/.
|
|
59
|
+
("Cargo.toml", "cargo", True),
|
|
60
|
+
("bindings/rust/transcribe-cpp/Cargo.toml", "cargo", True),
|
|
61
|
+
("bindings/typescript/package.json", "npm", True),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def base_version(version: str) -> str:
|
|
66
|
+
"""The leading dotted-numeric release segment (suffix stripped)."""
|
|
67
|
+
m = re.match(r"\d+(?:\.\d+)*", version.strip())
|
|
68
|
+
return m.group(0) if m else version.strip()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def header_version(text: str) -> str | None:
|
|
72
|
+
parts = []
|
|
73
|
+
for component in ("MAJOR", "MINOR", "PATCH"):
|
|
74
|
+
m = re.search(rf"define\s+TRANSCRIBE_VERSION_{component}\s+(\d+)", text)
|
|
75
|
+
if not m:
|
|
76
|
+
return None
|
|
77
|
+
parts.append(m.group(1))
|
|
78
|
+
return ".".join(parts)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def pyproject_version(text: str) -> str | None:
|
|
82
|
+
# project.version is a top-level string in [project]; match it directly
|
|
83
|
+
# rather than pulling in a TOML parser (tomllib is 3.11+).
|
|
84
|
+
m = re.search(r'(?m)^\s*version\s*=\s*"([^"]+)"', text)
|
|
85
|
+
return m.group(1) if m else None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def init_version(text: str) -> str | None:
|
|
89
|
+
m = re.search(r'(?m)^__version__\s*=\s*"([^"]+)"', text)
|
|
90
|
+
return m.group(1) if m else None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def cargo_version(text: str) -> str | None:
|
|
94
|
+
# First `version = "..."` in the file: [package] leads a Cargo.toml by
|
|
95
|
+
# convention, and dependency tables spell it `name = { version = ... }`.
|
|
96
|
+
m = re.search(r'(?m)^version\s*=\s*"([^"]+)"', text)
|
|
97
|
+
return m.group(1) if m else None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def npm_version(text: str) -> str | None:
|
|
101
|
+
m = re.search(r'"version"\s*:\s*"([^"]+)"', text)
|
|
102
|
+
return m.group(1) if m else None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
_BINDING_EXTRACTORS = {"cargo": cargo_version, "npm": npm_version}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def native_pin_versions(text: str) -> "dict[str, str | None]":
|
|
109
|
+
# Every native-provider pin (the hard dependency AND accelerator extras)
|
|
110
|
+
# is the pre-1.0 base-version contract at resolver level:
|
|
111
|
+
# transcribe-cpp-native[-suffix]==X.Y.Z.* — X.Y.Z must be the same base
|
|
112
|
+
# as everything else. (The provider packages themselves can't drift:
|
|
113
|
+
# their versions are parsed from the header at build time.)
|
|
114
|
+
pins = re.findall(
|
|
115
|
+
r'"(transcribe-cpp-native(?:-[a-z0-9]+)*)\s*==\s*([0-9.]+?)\.\*"', text
|
|
116
|
+
)
|
|
117
|
+
if not pins:
|
|
118
|
+
return {"pyproject.toml (native pin)": None}
|
|
119
|
+
return {f"pyproject.toml ({name} pin)": version for name, version in pins}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def npm_optional_pins(text: str) -> "dict[str, str | None]":
|
|
123
|
+
# The npm analog of native_pin_versions: the API package
|
|
124
|
+
# (bindings/typescript/package.json) pins each @transcribe-cpp/<platform>
|
|
125
|
+
# provider in optionalDependencies at an exact version. Pre-1.0 they must
|
|
126
|
+
# share the base version with everything else, exactly as the Python native
|
|
127
|
+
# pins do. The release job (ts-release) re-syncs them to the published
|
|
128
|
+
# version; this is the static counterpart so a forgotten bump fails CI.
|
|
129
|
+
block = re.search(r'"optionalDependencies"\s*:\s*\{([^}]*)\}', text, re.S)
|
|
130
|
+
pins = re.findall(r'"(@transcribe-cpp/[^"]+)"\s*:\s*"([^"]+)"', block.group(1)) if block else []
|
|
131
|
+
if not pins:
|
|
132
|
+
return {"package.json (optionalDependencies)": None}
|
|
133
|
+
return {f"package.json ({name} pin)": version for name, version in pins}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def cargo_sys_pin(text: str) -> str | None:
|
|
137
|
+
# The safe crate's dependency *pin* on the sys crate (a different field from
|
|
138
|
+
# its own [package].version, which cargo_version() returns):
|
|
139
|
+
# transcribe-cpp-sys = { version = "X.Y.Z", path = "../../..", ... }
|
|
140
|
+
m = re.search(
|
|
141
|
+
r'transcribe-cpp-sys\s*=\s*\{[^}]*?\bversion\s*=\s*"([^"]+)"', text
|
|
142
|
+
)
|
|
143
|
+
return m.group(1) if m else None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def cargo_lock_versions(text: str) -> "dict[str, str | None]":
|
|
147
|
+
# The two workspace crates pinned in Cargo.lock. cargo writes name then
|
|
148
|
+
# version on consecutive lines within each [[package]] block; the closing
|
|
149
|
+
# quote in the name match keeps "transcribe-cpp" from also matching
|
|
150
|
+
# "transcribe-cpp-sys".
|
|
151
|
+
out: dict[str, str | None] = {}
|
|
152
|
+
for name in ("transcribe-cpp", "transcribe-cpp-sys"):
|
|
153
|
+
m = re.search(rf'name = "{re.escape(name)}"\nversion = "([^"]+)"', text)
|
|
154
|
+
out[f"Cargo.lock ({name})"] = m.group(1) if m else None
|
|
155
|
+
return out
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def package_lock_versions(text: str) -> "dict[str, str | None]":
|
|
159
|
+
# The two spots npm keeps a root version in the lockfile: top-level
|
|
160
|
+
# `.version` and `.packages[""].version` (the root package's own node).
|
|
161
|
+
try:
|
|
162
|
+
data = json.loads(text)
|
|
163
|
+
except (json.JSONDecodeError, ValueError):
|
|
164
|
+
return {"package-lock.json (root)": None, 'package-lock.json (packages[""])': None}
|
|
165
|
+
return {
|
|
166
|
+
"package-lock.json (root)": data.get("version"),
|
|
167
|
+
'package-lock.json (packages[""])': (data.get("packages") or {}).get("", {}).get("version"),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def swift_compiled_version(text: str) -> str | None:
|
|
172
|
+
# The hand-maintained Swift literal `compiledVersion = "X.Y.Z"` that the
|
|
173
|
+
# SwiftPM load gate (Transcribe.ensureCompatible) compares against the
|
|
174
|
+
# linked library. (The Swift ABI pin is checked separately by
|
|
175
|
+
# swift_abihash_check.py against include/transcribe.abihash.)
|
|
176
|
+
m = re.search(r'compiledVersion\s*=\s*"([^"]+)"', text)
|
|
177
|
+
return m.group(1) if m else None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def main() -> int:
|
|
181
|
+
pyproject_text = PYPROJECT.read_text()
|
|
182
|
+
sources = {
|
|
183
|
+
"include/transcribe.h": header_version(HEADER.read_text()),
|
|
184
|
+
"pyproject.toml": pyproject_version(pyproject_text),
|
|
185
|
+
"__init__.__version__": init_version(INIT.read_text()),
|
|
186
|
+
}
|
|
187
|
+
sources.update(native_pin_versions(pyproject_text))
|
|
188
|
+
if TS_PACKAGE_JSON.exists():
|
|
189
|
+
sources.update(npm_optional_pins(TS_PACKAGE_JSON.read_text()))
|
|
190
|
+
|
|
191
|
+
# Formerly §1c blind spots — now part of the equality set (releasing.md §8
|
|
192
|
+
# P0 #2 slice B). Each file must exist; a missing one is a hard error below.
|
|
193
|
+
sources["Cargo.toml (sys dep pin)"] = (
|
|
194
|
+
cargo_sys_pin(RUST_SAFE_CARGO.read_text()) if RUST_SAFE_CARGO.exists() else None
|
|
195
|
+
)
|
|
196
|
+
if CARGO_LOCK.exists():
|
|
197
|
+
sources.update(cargo_lock_versions(CARGO_LOCK.read_text()))
|
|
198
|
+
else:
|
|
199
|
+
sources["Cargo.lock"] = None
|
|
200
|
+
if PACKAGE_LOCK.exists():
|
|
201
|
+
sources.update(package_lock_versions(PACKAGE_LOCK.read_text()))
|
|
202
|
+
else:
|
|
203
|
+
sources["package-lock.json"] = None
|
|
204
|
+
sources["TranscribeCpp.swift (compiledVersion)"] = (
|
|
205
|
+
swift_compiled_version(SWIFT_SOURCE.read_text()) if SWIFT_SOURCE.exists() else None
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Binding manifests: active ones join the equality set; inactive ones
|
|
209
|
+
# must merely exist and parse (placeholder versions are reported, not
|
|
210
|
+
# compared).
|
|
211
|
+
inactive: dict[str, str] = {}
|
|
212
|
+
for rel, kind, active in BINDING_MANIFESTS:
|
|
213
|
+
path = REPO / rel
|
|
214
|
+
version = (
|
|
215
|
+
_BINDING_EXTRACTORS[kind](path.read_text()) if path.exists() else None
|
|
216
|
+
)
|
|
217
|
+
if active:
|
|
218
|
+
sources[rel] = version
|
|
219
|
+
elif version is None:
|
|
220
|
+
sources[rel] = None # missing/unparseable is an error either way
|
|
221
|
+
else:
|
|
222
|
+
inactive[rel] = version
|
|
223
|
+
if inactive:
|
|
224
|
+
detail = ", ".join(f"{name}={v}" for name, v in inactive.items())
|
|
225
|
+
print(f"inactive binding manifests (parsed, not compared): {detail}")
|
|
226
|
+
|
|
227
|
+
missing = [name for name, v in sources.items() if v is None]
|
|
228
|
+
if missing:
|
|
229
|
+
for name in missing:
|
|
230
|
+
print(f"error: could not locate the version in {name}", file=sys.stderr)
|
|
231
|
+
return 2
|
|
232
|
+
|
|
233
|
+
bases = {name: base_version(v) for name, v in sources.items()} # type: ignore[arg-type]
|
|
234
|
+
distinct = set(bases.values())
|
|
235
|
+
if len(distinct) != 1:
|
|
236
|
+
print("version drift across sources (base MAJOR.MINOR.PATCH must agree):",
|
|
237
|
+
file=sys.stderr)
|
|
238
|
+
for name, v in sources.items():
|
|
239
|
+
print(f" {name}: {v} (base {bases[name]})", file=sys.stderr)
|
|
240
|
+
return 1
|
|
241
|
+
|
|
242
|
+
base = distinct.pop()
|
|
243
|
+
detail = ", ".join(f"{name}={v}" for name, v in sources.items())
|
|
244
|
+
print(f"version sync ok: base {base} ({detail})")
|
|
245
|
+
return 0
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
if __name__ == "__main__":
|
|
249
|
+
raise SystemExit(main())
|