transcribe-cpp 0.0.0__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {transcribe_cpp-0.0.0 → transcribe_cpp-0.0.4}/.gitignore +16 -0
  2. transcribe_cpp-0.0.4/PKG-INFO +123 -0
  3. transcribe_cpp-0.0.4/README.md +92 -0
  4. transcribe_cpp-0.0.4/_generate/README.md +38 -0
  5. transcribe_cpp-0.0.4/_generate/check_version_sync.py +249 -0
  6. transcribe_cpp-0.0.4/_generate/generate.py +534 -0
  7. transcribe_cpp-0.0.4/examples/stream_wav.py +95 -0
  8. transcribe_cpp-0.0.4/examples/transcribe_wav.py +91 -0
  9. transcribe_cpp-0.0.4/pyproject.toml +71 -0
  10. transcribe_cpp-0.0.4/src/transcribe_cpp/__init__.py +1367 -0
  11. transcribe_cpp-0.0.4/src/transcribe_cpp/_abi.py +77 -0
  12. transcribe_cpp-0.0.4/src/transcribe_cpp/_generated.py +395 -0
  13. transcribe_cpp-0.0.4/src/transcribe_cpp/_library.py +386 -0
  14. transcribe_cpp-0.0.4/src/transcribe_cpp/errors.py +151 -0
  15. transcribe_cpp-0.0.4/src/transcribe_cpp/py.typed +0 -0
  16. transcribe_cpp-0.0.4/tests/conftest.py +159 -0
  17. transcribe_cpp-0.0.4/tests/test_abi.py +85 -0
  18. transcribe_cpp-0.0.4/tests/test_backends.py +98 -0
  19. transcribe_cpp-0.0.4/tests/test_device_select.py +55 -0
  20. transcribe_cpp-0.0.4/tests/test_errors.py +104 -0
  21. transcribe_cpp-0.0.4/tests/test_example.py +130 -0
  22. transcribe_cpp-0.0.4/tests/test_family_ext.py +206 -0
  23. transcribe_cpp-0.0.4/tests/test_lifetime.py +204 -0
  24. transcribe_cpp-0.0.4/tests/test_pcm.py +150 -0
  25. transcribe_cpp-0.0.4/tests/test_provider_discovery.py +341 -0
  26. transcribe_cpp-0.0.4/tests/test_streaming.py +152 -0
  27. transcribe_cpp-0.0.4/tests/test_transcribe.py +247 -0
  28. transcribe_cpp-0.0.4/uv.lock +416 -0
  29. transcribe_cpp-0.0.0/PKG-INFO +0 -39
  30. transcribe_cpp-0.0.0/README.md +0 -19
  31. transcribe_cpp-0.0.0/pyproject.toml +0 -30
  32. transcribe_cpp-0.0.0/src/transcribe_cpp/__init__.py +0 -12
  33. {transcribe_cpp-0.0.0 → transcribe_cpp-0.0.4}/LICENSE +0 -0
@@ -3,6 +3,19 @@
3
3
  /build-*/
4
4
  /cmake-build-*/
5
5
 
6
+ # Rust workspace build output (Cargo.lock IS committed — workspace has a binary)
7
+ /target/
8
+
9
+ # Python distribution output (provider wheels/sdists at the repo root;
10
+ # bindings/python/dist for the pure API package; wheelhouse* from local
11
+ # cibuildwheel / wheel-repair runs)
12
+ /dist/
13
+ /bindings/python/dist/
14
+ /wheelhouse*/
15
+
16
+ # Canary GGUFs fetched by CI / local smoke runs
17
+ /canary/
18
+
6
19
  # Scratch space for benchmarks, staging, etc.
7
20
  /tmp/
8
21
 
@@ -68,3 +81,6 @@ WER_TESTING.md
68
81
 
69
82
  # IDE / language-server artifacts
70
83
  .cache/
84
+
85
+ # Local working notes (plans, drafts) — never committed
86
+ /notes/
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: transcribe-cpp
3
+ Version: 0.0.4
4
+ Summary: Python bindings for transcribe.cpp
5
+ Project-URL: Homepage, https://github.com/handy-computer/transcribe.cpp
6
+ Project-URL: Repository, https://github.com/handy-computer/transcribe.cpp
7
+ Project-URL: Issues, https://github.com/handy-computer/transcribe.cpp/issues
8
+ Author: The transcribe.cpp authors
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: asr,ggml,parakeet,speech-to-text,transcription,whisper
12
+ Classifier: Development Status :: 1 - Planning
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: transcribe-cpp-native==0.0.4.*
25
+ Provides-Extra: cu12
26
+ Requires-Dist: transcribe-cpp-native-cu12==0.0.4.*; extra == 'cu12'
27
+ Provides-Extra: test
28
+ Requires-Dist: numpy; extra == 'test'
29
+ Requires-Dist: pytest>=7; extra == 'test'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # transcribe-cpp
33
+
34
+ Python bindings for [transcribe.cpp](https://github.com/handy-computer/transcribe.cpp),
35
+ a C/C++ speech-to-text library built on ggml.
36
+
37
+ > **Status: in development.** Until wheels are published, use a locally built
38
+ > `libtranscribe` through repo auto-discovery or `TRANSCRIBE_LIBRARY`.
39
+
40
+ ```python
41
+ import transcribe_cpp
42
+
43
+ with transcribe_cpp.Model("model.gguf") as model:
44
+ with model.session() as session:
45
+ result = session.run(pcm_float32_16k_mono)
46
+ print(result.text)
47
+ ```
48
+
49
+ `run()` takes mono 16 kHz float32 PCM (buffer-protocol object or sequence). It
50
+ does not decode containers or resample; convert audio before calling it.
51
+
52
+ ```python
53
+ import numpy as np
54
+
55
+ pcm = np.asarray(audio, dtype=np.float32) # 1-D, 16 kHz mono
56
+ # Downmix stereo first; 2-D input is rejected:
57
+ # pcm = audio.mean(axis=1).astype(np.float32)
58
+ result = session.run(pcm)
59
+ ```
60
+
61
+ Streaming models expose incremental transcription with committed/tentative
62
+ text views — see `examples/stream_wav.py`:
63
+
64
+ ```python
65
+ with model.session() as session, session.stream() as stream:
66
+ for chunk in pcm_chunks:
67
+ stream.feed(chunk)
68
+ text = stream.text() # .committed (stable) + .tentative
69
+ stream.finalize()
70
+ ```
71
+
72
+ Long transcriptions can be cancelled from another thread with
73
+ `session.cancel()` — the run raises `Aborted` with the partial transcript on
74
+ `exc.partial_result` (same for `OutputTruncated`).
75
+
76
+ ## Backends
77
+
78
+ `Model(backend=...)` picks the compute device (`"auto"` uses the best
79
+ available). `transcribe_cpp.backends()` lists registered backends and
80
+ `backend_available(kind)` checks one kind.
81
+
82
+ | Variable | Effect |
83
+ |---|---|
84
+ | `TRANSCRIBE_BACKEND` | overrides the `"auto"` default; explicit `backend=` still wins |
85
+ | `TRANSCRIBE_NATIVE_PROVIDER` | forces an installed native provider package, for example `cu12` |
86
+ | `TRANSCRIBE_LIBRARY` | loads exactly this shared library |
87
+
88
+ Planned wheels will bundle CPU plus platform accelerators;
89
+ `transcribe-cpp[cu12]` will add the CUDA 12 provider.
90
+
91
+ ## Running from a working tree
92
+
93
+ The binding loads the native library at import and verifies its ABI layout and
94
+ version before use. Build a shared library, then run from the repo or point
95
+ `TRANSCRIBE_LIBRARY` at it:
96
+
97
+ ```bash
98
+ cmake -B build-shared -DTRANSCRIBE_BUILD_SHARED=ON
99
+ cmake --build build-shared --target transcribe
100
+
101
+ cd bindings/python
102
+ PYTHONPATH=src uv run --no-project python examples/transcribe_wav.py \
103
+ ../../models/whisper-tiny.en/whisper-tiny.en-Q5_K_M.gguf ../../samples/jfk.wav
104
+ ```
105
+
106
+ No-model tests always run; model tests skip unless smoke assets are present.
107
+ Override paths with `TRANSCRIBE_SMOKE_MODEL`, `TRANSCRIBE_SMOKE_AUDIO`, and
108
+ `TRANSCRIBE_SMOKE_STREAMING_MODEL`.
109
+
110
+ ```bash
111
+ cd bindings/python
112
+ TRANSCRIBE_LIBRARY=../../build-shared/src/libtranscribe.dylib \
113
+ uv run --extra test pytest
114
+ ```
115
+
116
+ ## Notes
117
+
118
+ - One run/stream at a time per `Model` in 0.x: sessions share the model's
119
+ compute backend, so serialize runs across sessions (or load one model per
120
+ worker). See the `Model` docstring.
121
+ - Import package: `transcribe_cpp`
122
+ - Distribution: `transcribe-cpp`
123
+ - License: MIT
@@ -0,0 +1,92 @@
1
+ # transcribe-cpp
2
+
3
+ Python bindings for [transcribe.cpp](https://github.com/handy-computer/transcribe.cpp),
4
+ a C/C++ speech-to-text library built on ggml.
5
+
6
+ > **Status: in development.** Until wheels are published, use a locally built
7
+ > `libtranscribe` through repo auto-discovery or `TRANSCRIBE_LIBRARY`.
8
+
9
+ ```python
10
+ import transcribe_cpp
11
+
12
+ with transcribe_cpp.Model("model.gguf") as model:
13
+ with model.session() as session:
14
+ result = session.run(pcm_float32_16k_mono)
15
+ print(result.text)
16
+ ```
17
+
18
+ `run()` takes mono 16 kHz float32 PCM (buffer-protocol object or sequence). It
19
+ does not decode containers or resample; convert audio before calling it.
20
+
21
+ ```python
22
+ import numpy as np
23
+
24
+ pcm = np.asarray(audio, dtype=np.float32) # 1-D, 16 kHz mono
25
+ # Downmix stereo first; 2-D input is rejected:
26
+ # pcm = audio.mean(axis=1).astype(np.float32)
27
+ result = session.run(pcm)
28
+ ```
29
+
30
+ Streaming models expose incremental transcription with committed/tentative
31
+ text views — see `examples/stream_wav.py`:
32
+
33
+ ```python
34
+ with model.session() as session, session.stream() as stream:
35
+ for chunk in pcm_chunks:
36
+ stream.feed(chunk)
37
+ text = stream.text() # .committed (stable) + .tentative
38
+ stream.finalize()
39
+ ```
40
+
41
+ Long transcriptions can be cancelled from another thread with
42
+ `session.cancel()` — the run raises `Aborted` with the partial transcript on
43
+ `exc.partial_result` (same for `OutputTruncated`).
44
+
45
+ ## Backends
46
+
47
+ `Model(backend=...)` picks the compute device (`"auto"` uses the best
48
+ available). `transcribe_cpp.backends()` lists registered backends and
49
+ `backend_available(kind)` checks one kind.
50
+
51
+ | Variable | Effect |
52
+ |---|---|
53
+ | `TRANSCRIBE_BACKEND` | overrides the `"auto"` default; explicit `backend=` still wins |
54
+ | `TRANSCRIBE_NATIVE_PROVIDER` | forces an installed native provider package, for example `cu12` |
55
+ | `TRANSCRIBE_LIBRARY` | loads exactly this shared library |
56
+
57
+ Planned wheels will bundle CPU plus platform accelerators;
58
+ `transcribe-cpp[cu12]` will add the CUDA 12 provider.
59
+
60
+ ## Running from a working tree
61
+
62
+ The binding loads the native library at import and verifies its ABI layout and
63
+ version before use. Build a shared library, then run from the repo or point
64
+ `TRANSCRIBE_LIBRARY` at it:
65
+
66
+ ```bash
67
+ cmake -B build-shared -DTRANSCRIBE_BUILD_SHARED=ON
68
+ cmake --build build-shared --target transcribe
69
+
70
+ cd bindings/python
71
+ PYTHONPATH=src uv run --no-project python examples/transcribe_wav.py \
72
+ ../../models/whisper-tiny.en/whisper-tiny.en-Q5_K_M.gguf ../../samples/jfk.wav
73
+ ```
74
+
75
+ No-model tests always run; model tests skip unless smoke assets are present.
76
+ Override paths with `TRANSCRIBE_SMOKE_MODEL`, `TRANSCRIBE_SMOKE_AUDIO`, and
77
+ `TRANSCRIBE_SMOKE_STREAMING_MODEL`.
78
+
79
+ ```bash
80
+ cd bindings/python
81
+ TRANSCRIBE_LIBRARY=../../build-shared/src/libtranscribe.dylib \
82
+ uv run --extra test pytest
83
+ ```
84
+
85
+ ## Notes
86
+
87
+ - One run/stream at a time per `Model` in 0.x: sessions share the model's
88
+ compute backend, so serialize runs across sessions (or load one model per
89
+ worker). See the `Model` docstring.
90
+ - Import package: `transcribe_cpp`
91
+ - Distribution: `transcribe-cpp`
92
+ - License: MIT
@@ -0,0 +1,38 @@
1
+ # FFI generator
2
+
3
+ Generates `src/transcribe_cpp/_generated.py` — the low-level ctypes layer — from
4
+ `include/transcribe/extensions.h` using libclang. The generated module is
5
+ **committed**; it is never hand-edited.
6
+
7
+ ## Regenerate
8
+
9
+ ```bash
10
+ cd bindings/python
11
+ uv run --no-project --with 'libclang==18.1.1' _generate/generate.py
12
+ ```
13
+
14
+ Run this whenever the public C headers change. libclang is pinned so the output
15
+ is deterministic across machines; the freestanding headers (`stdbool.h`, …) come
16
+ from the host compiler's resource dir, discovered via `clang -print-resource-dir`
17
+ (macOS: `xcrun`).
18
+
19
+ ## CI gate
20
+
21
+ ```bash
22
+ uv run --no-project --with 'libclang==18.1.1' _generate/generate.py --check
23
+ ```
24
+
25
+ Exit non-zero if the committed `_generated.py` is out of date. Because the
26
+ generator works from the parsed AST, the check is **semantic**: a comment- or
27
+ whitespace-only header edit produces no diff, while any real ABI change (a field,
28
+ type, enum value, or function signature) does — and then fails CI until the
29
+ binding is regenerated.
30
+
31
+ ## What it emits
32
+
33
+ - ctypes `Structure` for every public struct, field-for-field.
34
+ - Enum values as module constants.
35
+ - `configure(lib)` — `restype`/`argtypes` for every public function.
36
+ - `ABI_STRUCT_IDS` and `STRUCT_LAYOUT` (sizes/aligns/offsets), used by
37
+ `_abi.verify_layouts()` to check the layer against itself and the loaded
38
+ native library at import.
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/env python3
2
+ """Fail if the library version drifts across every place it is duplicated.
3
+
4
+ The native library version is defined once, in
5
+ ``include/transcribe.h`` (``TRANSCRIBE_VERSION_{MAJOR,MINOR,PATCH}``); CMake
6
+ parses it from there. Every binding repeats it — in package manifests, lockfiles,
7
+ the Python ``__version__``, the cross-package dependency pins, and the Swift
8
+ ``compiledVersion`` literal. The import-time gate enforces base-version match
9
+ against the *loaded* library at runtime; this script is the static, build-time
10
+ counterpart so a forgotten bump fails CI before anything is published.
11
+
12
+ This covers every §1b spot in ``notes/releasing.md`` — including the ones that
13
+ used to be §1c blind spots: the ``transcribe-cpp-sys`` dependency *pin*, both
14
+ ``Cargo.lock`` entries, both ``package-lock.json`` spots, and Swift
15
+ ``compiledVersion``. (Lockfile *internal* consistency — a stale lock silently
16
+ rewritten by an unlocked command — is still the job of the locked-command
17
+ checks, ``cargo metadata --locked`` / ``npm ci``, run in release-preflight.)
18
+
19
+ Comparison is on the PEP 440 *release segment* (``MAJOR.MINOR.PATCH``): the
20
+ header is always a clean triple, while a package side may legitimately carry a
21
+ ``.postN`` packaging suffix that must still be accepted.
22
+
23
+ uv run --no-project bindings/python/_generate/check_version_sync.py
24
+
25
+ Exit 0 when all agree on the base version; 1 on drift; 2 if a version could not
26
+ be located (treated as a hard error, not a pass).
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import re
33
+ import sys
34
+ from pathlib import Path
35
+
36
+ REPO = Path(__file__).resolve().parents[3]
37
+ HEADER = REPO / "include" / "transcribe.h"
38
+ PYPROJECT = REPO / "bindings" / "python" / "pyproject.toml"
39
+ INIT = REPO / "bindings" / "python" / "src" / "transcribe_cpp" / "__init__.py"
40
+ TS_PACKAGE_JSON = REPO / "bindings" / "typescript" / "package.json"
41
+ RUST_SAFE_CARGO = REPO / "bindings" / "rust" / "transcribe-cpp" / "Cargo.toml"
42
+ CARGO_LOCK = REPO / "Cargo.lock"
43
+ PACKAGE_LOCK = REPO / "bindings" / "typescript" / "package-lock.json"
44
+ SWIFT_SOURCE = REPO / "bindings" / "swift" / "Sources" / "TranscribeCpp" / "TranscribeCpp.swift"
45
+
46
+ # Binding package manifests (requirements doc §2: every manifest is derived
47
+ # from or gated against the header). Gated by the `active` flag: a 0.0.0
48
+ # name-reservation placeholder is NOT version-locked — flip its entry to True
49
+ # in the PR that lands the real binding. Inactive manifests are still parsed
50
+ # (file must exist and carry a readable version) so the mechanism itself
51
+ # stays exercised. Package.swift has no entry: SwiftPM versions via git tags,
52
+ # so its gate is the tag itself (release-workflow concern, not this script).
53
+ BINDING_MANIFESTS = [
54
+ # (relative path, extractor name, active)
55
+ # The Rust crates are real (0.0.1), so they're version-locked. The sys
56
+ # crate's manifest is the repo-root Cargo.toml (it carries the whole C++
57
+ # tree); the safe wrapper is the sibling member at
58
+ # bindings/rust/transcribe-cpp/.
59
+ ("Cargo.toml", "cargo", True),
60
+ ("bindings/rust/transcribe-cpp/Cargo.toml", "cargo", True),
61
+ ("bindings/typescript/package.json", "npm", True),
62
+ ]
63
+
64
+
65
+ def base_version(version: str) -> str:
66
+ """The leading dotted-numeric release segment (suffix stripped)."""
67
+ m = re.match(r"\d+(?:\.\d+)*", version.strip())
68
+ return m.group(0) if m else version.strip()
69
+
70
+
71
+ def header_version(text: str) -> str | None:
72
+ parts = []
73
+ for component in ("MAJOR", "MINOR", "PATCH"):
74
+ m = re.search(rf"define\s+TRANSCRIBE_VERSION_{component}\s+(\d+)", text)
75
+ if not m:
76
+ return None
77
+ parts.append(m.group(1))
78
+ return ".".join(parts)
79
+
80
+
81
+ def pyproject_version(text: str) -> str | None:
82
+ # project.version is a top-level string in [project]; match it directly
83
+ # rather than pulling in a TOML parser (tomllib is 3.11+).
84
+ m = re.search(r'(?m)^\s*version\s*=\s*"([^"]+)"', text)
85
+ return m.group(1) if m else None
86
+
87
+
88
+ def init_version(text: str) -> str | None:
89
+ m = re.search(r'(?m)^__version__\s*=\s*"([^"]+)"', text)
90
+ return m.group(1) if m else None
91
+
92
+
93
+ def cargo_version(text: str) -> str | None:
94
+ # First `version = "..."` in the file: [package] leads a Cargo.toml by
95
+ # convention, and dependency tables spell it `name = { version = ... }`.
96
+ m = re.search(r'(?m)^version\s*=\s*"([^"]+)"', text)
97
+ return m.group(1) if m else None
98
+
99
+
100
+ def npm_version(text: str) -> str | None:
101
+ m = re.search(r'"version"\s*:\s*"([^"]+)"', text)
102
+ return m.group(1) if m else None
103
+
104
+
105
+ _BINDING_EXTRACTORS = {"cargo": cargo_version, "npm": npm_version}
106
+
107
+
108
+ def native_pin_versions(text: str) -> "dict[str, str | None]":
109
+ # Every native-provider pin (the hard dependency AND accelerator extras)
110
+ # is the pre-1.0 base-version contract at resolver level:
111
+ # transcribe-cpp-native[-suffix]==X.Y.Z.* — X.Y.Z must be the same base
112
+ # as everything else. (The provider packages themselves can't drift:
113
+ # their versions are parsed from the header at build time.)
114
+ pins = re.findall(
115
+ r'"(transcribe-cpp-native(?:-[a-z0-9]+)*)\s*==\s*([0-9.]+?)\.\*"', text
116
+ )
117
+ if not pins:
118
+ return {"pyproject.toml (native pin)": None}
119
+ return {f"pyproject.toml ({name} pin)": version for name, version in pins}
120
+
121
+
122
+ def npm_optional_pins(text: str) -> "dict[str, str | None]":
123
+ # The npm analog of native_pin_versions: the API package
124
+ # (bindings/typescript/package.json) pins each @transcribe-cpp/<platform>
125
+ # provider in optionalDependencies at an exact version. Pre-1.0 they must
126
+ # share the base version with everything else, exactly as the Python native
127
+ # pins do. The release job (ts-release) re-syncs them to the published
128
+ # version; this is the static counterpart so a forgotten bump fails CI.
129
+ block = re.search(r'"optionalDependencies"\s*:\s*\{([^}]*)\}', text, re.S)
130
+ pins = re.findall(r'"(@transcribe-cpp/[^"]+)"\s*:\s*"([^"]+)"', block.group(1)) if block else []
131
+ if not pins:
132
+ return {"package.json (optionalDependencies)": None}
133
+ return {f"package.json ({name} pin)": version for name, version in pins}
134
+
135
+
136
+ def cargo_sys_pin(text: str) -> str | None:
137
+ # The safe crate's dependency *pin* on the sys crate (a different field from
138
+ # its own [package].version, which cargo_version() returns):
139
+ # transcribe-cpp-sys = { version = "X.Y.Z", path = "../../..", ... }
140
+ m = re.search(
141
+ r'transcribe-cpp-sys\s*=\s*\{[^}]*?\bversion\s*=\s*"([^"]+)"', text
142
+ )
143
+ return m.group(1) if m else None
144
+
145
+
146
+ def cargo_lock_versions(text: str) -> "dict[str, str | None]":
147
+ # The two workspace crates pinned in Cargo.lock. cargo writes name then
148
+ # version on consecutive lines within each [[package]] block; the closing
149
+ # quote in the name match keeps "transcribe-cpp" from also matching
150
+ # "transcribe-cpp-sys".
151
+ out: dict[str, str | None] = {}
152
+ for name in ("transcribe-cpp", "transcribe-cpp-sys"):
153
+ m = re.search(rf'name = "{re.escape(name)}"\nversion = "([^"]+)"', text)
154
+ out[f"Cargo.lock ({name})"] = m.group(1) if m else None
155
+ return out
156
+
157
+
158
+ def package_lock_versions(text: str) -> "dict[str, str | None]":
159
+ # The two spots npm keeps a root version in the lockfile: top-level
160
+ # `.version` and `.packages[""].version` (the root package's own node).
161
+ try:
162
+ data = json.loads(text)
163
+ except (json.JSONDecodeError, ValueError):
164
+ return {"package-lock.json (root)": None, 'package-lock.json (packages[""])': None}
165
+ return {
166
+ "package-lock.json (root)": data.get("version"),
167
+ 'package-lock.json (packages[""])': (data.get("packages") or {}).get("", {}).get("version"),
168
+ }
169
+
170
+
171
+ def swift_compiled_version(text: str) -> str | None:
172
+ # The hand-maintained Swift literal `compiledVersion = "X.Y.Z"` that the
173
+ # SwiftPM load gate (Transcribe.ensureCompatible) compares against the
174
+ # linked library. (The Swift ABI pin is checked separately by
175
+ # swift_abihash_check.py against include/transcribe.abihash.)
176
+ m = re.search(r'compiledVersion\s*=\s*"([^"]+)"', text)
177
+ return m.group(1) if m else None
178
+
179
+
180
+ def main() -> int:
181
+ pyproject_text = PYPROJECT.read_text()
182
+ sources = {
183
+ "include/transcribe.h": header_version(HEADER.read_text()),
184
+ "pyproject.toml": pyproject_version(pyproject_text),
185
+ "__init__.__version__": init_version(INIT.read_text()),
186
+ }
187
+ sources.update(native_pin_versions(pyproject_text))
188
+ if TS_PACKAGE_JSON.exists():
189
+ sources.update(npm_optional_pins(TS_PACKAGE_JSON.read_text()))
190
+
191
+ # Formerly §1c blind spots — now part of the equality set (releasing.md §8
192
+ # P0 #2 slice B). Each file must exist; a missing one is a hard error below.
193
+ sources["Cargo.toml (sys dep pin)"] = (
194
+ cargo_sys_pin(RUST_SAFE_CARGO.read_text()) if RUST_SAFE_CARGO.exists() else None
195
+ )
196
+ if CARGO_LOCK.exists():
197
+ sources.update(cargo_lock_versions(CARGO_LOCK.read_text()))
198
+ else:
199
+ sources["Cargo.lock"] = None
200
+ if PACKAGE_LOCK.exists():
201
+ sources.update(package_lock_versions(PACKAGE_LOCK.read_text()))
202
+ else:
203
+ sources["package-lock.json"] = None
204
+ sources["TranscribeCpp.swift (compiledVersion)"] = (
205
+ swift_compiled_version(SWIFT_SOURCE.read_text()) if SWIFT_SOURCE.exists() else None
206
+ )
207
+
208
+ # Binding manifests: active ones join the equality set; inactive ones
209
+ # must merely exist and parse (placeholder versions are reported, not
210
+ # compared).
211
+ inactive: dict[str, str] = {}
212
+ for rel, kind, active in BINDING_MANIFESTS:
213
+ path = REPO / rel
214
+ version = (
215
+ _BINDING_EXTRACTORS[kind](path.read_text()) if path.exists() else None
216
+ )
217
+ if active:
218
+ sources[rel] = version
219
+ elif version is None:
220
+ sources[rel] = None # missing/unparseable is an error either way
221
+ else:
222
+ inactive[rel] = version
223
+ if inactive:
224
+ detail = ", ".join(f"{name}={v}" for name, v in inactive.items())
225
+ print(f"inactive binding manifests (parsed, not compared): {detail}")
226
+
227
+ missing = [name for name, v in sources.items() if v is None]
228
+ if missing:
229
+ for name in missing:
230
+ print(f"error: could not locate the version in {name}", file=sys.stderr)
231
+ return 2
232
+
233
+ bases = {name: base_version(v) for name, v in sources.items()} # type: ignore[arg-type]
234
+ distinct = set(bases.values())
235
+ if len(distinct) != 1:
236
+ print("version drift across sources (base MAJOR.MINOR.PATCH must agree):",
237
+ file=sys.stderr)
238
+ for name, v in sources.items():
239
+ print(f" {name}: {v} (base {bases[name]})", file=sys.stderr)
240
+ return 1
241
+
242
+ base = distinct.pop()
243
+ detail = ", ".join(f"{name}={v}" for name, v in sources.items())
244
+ print(f"version sync ok: base {base} ({detail})")
245
+ return 0
246
+
247
+
248
+ if __name__ == "__main__":
249
+ raise SystemExit(main())