wispr-lrc 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. wispr_lrc-0.1.2/.github/workflows/ci.yml +54 -0
  2. wispr_lrc-0.1.2/.github/workflows/ml-benchmark.yml +66 -0
  3. wispr_lrc-0.1.2/.gitignore +10 -0
  4. wispr_lrc-0.1.2/.redsun/.gitignore +1 -0
  5. wispr_lrc-0.1.2/.redsun/AGENTS.md +203 -0
  6. wispr_lrc-0.1.2/.redsun/ARCHITECTURE.md +208 -0
  7. wispr_lrc-0.1.2/AGENTS.md +1 -0
  8. wispr_lrc-0.1.2/PKG-INFO +181 -0
  9. wispr_lrc-0.1.2/README.md +167 -0
  10. wispr_lrc-0.1.2/pyproject.toml +49 -0
  11. wispr_lrc-0.1.2/src/wispr/__init__.py +5 -0
  12. wispr_lrc-0.1.2/src/wispr/align.py +3 -0
  13. wispr_lrc-0.1.2/src/wispr/audio.py +31 -0
  14. wispr_lrc-0.1.2/src/wispr/backend_factory.py +81 -0
  15. wispr_lrc-0.1.2/src/wispr/backends.py +70 -0
  16. wispr_lrc-0.1.2/src/wispr/batch.py +201 -0
  17. wispr_lrc-0.1.2/src/wispr/benchmark.py +214 -0
  18. wispr_lrc-0.1.2/src/wispr/cli.py +227 -0
  19. wispr_lrc-0.1.2/src/wispr/demucs_backend.py +64 -0
  20. wispr_lrc-0.1.2/src/wispr/lrc.py +29 -0
  21. wispr_lrc-0.1.2/src/wispr/lyrics.py +19 -0
  22. wispr_lrc-0.1.2/src/wispr/metadata.py +43 -0
  23. wispr_lrc-0.1.2/src/wispr/models.py +196 -0
  24. wispr_lrc-0.1.2/src/wispr/pipeline.py +295 -0
  25. wispr_lrc-0.1.2/src/wispr/runtime.py +58 -0
  26. wispr_lrc-0.1.2/src/wispr/segment.py +378 -0
  27. wispr_lrc-0.1.2/src/wispr/transcribe.py +12 -0
  28. wispr_lrc-0.1.2/src/wispr/warnings.py +3 -0
  29. wispr_lrc-0.1.2/src/wispr/whisperx_backend.py +366 -0
  30. wispr_lrc-0.1.2/tests/fixtures/README.md +9 -0
  31. wispr_lrc-0.1.2/tests/fixtures/jingle_bells.m4a +0 -0
  32. wispr_lrc-0.1.2/tests/fixtures/jingle_bells.txt +71 -0
  33. wispr_lrc-0.1.2/tests/test_audio.py +56 -0
  34. wispr_lrc-0.1.2/tests/test_backend_factory.py +92 -0
  35. wispr_lrc-0.1.2/tests/test_batch.py +136 -0
  36. wispr_lrc-0.1.2/tests/test_benchmark.py +93 -0
  37. wispr_lrc-0.1.2/tests/test_cli.py +421 -0
  38. wispr_lrc-0.1.2/tests/test_demucs.py +77 -0
  39. wispr_lrc-0.1.2/tests/test_lrc.py +36 -0
  40. wispr_lrc-0.1.2/tests/test_metadata.py +46 -0
  41. wispr_lrc-0.1.2/tests/test_ml_smoke.py +47 -0
  42. wispr_lrc-0.1.2/tests/test_pipeline.py +203 -0
  43. wispr_lrc-0.1.2/tests/test_runtime.py +45 -0
  44. wispr_lrc-0.1.2/tests/test_segment.py +171 -0
  45. wispr_lrc-0.1.2/tests/test_transcribe.py +382 -0
  46. wispr_lrc-0.1.2/uv.lock +3488 -0
@@ -0,0 +1,54 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ name: Python ${{ matrix.python-version }}
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - name: Check out repository
18
+ uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v5
22
+ with:
23
+ enable-cache: true
24
+ cache-dependency-glob: uv.lock
25
+
26
+ - name: Set up Python
27
+ uses: actions/setup-python@v5
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+
31
+ - name: Install dependencies
32
+ run: uv sync --dev --locked
33
+
34
+ - name: Lint
35
+ run: uv run ruff check .
36
+
37
+ - name: Test
38
+ run: uv run pytest
39
+
40
+ - name: Benchmark smoke
41
+ run: |
42
+ mkdir -p "$RUNNER_TEMP/wispr-ci"
43
+ uv run wispr benchmark tests/fixtures/jingle_bells.m4a tests/fixtures/jingle_bells.txt \
44
+ --backend mock \
45
+ --debug \
46
+ --force \
47
+ -o "$RUNNER_TEMP/wispr-ci/jingle_bells.lrc" \
48
+ --report "$RUNNER_TEMP/wispr-ci/jingle_bells.benchmark.json"
49
+ test -f "$RUNNER_TEMP/wispr-ci/jingle_bells.lrc"
50
+ test -f "$RUNNER_TEMP/wispr-ci/jingle_bells.benchmark.json"
51
+ test -f "$RUNNER_TEMP/wispr-ci/jingle_bells.debug/timings.json"
52
+
53
+ - name: Build package
54
+ run: uv build
@@ -0,0 +1,66 @@
1
+ name: Manual ML Benchmark
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ model:
7
+ description: WhisperX model name
8
+ required: true
9
+ default: base
10
+ device:
11
+ description: Runtime device
12
+ required: true
13
+ default: cpu
14
+ compute-type:
15
+ description: WhisperX compute type
16
+ required: true
17
+ default: int8
18
+
19
+ jobs:
20
+ whisperx-benchmark:
21
+ runs-on: ubuntu-latest
22
+ timeout-minutes: 60
23
+
24
+ steps:
25
+ - name: Check out repository
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Install uv
29
+ uses: astral-sh/setup-uv@v5
30
+ with:
31
+ enable-cache: true
32
+ cache-dependency-glob: uv.lock
33
+
34
+ - name: Set up Python
35
+ uses: actions/setup-python@v5
36
+ with:
37
+ python-version: "3.12"
38
+
39
+ - name: Install ffmpeg
40
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg
41
+
42
+ - name: Install ML dependencies
43
+ run: uv sync --dev --extra ml --locked
44
+
45
+ - name: Run WhisperX benchmark
46
+ run: |
47
+ mkdir -p "$RUNNER_TEMP/wispr-ml"
48
+ uv run wispr benchmark tests/fixtures/jingle_bells.m4a tests/fixtures/jingle_bells.txt \
49
+ --backend whisperx \
50
+ --model "${{ inputs.model }}" \
51
+ --device "${{ inputs.device }}" \
52
+ --compute-type "${{ inputs['compute-type'] }}" \
53
+ --debug \
54
+ --force \
55
+ -o "$RUNNER_TEMP/wispr-ml/jingle_bells.lrc" \
56
+ --report "$RUNNER_TEMP/wispr-ml/jingle_bells.benchmark.json"
57
+
58
+ - name: Upload benchmark artifacts
59
+ uses: actions/upload-artifact@v4
60
+ if: always()
61
+ with:
62
+ name: wispr-ml-benchmark
63
+ path: |
64
+ ${{ runner.temp }}/wispr-ml/*.lrc
65
+ ${{ runner.temp }}/wispr-ml/*.benchmark.json
66
+ ${{ runner.temp }}/wispr-ml/*.debug/**
@@ -0,0 +1,10 @@
1
+ .venv/
2
+ .pytest_cache/
3
+ .ruff_cache/
4
+ __pycache__/
5
+ *.py[cod]
6
+ inputs/
7
+ *.lrc
8
+ *.benchmark.json
9
+ *.summary.json
10
+ *.debug/
@@ -0,0 +1 @@
1
+ *
@@ -0,0 +1,203 @@
1
+ # AGENTS.md
2
+
3
+ ## Purpose
4
+
5
+ This repository implements **wispr**, a Python library and CLI for generating synchronized standard line-level `.lrc` files from full-song audio plus a canonical line-by-line lyrics file.
6
+
7
+ The core product goal is not lyric generation. The system should treat the provided lyrics file as the source of truth and use transcription and forced alignment tooling primarily to recover timing information.
8
+
9
+ Agents working in this repository should optimize for **software engineering quality first**: clean module boundaries, deterministic behavior, strong testability, clear failure modes, and a polished command-line user experience.
10
+
11
+ ## Product Scope
12
+
13
+ ### In scope for v1
14
+
15
+ - English-only alignment.
16
+ - Full-song processing.
17
+ - Standard line-level `.lrc` output.
18
+ - Audio inputs: `.wav`, `.mp3`, `.flac`, `.m4a`.
19
+ - Optional metadata extraction from source audio.
20
+ - Typer-based CLI plus reusable Python library.
21
+ - Debug artifacts for transcript, alignment, and segments.
22
+ - Warnings on weak alignment while continuing output generation.
23
+
24
+ ### Out of scope for v1
25
+
26
+ - Word-level karaoke timing.
27
+ - Multilingual support.
28
+ - Docker packaging.
29
+ - Extra subcommands like `wispr doctor`.
30
+ - Replacing the provided lyrics with model-generated text.
31
+
32
+ ## Core Principles
33
+
34
+ ### 1. Canonical lyrics are the source of truth
35
+
36
+ The contents of `lyrics.txt` must define the emitted lyric text. Do not silently rewrite lyrics to match model output.
37
+
38
+ ### 2. Timing over transcription
39
+
40
+ Transcript and alignment stages exist to obtain usable timestamps. They do not define the final textual output.
41
+
42
+ ### 3. Library first, CLI second
43
+
44
+ The CLI should be a thin wrapper over reusable internal modules. Business logic should not live directly inside CLI command functions.
45
+
46
+ ### 4. Deterministic output where possible
47
+
48
+ Formatting, metadata ordering, path resolution, warning behavior, and line segmentation should be predictable and testable.
49
+
50
+ ### 5. Safe defaults
51
+
52
+ The default experience should be optimized for likely real-world success:
53
+ - Vocal separation on by default.
54
+ - Output path derived from input audio filename.
55
+ - Existing output files should not be overwritten unless `--force` is passed.
56
+ - Missing metadata should be skipped, not treated as fatal.
57
+ - Weak alignment should warn and continue.
58
+
59
+ ## User Experience Contract
60
+
61
+ The primary demo path should remain:
62
+
63
+ ```bash
64
+ wispr song.wav lyrics.txt
65
+ ```
66
+
67
+ Expected behavior:
68
+ - Produces `song.lrc` next to the audio file unless `-o` is provided.
69
+ - Uses the first aligned word in each lyric line as that line's timestamp.
70
+ - Emits metadata tags before lyric lines when metadata is available.
71
+ - Fails on output collisions unless `--force` is provided.
72
+ - Writes debug artifacts only when `--debug` is enabled.
73
+
74
+ ## Expected Output Format
75
+
76
+ wispr should emit standard line-level LRC.
77
+
78
+ If metadata is available, write tags in this fixed order:
79
+ 1. `[ar:]`
80
+ 2. `[al:]`
81
+ 3. `[ti:]`
82
+ 4. Timestamped lyric lines
83
+
84
+ Do not emit empty placeholder metadata tags.
85
+
86
+ ## Recommended Project Structure
87
+
88
+ ```text
89
+ wispr/
90
+ __init__.py
91
+ cli.py
92
+ pipeline.py
93
+ audio.py
94
+ metadata.py
95
+ transcribe.py
96
+ align.py
97
+ segment.py
98
+ lrc.py
99
+ models.py
100
+ warnings.py
101
+ tests/
102
+ ```
103
+
104
+ ### Module responsibilities
105
+
106
+ - `cli.py`: Typer commands, user-facing flags, terminal messaging.
107
+ - `pipeline.py`: stage orchestration.
108
+ - `audio.py`: validation, preprocessing, vocal-separation integration.
109
+ - `metadata.py`: source metadata extraction and normalization.
110
+ - `transcribe.py`: transcript and raw timestamp acquisition.
111
+ - `align.py`: forced-alignment backend integration.
112
+ - `segment.py`: map aligned words onto canonical lyric lines.
113
+ - `lrc.py`: timestamp formatting and final file serialization.
114
+ - `models.py`: shared data structures across stages.
115
+ - `warnings.py`: structured warning types and formatting.
116
+
117
+ ## Implementation Guidance
118
+
119
+ ### Prefer explicit data models
120
+
121
+ Use typed models or dataclasses for stage boundaries instead of large unstructured dictionaries.
122
+
123
+ Suggested entities include:
124
+ - `TrackMetadata`
125
+ - `TranscriptWord`
126
+ - `AlignedWord`
127
+ - `LyricLine`
128
+ - `LrcDocument`
129
+
130
+ ### Keep backend boundaries clean
131
+
132
+ The alignment backend should be abstracted behind a narrow interface so the project can begin with WhisperX-centered alignment and later swap or add custom aligners without rewriting the whole pipeline.
133
+
134
+ ### Preserve inspectability
135
+
136
+ When adding logic, prefer designs that make it easier to understand how a timestamp was produced. Debuggability matters more than cleverness.
137
+
138
+ ### Avoid premature product sprawl
139
+
140
+ Do not add extra commands, frontend layers, web services, or database infrastructure unless explicitly requested. The value of this project comes from doing one CLI workflow well.
141
+
142
+ ## Debug Artifacts
143
+
144
+ When `--debug` is enabled, write a folder of intermediate artifacts containing:
145
+ - `transcript.json`
146
+ - `alignment.json`
147
+ - `segments.json`
148
+
149
+ These artifacts should reflect the real internal pipeline state, not post-hoc summaries.
150
+
151
+ ## Warning Policy
152
+
153
+ Weak alignment should not abort the entire run.
154
+
155
+ Warnings should include:
156
+ - Line number.
157
+ - Confidence score.
158
+ - Estimated timestamp source.
159
+
160
+ Warnings should be informative enough that a user can inspect the affected line and understand why the output may be imperfect.
161
+
162
+ ## Testing Priorities
163
+
164
+ Focus tests on deterministic logic and contract behavior.
165
+
166
+ High-priority tests:
167
+ - LRC timestamp formatting.
168
+ - Metadata ordering and omission.
169
+ - Line segmentation behavior.
170
+ - Output collision handling with and without `--force`.
171
+ - Warning generation for weak alignment.
172
+ - Output filename derivation from input audio.
173
+
174
+ Where possible, use small synthetic fixtures and mocked backend outputs instead of heavyweight full-model runs.
175
+
176
+ ## Documentation Expectations
177
+
178
+ When updating the repo:
179
+ - Keep architecture and behavior consistent with `ARCHITECTURE.md`.
180
+ - Document new flags, outputs, or format changes in the README.
181
+ - Call out any deviation from the v1 contract explicitly.
182
+
183
+ ## Style Expectations
184
+
185
+ - Prefer small, composable functions over monolithic pipeline code.
186
+ - Keep side effects localized.
187
+ - Make error messages concrete and actionable.
188
+ - Favor clarity over clever abstractions.
189
+ - Add comments where intent is not obvious, but do not narrate trivial code.
190
+ - "wispr" should always be typed in all lowercase. Do not use "Wispr" or "WISPR" or any other formatting
191
+
192
+ ## Agent Behavior
193
+
194
+ Agents assisting with this project should:
195
+ - Preserve the library-plus-CLI structure.
196
+ - Respect the canonical-lyrics-first design.
197
+ - Avoid introducing hidden behavior that changes lyric text.
198
+ - Prefer incremental, reviewable changes.
199
+ - Keep recommendations aligned with recruiter-facing SWE value and real usability.
200
+ - Code should be structured in a clean and organized way, and be human readable.
201
+ - Be consise with code generation, focus on writing as few lines of code as possible.
202
+
203
+ When responding, guide and explain clearly when useful, but keep implementations aligned with the repository's architecture and scope.
@@ -0,0 +1,208 @@
1
+ # wispr Architecture
2
+
3
+ ## Overview
4
+
5
+ wispr is a Python library and CLI that converts full-song audio plus a canonical line-by-line lyrics file into a synchronized, standard line-level `.lrc` output. The CLI is the simplest interface over a reusable library, not a one-off script.[cite:58][cite:60]
6
+
7
+ The project is intentionally scoped around timing alignment rather than lyric generation. Standard LRC supports line-level timestamps and optional metadata tags, which makes it a good target for a deterministic v1 that is easy to inspect, test, and demo.[cite:34][cite:65][cite:31]
8
+
9
+ ## Product Goals
10
+
11
+ ### Primary goals
12
+
13
+ - Accept `.wav`, `.mp3`, `.flac`, and `.m4a` audio inputs.[cite:74][cite:78]
14
+ - Accept a `lyrics.txt` file where each line represents one intended lyric line.
15
+ - Produce a standard line-level `.lrc` file with one timestamp per lyric line.[cite:34][cite:65]
16
+ - Prefer forced alignment for timing accuracy while keeping the provided lyric text as the source of truth.[cite:32][cite:42][cite:8]
17
+ - Ship as both a reusable Python package and an installed `wispr` command via Python entry points.[cite:60]
18
+
19
+ ### Non-goals for v1
20
+
21
+ - Word-level karaoke timing.
22
+ - Multilingual alignment.
23
+ - Docker packaging.
24
+ - Additional diagnostic subcommands such as `wispr doctor`.
25
+
26
+ ## User Experience
27
+
28
+ The default happy path should be:
29
+
30
+ ```bash
31
+ wispr song.wav lyrics.txt
32
+ ```
33
+
34
+ When `-o` is omitted, wispr should derive the output path from the audio filename, so `random_song232.wav` becomes `random_song232.lrc`.
35
+
36
+ If the destination file already exists, the command should fail unless `--force` is passed. Alignment problems should produce warnings and continue rather than aborting the entire run.
37
+
38
+ ## CLI Contract
39
+
40
+ wispr should use Typer for the command-line interface so the public command surface can stay small now and grow cleanly later through typed arguments and options.[cite:58]
41
+
42
+ ### Initial command surface
43
+
44
+ ```bash
45
+ wispr <audio> <lyrics.txt>
46
+ wispr <audio> <lyrics.txt> -o output.lrc
47
+ wispr <audio> <lyrics.txt> --debug
48
+ wispr <audio> <lyrics.txt> --force
49
+ wispr <audio> <lyrics.txt> --no-separate-vocals
50
+ ```
51
+
52
+ ### Packaging model
53
+
54
+ The package name and installed command should both be `wispr`. Installation should expose the CLI through a `console_scripts` entry point so users can run `wispr` directly from their shell after installation.[cite:60][cite:79]
55
+
56
+ ## Pipeline
57
+
58
+ The default processing path is:
59
+
60
+ 1. Audio ingest and validation.
61
+ 2. Optional metadata extraction.
62
+ 3. Vocal separation, enabled by default.
63
+ 4. Transcript and word-timestamp extraction.
64
+ 5. Forced alignment against the canonical lyric text.
65
+ 6. Line segmentation.
66
+ 7. LRC emission.
67
+
68
+ WhisperX is the intended alignment anchor for v1 because it is built around refining Whisper timestamps with forced alignment to improve timestamp accuracy.[cite:32][cite:42][cite:8]
69
+
70
+ ### Stage details
71
+
72
+ #### 1. Audio ingest
73
+
74
+ The input layer should validate supported extensions, normalize paths, and prepare audio for downstream tools.
75
+
76
+ #### 2. Metadata extraction
77
+
78
+ wispr should opportunistically extract title, artist, and album metadata from the source file. Mutagen is a good fit because it supports common audio metadata handling across formats including MP3, FLAC, and MP4-family files.[cite:74][cite:78]
79
+
80
+ If metadata is missing or unreadable, wispr should skip those fields without failing the run.
81
+
82
+ #### 3. Vocal separation
83
+
84
+ Vocal separation should be on by default, with a user escape hatch through `--no-separate-vocals`. This keeps the default UX optimized for noisy real songs while preserving a fast path for cleaner inputs.
85
+
86
+ #### 4. Transcript and alignment
87
+
88
+ The transcript stage exists to recover timing signals, not to produce final lyric text. The canonical lyrics file remains the source of truth for emitted lines, while the alignment backend maps timing evidence onto those lines.[cite:32][cite:42]
89
+
90
+ #### 5. Line segmentation
91
+
92
+ Because the lyrics input is already line-structured, segmentation should map aligned word spans onto each original lyric line rather than trying to infer poetic phrasing from paragraph text.
93
+
94
+ #### 6. LRC emission
95
+
96
+ The emitted file should be standard line-level LRC. Each lyric line should use the timestamp of the first aligned word in that line.[cite:34][cite:65]
97
+
98
+ If metadata exists, it should be written before the lyric body in fixed order:
99
+
100
+ 1. `[ar:]`
101
+ 2. `[al:]`
102
+ 3. `[ti:]`
103
+ 4. Timestamped lyric lines
104
+
105
+ Metadata tags should be omitted individually when unavailable rather than written as empty placeholders.[cite:34][cite:65]
106
+
107
+ ## Internal Architecture
108
+
109
+ wispr should be library-first with a thin CLI wrapper. The CLI exists to parse inputs and display results, while the library owns orchestration, transformation, and file generation.
110
+
111
+ ### Proposed package layout
112
+
113
+ ```text
114
+ wispr/
115
+ __init__.py
116
+ cli.py
117
+ pipeline.py
118
+ audio.py
119
+ metadata.py
120
+ transcribe.py
121
+ align.py
122
+ segment.py
123
+ lrc.py
124
+ models.py
125
+ warnings.py
126
+ tests/
127
+ ```
128
+
129
+ ### Module responsibilities
130
+
131
+ | Module | Responsibility |
132
+ |---|---|
133
+ | `cli.py` | Typer commands, argument parsing, user-facing output.[cite:58] |
134
+ | `pipeline.py` | End-to-end orchestration and stage ordering. |
135
+ | `audio.py` | Input validation, preprocessing, and vocal-separation integration. |
136
+ | `metadata.py` | Audio metadata extraction and normalization via Mutagen-compatible readers.[cite:74][cite:78] |
137
+ | `transcribe.py` | Transcript and raw word-timestamp acquisition. |
138
+ | `align.py` | Forced-alignment adapter and backend boundary.[cite:42][cite:32] |
139
+ | `segment.py` | Map aligned tokens/spans onto lyric lines. |
140
+ | `lrc.py` | Metadata ordering, timestamp formatting, and final `.lrc` serialization.[cite:34][cite:65] |
141
+ | `models.py` | Shared typed data structures between stages. |
142
+ | `warnings.py` | Structured warning types and formatting. |
143
+
144
+ ## Data Model
145
+
146
+ The internal pipeline should pass structured models instead of ad hoc dictionaries where possible.
147
+
148
+ ### Core objects
149
+
150
+ - `TrackMetadata`: title, artist, album, source path.
151
+ - `TranscriptWord`: text, start time, end time, confidence, source.
152
+ - `AlignedWord`: canonical token, start time, end time, confidence, timestamp source.
153
+ - `LyricLine`: line number, raw text, aligned words, line start time, confidence.
154
+ - `LrcDocument`: metadata tags plus ordered timestamped lyric lines.
155
+
156
+ This keeps the CLI thin, improves unit-test boundaries, and makes it easier to swap alignment backends later.
157
+
158
+ ## Debug Artifacts
159
+
160
+ Debug mode should write a folder of intermediate artifacts rather than a single file.
161
+
162
+ ### Required debug outputs
163
+
164
+ - `transcript.json`
165
+ - `alignment.json`
166
+ - `segments.json`
167
+
168
+ These files should make it possible to inspect where timing entered the pipeline, how canonical lyrics were aligned, and how final line timestamps were chosen.
169
+
170
+ ## Warning Policy
171
+
172
+ wispr should warn and continue when a line is weakly aligned instead of failing the whole song.
173
+
174
+ Each warning should include:
175
+
176
+ - Line number.
177
+ - Confidence score.
178
+ - Estimated timestamp source.
179
+
180
+ This policy preserves a usable output file while still surfacing uncertainty to the user.
181
+
182
+ ## Testing Priorities
183
+
184
+ The highest-value unit tests should target deterministic logic rather than heavyweight model execution.
185
+
186
+ ### Must-test areas
187
+
188
+ - LRC timestamp formatting and serialization.[cite:34][cite:65]
189
+ - Metadata tag ordering and omission behavior.[cite:34][cite:65]
190
+ - Line-to-span segmentation.
191
+ - Existing-file collision behavior with and without `--force`.
192
+ - Warning generation when confidence falls below threshold.
193
+
194
+ The alignment backend itself should be wrapped behind interfaces so pure logic can be tested with synthetic fixtures instead of depending on full external model runs.
195
+
196
+ ## Future Extensions
197
+
198
+ The current architecture should leave room for later additions without forcing a redesign.
199
+
200
+ ### Likely future work
201
+
202
+ - Word-level karaoke timing.
203
+ - Multilingual support.
204
+ - Alternative alignment backends.
205
+ - Optional Docker packaging.
206
+ - Richer metadata support.
207
+
208
+ The key design rule is that the CLI should remain the purest interface to the library: simple for end users, while the library stays modular enough for testing, reuse, and future backends.
@@ -0,0 +1 @@
1
+ @.redsun/AGENTS.md