subtitle-engine 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {subtitle_engine-0.1.0/src/subtitle_engine.egg-info → subtitle_engine-0.1.2}/PKG-INFO +5 -5
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/README.md +2 -2
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/pyproject.toml +4 -4
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine/__init__.py +1 -1
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine/cli.py +93 -11
- subtitle_engine-0.1.2/src/subtitle_engine/transcriber.py +187 -0
- subtitle_engine-0.1.2/src/subtitle_engine/updater.py +158 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2/src/subtitle_engine.egg-info}/PKG-INFO +5 -5
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine.egg-info/SOURCES.txt +4 -1
- subtitle_engine-0.1.2/src/subtitle_engine.egg-info/entry_points.txt +2 -0
- subtitle_engine-0.1.2/tests/test_cli.py +147 -0
- subtitle_engine-0.1.2/tests/test_transcriber.py +26 -0
- subtitle_engine-0.1.2/tests/test_updater.py +172 -0
- subtitle_engine-0.1.0/src/subtitle_engine/transcriber.py +0 -129
- subtitle_engine-0.1.0/src/subtitle_engine.egg-info/entry_points.txt +0 -2
- subtitle_engine-0.1.0/tests/test_cli.py +0 -51
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/LICENSE +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/setup.cfg +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine/captioner.py +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine/srt_writer.py +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine/utils.py +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine.egg-info/dependency_links.txt +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine.egg-info/requires.txt +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine.egg-info/top_level.txt +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/tests/test_captioner.py +0 -0
- {subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/tests/test_srt_writer.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: subtitle-engine
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Generate SRT subtitles from audio/video files using WhisperX
|
|
5
5
|
Author: Leevi Puntanen
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/
|
|
8
|
-
Project-URL: Issues, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://github.com/leevipun/subtitle-engine
|
|
8
|
+
Project-URL: Issues, https://github.com/leevipun/subtitle-engine/issues
|
|
9
9
|
Keywords: subtitles,srt,whisperx,transcription,asr
|
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: Intended Audience :: End Users/Desktop
|
|
@@ -58,7 +58,7 @@ subeng video.mp4 --output subtitles.srt
|
|
|
58
58
|
# Use a different model or language
|
|
59
59
|
subeng video.mp4 --model medium --language fi
|
|
60
60
|
|
|
61
|
-
# Force CPU / CUDA
|
|
61
|
+
# Force CPU / CUDA
|
|
62
62
|
subeng video.mp4 --device cpu
|
|
63
63
|
|
|
64
64
|
# Speaker diarization (requires a Hugging Face token)
|
|
@@ -75,7 +75,7 @@ subeng video.mp4 --caption --ollama-model qwen3.5:0.8b
|
|
|
75
75
|
| `--output`, `-o` | Output SRT file path |
|
|
76
76
|
| `--model`, `-m` | WhisperX model: `tiny`, `base`, `small` (default), `medium`, `large-v2`, `large-v3` |
|
|
77
77
|
| `--language`, `-l` | ISO language code, e.g. `en`, `fi`. Auto-detected if omitted. |
|
|
78
|
-
| `--device`, `-d` | `cpu
|
|
78
|
+
| `--device`, `-d` | `cpu` or `cuda`. Auto-detected if omitted. |
|
|
79
79
|
| `--batch-size`, `-b` | Inference batch size (default: 16) |
|
|
80
80
|
| `--compute-type`, `-c` | `int8` or `float16`. Auto-selected if omitted. |
|
|
81
81
|
| `--diarize` | Enable speaker diarization |
|
|
@@ -32,7 +32,7 @@ subeng video.mp4 --output subtitles.srt
|
|
|
32
32
|
# Use a different model or language
|
|
33
33
|
subeng video.mp4 --model medium --language fi
|
|
34
34
|
|
|
35
|
-
# Force CPU / CUDA
|
|
35
|
+
# Force CPU / CUDA
|
|
36
36
|
subeng video.mp4 --device cpu
|
|
37
37
|
|
|
38
38
|
# Speaker diarization (requires a Hugging Face token)
|
|
@@ -49,7 +49,7 @@ subeng video.mp4 --caption --ollama-model qwen3.5:0.8b
|
|
|
49
49
|
| `--output`, `-o` | Output SRT file path |
|
|
50
50
|
| `--model`, `-m` | WhisperX model: `tiny`, `base`, `small` (default), `medium`, `large-v2`, `large-v3` |
|
|
51
51
|
| `--language`, `-l` | ISO language code, e.g. `en`, `fi`. Auto-detected if omitted. |
|
|
52
|
-
| `--device`, `-d` | `cpu
|
|
52
|
+
| `--device`, `-d` | `cpu` or `cuda`. Auto-detected if omitted. |
|
|
53
53
|
| `--batch-size`, `-b` | Inference batch size (default: 16) |
|
|
54
54
|
| `--compute-type`, `-c` | `int8` or `float16`. Auto-selected if omitted. |
|
|
55
55
|
| `--diarize` | Enable speaker diarization |
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "subtitle-engine"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
description = "Generate SRT subtitles from audio/video files using WhisperX"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -35,11 +35,11 @@ dev = [
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
[project.scripts]
|
|
38
|
-
subeng = "subtitle_engine.cli:
|
|
38
|
+
subeng = "subtitle_engine.cli:main_entry"
|
|
39
39
|
|
|
40
40
|
[project.urls]
|
|
41
|
-
Homepage = "https://github.com/
|
|
42
|
-
Issues = "https://github.com/
|
|
41
|
+
Homepage = "https://github.com/leevipun/subtitle-engine"
|
|
42
|
+
Issues = "https://github.com/leevipun/subtitle-engine/issues"
|
|
43
43
|
|
|
44
44
|
[tool.setuptools.packages.find]
|
|
45
45
|
where = ["src"]
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
"""Command-line interface for subtitle-engine."""
|
|
2
2
|
|
|
3
|
+
import sys
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Annotated, Optional
|
|
5
6
|
|
|
6
7
|
import typer
|
|
7
8
|
from rich.console import Console
|
|
8
9
|
|
|
10
|
+
from subtitle_engine import __version__
|
|
9
11
|
from subtitle_engine.captioner import generate_caption
|
|
10
12
|
from subtitle_engine.srt_writer import write_srt
|
|
11
13
|
from subtitle_engine.transcriber import transcribe
|
|
14
|
+
from subtitle_engine.updater import UpdateCheckError, check_for_update, update_package
|
|
12
15
|
from subtitle_engine.utils import resolve_output_path, validate_media_file
|
|
13
16
|
|
|
14
17
|
app = typer.Typer(
|
|
@@ -18,7 +21,48 @@ app = typer.Typer(
|
|
|
18
21
|
console = Console()
|
|
19
22
|
|
|
20
23
|
|
|
21
|
-
|
|
24
|
+
def _version_callback(value: bool) -> None:
|
|
25
|
+
if value:
|
|
26
|
+
console.print(f"subeng {__version__}")
|
|
27
|
+
raise typer.Exit()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def update() -> None:
|
|
31
|
+
"""Update subeng to the latest version from PyPI."""
|
|
32
|
+
try:
|
|
33
|
+
update_info = check_for_update(force=True)
|
|
34
|
+
except UpdateCheckError as exc:
|
|
35
|
+
console.print(f"[red]Error:[/red] {exc}")
|
|
36
|
+
raise typer.Exit(code=1) from exc
|
|
37
|
+
|
|
38
|
+
if update_info is None:
|
|
39
|
+
console.print(f"[green]subeng is up to date ({__version__}).[/green]")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
console.print(
|
|
43
|
+
f"[bold]A new version is available:[/bold] "
|
|
44
|
+
f"{update_info.current} → {update_info.latest}"
|
|
45
|
+
)
|
|
46
|
+
console.print("[bold]Updating...[/bold]")
|
|
47
|
+
try:
|
|
48
|
+
update_package()
|
|
49
|
+
except UpdateCheckError as exc:
|
|
50
|
+
console.print(f"[red]Error:[/red] {exc}")
|
|
51
|
+
raise typer.Exit(code=1) from exc
|
|
52
|
+
console.print("[green]Update complete.[/green]")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main_entry() -> None:
|
|
56
|
+
"""Route ``subeng update`` to the updater; otherwise run the Typer app."""
|
|
57
|
+
if len(sys.argv) > 1 and sys.argv[1] == "update":
|
|
58
|
+
update()
|
|
59
|
+
else:
|
|
60
|
+
app()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command(
|
|
64
|
+
epilog="Run 'subeng update' to update to the latest version.",
|
|
65
|
+
)
|
|
22
66
|
def main(
|
|
23
67
|
input_file: Annotated[
|
|
24
68
|
Path,
|
|
@@ -61,7 +105,7 @@ def main(
|
|
|
61
105
|
typer.Option(
|
|
62
106
|
"--device",
|
|
63
107
|
"-d",
|
|
64
|
-
help="Device: cpu
|
|
108
|
+
help="Device: cpu or cuda. Auto-detected if omitted.",
|
|
65
109
|
),
|
|
66
110
|
] = None,
|
|
67
111
|
batch_size: Annotated[
|
|
@@ -118,6 +162,31 @@ def main(
|
|
|
118
162
|
envvar="OLLAMA_HOST",
|
|
119
163
|
),
|
|
120
164
|
] = "http://localhost:11434",
|
|
165
|
+
quiet: Annotated[
|
|
166
|
+
bool,
|
|
167
|
+
typer.Option(
|
|
168
|
+
"--quiet",
|
|
169
|
+
"-q",
|
|
170
|
+
help="Only print errors.",
|
|
171
|
+
),
|
|
172
|
+
] = False,
|
|
173
|
+
verbose: Annotated[
|
|
174
|
+
bool,
|
|
175
|
+
typer.Option(
|
|
176
|
+
"--verbose",
|
|
177
|
+
help="Show WhisperX progress bars and warnings.",
|
|
178
|
+
),
|
|
179
|
+
] = False,
|
|
180
|
+
version: Annotated[
|
|
181
|
+
bool,
|
|
182
|
+
typer.Option(
|
|
183
|
+
"--version",
|
|
184
|
+
"-v",
|
|
185
|
+
help="Show the version and exit.",
|
|
186
|
+
callback=_version_callback,
|
|
187
|
+
is_eager=True,
|
|
188
|
+
),
|
|
189
|
+
] = False,
|
|
121
190
|
) -> None:
|
|
122
191
|
"""Generate SRT subtitles from a media file."""
|
|
123
192
|
try:
|
|
@@ -127,12 +196,22 @@ def main(
|
|
|
127
196
|
if caption and not ollama_model:
|
|
128
197
|
raise ValueError("--ollama-model is required when using --caption")
|
|
129
198
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
199
|
+
if not quiet:
|
|
200
|
+
update_info = check_for_update()
|
|
201
|
+
if update_info:
|
|
202
|
+
console.print(
|
|
203
|
+
f"[yellow]A new version of subeng is available:[/yellow] "
|
|
204
|
+
f"{update_info.current} → {update_info.latest}. "
|
|
205
|
+
f"Run [bold]subeng update[/bold] to install it."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if not quiet:
|
|
209
|
+
console.print(f"[bold]Transcribing:[/bold] {input_file}")
|
|
210
|
+
console.print(f"[bold]Model:[/bold] {model}")
|
|
211
|
+
if language:
|
|
212
|
+
console.print(f"[bold]Language:[/bold] {language}")
|
|
213
|
+
if device:
|
|
214
|
+
console.print(f"[bold]Device:[/bold] {device}")
|
|
136
215
|
|
|
137
216
|
segments = transcribe(
|
|
138
217
|
input_file,
|
|
@@ -143,10 +222,12 @@ def main(
|
|
|
143
222
|
compute_type=compute_type,
|
|
144
223
|
diarize=diarize,
|
|
145
224
|
hf_token=hf_token,
|
|
225
|
+
verbose=verbose,
|
|
146
226
|
)
|
|
147
227
|
|
|
148
228
|
write_srt(segments, output_path)
|
|
149
|
-
|
|
229
|
+
if not quiet:
|
|
230
|
+
console.print(f"[green]Wrote subtitles to:[/green] {output_path}")
|
|
150
231
|
|
|
151
232
|
if caption:
|
|
152
233
|
transcript = " ".join(str(segment.get("text", "")).strip() for segment in segments)
|
|
@@ -157,7 +238,8 @@ def main(
|
|
|
157
238
|
)
|
|
158
239
|
caption_path = output_path.with_suffix(".caption.txt")
|
|
159
240
|
caption_path.write_text(caption_text, encoding="utf-8")
|
|
160
|
-
|
|
241
|
+
if not quiet:
|
|
242
|
+
console.print(f"[green]Wrote caption to:[/green] {caption_path}")
|
|
161
243
|
except (ValueError, FileNotFoundError, ConnectionError) as exc:
|
|
162
244
|
console.print(f"[red]Error:[/red] {exc}")
|
|
163
245
|
raise typer.Exit(code=1) from exc
|
|
@@ -167,4 +249,4 @@ def main(
|
|
|
167
249
|
|
|
168
250
|
|
|
169
251
|
if __name__ == "__main__":
|
|
170
|
-
|
|
252
|
+
main_entry()
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""WhisperX transcription wrapper."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import io
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import warnings
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Iterator, Optional
|
|
10
|
+
|
|
11
|
+
import torch
|
|
12
|
+
import whisperx
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
VALID_MODELS = {"tiny", "base", "small", "medium", "large-v2", "large-v3"}
|
|
16
|
+
VALID_DEVICES = {"cpu", "cuda"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _detect_device(device: Optional[str]) -> str:
|
|
20
|
+
"""Pick a device if none was specified.
|
|
21
|
+
|
|
22
|
+
Note: MPS is intentionally excluded because the WhisperX backend
|
|
23
|
+
(faster-whisper / CTranslate2) only supports CPU and CUDA.
|
|
24
|
+
"""
|
|
25
|
+
if device:
|
|
26
|
+
return device
|
|
27
|
+
if torch.cuda.is_available():
|
|
28
|
+
return "cuda"
|
|
29
|
+
return "cpu"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _default_compute_type(device: str) -> str:
|
|
33
|
+
"""Pick a safe compute type for the device."""
|
|
34
|
+
if device == "cpu":
|
|
35
|
+
return "int8"
|
|
36
|
+
return "float16"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _validate_model(model_name: str) -> None:
|
|
40
|
+
"""Raise a ValueError if the model name is unknown."""
|
|
41
|
+
if model_name not in VALID_MODELS:
|
|
42
|
+
joined = ", ".join(sorted(VALID_MODELS))
|
|
43
|
+
raise ValueError(f"Unknown model '{model_name}'. Choose from: {joined}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _validate_device(device: str) -> None:
|
|
47
|
+
"""Raise a ValueError if the device name is unknown."""
|
|
48
|
+
if device not in VALID_DEVICES:
|
|
49
|
+
joined = ", ".join(sorted(VALID_DEVICES))
|
|
50
|
+
raise ValueError(f"Unknown device '{device}'. Choose from: {joined}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@contextlib.contextmanager
|
|
54
|
+
def _suppress_external_output(verbose: bool) -> Iterator[None]:
|
|
55
|
+
"""Silence tqdm, warnings, and noisy loggers unless verbose is True."""
|
|
56
|
+
if verbose:
|
|
57
|
+
yield
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
original_tqdm_disable: Optional[bool] = None
|
|
61
|
+
original_tqdm_env = os.environ.get("TQDM_DISABLE")
|
|
62
|
+
try:
|
|
63
|
+
import tqdm as _tqdm
|
|
64
|
+
|
|
65
|
+
original_tqdm_disable = _tqdm.tqdm.disable
|
|
66
|
+
_tqdm.tqdm.disable = True
|
|
67
|
+
except Exception: # noqa: BLE001
|
|
68
|
+
_tqdm = None
|
|
69
|
+
|
|
70
|
+
os.environ["TQDM_DISABLE"] = "1"
|
|
71
|
+
|
|
72
|
+
loggers = [
|
|
73
|
+
"whisperx",
|
|
74
|
+
"faster_whisper",
|
|
75
|
+
"pyannote.audio",
|
|
76
|
+
"transformers",
|
|
77
|
+
"torch",
|
|
78
|
+
]
|
|
79
|
+
original_levels: dict[str, int] = {}
|
|
80
|
+
for name in loggers:
|
|
81
|
+
logger = logging.getLogger(name)
|
|
82
|
+
original_levels[name] = logger.level
|
|
83
|
+
logger.setLevel(logging.WARNING)
|
|
84
|
+
|
|
85
|
+
buf = io.StringIO()
|
|
86
|
+
try:
|
|
87
|
+
with warnings.catch_warnings():
|
|
88
|
+
warnings.simplefilter("ignore")
|
|
89
|
+
with contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
|
|
90
|
+
yield
|
|
91
|
+
finally:
|
|
92
|
+
if original_tqdm_env is None:
|
|
93
|
+
os.environ.pop("TQDM_DISABLE", None)
|
|
94
|
+
else:
|
|
95
|
+
os.environ["TQDM_DISABLE"] = original_tqdm_env
|
|
96
|
+
if original_tqdm_disable is not None and _tqdm is not None:
|
|
97
|
+
_tqdm.tqdm.disable = original_tqdm_disable
|
|
98
|
+
for name, level in original_levels.items():
|
|
99
|
+
logging.getLogger(name).setLevel(level)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def transcribe(
|
|
103
|
+
audio_path: Path,
|
|
104
|
+
*,
|
|
105
|
+
model_name: str = "small",
|
|
106
|
+
language: Optional[str] = None,
|
|
107
|
+
device: Optional[str] = None,
|
|
108
|
+
batch_size: int = 16,
|
|
109
|
+
compute_type: Optional[str] = None,
|
|
110
|
+
diarize: bool = False,
|
|
111
|
+
hf_token: Optional[str] = None,
|
|
112
|
+
verbose: bool = False,
|
|
113
|
+
) -> list[dict]:
|
|
114
|
+
"""Transcribe an audio/video file and return SRT-ready segments.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
audio_path:
|
|
119
|
+
Path to the media file to transcribe.
|
|
120
|
+
model_name:
|
|
121
|
+
WhisperX model size. One of: tiny, base, small, medium, large-v2, large-v3.
|
|
122
|
+
language:
|
|
123
|
+
ISO language code, e.g. ``en`` or ``fi``. If ``None``, WhisperX auto-detects.
|
|
124
|
+
device:
|
|
125
|
+
``cpu``, ``cuda`` or ``mps``. Auto-detected if omitted.
|
|
126
|
+
batch_size:
|
|
127
|
+
WhisperX batch size for transcription.
|
|
128
|
+
compute_type:
|
|
129
|
+
``int8`` or ``float16``. Auto-selected per device if omitted.
|
|
130
|
+
diarize:
|
|
131
|
+
Whether to run speaker diarization.
|
|
132
|
+
hf_token:
|
|
133
|
+
Hugging Face token required for diarization.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
A list of segment dicts with ``start``, ``end`` and ``text`` keys.
|
|
138
|
+
"""
|
|
139
|
+
_validate_model(model_name)
|
|
140
|
+
|
|
141
|
+
audio_path = Path(audio_path)
|
|
142
|
+
if not audio_path.exists():
|
|
143
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
144
|
+
|
|
145
|
+
device = _detect_device(device)
|
|
146
|
+
_validate_device(device)
|
|
147
|
+
|
|
148
|
+
if compute_type is None:
|
|
149
|
+
compute_type = _default_compute_type(device)
|
|
150
|
+
|
|
151
|
+
if diarize and not hf_token:
|
|
152
|
+
raise ValueError("--hf-token is required when using --diarize")
|
|
153
|
+
|
|
154
|
+
with _suppress_external_output(verbose):
|
|
155
|
+
audio = whisperx.load_audio(str(audio_path))
|
|
156
|
+
|
|
157
|
+
model = whisperx.load_model(model_name, device, compute_type=compute_type)
|
|
158
|
+
result = model.transcribe(audio, batch_size=batch_size, language=language)
|
|
159
|
+
|
|
160
|
+
# Free transcription model memory before alignment
|
|
161
|
+
del model
|
|
162
|
+
|
|
163
|
+
detected_language = result.get("language")
|
|
164
|
+
if detected_language:
|
|
165
|
+
align_model, align_metadata = whisperx.load_align_model(
|
|
166
|
+
language_code=detected_language, device=device
|
|
167
|
+
)
|
|
168
|
+
result = whisperx.align(
|
|
169
|
+
result["segments"],
|
|
170
|
+
align_model,
|
|
171
|
+
align_metadata,
|
|
172
|
+
audio,
|
|
173
|
+
device,
|
|
174
|
+
return_char_alignments=False,
|
|
175
|
+
)
|
|
176
|
+
del align_model
|
|
177
|
+
|
|
178
|
+
if diarize:
|
|
179
|
+
diarize_model = whisperx.DiarizationPipeline(
|
|
180
|
+
model_name="pyannote/speaker-diarization-3.1",
|
|
181
|
+
use_auth_token=hf_token,
|
|
182
|
+
device=device,
|
|
183
|
+
)
|
|
184
|
+
diarize_segments = diarize_model(audio)
|
|
185
|
+
result = whisperx.assign_word_speakers(diarize_segments, result)
|
|
186
|
+
|
|
187
|
+
return result["segments"]
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""Update checker and in-place updater for subtitle-engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime, timedelta, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
from urllib.request import Request, urlopen
|
|
13
|
+
|
|
14
|
+
from subtitle_engine import __version__
|
|
15
|
+
|
|
16
|
+
PYPI_PACKAGE_NAME = "subtitle-engine"
|
|
17
|
+
PYPI_JSON_URL = f"https://pypi.org/pypi/{PYPI_PACKAGE_NAME}/json"
|
|
18
|
+
CACHE_DIR = Path.home() / ".cache" / "subeng"
|
|
19
|
+
CACHE_FILE = CACHE_DIR / "update_check.json"
|
|
20
|
+
CACHE_TTL = timedelta(days=1)
|
|
21
|
+
REQUEST_TIMEOUT = 5 # seconds
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class UpdateInfo:
|
|
26
|
+
"""Information about an available update."""
|
|
27
|
+
|
|
28
|
+
current: str
|
|
29
|
+
latest: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class UpdateCheckError(Exception):
|
|
33
|
+
"""Raised when the update check fails."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _version_key(version: str) -> tuple[int, ...]:
|
|
37
|
+
"""Convert a version string to a comparable tuple of integers.
|
|
38
|
+
|
|
39
|
+
Non-numeric parts are stripped, so ``1.2.3a1`` is treated like ``1.2.3``.
|
|
40
|
+
"""
|
|
41
|
+
parts: list[int] = []
|
|
42
|
+
for part in version.split("."):
|
|
43
|
+
numeric = ""
|
|
44
|
+
for ch in part:
|
|
45
|
+
if ch.isdigit():
|
|
46
|
+
numeric += ch
|
|
47
|
+
else:
|
|
48
|
+
break
|
|
49
|
+
parts.append(int(numeric) if numeric else 0)
|
|
50
|
+
return tuple(parts)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def is_update_available(current: str, latest: str) -> bool:
|
|
54
|
+
"""Return ``True`` if ``latest`` is newer than ``current``."""
|
|
55
|
+
return _version_key(latest) > _version_key(current)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _cache_path() -> Path:
|
|
59
|
+
"""Return the path to the update-check cache file, creating the directory if needed."""
|
|
60
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
return CACHE_FILE
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _read_cached_check() -> Optional[UpdateInfo]:
|
|
65
|
+
"""Read the cached update check if it exists and is still fresh."""
|
|
66
|
+
cache = _cache_path()
|
|
67
|
+
if not cache.exists():
|
|
68
|
+
return None
|
|
69
|
+
try:
|
|
70
|
+
data = json.loads(cache.read_text(encoding="utf-8"))
|
|
71
|
+
checked_at = datetime.fromisoformat(data["checked_at"])
|
|
72
|
+
if datetime.now(timezone.utc) - checked_at > CACHE_TTL:
|
|
73
|
+
return None
|
|
74
|
+
latest = data["latest"]
|
|
75
|
+
if is_update_available(__version__, latest):
|
|
76
|
+
return UpdateInfo(current=__version__, latest=latest)
|
|
77
|
+
return None
|
|
78
|
+
except (KeyError, ValueError, OSError):
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _write_cached_check(latest: str) -> None:
|
|
83
|
+
"""Write the result of an update check to the cache."""
|
|
84
|
+
cache = _cache_path()
|
|
85
|
+
payload = {
|
|
86
|
+
"checked_at": datetime.now(timezone.utc).isoformat(),
|
|
87
|
+
"latest": latest,
|
|
88
|
+
}
|
|
89
|
+
try:
|
|
90
|
+
cache.write_text(json.dumps(payload), encoding="utf-8")
|
|
91
|
+
except OSError:
|
|
92
|
+
# Caching is best-effort; never fail the CLI because of it.
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def fetch_latest_version(timeout: int = REQUEST_TIMEOUT) -> str:
|
|
97
|
+
"""Fetch the latest released version from PyPI.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
UpdateCheckError: if the request fails or the response is unusable.
|
|
101
|
+
"""
|
|
102
|
+
request = Request(
|
|
103
|
+
PYPI_JSON_URL,
|
|
104
|
+
headers={"Accept": "application/json", "User-Agent": f"subeng/{__version__}"},
|
|
105
|
+
)
|
|
106
|
+
try:
|
|
107
|
+
with urlopen(request, timeout=timeout) as response: # noqa: S310
|
|
108
|
+
data = json.loads(response.read().decode("utf-8"))
|
|
109
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
110
|
+
raise UpdateCheckError(f"Could not check for updates: {exc}") from exc
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
return str(data["info"]["version"])
|
|
114
|
+
except (KeyError, TypeError) as exc:
|
|
115
|
+
raise UpdateCheckError(f"Unexpected PyPI response: {exc}") from exc
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def check_for_update(force: bool = False) -> Optional[UpdateInfo]:
|
|
119
|
+
"""Check whether a newer version is available on PyPI.
|
|
120
|
+
|
|
121
|
+
The result is cached for ``CACHE_TTL`` (one day) unless ``force`` is ``True``.
|
|
122
|
+
Network failures are swallowed unless ``force`` is ``True``.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
``UpdateInfo`` if a newer version exists, otherwise ``None``.
|
|
126
|
+
"""
|
|
127
|
+
if not force:
|
|
128
|
+
cached = _read_cached_check()
|
|
129
|
+
if cached is not None:
|
|
130
|
+
return cached
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
latest = fetch_latest_version()
|
|
134
|
+
except UpdateCheckError:
|
|
135
|
+
if force:
|
|
136
|
+
raise
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
_write_cached_check(latest)
|
|
140
|
+
|
|
141
|
+
if is_update_available(__version__, latest):
|
|
142
|
+
return UpdateInfo(current=__version__, latest=latest)
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def update_package() -> None:
|
|
147
|
+
"""Upgrade subtitle-engine in the current Python environment using pip.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
UpdateCheckError: if the pip command fails.
|
|
151
|
+
"""
|
|
152
|
+
command = [sys.executable, "-m", "pip", "install", "--upgrade", PYPI_PACKAGE_NAME]
|
|
153
|
+
try:
|
|
154
|
+
subprocess.run(command, check=True) # noqa: S603
|
|
155
|
+
except subprocess.CalledProcessError as exc:
|
|
156
|
+
raise UpdateCheckError(f"Update failed (exit code {exc.returncode}).") from exc
|
|
157
|
+
except FileNotFoundError as exc:
|
|
158
|
+
raise UpdateCheckError(f"Could not run pip: {exc}") from exc
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: subtitle-engine
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Generate SRT subtitles from audio/video files using WhisperX
|
|
5
5
|
Author: Leevi Puntanen
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/
|
|
8
|
-
Project-URL: Issues, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://github.com/leevipun/subtitle-engine
|
|
8
|
+
Project-URL: Issues, https://github.com/leevipun/subtitle-engine/issues
|
|
9
9
|
Keywords: subtitles,srt,whisperx,transcription,asr
|
|
10
10
|
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: Intended Audience :: End Users/Desktop
|
|
@@ -58,7 +58,7 @@ subeng video.mp4 --output subtitles.srt
|
|
|
58
58
|
# Use a different model or language
|
|
59
59
|
subeng video.mp4 --model medium --language fi
|
|
60
60
|
|
|
61
|
-
# Force CPU / CUDA
|
|
61
|
+
# Force CPU / CUDA
|
|
62
62
|
subeng video.mp4 --device cpu
|
|
63
63
|
|
|
64
64
|
# Speaker diarization (requires a Hugging Face token)
|
|
@@ -75,7 +75,7 @@ subeng video.mp4 --caption --ollama-model qwen3.5:0.8b
|
|
|
75
75
|
| `--output`, `-o` | Output SRT file path |
|
|
76
76
|
| `--model`, `-m` | WhisperX model: `tiny`, `base`, `small` (default), `medium`, `large-v2`, `large-v3` |
|
|
77
77
|
| `--language`, `-l` | ISO language code, e.g. `en`, `fi`. Auto-detected if omitted. |
|
|
78
|
-
| `--device`, `-d` | `cpu
|
|
78
|
+
| `--device`, `-d` | `cpu` or `cuda`. Auto-detected if omitted. |
|
|
79
79
|
| `--batch-size`, `-b` | Inference batch size (default: 16) |
|
|
80
80
|
| `--compute-type`, `-c` | `int8` or `float16`. Auto-selected if omitted. |
|
|
81
81
|
| `--diarize` | Enable speaker diarization |
|
|
@@ -6,6 +6,7 @@ src/subtitle_engine/captioner.py
|
|
|
6
6
|
src/subtitle_engine/cli.py
|
|
7
7
|
src/subtitle_engine/srt_writer.py
|
|
8
8
|
src/subtitle_engine/transcriber.py
|
|
9
|
+
src/subtitle_engine/updater.py
|
|
9
10
|
src/subtitle_engine/utils.py
|
|
10
11
|
src/subtitle_engine.egg-info/PKG-INFO
|
|
11
12
|
src/subtitle_engine.egg-info/SOURCES.txt
|
|
@@ -15,4 +16,6 @@ src/subtitle_engine.egg-info/requires.txt
|
|
|
15
16
|
src/subtitle_engine.egg-info/top_level.txt
|
|
16
17
|
tests/test_captioner.py
|
|
17
18
|
tests/test_cli.py
|
|
18
|
-
tests/test_srt_writer.py
|
|
19
|
+
tests/test_srt_writer.py
|
|
20
|
+
tests/test_transcriber.py
|
|
21
|
+
tests/test_updater.py
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Tests for CLI helpers and argument parsing."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from unittest.mock import patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
import typer
|
|
9
|
+
from typer.testing import CliRunner
|
|
10
|
+
|
|
11
|
+
from subtitle_engine import __version__
|
|
12
|
+
from subtitle_engine.cli import app, main_entry, update
|
|
13
|
+
from subtitle_engine.updater import UpdateCheckError, UpdateInfo
|
|
14
|
+
from subtitle_engine.utils import resolve_output_path, validate_media_file
|
|
15
|
+
|
|
16
|
+
runner = CliRunner()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture(autouse=True)
|
|
20
|
+
def disable_update_check():
|
|
21
|
+
"""Prevent the CLI from hitting the network during transcription tests."""
|
|
22
|
+
with patch("subtitle_engine.cli.check_for_update", return_value=None):
|
|
23
|
+
yield
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_resolve_output_path_default():
|
|
27
|
+
input_path = Path("movie.mp4")
|
|
28
|
+
assert resolve_output_path(input_path) == Path("movie.srt")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_resolve_output_path_explicit():
|
|
32
|
+
input_path = Path("movie.mp4")
|
|
33
|
+
output = Path("custom.srt")
|
|
34
|
+
assert resolve_output_path(input_path, output) == output
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_validate_media_file_supported():
|
|
38
|
+
validate_media_file(Path("video.mp4"))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_validate_media_file_unsupported():
|
|
42
|
+
with pytest.raises(ValueError, match="Unsupported file type"):
|
|
43
|
+
validate_media_file(Path("file.txt"))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_cli_help():
|
|
47
|
+
result = runner.invoke(app, ["--help"])
|
|
48
|
+
assert result.exit_code == 0
|
|
49
|
+
assert "Generate SRT subtitles" in result.output
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_cli_no_args():
|
|
53
|
+
result = runner.invoke(app)
|
|
54
|
+
assert result.exit_code != 0
|
|
55
|
+
assert "Usage:" in result.output
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_caption_requires_ollama_model(tmp_path: Path):
|
|
59
|
+
media = tmp_path / "video.mp4"
|
|
60
|
+
media.write_bytes(b"fake")
|
|
61
|
+
result = runner.invoke(app, [str(media), "--caption"])
|
|
62
|
+
assert result.exit_code != 0
|
|
63
|
+
assert "--ollama-model is required" in result.output
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_cli_version_long():
|
|
67
|
+
result = runner.invoke(app, ["--version"])
|
|
68
|
+
assert result.exit_code == 0
|
|
69
|
+
assert "subeng" in result.output
|
|
70
|
+
assert "0.1.1" in result.output
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_cli_version_short():
|
|
74
|
+
result = runner.invoke(app, ["-v"])
|
|
75
|
+
assert result.exit_code == 0
|
|
76
|
+
assert "subeng" in result.output
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_cli_version_no_extra_output():
|
|
80
|
+
result = runner.invoke(app, ["--version"])
|
|
81
|
+
assert result.exit_code == 0
|
|
82
|
+
assert result.output.strip() == "subeng 0.1.1"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_cli_quiet_hides_status_but_keeps_errors(tmp_path: Path):
|
|
86
|
+
media = tmp_path / "video.mp4"
|
|
87
|
+
media.write_bytes(b"fake")
|
|
88
|
+
result = runner.invoke(app, [str(media), "--caption", "-q"])
|
|
89
|
+
assert result.exit_code != 0
|
|
90
|
+
assert "Error:" in result.output
|
|
91
|
+
assert "Transcribing:" not in result.output
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_cli_verbose_accepted(tmp_path: Path):
|
|
95
|
+
media = tmp_path / "video.mp4"
|
|
96
|
+
media.write_bytes(b"fake")
|
|
97
|
+
result = runner.invoke(app, [str(media), "--caption", "--verbose"])
|
|
98
|
+
assert result.exit_code != 0
|
|
99
|
+
assert "--ollama-model is required" in result.output
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_update_command_shows_up_to_date():
|
|
103
|
+
with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
|
|
104
|
+
result = runner.invoke(app, ["update"])
|
|
105
|
+
# The Typer app itself does not register ``update`` as a command; it is
|
|
106
|
+
# routed via ``main_entry``. Invoking the app directly with ``update``
|
|
107
|
+
# should therefore fail as an unknown command.
|
|
108
|
+
assert result.exit_code != 0
|
|
109
|
+
mock_check.assert_not_called()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_update_function_runs_upgrade_when_available():
|
|
113
|
+
update_info = UpdateInfo(current=__version__, latest="9.9.9")
|
|
114
|
+
with patch("subtitle_engine.cli.check_for_update", return_value=update_info) as mock_check:
|
|
115
|
+
with patch("subtitle_engine.cli.update_package") as mock_upgrade:
|
|
116
|
+
update()
|
|
117
|
+
mock_check.assert_called_once_with(force=True)
|
|
118
|
+
mock_upgrade.assert_called_once()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_update_function_reports_up_to_date():
|
|
122
|
+
with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
|
|
123
|
+
with patch("subtitle_engine.cli.update_package") as mock_upgrade:
|
|
124
|
+
update()
|
|
125
|
+
mock_check.assert_called_once_with(force=True)
|
|
126
|
+
mock_upgrade.assert_not_called()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_update_function_handles_check_error():
|
|
130
|
+
with patch("subtitle_engine.cli.check_for_update", side_effect=UpdateCheckError("no network")):
|
|
131
|
+
with pytest.raises(typer.Exit) as exc_info:
|
|
132
|
+
update()
|
|
133
|
+
assert exc_info.value.exit_code == 1
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_main_entry_routes_update_command():
|
|
137
|
+
with patch("subtitle_engine.cli.update") as mock_update:
|
|
138
|
+
with patch.object(sys, "argv", ["subeng", "update"]):
|
|
139
|
+
main_entry()
|
|
140
|
+
mock_update.assert_called_once()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_main_entry_runs_typer_app_for_transcription():
|
|
144
|
+
with patch("subtitle_engine.cli.app") as mock_app:
|
|
145
|
+
with patch.object(sys, "argv", ["subeng", "video.mp4"]):
|
|
146
|
+
main_entry()
|
|
147
|
+
mock_app.assert_called_once()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Tests for the WhisperX transcription wrapper."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from subtitle_engine.transcriber import _suppress_external_output
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_suppress_external_output_hides_noise(capsys):
|
|
10
|
+
with _suppress_external_output(verbose=False):
|
|
11
|
+
print("stdout noise")
|
|
12
|
+
print("stderr noise", flush=True)
|
|
13
|
+
warnings.warn("warning noise")
|
|
14
|
+
logging.getLogger("whisperx").warning("logger noise")
|
|
15
|
+
|
|
16
|
+
captured = capsys.readouterr()
|
|
17
|
+
assert captured.out == ""
|
|
18
|
+
assert captured.err == ""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_suppress_external_output_verbose_keeps_noise(capsys):
|
|
22
|
+
with _suppress_external_output(verbose=True):
|
|
23
|
+
print("visible output")
|
|
24
|
+
|
|
25
|
+
captured = capsys.readouterr()
|
|
26
|
+
assert "visible output" in captured.out
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Tests for the update checker and updater."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from unittest.mock import MagicMock, patch
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from subtitle_engine import __version__
|
|
13
|
+
from subtitle_engine import updater
|
|
14
|
+
from subtitle_engine.updater import (
|
|
15
|
+
UpdateCheckError,
|
|
16
|
+
UpdateInfo,
|
|
17
|
+
check_for_update,
|
|
18
|
+
fetch_latest_version,
|
|
19
|
+
is_update_available,
|
|
20
|
+
update_package,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def fresh_cache(tmp_path: Path, monkeypatch):
|
|
26
|
+
"""Redirect the update-check cache to a temporary directory."""
|
|
27
|
+
cache_dir = tmp_path / "cache"
|
|
28
|
+
monkeypatch.setattr(updater, "CACHE_DIR", cache_dir)
|
|
29
|
+
monkeypatch.setattr(updater, "CACHE_FILE", cache_dir / "update_check.json")
|
|
30
|
+
return cache_dir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_is_update_available_when_latest_is_newer():
|
|
34
|
+
assert is_update_available("0.1.0", "0.2.0") is True
|
|
35
|
+
assert is_update_available("0.1.1", "0.1.2") is True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_is_update_available_when_latest_is_older_or_equal():
|
|
39
|
+
assert is_update_available("0.2.0", "0.1.0") is False
|
|
40
|
+
assert is_update_available("0.1.1", "0.1.1") is False
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_is_update_available_ignores_non_numeric_suffixes():
|
|
44
|
+
assert is_update_available("0.1.1", "0.1.2a1") is True
|
|
45
|
+
assert is_update_available("0.1.2", "0.1.2a1") is False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_fetch_latest_version_parses_pypi_response():
|
|
49
|
+
mock_response = MagicMock()
|
|
50
|
+
mock_response.read.return_value = json.dumps({"info": {"version": "9.9.9"}}).encode("utf-8")
|
|
51
|
+
mock_context = MagicMock()
|
|
52
|
+
mock_context.__enter__.return_value = mock_response
|
|
53
|
+
|
|
54
|
+
with patch("subtitle_engine.updater.urlopen", return_value=mock_context):
|
|
55
|
+
assert fetch_latest_version() == "9.9.9"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_fetch_latest_version_raises_on_bad_response():
|
|
59
|
+
mock_response = MagicMock()
|
|
60
|
+
mock_response.read.return_value = json.dumps({"info": {}}).encode("utf-8")
|
|
61
|
+
mock_context = MagicMock()
|
|
62
|
+
mock_context.__enter__.return_value = mock_response
|
|
63
|
+
|
|
64
|
+
with patch("subtitle_engine.updater.urlopen", return_value=mock_context):
|
|
65
|
+
with pytest.raises(UpdateCheckError):
|
|
66
|
+
fetch_latest_version()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_fetch_latest_version_raises_on_network_error():
|
|
70
|
+
with patch("subtitle_engine.updater.urlopen", side_effect=OSError("no network")):
|
|
71
|
+
with pytest.raises(UpdateCheckError):
|
|
72
|
+
fetch_latest_version()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_check_for_update_returns_info_when_update_available(fresh_cache: Path):
|
|
76
|
+
mock_response = MagicMock()
|
|
77
|
+
mock_response.read.return_value = json.dumps({"info": {"version": "9.9.9"}}).encode("utf-8")
|
|
78
|
+
mock_context = MagicMock()
|
|
79
|
+
mock_context.__enter__.return_value = mock_response
|
|
80
|
+
|
|
81
|
+
with patch("subtitle_engine.updater.urlopen", return_value=mock_context):
|
|
82
|
+
result = check_for_update()
|
|
83
|
+
|
|
84
|
+
assert result == UpdateInfo(current=__version__, latest="9.9.9")
|
|
85
|
+
assert (fresh_cache / "update_check.json").exists()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_check_for_update_uses_cache_instead_of_fetching(fresh_cache: Path):
|
|
89
|
+
fresh_cache.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
cache = fresh_cache / "update_check.json"
|
|
91
|
+
cache.write_text(
|
|
92
|
+
json.dumps(
|
|
93
|
+
{
|
|
94
|
+
"checked_at": datetime.now(timezone.utc).isoformat(),
|
|
95
|
+
"latest": "9.9.9",
|
|
96
|
+
}
|
|
97
|
+
),
|
|
98
|
+
encoding="utf-8",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
with patch("subtitle_engine.updater.urlopen") as mock_urlopen:
|
|
102
|
+
result = check_for_update()
|
|
103
|
+
mock_urlopen.assert_not_called()
|
|
104
|
+
|
|
105
|
+
assert result == UpdateInfo(current=__version__, latest="9.9.9")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_check_for_update_cache_ignores_stale_entries(fresh_cache: Path):
|
|
109
|
+
fresh_cache.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
cache = fresh_cache / "update_check.json"
|
|
111
|
+
stale_time = datetime.now(timezone.utc) - timedelta(days=2)
|
|
112
|
+
cache.write_text(
|
|
113
|
+
json.dumps(
|
|
114
|
+
{
|
|
115
|
+
"checked_at": stale_time.isoformat(),
|
|
116
|
+
"latest": "9.9.9",
|
|
117
|
+
}
|
|
118
|
+
),
|
|
119
|
+
encoding="utf-8",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
mock_response = MagicMock()
|
|
123
|
+
mock_response.read.return_value = json.dumps({"info": {"version": "9.9.10"}}).encode("utf-8")
|
|
124
|
+
mock_context = MagicMock()
|
|
125
|
+
mock_context.__enter__.return_value = mock_response
|
|
126
|
+
|
|
127
|
+
with patch("subtitle_engine.updater.urlopen", return_value=mock_context):
|
|
128
|
+
result = check_for_update()
|
|
129
|
+
|
|
130
|
+
assert result == UpdateInfo(current=__version__, latest="9.9.10")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_check_for_update_returns_none_when_up_to_date(fresh_cache: Path):
|
|
134
|
+
mock_response = MagicMock()
|
|
135
|
+
mock_response.read.return_value = json.dumps({"info": {"version": __version__}}).encode("utf-8")
|
|
136
|
+
mock_context = MagicMock()
|
|
137
|
+
mock_context.__enter__.return_value = mock_response
|
|
138
|
+
|
|
139
|
+
with patch("subtitle_engine.updater.urlopen", return_value=mock_context):
|
|
140
|
+
result = check_for_update()
|
|
141
|
+
|
|
142
|
+
assert result is None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_check_for_update_swallows_network_errors_by_default(fresh_cache: Path):
|
|
146
|
+
with patch("subtitle_engine.updater.urlopen", side_effect=OSError("no network")):
|
|
147
|
+
result = check_for_update()
|
|
148
|
+
assert result is None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def test_check_for_update_force_re_raises_network_errors(fresh_cache: Path):
|
|
152
|
+
with patch("subtitle_engine.updater.urlopen", side_effect=OSError("no network")):
|
|
153
|
+
with pytest.raises(UpdateCheckError):
|
|
154
|
+
check_for_update(force=True)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_update_package_runs_pip_upgrade():
|
|
158
|
+
with patch("subtitle_engine.updater.subprocess.run") as mock_run:
|
|
159
|
+
update_package()
|
|
160
|
+
mock_run.assert_called_once()
|
|
161
|
+
command = mock_run.call_args[0][0]
|
|
162
|
+
assert command[0] == sys.executable
|
|
163
|
+
assert command[1:] == ["-m", "pip", "install", "--upgrade", "subtitle-engine"]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_update_package_raises_on_pip_failure():
|
|
167
|
+
with patch(
|
|
168
|
+
"subtitle_engine.updater.subprocess.run",
|
|
169
|
+
side_effect=subprocess.CalledProcessError(1, ["pip"]),
|
|
170
|
+
):
|
|
171
|
+
with pytest.raises(UpdateCheckError):
|
|
172
|
+
update_package()
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
"""WhisperX transcription wrapper."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
import torch
|
|
7
|
-
import whisperx
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
VALID_MODELS = {"tiny", "base", "small", "medium", "large-v2", "large-v3"}
|
|
11
|
-
VALID_DEVICES = {"cpu", "cuda", "mps"}
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _detect_device(device: Optional[str]) -> str:
|
|
15
|
-
"""Pick a device if none was specified."""
|
|
16
|
-
if device:
|
|
17
|
-
return device
|
|
18
|
-
if torch.cuda.is_available():
|
|
19
|
-
return "cuda"
|
|
20
|
-
if torch.backends.mps.is_available():
|
|
21
|
-
return "mps"
|
|
22
|
-
return "cpu"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _default_compute_type(device: str) -> str:
|
|
26
|
-
"""Pick a safe compute type for the device."""
|
|
27
|
-
if device == "cpu":
|
|
28
|
-
return "int8"
|
|
29
|
-
return "float16"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def _validate_model(model_name: str) -> None:
|
|
33
|
-
"""Raise a ValueError if the model name is unknown."""
|
|
34
|
-
if model_name not in VALID_MODELS:
|
|
35
|
-
joined = ", ".join(sorted(VALID_MODELS))
|
|
36
|
-
raise ValueError(f"Unknown model '{model_name}'. Choose from: {joined}")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _validate_device(device: str) -> None:
|
|
40
|
-
"""Raise a ValueError if the device name is unknown."""
|
|
41
|
-
if device not in VALID_DEVICES:
|
|
42
|
-
joined = ", ".join(sorted(VALID_DEVICES))
|
|
43
|
-
raise ValueError(f"Unknown device '{device}'. Choose from: {joined}")
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def transcribe(
|
|
47
|
-
audio_path: Path,
|
|
48
|
-
*,
|
|
49
|
-
model_name: str = "small",
|
|
50
|
-
language: Optional[str] = None,
|
|
51
|
-
device: Optional[str] = None,
|
|
52
|
-
batch_size: int = 16,
|
|
53
|
-
compute_type: Optional[str] = None,
|
|
54
|
-
diarize: bool = False,
|
|
55
|
-
hf_token: Optional[str] = None,
|
|
56
|
-
) -> list[dict]:
|
|
57
|
-
"""Transcribe an audio/video file and return SRT-ready segments.
|
|
58
|
-
|
|
59
|
-
Parameters
|
|
60
|
-
----------
|
|
61
|
-
audio_path:
|
|
62
|
-
Path to the media file to transcribe.
|
|
63
|
-
model_name:
|
|
64
|
-
WhisperX model size. One of: tiny, base, small, medium, large-v2, large-v3.
|
|
65
|
-
language:
|
|
66
|
-
ISO language code, e.g. ``en`` or ``fi``. If ``None``, WhisperX auto-detects.
|
|
67
|
-
device:
|
|
68
|
-
``cpu``, ``cuda`` or ``mps``. Auto-detected if omitted.
|
|
69
|
-
batch_size:
|
|
70
|
-
WhisperX batch size for transcription.
|
|
71
|
-
compute_type:
|
|
72
|
-
``int8`` or ``float16``. Auto-selected per device if omitted.
|
|
73
|
-
diarize:
|
|
74
|
-
Whether to run speaker diarization.
|
|
75
|
-
hf_token:
|
|
76
|
-
Hugging Face token required for diarization.
|
|
77
|
-
|
|
78
|
-
Returns
|
|
79
|
-
-------
|
|
80
|
-
A list of segment dicts with ``start``, ``end`` and ``text`` keys.
|
|
81
|
-
"""
|
|
82
|
-
_validate_model(model_name)
|
|
83
|
-
|
|
84
|
-
audio_path = Path(audio_path)
|
|
85
|
-
if not audio_path.exists():
|
|
86
|
-
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
|
87
|
-
|
|
88
|
-
device = _detect_device(device)
|
|
89
|
-
_validate_device(device)
|
|
90
|
-
|
|
91
|
-
if compute_type is None:
|
|
92
|
-
compute_type = _default_compute_type(device)
|
|
93
|
-
|
|
94
|
-
if diarize and not hf_token:
|
|
95
|
-
raise ValueError("--hf-token is required when using --diarize")
|
|
96
|
-
|
|
97
|
-
audio = whisperx.load_audio(str(audio_path))
|
|
98
|
-
|
|
99
|
-
model = whisperx.load_model(model_name, device, compute_type=compute_type)
|
|
100
|
-
result = model.transcribe(audio, batch_size=batch_size, language=language)
|
|
101
|
-
|
|
102
|
-
# Free transcription model memory before alignment
|
|
103
|
-
del model
|
|
104
|
-
|
|
105
|
-
detected_language = result.get("language")
|
|
106
|
-
if detected_language:
|
|
107
|
-
align_model, align_metadata = whisperx.load_align_model(
|
|
108
|
-
language_code=detected_language, device=device
|
|
109
|
-
)
|
|
110
|
-
result = whisperx.align(
|
|
111
|
-
result["segments"],
|
|
112
|
-
align_model,
|
|
113
|
-
align_metadata,
|
|
114
|
-
audio,
|
|
115
|
-
device,
|
|
116
|
-
return_char_alignments=False,
|
|
117
|
-
)
|
|
118
|
-
del align_model
|
|
119
|
-
|
|
120
|
-
if diarize:
|
|
121
|
-
diarize_model = whisperx.DiarizationPipeline(
|
|
122
|
-
model_name="pyannote/speaker-diarization-3.1",
|
|
123
|
-
use_auth_token=hf_token,
|
|
124
|
-
device=device,
|
|
125
|
-
)
|
|
126
|
-
diarize_segments = diarize_model(audio)
|
|
127
|
-
result = whisperx.assign_word_speakers(diarize_segments, result)
|
|
128
|
-
|
|
129
|
-
return result["segments"]
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
"""Tests for CLI helpers and argument parsing."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
from typer.testing import CliRunner
|
|
7
|
-
|
|
8
|
-
from subtitle_engine.cli import app
|
|
9
|
-
from subtitle_engine.utils import resolve_output_path, validate_media_file
|
|
10
|
-
|
|
11
|
-
runner = CliRunner()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_resolve_output_path_default():
|
|
15
|
-
input_path = Path("movie.mp4")
|
|
16
|
-
assert resolve_output_path(input_path) == Path("movie.srt")
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def test_resolve_output_path_explicit():
|
|
20
|
-
input_path = Path("movie.mp4")
|
|
21
|
-
output = Path("custom.srt")
|
|
22
|
-
assert resolve_output_path(input_path, output) == output
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def test_validate_media_file_supported():
|
|
26
|
-
validate_media_file(Path("video.mp4"))
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def test_validate_media_file_unsupported():
|
|
30
|
-
with pytest.raises(ValueError, match="Unsupported file type"):
|
|
31
|
-
validate_media_file(Path("file.txt"))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def test_cli_help():
|
|
35
|
-
result = runner.invoke(app, ["--help"])
|
|
36
|
-
assert result.exit_code == 0
|
|
37
|
-
assert "Generate SRT subtitles" in result.output
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_cli_no_args():
|
|
41
|
-
result = runner.invoke(app)
|
|
42
|
-
assert result.exit_code != 0
|
|
43
|
-
assert "Usage:" in result.output
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def test_caption_requires_ollama_model(tmp_path: Path):
|
|
47
|
-
media = tmp_path / "video.mp4"
|
|
48
|
-
media.write_bytes(b"fake")
|
|
49
|
-
result = runner.invoke(app, [str(media), "--caption"])
|
|
50
|
-
assert result.exit_code != 0
|
|
51
|
-
assert "--ollama-model is required" in result.output
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{subtitle_engine-0.1.0 → subtitle_engine-0.1.2}/src/subtitle_engine.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|