subtitle-engine 0.1.3.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {subtitle_engine-0.1.3.2/src/subtitle_engine.egg-info → subtitle_engine-0.1.4}/PKG-INFO +7 -2
  2. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/README.md +5 -1
  3. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/pyproject.toml +2 -1
  4. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/__init__.py +1 -1
  5. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/cli.py +117 -7
  6. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/srt_writer.py +43 -0
  7. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4/src/subtitle_engine.egg-info}/PKG-INFO +7 -2
  8. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine.egg-info/requires.txt +1 -0
  9. subtitle_engine-0.1.4/tests/test_cli.py +274 -0
  10. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/tests/test_srt_writer.py +48 -0
  11. subtitle_engine-0.1.3.2/tests/test_cli.py +0 -172
  12. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/LICENSE +0 -0
  13. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/setup.cfg +0 -0
  14. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/captioner.py +0 -0
  15. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/segmenter.py +0 -0
  16. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/transcriber.py +0 -0
  17. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/updater.py +0 -0
  18. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine/utils.py +0 -0
  19. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine.egg-info/SOURCES.txt +0 -0
  20. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine.egg-info/dependency_links.txt +0 -0
  21. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine.egg-info/entry_points.txt +0 -0
  22. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/src/subtitle_engine.egg-info/top_level.txt +0 -0
  23. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/tests/test_captioner.py +0 -0
  24. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/tests/test_segmenter.py +0 -0
  25. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/tests/test_transcriber.py +0 -0
  26. {subtitle_engine-0.1.3.2 → subtitle_engine-0.1.4}/tests/test_updater.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: subtitle-engine
3
- Version: 0.1.3.2
3
+ Version: 0.1.4
4
4
  Summary: Generate SRT subtitles from audio/video files using WhisperX
5
5
  Author: Leevi Puntanen
6
6
  License-Expression: MIT
@@ -20,6 +20,7 @@ License-File: LICENSE
20
20
  Requires-Dist: typer>=0.12.0
21
21
  Requires-Dist: whisperx>=3.8.0
22
22
  Requires-Dist: requests>=2.32.0
23
+ Requires-Dist: questionary>=2.0.0
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: pytest>=8.0.0; extra == "dev"
25
26
  Dynamic: license-file
@@ -67,6 +68,9 @@ subeng video.mp4 --diarize --hf-token $HF_TOKEN
67
68
  # Generate a caption from the transcript using Ollama
68
69
  subeng video.mp4 --caption --ollama-model qwen3.5:0.6b
69
70
 
71
+ # Generate a caption from an existing SRT file
72
+ subeng caption subtitles.srt
73
+
70
74
  # Short-form subtitles (2-5 words per line, default)
71
75
  subeng video.mp4 --preset shortform
72
76
 
@@ -87,8 +91,9 @@ subeng video.mp4 --preset longform
87
91
  | `--diarize` | Enable speaker diarization |
88
92
  | `--hf-token` | Hugging Face token for diarization (or set `HF_TOKEN` env var) |
89
93
  | `--caption` | Generate a caption from the transcript via Ollama |
90
- | `--ollama-model` | Ollama model name (required with `--caption`) |
94
+ | `--ollama-model` | Ollama model name. If omitted, installed models are listed and you can pick one. |
91
95
  | `--ollama-host` | Ollama API host (default: `http://localhost:11434`) |
96
+ | `caption` | Generate a caption from an existing SRT file (e.g. `subeng caption file.srt`) |
92
97
  | `--preset`, `-p` | Subtitle style: `shortform` (2-5 words, default) or `longform` (10-14 words) |
93
98
 
94
99
  ## Development
@@ -41,6 +41,9 @@ subeng video.mp4 --diarize --hf-token $HF_TOKEN
41
41
  # Generate a caption from the transcript using Ollama
42
42
  subeng video.mp4 --caption --ollama-model qwen3.5:0.6b
43
43
 
44
+ # Generate a caption from an existing SRT file
45
+ subeng caption subtitles.srt
46
+
44
47
  # Short-form subtitles (2-5 words per line, default)
45
48
  subeng video.mp4 --preset shortform
46
49
 
@@ -61,8 +64,9 @@ subeng video.mp4 --preset longform
61
64
  | `--diarize` | Enable speaker diarization |
62
65
  | `--hf-token` | Hugging Face token for diarization (or set `HF_TOKEN` env var) |
63
66
  | `--caption` | Generate a caption from the transcript via Ollama |
64
- | `--ollama-model` | Ollama model name (required with `--caption`) |
67
+ | `--ollama-model` | Ollama model name. If omitted, installed models are listed and you can pick one. |
65
68
  | `--ollama-host` | Ollama API host (default: `http://localhost:11434`) |
69
+ | `caption` | Generate a caption from an existing SRT file (e.g. `subeng caption file.srt`) |
66
70
  | `--preset`, `-p` | Subtitle style: `shortform` (2-5 words, default) or `longform` (10-14 words) |
67
71
 
68
72
  ## Development
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "subtitle-engine"
7
- version = "0.1.3.2"
7
+ version = "0.1.4"
8
8
  description = "Generate SRT subtitles from audio/video files using WhisperX"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -27,6 +27,7 @@ dependencies = [
27
27
  "typer>=0.12.0",
28
28
  "whisperx>=3.8.0",
29
29
  "requests>=2.32.0",
30
+ "questionary>=2.0.0",
30
31
  ]
31
32
 
32
33
  [project.optional-dependencies]
@@ -1,3 +1,3 @@
1
1
  """Subtitle Engine — generate SRT files with WhisperX."""
2
2
 
3
- __version__ = "0.1.3.2"
3
+ __version__ = "0.1.4"
@@ -4,13 +4,15 @@ import sys
4
4
  from pathlib import Path
5
5
  from typing import Annotated, Optional
6
6
 
7
+ import questionary
8
+ import requests
7
9
  import typer
8
10
  from rich.console import Console
9
11
 
10
12
  from subtitle_engine import __version__
11
- from subtitle_engine.captioner import generate_caption
13
+ from subtitle_engine.captioner import generate_caption, list_models
12
14
  from subtitle_engine.segmenter import VALID_PRESETS, split_segments
13
- from subtitle_engine.srt_writer import write_srt
15
+ from subtitle_engine.srt_writer import extract_text_from_srt, write_srt
14
16
  from subtitle_engine.transcriber import transcribe
15
17
  from subtitle_engine.updater import UpdateCheckError, check_for_update, update_package
16
18
  from subtitle_engine.utils import resolve_output_path, validate_media_file
@@ -22,6 +24,30 @@ app = typer.Typer(
22
24
  console = Console()
23
25
 
24
26
 
27
+ def _select_ollama_model(host: str) -> str:
28
+ """List available Ollama models and prompt the user to pick one."""
29
+ try:
30
+ models = list_models(host)
31
+ except requests.RequestException as exc:
32
+ raise ConnectionError(
33
+ f"Could not connect to Ollama at {host}. Is Ollama running?"
34
+ ) from exc
35
+
36
+ if not models:
37
+ raise ConnectionError(f"No Ollama models found at {host}.")
38
+
39
+ choice = questionary.select(
40
+ "Select an Ollama model:",
41
+ choices=models,
42
+ default=models[0],
43
+ ).ask()
44
+
45
+ if choice is None:
46
+ raise ValueError("No model selected")
47
+
48
+ return choice
49
+
50
+
25
51
  def _version_callback(value: bool) -> None:
26
52
  if value:
27
53
  console.print(f"subeng {__version__}")
@@ -54,14 +80,27 @@ def update() -> None:
54
80
 
55
81
 
56
82
  def main_entry() -> None:
57
- """Route ``subeng update`` to the updater; otherwise run the Typer app."""
83
+ """Route subcommands; default to the ``main`` transcription command."""
84
+ if len(sys.argv) > 1 and sys.argv[1] in ("-v", "--version"):
85
+ console.print(f"subeng {__version__}")
86
+ return
58
87
  if len(sys.argv) > 1 and sys.argv[1] == "update":
59
88
  update()
60
- else:
61
- app()
89
+ return
90
+
91
+ # If the user did not supply a subcommand (or global option), default to ``main``.
92
+ args = sys.argv.copy()
93
+ if (
94
+ len(args) > 1
95
+ and not args[1].startswith("-")
96
+ and args[1] not in ("main", "caption")
97
+ ):
98
+ args.insert(1, "main")
99
+ app(args[1:])
62
100
 
63
101
 
64
102
  @app.command(
103
+ name="main",
65
104
  epilog="Run 'subeng update' to update to the latest version.",
66
105
  )
67
106
  def main(
@@ -152,7 +191,7 @@ def main(
152
191
  Optional[str],
153
192
  typer.Option(
154
193
  "--ollama-model",
155
- help="Ollama model for caption generation. Required if --caption is set.",
194
+ help="Ollama model for caption generation. If omitted, installed models are listed.",
156
195
  ),
157
196
  ] = None,
158
197
  ollama_host: Annotated[
@@ -207,7 +246,7 @@ def main(
207
246
  output_path = resolve_output_path(input_file, output)
208
247
 
209
248
  if caption and not ollama_model:
210
- raise ValueError("--ollama-model is required when using --caption")
249
+ ollama_model = _select_ollama_model(ollama_host)
211
250
 
212
251
  if not quiet:
213
252
  update_info = check_for_update()
@@ -263,5 +302,76 @@ def main(
263
302
  raise typer.Exit(code=1) from exc
264
303
 
265
304
 
305
+ @app.command(name="caption")
306
+ def caption_command(
307
+ input_file: Annotated[
308
+ Path,
309
+ typer.Argument(
310
+ help="SRT file to generate a caption from",
311
+ exists=True,
312
+ file_okay=True,
313
+ dir_okay=False,
314
+ readable=True,
315
+ ),
316
+ ],
317
+ output: Annotated[
318
+ Optional[Path],
319
+ typer.Option(
320
+ "--output",
321
+ "-o",
322
+ help="Output caption file (default: <input>.caption.txt)",
323
+ file_okay=True,
324
+ dir_okay=False,
325
+ ),
326
+ ] = None,
327
+ ollama_model: Annotated[
328
+ Optional[str],
329
+ typer.Option(
330
+ "--ollama-model",
331
+ "-m",
332
+ help="Ollama model for caption generation",
333
+ ),
334
+ ] = None,
335
+ ollama_host: Annotated[
336
+ str,
337
+ typer.Option(
338
+ "--ollama-host",
339
+ help="Ollama API host",
340
+ envvar="OLLAMA_HOST",
341
+ ),
342
+ ] = "http://localhost:11434",
343
+ quiet: Annotated[
344
+ bool,
345
+ typer.Option(
346
+ "--quiet",
347
+ "-q",
348
+ help="Only print errors.",
349
+ ),
350
+ ] = False,
351
+ ) -> None:
352
+ """Generate a caption from an existing SRT file."""
353
+ try:
354
+ if not ollama_model:
355
+ ollama_model = _select_ollama_model(ollama_host)
356
+
357
+ transcript = extract_text_from_srt(input_file)
358
+ caption_text = generate_caption(
359
+ transcript,
360
+ model=ollama_model,
361
+ host=ollama_host,
362
+ )
363
+
364
+ caption_path = output or input_file.with_suffix(".caption.txt")
365
+ caption_path.write_text(caption_text, encoding="utf-8")
366
+ if not quiet:
367
+ console.print(f"[green]Wrote caption to:[/green] {caption_path}")
368
+ except (ValueError, FileNotFoundError, ConnectionError) as exc:
369
+ console.print(f"[red]Error:[/red] {exc}")
370
+ raise typer.Exit(code=1) from exc
371
+ except Exception as exc: # noqa: BLE001
372
+ console.print(f"[red]Caption generation failed:[/red] {exc}")
373
+ raise typer.Exit(code=1) from exc
374
+
375
+
266
376
  if __name__ == "__main__":
267
377
  main_entry()
@@ -1,5 +1,6 @@
1
1
  """Convert transcription segments to SRT format."""
2
2
 
3
+ import re
3
4
  from pathlib import Path
4
5
  from typing import Iterable
5
6
 
@@ -38,6 +39,48 @@ def segments_to_srt(segments: Iterable[dict]) -> str:
38
39
  return "\n".join(blocks)
39
40
 
40
41
 
42
+ def extract_text_from_srt(path: Path) -> str:
43
+ """Read an SRT file and return the spoken text as a single string.
44
+
45
+ Parameters
46
+ ----------
47
+ path:
48
+ Path to the SRT file to read.
49
+
50
+ Returns
51
+ -------
52
+ The transcript text with subtitle lines joined by spaces.
53
+
54
+ Raises
55
+ ------
56
+ FileNotFoundError:
57
+ If the SRT file does not exist.
58
+ ValueError:
59
+ If no subtitle text can be extracted from the file.
60
+ """
61
+ path = Path(path)
62
+ if not path.exists():
63
+ raise FileNotFoundError(f"SRT file not found: {path}")
64
+
65
+ content = path.read_text(encoding="utf-8")
66
+ blocks = re.split(r"\n\s*\n", content.strip())
67
+
68
+ texts: list[str] = []
69
+ for block in blocks:
70
+ lines = block.strip().splitlines()
71
+ # A valid block has at least an index, a timecode line, and one text line.
72
+ if len(lines) < 3:
73
+ continue
74
+ text = " ".join(line.strip() for line in lines[2:] if line.strip())
75
+ if text:
76
+ texts.append(text)
77
+
78
+ if not texts:
79
+ raise ValueError(f"No subtitle text found in {path}")
80
+
81
+ return " ".join(texts)
82
+
83
+
41
84
  def write_srt(segments: Iterable[dict], output_path: Path) -> None:
42
85
  """Write segments to an SRT file."""
43
86
  output_path = Path(output_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: subtitle-engine
3
- Version: 0.1.3.2
3
+ Version: 0.1.4
4
4
  Summary: Generate SRT subtitles from audio/video files using WhisperX
5
5
  Author: Leevi Puntanen
6
6
  License-Expression: MIT
@@ -20,6 +20,7 @@ License-File: LICENSE
20
20
  Requires-Dist: typer>=0.12.0
21
21
  Requires-Dist: whisperx>=3.8.0
22
22
  Requires-Dist: requests>=2.32.0
23
+ Requires-Dist: questionary>=2.0.0
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: pytest>=8.0.0; extra == "dev"
25
26
  Dynamic: license-file
@@ -67,6 +68,9 @@ subeng video.mp4 --diarize --hf-token $HF_TOKEN
67
68
  # Generate a caption from the transcript using Ollama
68
69
  subeng video.mp4 --caption --ollama-model qwen3.5:0.6b
69
70
 
71
+ # Generate a caption from an existing SRT file
72
+ subeng caption subtitles.srt
73
+
70
74
  # Short-form subtitles (2-5 words per line, default)
71
75
  subeng video.mp4 --preset shortform
72
76
 
@@ -87,8 +91,9 @@ subeng video.mp4 --preset longform
87
91
  | `--diarize` | Enable speaker diarization |
88
92
  | `--hf-token` | Hugging Face token for diarization (or set `HF_TOKEN` env var) |
89
93
  | `--caption` | Generate a caption from the transcript via Ollama |
90
- | `--ollama-model` | Ollama model name (required with `--caption`) |
94
+ | `--ollama-model` | Ollama model name. If omitted, installed models are listed and you can pick one. |
91
95
  | `--ollama-host` | Ollama API host (default: `http://localhost:11434`) |
96
+ | `caption` | Generate a caption from an existing SRT file (e.g. `subeng caption file.srt`) |
92
97
  | `--preset`, `-p` | Subtitle style: `shortform` (2-5 words, default) or `longform` (10-14 words) |
93
98
 
94
99
  ## Development
@@ -1,6 +1,7 @@
1
1
  typer>=0.12.0
2
2
  whisperx>=3.8.0
3
3
  requests>=2.32.0
4
+ questionary>=2.0.0
4
5
 
5
6
  [dev]
6
7
  pytest>=8.0.0
@@ -0,0 +1,274 @@
1
+ """Tests for CLI helpers and argument parsing."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from unittest.mock import Mock, patch
6
+
7
+ import pytest
8
+ import typer
9
+ from typer.testing import CliRunner
10
+
11
+ from subtitle_engine import __version__
12
+ from subtitle_engine.cli import _select_ollama_model, app, main_entry, update
13
+ from subtitle_engine.updater import UpdateCheckError, UpdateInfo
14
+ from subtitle_engine.utils import resolve_output_path, validate_media_file
15
+
16
+ runner = CliRunner()
17
+
18
+
19
+ @pytest.fixture(autouse=True)
20
+ def disable_update_check():
21
+ """Prevent the CLI from hitting the network during transcription tests."""
22
+ with patch("subtitle_engine.cli.check_for_update", return_value=None):
23
+ yield
24
+
25
+
26
+ def test_resolve_output_path_default():
27
+ input_path = Path("movie.mp4")
28
+ assert resolve_output_path(input_path) == Path("movie.srt")
29
+
30
+
31
+ def test_resolve_output_path_explicit():
32
+ input_path = Path("movie.mp4")
33
+ output = Path("custom.srt")
34
+ assert resolve_output_path(input_path, output) == output
35
+
36
+
37
+ def test_validate_media_file_supported():
38
+ validate_media_file(Path("video.mp4"))
39
+
40
+
41
+ def test_validate_media_file_unsupported():
42
+ with pytest.raises(ValueError, match="Unsupported file type"):
43
+ validate_media_file(Path("file.txt"))
44
+
45
+
46
+ def test_cli_help():
47
+ result = runner.invoke(app, ["--help"])
48
+ assert result.exit_code == 0
49
+ assert "Generate SRT subtitles" in result.output
50
+
51
+
52
+ def test_cli_no_args():
53
+ result = runner.invoke(app)
54
+ assert result.exit_code != 0
55
+ assert "Usage:" in result.output
56
+
57
+
58
+ def test_caption_prompts_for_ollama_model(tmp_path: Path):
59
+ media = tmp_path / "video.mp4"
60
+ media.write_bytes(b"fake")
61
+ with patch("subtitle_engine.cli._select_ollama_model", return_value="qwen3.5:0.8b") as mock_select:
62
+ with patch("subtitle_engine.cli.transcribe", return_value=[{"start": 0.0, "end": 1.0, "text": "hello"}]):
63
+ with patch("subtitle_engine.cli.split_segments", return_value=[{"start": 0.0, "end": 1.0, "text": "hello"}]):
64
+ with patch("subtitle_engine.cli.generate_caption", return_value="A caption"):
65
+ result = runner.invoke(app, ["main", str(media), "--caption"])
66
+ assert result.exit_code == 0
67
+ mock_select.assert_called_once_with("http://localhost:11434")
68
+ assert "Wrote caption" in result.output
69
+
70
+
71
+ def test_cli_version_long():
72
+ result = runner.invoke(app, ["main", "--version"])
73
+ assert result.exit_code == 0
74
+ assert "subeng" in result.output
75
+ assert "0.1.3" in result.output
76
+
77
+
78
+ def test_cli_version_short():
79
+ result = runner.invoke(app, ["main", "-v"])
80
+ assert result.exit_code == 0
81
+ assert "subeng" in result.output
82
+
83
+
84
+ def test_cli_version_no_extra_output():
85
+ result = runner.invoke(app, ["main", "--version"])
86
+ assert result.exit_code == 0
87
+ assert result.output.strip() == f"subeng {__version__}"
88
+
89
+
90
+ def test_cli_quiet_hides_status_but_keeps_errors(tmp_path: Path):
91
+ media = tmp_path / "video.mp4"
92
+ media.write_bytes(b"fake")
93
+ result = runner.invoke(
94
+ app,
95
+ ["main", str(media), "--caption", "--ollama-model", "qwen3.5:0.8b", "-q"],
96
+ )
97
+ assert result.exit_code != 0
98
+ assert "Transcription failed:" in result.output
99
+ assert "Transcribing:" not in result.output
100
+
101
+
102
+ def test_cli_verbose_accepted(tmp_path: Path):
103
+ media = tmp_path / "video.mp4"
104
+ media.write_bytes(b"fake")
105
+ result = runner.invoke(
106
+ app,
107
+ ["main", str(media), "--caption", "--ollama-model", "qwen3.5:0.8b", "--verbose"],
108
+ )
109
+ assert result.exit_code != 0
110
+ assert "Transcription failed:" in result.output
111
+ assert "Transcribing:" in result.output
112
+
113
+
114
+ def test_update_command_shows_up_to_date():
115
+ with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
116
+ result = runner.invoke(app, ["update"])
117
+ # The Typer app itself does not register ``update`` as a command; it is
118
+ # routed via ``main_entry``. Invoking the app directly with ``update``
119
+ # should therefore fail as an unknown command.
120
+ assert result.exit_code != 0
121
+ mock_check.assert_not_called()
122
+
123
+
124
+ def test_update_function_runs_upgrade_when_available():
125
+ update_info = UpdateInfo(current=__version__, latest="9.9.9")
126
+ with patch("subtitle_engine.cli.check_for_update", return_value=update_info) as mock_check:
127
+ with patch("subtitle_engine.cli.update_package") as mock_upgrade:
128
+ update()
129
+ mock_check.assert_called_once_with(force=True)
130
+ mock_upgrade.assert_called_once()
131
+
132
+
133
+ def test_update_function_reports_up_to_date():
134
+ with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
135
+ with patch("subtitle_engine.cli.update_package") as mock_upgrade:
136
+ update()
137
+ mock_check.assert_called_once_with(force=True)
138
+ mock_upgrade.assert_not_called()
139
+
140
+
141
+ def test_update_function_handles_check_error():
142
+ with patch("subtitle_engine.cli.check_for_update", side_effect=UpdateCheckError("no network")):
143
+ with pytest.raises(typer.Exit) as exc_info:
144
+ update()
145
+ assert exc_info.value.exit_code == 1
146
+
147
+
148
+ def test_main_entry_routes_update_command():
149
+ with patch("subtitle_engine.cli.update") as mock_update:
150
+ with patch.object(sys, "argv", ["subeng", "update"]):
151
+ main_entry()
152
+ mock_update.assert_called_once()
153
+
154
+
155
+ def test_main_entry_runs_typer_app_for_transcription():
156
+ with patch("subtitle_engine.cli.app") as mock_app:
157
+ with patch.object(sys, "argv", ["subeng", "video.mp4"]):
158
+ main_entry()
159
+ mock_app.assert_called_once_with(["main", "video.mp4"])
160
+
161
+
162
+ def test_main_entry_routes_caption_command():
163
+ with patch("subtitle_engine.cli.app") as mock_app:
164
+ with patch.object(sys, "argv", ["subeng", "caption", "file.srt"]):
165
+ main_entry()
166
+ mock_app.assert_called_once_with(["caption", "file.srt"])
167
+
168
+
169
+ def test_main_entry_handles_version_flag(capsys):
170
+ with patch.object(sys, "argv", ["subeng", "--version"]):
171
+ assert main_entry() is None
172
+ captured = capsys.readouterr()
173
+ assert captured.out.strip() == f"subeng {__version__}"
174
+
175
+
176
+ def test_cli_preset_shortform_accepted(tmp_path: Path):
177
+ media = tmp_path / "video.mp4"
178
+ media.write_bytes(b"fake")
179
+ result = runner.invoke(app, ["main", str(media), "--preset", "shortform"])
180
+ # Validation passes; transcription fails because the file is fake.
181
+ assert result.exit_code != 0
182
+ assert "Preset: shortform" in result.output
183
+
184
+
185
+ def test_cli_preset_longform_accepted(tmp_path: Path):
186
+ media = tmp_path / "video.mp4"
187
+ media.write_bytes(b"fake")
188
+ result = runner.invoke(app, ["main", str(media), "--preset", "longform"])
189
+ assert result.exit_code != 0
190
+ assert "Preset: longform" in result.output
191
+
192
+
193
+ def test_cli_invalid_preset_rejected(tmp_path: Path):
194
+ media = tmp_path / "video.mp4"
195
+ media.write_bytes(b"fake")
196
+ result = runner.invoke(app, ["main", str(media), "--preset", "invalid"])
197
+ assert result.exit_code != 0
198
+ assert "Unknown preset" in result.output
199
+
200
+
201
+ def test_select_ollama_model_returns_chosen_model():
202
+ mock_select = Mock(ask=Mock(return_value="model-b"))
203
+ with patch("subtitle_engine.cli.list_models", return_value=["model-a", "model-b"]):
204
+ with patch("subtitle_engine.cli.questionary.select", return_value=mock_select):
205
+ assert _select_ollama_model("http://localhost:11434") == "model-b"
206
+
207
+
208
+ def test_select_ollama_model_empty_list_raises():
209
+ with patch("subtitle_engine.cli.list_models", return_value=[]):
210
+ with pytest.raises(ConnectionError, match="No Ollama models"):
211
+ _select_ollama_model("http://localhost:11434")
212
+
213
+
214
+ def test_select_ollama_model_no_selection_raises():
215
+ mock_select = Mock(ask=Mock(return_value=None))
216
+ with patch("subtitle_engine.cli.list_models", return_value=["model-a"]):
217
+ with patch("subtitle_engine.cli.questionary.select", return_value=mock_select):
218
+ with pytest.raises(ValueError, match="No model selected"):
219
+ _select_ollama_model("http://localhost:11434")
220
+
221
+
222
+ def test_caption_command_generates_caption(tmp_path: Path):
223
+ srt = tmp_path / "subs.srt"
224
+ srt.write_text(
225
+ "1\n00:00:00,000 --> 00:00:02,000\nHello world\n",
226
+ encoding="utf-8",
227
+ )
228
+ mock_select = Mock(ask=Mock(return_value="qwen3.5:0.8b"))
229
+ with patch("subtitle_engine.cli.list_models", return_value=["qwen3.5:0.8b"]):
230
+ with patch("subtitle_engine.cli.questionary.select", return_value=mock_select):
231
+ with patch("subtitle_engine.cli.generate_caption", return_value="A caption") as mock_generate:
232
+ result = runner.invoke(app, ["caption", str(srt)])
233
+ assert result.exit_code == 0
234
+ mock_generate.assert_called_once()
235
+ assert (tmp_path / "subs.caption.txt").read_text(encoding="utf-8") == "A caption"
236
+
237
+
238
+ def test_caption_command_uses_explicit_model(tmp_path: Path):
239
+ srt = tmp_path / "subs.srt"
240
+ srt.write_text(
241
+ "1\n00:00:00,000 --> 00:00:02,000\nHello world\n",
242
+ encoding="utf-8",
243
+ )
244
+ with patch("subtitle_engine.cli.generate_caption", return_value="A caption") as mock_generate:
245
+ result = runner.invoke(app, ["caption", str(srt), "--ollama-model", "llama3.2"])
246
+ assert result.exit_code == 0
247
+ mock_generate.assert_called_once()
248
+ _, kwargs = mock_generate.call_args
249
+ assert kwargs["model"] == "llama3.2"
250
+
251
+
252
+ def test_caption_command_custom_output(tmp_path: Path):
253
+ srt = tmp_path / "subs.srt"
254
+ srt.write_text(
255
+ "1\n00:00:00,000 --> 00:00:02,000\nHello world\n",
256
+ encoding="utf-8",
257
+ )
258
+ output = tmp_path / "custom.txt"
259
+ with patch("subtitle_engine.cli.generate_caption", return_value="A caption"):
260
+ result = runner.invoke(app, ["caption", str(srt), "--ollama-model", "llama3.2", "--output", str(output)])
261
+ assert result.exit_code == 0
262
+ assert output.read_text(encoding="utf-8") == "A caption"
263
+
264
+
265
+ def test_caption_command_no_models_raises(tmp_path: Path):
266
+ srt = tmp_path / "subs.srt"
267
+ srt.write_text(
268
+ "1\n00:00:00,000 --> 00:00:02,000\nHello world\n",
269
+ encoding="utf-8",
270
+ )
271
+ with patch("subtitle_engine.cli.list_models", return_value=[]):
272
+ result = runner.invoke(app, ["caption", str(srt)])
273
+ assert result.exit_code != 0
274
+ assert "No Ollama models" in result.output
@@ -2,9 +2,12 @@
2
2
 
3
3
  from pathlib import Path
4
4
 
5
+ import pytest
6
+
5
7
  from subtitle_engine.srt_writer import (
6
8
  _format_segment,
7
9
  _format_time,
10
+ extract_text_from_srt,
8
11
  segments_to_srt,
9
12
  write_srt,
10
13
  )
@@ -64,3 +67,48 @@ def test_write_srt_creates_parent_dirs(tmp_path: Path):
64
67
  output = tmp_path / "nested" / "dir" / "subs.srt"
65
68
  write_srt(segments, output)
66
69
  assert output.exists()
70
+
71
+
72
+ def test_extract_text_from_srt(tmp_path: Path):
73
+ srt = tmp_path / "subs.srt"
74
+ srt.write_text(
75
+ "1\n00:00:00,000 --> 00:00:02,000\nHello world\n\n"
76
+ "2\n00:00:03,000 --> 00:00:05,000\nSecond line\n",
77
+ encoding="utf-8",
78
+ )
79
+ assert extract_text_from_srt(srt) == "Hello world Second line"
80
+
81
+
82
+ def test_extract_text_from_srt_multiline_text(tmp_path: Path):
83
+ srt = tmp_path / "subs.srt"
84
+ srt.write_text(
85
+ "1\n00:00:00,000 --> 00:00:04,000\nFirst line\nSecond line\n",
86
+ encoding="utf-8",
87
+ )
88
+ assert extract_text_from_srt(srt) == "First line Second line"
89
+
90
+
91
+ def test_extract_text_from_srt_ignores_blank_blocks(tmp_path: Path):
92
+ srt = tmp_path / "subs.srt"
93
+ srt.write_text(
94
+ "1\n00:00:00,000 --> 00:00:02,000\nHello\n\n"
95
+ "2\n00:00:03,000 --> 00:00:05,000\n \n",
96
+ encoding="utf-8",
97
+ )
98
+ assert extract_text_from_srt(srt) == "Hello"
99
+
100
+
101
+ def test_extract_text_from_srt_missing_file(tmp_path: Path):
102
+ missing = tmp_path / "missing.srt"
103
+ with pytest.raises(FileNotFoundError):
104
+ extract_text_from_srt(missing)
105
+
106
+
107
+ def test_extract_text_from_srt_no_text(tmp_path: Path):
108
+ srt = tmp_path / "subs.srt"
109
+ srt.write_text(
110
+ "1\n00:00:00,000 --> 00:00:02,000\n \n",
111
+ encoding="utf-8",
112
+ )
113
+ with pytest.raises(ValueError, match="No subtitle text"):
114
+ extract_text_from_srt(srt)
@@ -1,172 +0,0 @@
1
- """Tests for CLI helpers and argument parsing."""
2
-
3
- import sys
4
- from pathlib import Path
5
- from unittest.mock import patch
6
-
7
- import pytest
8
- import typer
9
- from typer.testing import CliRunner
10
-
11
- from subtitle_engine import __version__
12
- from subtitle_engine.cli import app, main_entry, update
13
- from subtitle_engine.updater import UpdateCheckError, UpdateInfo
14
- from subtitle_engine.utils import resolve_output_path, validate_media_file
15
-
16
- runner = CliRunner()
17
-
18
-
19
- @pytest.fixture(autouse=True)
20
- def disable_update_check():
21
- """Prevent the CLI from hitting the network during transcription tests."""
22
- with patch("subtitle_engine.cli.check_for_update", return_value=None):
23
- yield
24
-
25
-
26
- def test_resolve_output_path_default():
27
- input_path = Path("movie.mp4")
28
- assert resolve_output_path(input_path) == Path("movie.srt")
29
-
30
-
31
- def test_resolve_output_path_explicit():
32
- input_path = Path("movie.mp4")
33
- output = Path("custom.srt")
34
- assert resolve_output_path(input_path, output) == output
35
-
36
-
37
- def test_validate_media_file_supported():
38
- validate_media_file(Path("video.mp4"))
39
-
40
-
41
- def test_validate_media_file_unsupported():
42
- with pytest.raises(ValueError, match="Unsupported file type"):
43
- validate_media_file(Path("file.txt"))
44
-
45
-
46
- def test_cli_help():
47
- result = runner.invoke(app, ["--help"])
48
- assert result.exit_code == 0
49
- assert "Generate SRT subtitles" in result.output
50
-
51
-
52
- def test_cli_no_args():
53
- result = runner.invoke(app)
54
- assert result.exit_code != 0
55
- assert "Usage:" in result.output
56
-
57
-
58
- def test_caption_requires_ollama_model(tmp_path: Path):
59
- media = tmp_path / "video.mp4"
60
- media.write_bytes(b"fake")
61
- result = runner.invoke(app, [str(media), "--caption"])
62
- assert result.exit_code != 0
63
- assert "--ollama-model is required" in result.output
64
-
65
-
66
- def test_cli_version_long():
67
- result = runner.invoke(app, ["--version"])
68
- assert result.exit_code == 0
69
- assert "subeng" in result.output
70
- assert "0.1.3" in result.output
71
-
72
-
73
- def test_cli_version_short():
74
- result = runner.invoke(app, ["-v"])
75
- assert result.exit_code == 0
76
- assert "subeng" in result.output
77
-
78
-
79
- def test_cli_version_no_extra_output():
80
- result = runner.invoke(app, ["--version"])
81
- assert result.exit_code == 0
82
- assert result.output.strip() == "subeng 0.1.3"
83
-
84
-
85
- def test_cli_quiet_hides_status_but_keeps_errors(tmp_path: Path):
86
- media = tmp_path / "video.mp4"
87
- media.write_bytes(b"fake")
88
- result = runner.invoke(app, [str(media), "--caption", "-q"])
89
- assert result.exit_code != 0
90
- assert "Error:" in result.output
91
- assert "Transcribing:" not in result.output
92
-
93
-
94
- def test_cli_verbose_accepted(tmp_path: Path):
95
- media = tmp_path / "video.mp4"
96
- media.write_bytes(b"fake")
97
- result = runner.invoke(app, [str(media), "--caption", "--verbose"])
98
- assert result.exit_code != 0
99
- assert "--ollama-model is required" in result.output
100
-
101
-
102
- def test_update_command_shows_up_to_date():
103
- with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
104
- result = runner.invoke(app, ["update"])
105
- # The Typer app itself does not register ``update`` as a command; it is
106
- # routed via ``main_entry``. Invoking the app directly with ``update``
107
- # should therefore fail as an unknown command.
108
- assert result.exit_code != 0
109
- mock_check.assert_not_called()
110
-
111
-
112
- def test_update_function_runs_upgrade_when_available():
113
- update_info = UpdateInfo(current=__version__, latest="9.9.9")
114
- with patch("subtitle_engine.cli.check_for_update", return_value=update_info) as mock_check:
115
- with patch("subtitle_engine.cli.update_package") as mock_upgrade:
116
- update()
117
- mock_check.assert_called_once_with(force=True)
118
- mock_upgrade.assert_called_once()
119
-
120
-
121
- def test_update_function_reports_up_to_date():
122
- with patch("subtitle_engine.cli.check_for_update", return_value=None) as mock_check:
123
- with patch("subtitle_engine.cli.update_package") as mock_upgrade:
124
- update()
125
- mock_check.assert_called_once_with(force=True)
126
- mock_upgrade.assert_not_called()
127
-
128
-
129
- def test_update_function_handles_check_error():
130
- with patch("subtitle_engine.cli.check_for_update", side_effect=UpdateCheckError("no network")):
131
- with pytest.raises(typer.Exit) as exc_info:
132
- update()
133
- assert exc_info.value.exit_code == 1
134
-
135
-
136
- def test_main_entry_routes_update_command():
137
- with patch("subtitle_engine.cli.update") as mock_update:
138
- with patch.object(sys, "argv", ["subeng", "update"]):
139
- main_entry()
140
- mock_update.assert_called_once()
141
-
142
-
143
- def test_main_entry_runs_typer_app_for_transcription():
144
- with patch("subtitle_engine.cli.app") as mock_app:
145
- with patch.object(sys, "argv", ["subeng", "video.mp4"]):
146
- main_entry()
147
- mock_app.assert_called_once()
148
-
149
-
150
- def test_cli_preset_shortform_accepted(tmp_path: Path):
151
- media = tmp_path / "video.mp4"
152
- media.write_bytes(b"fake")
153
- result = runner.invoke(app, [str(media), "--preset", "shortform"])
154
- # Validation passes; transcription fails because the file is fake.
155
- assert result.exit_code != 0
156
- assert "Preset: shortform" in result.output
157
-
158
-
159
- def test_cli_preset_longform_accepted(tmp_path: Path):
160
- media = tmp_path / "video.mp4"
161
- media.write_bytes(b"fake")
162
- result = runner.invoke(app, [str(media), "--preset", "longform"])
163
- assert result.exit_code != 0
164
- assert "Preset: longform" in result.output
165
-
166
-
167
- def test_cli_invalid_preset_rejected(tmp_path: Path):
168
- media = tmp_path / "video.mp4"
169
- media.write_bytes(b"fake")
170
- result = runner.invoke(app, [str(media), "--preset", "invalid"])
171
- assert result.exit_code != 0
172
- assert "Unknown preset" in result.output