srtforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- srtforge/__init__.py +3 -0
- srtforge/audio.py +57 -0
- srtforge/cli.py +237 -0
- srtforge/merge.py +129 -0
- srtforge/segment.py +371 -0
- srtforge/srt.py +31 -0
- srtforge/transcribe.py +30 -0
- srtforge/translate.py +254 -0
- srtforge-0.1.0.dist-info/METADATA +270 -0
- srtforge-0.1.0.dist-info/RECORD +14 -0
- srtforge-0.1.0.dist-info/WHEEL +5 -0
- srtforge-0.1.0.dist-info/entry_points.txt +2 -0
- srtforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- srtforge-0.1.0.dist-info/top_level.txt +1 -0
srtforge/__init__.py
ADDED
srtforge/audio.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Extract audio from a video file using ffmpeg."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FFmpegError(RuntimeError):
|
|
12
|
+
"""Raised when ffmpeg is missing or fails."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ensure_ffmpeg() -> str:
|
|
16
|
+
"""Return the ffmpeg executable path or raise if not found."""
|
|
17
|
+
ffmpeg = shutil.which("ffmpeg")
|
|
18
|
+
if not ffmpeg:
|
|
19
|
+
raise FFmpegError(
|
|
20
|
+
"ffmpeg not found on PATH. Install it (e.g. `nb install ffmpeg`)."
|
|
21
|
+
)
|
|
22
|
+
return ffmpeg
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def extract_audio(video_path: Path, sample_rate: int = 16000) -> Path:
|
|
26
|
+
"""Extract mono PCM WAV at the given sample rate into a temp file.
|
|
27
|
+
|
|
28
|
+
Returns the path to the WAV file. Caller is responsible for deletion.
|
|
29
|
+
"""
|
|
30
|
+
ffmpeg = ensure_ffmpeg()
|
|
31
|
+
if not video_path.exists():
|
|
32
|
+
raise FileNotFoundError(f"Video not found: {video_path}")
|
|
33
|
+
|
|
34
|
+
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
|
35
|
+
tmp.close()
|
|
36
|
+
out_path = Path(tmp.name)
|
|
37
|
+
|
|
38
|
+
cmd = [
|
|
39
|
+
ffmpeg,
|
|
40
|
+
"-y",
|
|
41
|
+
"-i", str(video_path),
|
|
42
|
+
"-vn", # drop video
|
|
43
|
+
"-ac", "1", # mono
|
|
44
|
+
"-ar", str(sample_rate), # sample rate
|
|
45
|
+
"-c:a", "pcm_s16le", # 16-bit PCM
|
|
46
|
+
str(out_path),
|
|
47
|
+
]
|
|
48
|
+
proc = subprocess.run(cmd, capture_output=True, text=True)
|
|
49
|
+
if proc.returncode != 0:
|
|
50
|
+
out_path.unlink(missing_ok=True)
|
|
51
|
+
raise FFmpegError(
|
|
52
|
+
f"ffmpeg failed (exit {proc.returncode}):\n{proc.stderr.strip()[-2000:]}"
|
|
53
|
+
)
|
|
54
|
+
if out_path.stat().st_size == 0:
|
|
55
|
+
out_path.unlink(missing_ok=True)
|
|
56
|
+
raise FFmpegError("ffmpeg produced no audio. Does the file have an audio track?")
|
|
57
|
+
return out_path
|
srtforge/cli.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Command-line interface for srtforge."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from . import __version__
|
|
11
|
+
from .audio import FFmpegError, extract_audio
|
|
12
|
+
from .merge import burn_subtitles
|
|
13
|
+
from .segment import Opts, fit_cues, resegment
|
|
14
|
+
from .srt import segments_to_srt
|
|
15
|
+
from .transcribe import DEFAULT_MODEL, transcribe
|
|
16
|
+
from .translate import DEFAULT_TRANSLATE_MODEL, TranslationError, translate_segments
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _log(msg: str) -> None:
|
|
20
|
+
print(msg, file=sys.stderr, flush=True)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _positive_int(value: str) -> int:
|
|
24
|
+
parsed = int(value)
|
|
25
|
+
if parsed <= 0:
|
|
26
|
+
raise argparse.ArgumentTypeError("must be greater than 0")
|
|
27
|
+
return parsed
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _positive_float(value: str) -> float:
|
|
31
|
+
parsed = float(value)
|
|
32
|
+
if parsed <= 0:
|
|
33
|
+
raise argparse.ArgumentTypeError("must be greater than 0")
|
|
34
|
+
return parsed
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _crf(value: str) -> int:
|
|
38
|
+
parsed = int(value)
|
|
39
|
+
if not 0 <= parsed <= 51:
|
|
40
|
+
raise argparse.ArgumentTypeError("must be between 0 and 51")
|
|
41
|
+
return parsed
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _translated_path(output: Path, language: str) -> Path:
|
|
45
|
+
"""Insert a language tag before the suffix: video.srt -> video.<lang>.srt."""
|
|
46
|
+
tag = re.sub(r"[^\w.-]+", "-", language.strip().lower()).strip(".-_")
|
|
47
|
+
tag = tag or "translated"
|
|
48
|
+
return output.with_suffix(f".{tag}{output.suffix}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
52
|
+
p = argparse.ArgumentParser(
|
|
53
|
+
prog="srtforge",
|
|
54
|
+
description="Generate SRT subtitles from a video using local Whisper (MLX).",
|
|
55
|
+
epilog="Also: `srtforge merge VIDEO SUBS.srt -o OUT.mp4` to burn subtitles "
|
|
56
|
+
"into a video. Run `srtforge merge -h` for details.",
|
|
57
|
+
)
|
|
58
|
+
p.add_argument("video", type=Path, help="Path to the input video (e.g. MP4).")
|
|
59
|
+
p.add_argument(
|
|
60
|
+
"-o", "--output", type=Path, default=None,
|
|
61
|
+
help="Output .srt path (default: alongside the video).",
|
|
62
|
+
)
|
|
63
|
+
p.add_argument(
|
|
64
|
+
"-m", "--model", default=DEFAULT_MODEL,
|
|
65
|
+
help=f"Whisper model / HF repo (default: {DEFAULT_MODEL}).",
|
|
66
|
+
)
|
|
67
|
+
p.add_argument(
|
|
68
|
+
"-l", "--language", default=None,
|
|
69
|
+
help="Language code (e.g. en, es). Default: auto-detect.",
|
|
70
|
+
)
|
|
71
|
+
p.add_argument(
|
|
72
|
+
"--word-timestamps", action="store_true",
|
|
73
|
+
help="Compute word-level timestamps (slower).",
|
|
74
|
+
)
|
|
75
|
+
p.add_argument(
|
|
76
|
+
"--no-resegment", dest="resegment", action="store_false",
|
|
77
|
+
help="Disable sentence-aware re-cueing; keep raw Whisper segments.",
|
|
78
|
+
)
|
|
79
|
+
p.add_argument(
|
|
80
|
+
"--max-line-length", type=_positive_int, default=Opts.max_cpl, metavar="N",
|
|
81
|
+
help=f"Max characters per subtitle line (default: {Opts.max_cpl}).",
|
|
82
|
+
)
|
|
83
|
+
p.add_argument(
|
|
84
|
+
"--max-lines", type=_positive_int, default=Opts.max_lines, metavar="N",
|
|
85
|
+
help=f"Max lines per subtitle cue (default: {Opts.max_lines}).",
|
|
86
|
+
)
|
|
87
|
+
p.add_argument(
|
|
88
|
+
"--reading-speed", type=_positive_float, default=Opts.reading_cps, metavar="CPS",
|
|
89
|
+
help=f"Reading speed in characters/second (default: {Opts.reading_cps}).",
|
|
90
|
+
)
|
|
91
|
+
p.add_argument(
|
|
92
|
+
"-t", "--translate", default=None, metavar="LANG",
|
|
93
|
+
help="Also translate subtitles into LANG (e.g. es, Spanish, Japanese) "
|
|
94
|
+
"using a local MLX LLM. Writes a second .<lang>.srt file.",
|
|
95
|
+
)
|
|
96
|
+
p.add_argument(
|
|
97
|
+
"--translate-model", default=DEFAULT_TRANSLATE_MODEL,
|
|
98
|
+
help=f"MLX LLM used for translation (default: {DEFAULT_TRANSLATE_MODEL}).",
|
|
99
|
+
)
|
|
100
|
+
p.add_argument("--version", action="version", version=f"srtforge {__version__}")
|
|
101
|
+
return p
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _gen_main(argv: list[str] | None = None) -> int:
|
|
105
|
+
args = build_parser().parse_args(argv)
|
|
106
|
+
|
|
107
|
+
output = args.output or args.video.with_suffix(".srt")
|
|
108
|
+
translating = bool(args.translate)
|
|
109
|
+
total = 4 if translating else 3
|
|
110
|
+
opts = Opts(
|
|
111
|
+
max_cpl=args.max_line_length,
|
|
112
|
+
max_lines=args.max_lines,
|
|
113
|
+
reading_cps=args.reading_speed,
|
|
114
|
+
)
|
|
115
|
+
# Sentence-aware re-cueing needs word-level timestamps.
|
|
116
|
+
need_words = args.word_timestamps or args.resegment
|
|
117
|
+
wav_path = None
|
|
118
|
+
try:
|
|
119
|
+
_log(f"[1/{total}] Extracting audio from {args.video} ...")
|
|
120
|
+
wav_path = extract_audio(args.video)
|
|
121
|
+
|
|
122
|
+
_log(f"[2/{total}] Transcribing with {args.model} (downloads model on first run) ...")
|
|
123
|
+
segments = transcribe(
|
|
124
|
+
wav_path,
|
|
125
|
+
model=args.model,
|
|
126
|
+
language=args.language,
|
|
127
|
+
word_timestamps=need_words,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if args.resegment:
|
|
131
|
+
segments = resegment(segments, opts)
|
|
132
|
+
|
|
133
|
+
_log(f"[3/{total}] Writing {output} ...")
|
|
134
|
+
srt_text = segments_to_srt(segments)
|
|
135
|
+
output.write_text(srt_text, encoding="utf-8")
|
|
136
|
+
if not srt_text:
|
|
137
|
+
_log("Warning: no speech detected; wrote an empty SRT file.")
|
|
138
|
+
else:
|
|
139
|
+
_log(f" {srt_text.count('-->')} subtitle entries written.")
|
|
140
|
+
|
|
141
|
+
if translating:
|
|
142
|
+
if not srt_text:
|
|
143
|
+
_log("Skipping translation: no speech to translate.")
|
|
144
|
+
else:
|
|
145
|
+
t_out = _translated_path(output, args.translate)
|
|
146
|
+
_log(
|
|
147
|
+
f"[4/{total}] Translating to {args.translate} with "
|
|
148
|
+
f"{args.translate_model} (downloads model on first run) ..."
|
|
149
|
+
)
|
|
150
|
+
t_segments = translate_segments(
|
|
151
|
+
segments,
|
|
152
|
+
target_language=args.translate,
|
|
153
|
+
model=args.translate_model,
|
|
154
|
+
)
|
|
155
|
+
if args.resegment:
|
|
156
|
+
# Translated text length differs; fit/split to line limits.
|
|
157
|
+
t_segments = fit_cues(t_segments, opts)
|
|
158
|
+
t_out.write_text(segments_to_srt(t_segments), encoding="utf-8")
|
|
159
|
+
_log(f" Translated subtitles written to {t_out}")
|
|
160
|
+
|
|
161
|
+
_log("Done.")
|
|
162
|
+
return 0
|
|
163
|
+
|
|
164
|
+
except (FFmpegError, FileNotFoundError, TranslationError) as e:
|
|
165
|
+
_log(f"Error: {e}")
|
|
166
|
+
return 1
|
|
167
|
+
except KeyboardInterrupt:
|
|
168
|
+
_log("Interrupted.")
|
|
169
|
+
return 130
|
|
170
|
+
finally:
|
|
171
|
+
if wav_path is not None:
|
|
172
|
+
wav_path.unlink(missing_ok=True)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def build_merge_parser() -> argparse.ArgumentParser:
|
|
176
|
+
p = argparse.ArgumentParser(
|
|
177
|
+
prog="srtforge merge",
|
|
178
|
+
description="Burn a subtitle file permanently into a video (hard subs).",
|
|
179
|
+
)
|
|
180
|
+
p.add_argument("video", type=Path, help="Input video file.")
|
|
181
|
+
p.add_argument("subtitles", type=Path, help="Subtitle file (.srt) to burn in.")
|
|
182
|
+
p.add_argument(
|
|
183
|
+
"-o", "--output", type=Path, default=None,
|
|
184
|
+
help="Output video path (default: <video>.subbed<ext>).",
|
|
185
|
+
)
|
|
186
|
+
p.add_argument(
|
|
187
|
+
"--crf", type=_crf, default=18, metavar="N",
|
|
188
|
+
help="x264 quality, lower = better/larger (default: 18, ~visually lossless).",
|
|
189
|
+
)
|
|
190
|
+
p.add_argument(
|
|
191
|
+
"--preset", default="slow",
|
|
192
|
+
help="x264 speed/efficiency preset (default: slow).",
|
|
193
|
+
)
|
|
194
|
+
p.add_argument(
|
|
195
|
+
"--font-size", type=_positive_int, default=None, metavar="N",
|
|
196
|
+
help="Override subtitle font size.",
|
|
197
|
+
)
|
|
198
|
+
p.add_argument(
|
|
199
|
+
"--font-name", default=None, metavar="NAME",
|
|
200
|
+
help="Override subtitle font name.",
|
|
201
|
+
)
|
|
202
|
+
return p
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _merge_main(argv: list[str]) -> int:
|
|
206
|
+
args = build_merge_parser().parse_args(argv)
|
|
207
|
+
output = args.output or args.video.with_suffix(f".subbed{args.video.suffix}")
|
|
208
|
+
try:
|
|
209
|
+
_log(f"Burning {args.subtitles} into {args.video} (crf={args.crf}, preset={args.preset}) ...")
|
|
210
|
+
burn_subtitles(
|
|
211
|
+
args.video,
|
|
212
|
+
args.subtitles,
|
|
213
|
+
output,
|
|
214
|
+
crf=args.crf,
|
|
215
|
+
preset=args.preset,
|
|
216
|
+
font_size=args.font_size,
|
|
217
|
+
font_name=args.font_name,
|
|
218
|
+
)
|
|
219
|
+
_log(f"Done. Wrote {output}")
|
|
220
|
+
return 0
|
|
221
|
+
except (FFmpegError, FileNotFoundError) as e:
|
|
222
|
+
_log(f"Error: {e}")
|
|
223
|
+
return 1
|
|
224
|
+
except KeyboardInterrupt:
|
|
225
|
+
_log("Interrupted.")
|
|
226
|
+
return 130
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def main(argv: list[str] | None = None) -> int:
|
|
230
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
231
|
+
if argv and argv[0] == "merge":
|
|
232
|
+
return _merge_main(argv[1:])
|
|
233
|
+
return _gen_main(argv)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
if __name__ == "__main__":
|
|
237
|
+
raise SystemExit(main())
|
srtforge/merge.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Burn subtitles permanently into a video using ffmpeg (libass)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import glob
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .audio import FFmpegError
|
|
12
|
+
|
|
13
|
+
# Where fuller ffmpeg builds (with libass) tend to live.
|
|
14
|
+
_FFMPEG_GLOBS = (
|
|
15
|
+
"/opt/nanobrew/prefix/Cellar/ffmpeg-full/*/bin/ffmpeg",
|
|
16
|
+
"/opt/nanobrew/prefix/Cellar/ffmpeg/*/bin/ffmpeg",
|
|
17
|
+
"/opt/homebrew/Cellar/ffmpeg*/*/bin/ffmpeg",
|
|
18
|
+
"/usr/local/Cellar/ffmpeg*/*/bin/ffmpeg",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _has_subtitles_filter(binary: str) -> bool:
|
|
23
|
+
"""True if the ffmpeg ``binary`` exposes the libass ``subtitles`` filter."""
|
|
24
|
+
try:
|
|
25
|
+
out = subprocess.run(
|
|
26
|
+
[binary, "-hide_banner", "-filters"],
|
|
27
|
+
capture_output=True, text=True, timeout=30,
|
|
28
|
+
).stdout
|
|
29
|
+
except Exception:
|
|
30
|
+
return False
|
|
31
|
+
return re.search(r"(?m)^\s*\S+\s+subtitles\s", out) is not None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _resolve_ffmpeg_with_subtitles() -> str:
|
|
35
|
+
"""Find an ffmpeg that supports subtitle burn-in (libass).
|
|
36
|
+
|
|
37
|
+
Prefers the one on PATH; otherwise searches common install locations for a
|
|
38
|
+
fuller build (e.g. ``ffmpeg-full``).
|
|
39
|
+
"""
|
|
40
|
+
candidates: list[str] = []
|
|
41
|
+
on_path = shutil.which("ffmpeg")
|
|
42
|
+
if on_path:
|
|
43
|
+
candidates.append(on_path)
|
|
44
|
+
for pattern in _FFMPEG_GLOBS:
|
|
45
|
+
candidates.extend(sorted(glob.glob(pattern), reverse=True))
|
|
46
|
+
|
|
47
|
+
seen: set[str] = set()
|
|
48
|
+
for c in candidates:
|
|
49
|
+
if c in seen:
|
|
50
|
+
continue
|
|
51
|
+
seen.add(c)
|
|
52
|
+
if _has_subtitles_filter(c):
|
|
53
|
+
return c
|
|
54
|
+
raise FFmpegError(
|
|
55
|
+
"No ffmpeg with the 'subtitles' filter (libass) was found. Your default "
|
|
56
|
+
"ffmpeg lacks libass. Install a full build, e.g. `nb install ffmpeg-full`."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _escape_filter_value(value: str) -> str:
|
|
61
|
+
"""Escape a value embedded in an ffmpeg filtergraph option.
|
|
62
|
+
|
|
63
|
+
The filtergraph parser treats these characters as syntax even when the
|
|
64
|
+
whole filtergraph is passed as a single subprocess argument.
|
|
65
|
+
"""
|
|
66
|
+
special = "\\':,[];"
|
|
67
|
+
return "".join(f"\\{ch}" if ch in special else ch for ch in value)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _escape_sub_path(path: Path) -> str:
|
|
71
|
+
"""Escape a subtitle path for the ffmpeg ``subtitles`` filter value."""
|
|
72
|
+
return _escape_filter_value(str(path))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _force_style(font_size: int | None, font_name: str | None) -> str:
|
|
76
|
+
parts: list[str] = []
|
|
77
|
+
if font_name:
|
|
78
|
+
parts.append(f"FontName={font_name}")
|
|
79
|
+
if font_size:
|
|
80
|
+
parts.append(f"FontSize={font_size}")
|
|
81
|
+
return ",".join(parts)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def burn_subtitles(
|
|
85
|
+
video: Path,
|
|
86
|
+
srt: Path,
|
|
87
|
+
output: Path,
|
|
88
|
+
crf: int = 18,
|
|
89
|
+
preset: str = "slow",
|
|
90
|
+
font_size: int | None = None,
|
|
91
|
+
font_name: str | None = None,
|
|
92
|
+
) -> Path:
|
|
93
|
+
"""Render ``srt`` onto ``video`` and write ``output``.
|
|
94
|
+
|
|
95
|
+
Framerate and resolution are preserved; audio is stream-copied. Video is
|
|
96
|
+
re-encoded with libx264 at the given CRF/preset (CRF 18 ~ visually lossless).
|
|
97
|
+
Returns the output path.
|
|
98
|
+
"""
|
|
99
|
+
ffmpeg = _resolve_ffmpeg_with_subtitles()
|
|
100
|
+
if not video.exists():
|
|
101
|
+
raise FileNotFoundError(f"Video not found: {video}")
|
|
102
|
+
if not srt.exists():
|
|
103
|
+
raise FileNotFoundError(f"Subtitle file not found: {srt}")
|
|
104
|
+
|
|
105
|
+
vf = f"subtitles=filename={_escape_sub_path(srt)}"
|
|
106
|
+
style = _force_style(font_size, font_name)
|
|
107
|
+
if style:
|
|
108
|
+
vf += f":force_style={_escape_filter_value(style)}"
|
|
109
|
+
|
|
110
|
+
cmd = [
|
|
111
|
+
ffmpeg,
|
|
112
|
+
"-y",
|
|
113
|
+
"-i", str(video),
|
|
114
|
+
"-vf", vf,
|
|
115
|
+
"-c:v", "libx264",
|
|
116
|
+
"-crf", str(crf),
|
|
117
|
+
"-preset", preset,
|
|
118
|
+
"-pix_fmt", "yuv420p",
|
|
119
|
+
"-c:a", "copy", # keep original audio losslessly
|
|
120
|
+
"-movflags", "+faststart",
|
|
121
|
+
str(output),
|
|
122
|
+
]
|
|
123
|
+
proc = subprocess.run(cmd, capture_output=True, text=True)
|
|
124
|
+
if proc.returncode != 0:
|
|
125
|
+
raise FFmpegError(
|
|
126
|
+
f"ffmpeg failed to burn subtitles (exit {proc.returncode}):\n"
|
|
127
|
+
f"{proc.stderr.strip()[-2000:]}"
|
|
128
|
+
)
|
|
129
|
+
return output
|