srtforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
srtforge-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 rromanv
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.4
2
+ Name: srtforge
3
+ Version: 0.1.0
4
+ Summary: Generate, translate, and burn SRT subtitles locally with MLX Whisper.
5
+ Author: rromanv
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rromanv/srtforge
8
+ Project-URL: Repository, https://github.com/rromanv/srtforge
9
+ Project-URL: Issues, https://github.com/rromanv/srtforge/issues
10
+ Keywords: subtitles,srt,whisper,mlx,transcription,translation
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: End Users/Desktop
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Multimedia :: Video
20
+ Classifier: Topic :: Text Processing
21
+ Requires-Python: <3.13,>=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: mlx-whisper>=0.4
25
+ Requires-Dist: mlx-lm>=0.20
26
+ Requires-Dist: mlx-vlm>=0.1
27
+ Dynamic: license-file
28
+
29
+ # srtforge
30
+
31
+ Generate, translate, and burn `.srt` subtitles locally on Apple Silicon.
32
+
33
+ `srtforge` extracts audio with ffmpeg, transcribes it with local MLX Whisper,
34
+ optionally re-cues the captions for readability, and can translate subtitles
35
+ with a local MLX language model. No cloud APIs or keys are required.
36
+
37
+ ## Features
38
+
39
+ - Local Whisper transcription through `mlx-whisper`
40
+ - Sentence-aware subtitle cueing from word timestamps
41
+ - Standard SRT output
42
+ - Optional local LLM translation with timestamp preservation
43
+ - Optional hard-subtitle burn-in through ffmpeg/libass
44
+ - Installable Python CLI: `srtforge`
45
+
46
+ ## Requirements
47
+
48
+ - macOS on Apple Silicon
49
+ - Python 3.10, 3.11, or 3.12
50
+ - `ffmpeg` on your `PATH`
51
+ - Enough disk/RAM for the models you choose
52
+
53
+ The default transcription model is `mlx-community/whisper-large-v3-turbo`.
54
+ The default translation model is `mlx-community/gemma-4-26b-a4b-it-4bit`.
55
+
56
+ First use downloads models from Hugging Face into the local cache. Later runs
57
+ can use the cached models.
58
+
59
+ By default, Hugging Face model files are stored outside this project in your
60
+ user cache, normally under:
61
+
62
+ ```text
63
+ ~/.cache/huggingface/hub
64
+ ```
65
+
66
+ You can move that cache by setting Hugging Face cache environment variables such
67
+ as `HF_HOME` or `HUGGINGFACE_HUB_CACHE`. `srtforge` does not store downloaded
68
+ models in the repository or next to your videos. Temporary extracted WAV files
69
+ are created in the system temp directory and deleted after each run.
70
+
71
+ ## Install
72
+
73
+ From GitHub:
74
+
75
+ ```bash
76
+ pipx install --python python3.11 git+https://github.com/rromanv/srtforge.git
77
+ ```
78
+
79
+ `srtforge` currently supports Python 3.10-3.12. If your default Python is newer
80
+ than that, such as Python 3.14, pass a supported interpreter explicitly with
81
+ `--python`.
82
+
83
+ Or into a virtual environment:
84
+
85
+ ```bash
86
+ python3.11 -m venv .venv
87
+ source .venv/bin/activate
88
+ pip install git+https://github.com/rromanv/srtforge.git
89
+ ```
90
+
91
+ For local development:
92
+
93
+ ```bash
94
+ git clone https://github.com/rromanv/srtforge.git
95
+ cd srtforge
96
+ python3.11 -m venv .venv
97
+ source .venv/bin/activate
98
+ pip install -e .
99
+ ```
100
+
101
+ After a PyPI release:
102
+
103
+ ```bash
104
+ pipx install --python python3.11 srtforge
105
+ ```
106
+
107
+ ## Troubleshooting
108
+
109
+ ### pipx uses Python 3.13 or newer
110
+
111
+ If installation fails because your default Python is outside the supported
112
+ range, install a supported Python and tell `pipx` to use it:
113
+
114
+ ```bash
115
+ python3.11 --version
116
+ pipx install --python python3.11 git+https://github.com/rromanv/srtforge.git
117
+ ```
118
+
119
+ If `python3.11` is not installed, install Python 3.11 or 3.12 first, then rerun
120
+ the `pipx install --python ...` command.
121
+
122
+ ## Usage
123
+
124
+ Generate subtitles next to a video:
125
+
126
+ ```bash
127
+ srtforge video.mp4
128
+ ```
129
+
130
+ Write to a custom path:
131
+
132
+ ```bash
133
+ srtforge video.mp4 -o captions.srt
134
+ ```
135
+
136
+ Force the source language:
137
+
138
+ ```bash
139
+ srtforge video.mp4 -l en
140
+ ```
141
+
142
+ Use another Whisper model:
143
+
144
+ ```bash
145
+ srtforge video.mp4 -m mlx-community/whisper-small
146
+ ```
147
+
148
+ Disable sentence-aware re-cueing:
149
+
150
+ ```bash
151
+ srtforge video.mp4 --no-resegment
152
+ ```
153
+
154
+ Tune subtitle readability:
155
+
156
+ ```bash
157
+ srtforge video.mp4 --max-line-length 37 --max-lines 2 --reading-speed 15
158
+ ```
159
+
160
+ Translate subtitles:
161
+
162
+ ```bash
163
+ srtforge video.mp4 -t Spanish
164
+ srtforge video.mp4 -t "Brazilian Portuguese"
165
+ srtforge video.mp4 -t ja --translate-model mlx-community/Qwen3.5-9B-OptiQ-4bit
166
+ ```
167
+
168
+ Burn subtitles into a video:
169
+
170
+ ```bash
171
+ srtforge merge video.mp4 video.srt -o final.mp4
172
+ srtforge merge video.mp4 video.es.srt --crf 16 --font-size 26
173
+ ```
174
+
175
+ Run:
176
+
177
+ ```bash
178
+ srtforge --help
179
+ srtforge merge --help
180
+ ```
181
+
182
+ ## Readability
183
+
184
+ By default, `srtforge` asks Whisper for word-level timestamps and rebuilds cues
185
+ so they are easier to read:
186
+
187
+ - cues prefer sentence boundaries
188
+ - lines are wrapped to two lines of 42 characters by default
189
+ - long sentences are split across cue boundaries
190
+ - cues are paced around 17 characters per second
191
+ - cues are adjusted to avoid overlap
192
+
193
+ Translated subtitles are re-fitted after translation because translated text can
194
+ be longer or shorter than the source.
195
+
196
+ ## ffmpeg Notes
197
+
198
+ Audio extraction requires `ffmpeg`.
199
+
200
+ Burn-in uses ffmpeg's `subtitles` filter, which requires libass. If your ffmpeg
201
+ does not include it, install a full build such as:
202
+
203
+ ```bash
204
+ nb install ffmpeg-full
205
+ ```
206
+
207
+ ## Project Layout
208
+
209
+ ```text
210
+ src/srtforge/
211
+ audio.py # ffmpeg audio extraction
212
+ transcribe.py # MLX Whisper transcription
213
+ segment.py # sentence-aware re-cueing, wrapping, pacing
214
+ translate.py # context-aware local-LLM translation
215
+ merge.py # burn subtitles into video with ffmpeg/libass
216
+ srt.py # SRT rendering
217
+ cli.py # argparse CLI
218
+ tests/
219
+ test_cli.py
220
+ test_merge.py
221
+ test_segment.py
222
+ test_srt.py
223
+ ```
224
+
225
+ ## Development
226
+
227
+ Run the test suite:
228
+
229
+ ```bash
230
+ python -m unittest discover -s tests
231
+ ```
232
+
233
+ Build release artifacts:
234
+
235
+ ```bash
236
+ python -m build
237
+ python -m twine check dist/*
238
+ ```
239
+
240
+ Publish to PyPI manually:
241
+
242
+ ```bash
243
+ python -m twine upload dist/*
244
+ ```
245
+
246
+ The repository also includes a GitHub Actions workflow for publishing to PyPI
247
+ when a release is created. It uses PyPI Trusted Publishing, so no PyPI API token
248
+ is needed in GitHub.
249
+
250
+ Before creating a release that should publish to PyPI, configure a pending
251
+ publisher in your PyPI account:
252
+
253
+ ```text
254
+ PyPI project name: srtforge
255
+ Owner: rromanv
256
+ Repository name: srtforge
257
+ Workflow filename: publish.yml
258
+ Environment name: pypi
259
+ ```
260
+
261
+ PyPI docs:
262
+
263
+ - Creating a new project with a pending publisher:
264
+ https://docs.pypi.org/trusted-publishers/creating-a-project-through-oidc/
265
+ - Publishing with a trusted publisher:
266
+ https://docs.pypi.org/trusted-publishers/using-a-publisher/
267
+
268
+ ## License
269
+
270
+ MIT
@@ -0,0 +1,242 @@
1
+ # srtforge
2
+
3
+ Generate, translate, and burn `.srt` subtitles locally on Apple Silicon.
4
+
5
+ `srtforge` extracts audio with ffmpeg, transcribes it with local MLX Whisper,
6
+ optionally re-cues the captions for readability, and can translate subtitles
7
+ with a local MLX language model. No cloud APIs or keys are required.
8
+
9
+ ## Features
10
+
11
+ - Local Whisper transcription through `mlx-whisper`
12
+ - Sentence-aware subtitle cueing from word timestamps
13
+ - Standard SRT output
14
+ - Optional local LLM translation with timestamp preservation
15
+ - Optional hard-subtitle burn-in through ffmpeg/libass
16
+ - Installable Python CLI: `srtforge`
17
+
18
+ ## Requirements
19
+
20
+ - macOS on Apple Silicon
21
+ - Python 3.10, 3.11, or 3.12
22
+ - `ffmpeg` on your `PATH`
23
+ - Enough disk/RAM for the models you choose
24
+
25
+ The default transcription model is `mlx-community/whisper-large-v3-turbo`.
26
+ The default translation model is `mlx-community/gemma-4-26b-a4b-it-4bit`.
27
+
28
+ First use downloads models from Hugging Face into the local cache. Later runs
29
+ can use the cached models.
30
+
31
+ By default, Hugging Face model files are stored outside this project in your
32
+ user cache, normally under:
33
+
34
+ ```text
35
+ ~/.cache/huggingface/hub
36
+ ```
37
+
38
+ You can move that cache by setting Hugging Face cache environment variables such
39
+ as `HF_HOME` or `HUGGINGFACE_HUB_CACHE`. `srtforge` does not store downloaded
40
+ models in the repository or next to your videos. Temporary extracted WAV files
41
+ are created in the system temp directory and deleted after each run.
42
+
43
+ ## Install
44
+
45
+ From GitHub:
46
+
47
+ ```bash
48
+ pipx install --python python3.11 git+https://github.com/rromanv/srtforge.git
49
+ ```
50
+
51
+ `srtforge` currently supports Python 3.10-3.12. If your default Python is newer
52
+ than that, such as Python 3.14, pass a supported interpreter explicitly with
53
+ `--python`.
54
+
55
+ Or into a virtual environment:
56
+
57
+ ```bash
58
+ python3.11 -m venv .venv
59
+ source .venv/bin/activate
60
+ pip install git+https://github.com/rromanv/srtforge.git
61
+ ```
62
+
63
+ For local development:
64
+
65
+ ```bash
66
+ git clone https://github.com/rromanv/srtforge.git
67
+ cd srtforge
68
+ python3.11 -m venv .venv
69
+ source .venv/bin/activate
70
+ pip install -e .
71
+ ```
72
+
73
+ After a PyPI release:
74
+
75
+ ```bash
76
+ pipx install --python python3.11 srtforge
77
+ ```
78
+
79
+ ## Troubleshooting
80
+
81
+ ### pipx uses Python 3.13 or newer
82
+
83
+ If installation fails because your default Python is outside the supported
84
+ range, install a supported Python and tell `pipx` to use it:
85
+
86
+ ```bash
87
+ python3.11 --version
88
+ pipx install --python python3.11 git+https://github.com/rromanv/srtforge.git
89
+ ```
90
+
91
+ If `python3.11` is not installed, install Python 3.11 or 3.12 first, then rerun
92
+ the `pipx install --python ...` command.
93
+
94
+ ## Usage
95
+
96
+ Generate subtitles next to a video:
97
+
98
+ ```bash
99
+ srtforge video.mp4
100
+ ```
101
+
102
+ Write to a custom path:
103
+
104
+ ```bash
105
+ srtforge video.mp4 -o captions.srt
106
+ ```
107
+
108
+ Force the source language:
109
+
110
+ ```bash
111
+ srtforge video.mp4 -l en
112
+ ```
113
+
114
+ Use another Whisper model:
115
+
116
+ ```bash
117
+ srtforge video.mp4 -m mlx-community/whisper-small
118
+ ```
119
+
120
+ Disable sentence-aware re-cueing:
121
+
122
+ ```bash
123
+ srtforge video.mp4 --no-resegment
124
+ ```
125
+
126
+ Tune subtitle readability:
127
+
128
+ ```bash
129
+ srtforge video.mp4 --max-line-length 37 --max-lines 2 --reading-speed 15
130
+ ```
131
+
132
+ Translate subtitles:
133
+
134
+ ```bash
135
+ srtforge video.mp4 -t Spanish
136
+ srtforge video.mp4 -t "Brazilian Portuguese"
137
+ srtforge video.mp4 -t ja --translate-model mlx-community/Qwen3.5-9B-OptiQ-4bit
138
+ ```
139
+
140
+ Burn subtitles into a video:
141
+
142
+ ```bash
143
+ srtforge merge video.mp4 video.srt -o final.mp4
144
+ srtforge merge video.mp4 video.es.srt --crf 16 --font-size 26
145
+ ```
146
+
147
+ Run:
148
+
149
+ ```bash
150
+ srtforge --help
151
+ srtforge merge --help
152
+ ```
153
+
154
+ ## Readability
155
+
156
+ By default, `srtforge` asks Whisper for word-level timestamps and rebuilds cues
157
+ so they are easier to read:
158
+
159
+ - cues prefer sentence boundaries
160
+ - lines are wrapped to two lines of 42 characters by default
161
+ - long sentences are split across cue boundaries
162
+ - cues are paced around 17 characters per second
163
+ - cues are adjusted to avoid overlap
164
+
165
+ Translated subtitles are re-fitted after translation because translated text can
166
+ be longer or shorter than the source.
167
+
168
+ ## ffmpeg Notes
169
+
170
+ Audio extraction requires `ffmpeg`.
171
+
172
+ Burn-in uses ffmpeg's `subtitles` filter, which requires libass. If your ffmpeg
173
+ does not include it, install a full build such as:
174
+
175
+ ```bash
176
+ nb install ffmpeg-full
177
+ ```
178
+
179
+ ## Project Layout
180
+
181
+ ```text
182
+ src/srtforge/
183
+ audio.py # ffmpeg audio extraction
184
+ transcribe.py # MLX Whisper transcription
185
+ segment.py # sentence-aware re-cueing, wrapping, pacing
186
+ translate.py # context-aware local-LLM translation
187
+ merge.py # burn subtitles into video with ffmpeg/libass
188
+ srt.py # SRT rendering
189
+ cli.py # argparse CLI
190
+ tests/
191
+ test_cli.py
192
+ test_merge.py
193
+ test_segment.py
194
+ test_srt.py
195
+ ```
196
+
197
+ ## Development
198
+
199
+ Run the test suite:
200
+
201
+ ```bash
202
+ python -m unittest discover -s tests
203
+ ```
204
+
205
+ Build release artifacts:
206
+
207
+ ```bash
208
+ python -m build
209
+ python -m twine check dist/*
210
+ ```
211
+
212
+ Publish to PyPI manually:
213
+
214
+ ```bash
215
+ python -m twine upload dist/*
216
+ ```
217
+
218
+ The repository also includes a GitHub Actions workflow for publishing to PyPI
219
+ when a release is created. It uses PyPI Trusted Publishing, so no PyPI API token
220
+ is needed in GitHub.
221
+
222
+ Before creating a release that should publish to PyPI, configure a pending
223
+ publisher in your PyPI account:
224
+
225
+ ```text
226
+ PyPI project name: srtforge
227
+ Owner: rromanv
228
+ Repository name: srtforge
229
+ Workflow filename: publish.yml
230
+ Environment name: pypi
231
+ ```
232
+
233
+ PyPI docs:
234
+
235
+ - Creating a new project with a pending publisher:
236
+ https://docs.pypi.org/trusted-publishers/creating-a-project-through-oidc/
237
+ - Publishing with a trusted publisher:
238
+ https://docs.pypi.org/trusted-publishers/using-a-publisher/
239
+
240
+ ## License
241
+
242
+ MIT
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "srtforge"
3
+ version = "0.1.0"
4
+ description = "Generate, translate, and burn SRT subtitles locally with MLX Whisper."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10,<3.13"
7
+ license = "MIT"
8
+ authors = [
9
+ { name = "rromanv" },
10
+ ]
11
+ keywords = ["subtitles", "srt", "whisper", "mlx", "transcription", "translation"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Environment :: Console",
15
+ "Intended Audience :: End Users/Desktop",
16
+ "Operating System :: MacOS",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: Multimedia :: Video",
22
+ "Topic :: Text Processing",
23
+ ]
24
+ dependencies = [
25
+ "mlx-whisper>=0.4",
26
+ "mlx-lm>=0.20",
27
+ "mlx-vlm>=0.1",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/rromanv/srtforge"
32
+ Repository = "https://github.com/rromanv/srtforge"
33
+ Issues = "https://github.com/rromanv/srtforge/issues"
34
+
35
+ [project.scripts]
36
+ srtforge = "srtforge.cli:main"
37
+
38
+ [build-system]
39
+ requires = ["setuptools>=61"]
40
+ build-backend = "setuptools.build_meta"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """srtforge: generate SRT subtitles from video using local Whisper (MLX)."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,57 @@
1
+ """Extract audio from a video file using ffmpeg."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ import subprocess
7
+ import tempfile
8
+ from pathlib import Path
9
+
10
+
11
+ class FFmpegError(RuntimeError):
12
+ """Raised when ffmpeg is missing or fails."""
13
+
14
+
15
+ def ensure_ffmpeg() -> str:
16
+ """Return the ffmpeg executable path or raise if not found."""
17
+ ffmpeg = shutil.which("ffmpeg")
18
+ if not ffmpeg:
19
+ raise FFmpegError(
20
+ "ffmpeg not found on PATH. Install it (e.g. `nb install ffmpeg`)."
21
+ )
22
+ return ffmpeg
23
+
24
+
25
+ def extract_audio(video_path: Path, sample_rate: int = 16000) -> Path:
26
+ """Extract mono PCM WAV at the given sample rate into a temp file.
27
+
28
+ Returns the path to the WAV file. Caller is responsible for deletion.
29
+ """
30
+ ffmpeg = ensure_ffmpeg()
31
+ if not video_path.exists():
32
+ raise FileNotFoundError(f"Video not found: {video_path}")
33
+
34
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
35
+ tmp.close()
36
+ out_path = Path(tmp.name)
37
+
38
+ cmd = [
39
+ ffmpeg,
40
+ "-y",
41
+ "-i", str(video_path),
42
+ "-vn", # drop video
43
+ "-ac", "1", # mono
44
+ "-ar", str(sample_rate), # sample rate
45
+ "-c:a", "pcm_s16le", # 16-bit PCM
46
+ str(out_path),
47
+ ]
48
+ proc = subprocess.run(cmd, capture_output=True, text=True)
49
+ if proc.returncode != 0:
50
+ out_path.unlink(missing_ok=True)
51
+ raise FFmpegError(
52
+ f"ffmpeg failed (exit {proc.returncode}):\n{proc.stderr.strip()[-2000:]}"
53
+ )
54
+ if out_path.stat().st_size == 0:
55
+ out_path.unlink(missing_ok=True)
56
+ raise FFmpegError("ffmpeg produced no audio. Does the file have an audio track?")
57
+ return out_path