subtitle-toolkit 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/__init__.py ADDED
@@ -0,0 +1,45 @@
1
+ """
2
+ Subtitle Toolkit - A collection of utilities for working with subtitle files.
3
+
4
+ This package provides utilities for:
5
+ - Time‑shifting subtitle timestamps
6
+ - Translating subtitles using AI models
7
+ - Extracting subtitles from MKV files
8
+
9
+ Example usage:
10
+ from src.timeshift import shift_timestamp, timestamp_to_seconds
11
+ from src.translate import split_into_units, chunk_units
12
+ from src.mkv2srt import extract_subtitles, clean_srt_content
13
+ """
14
+
15
+ # Package version – kept in sync with pyproject.toml
16
+ __version__: str = "0.9.5"
17
+
18
+ __all__ = [
19
+ # Timeshift functions
20
+ "shift_timestamp",
21
+ "timestamp_to_seconds",
22
+ # Translate functions
23
+ "detect_line_ending",
24
+ "read_file",
25
+ "write_file",
26
+ "split_into_units",
27
+ "chunk_units",
28
+ # MKV2SRT functions
29
+ "extract_subtitles",
30
+ "extract_all_subtitles",
31
+ "clean_srt_content",
32
+ # Version
33
+ "__version__",
34
+ ]
35
+
36
+ # Import public API from submodules
37
+ from .timeshift import shift_timestamp, timestamp_to_seconds
38
+ from .translate import (
39
+ detect_line_ending,
40
+ read_file,
41
+ write_file,
42
+ split_into_units,
43
+ chunk_units,
44
+ )
45
+ from .mkv2srt import extract_subtitles, extract_all_subtitles, clean_srt_content
src/cli.py ADDED
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI module - Unified command-line interface for the Subtitle Toolkit.
4
+
5
+ Public API:
6
+ - main: Entry point for the subtitle-tk command
7
+ """
8
+ import argparse
9
+ import subprocess
10
+ import sys
11
+ import os
12
+ from pathlib import Path
13
+ from typing import List
14
+
15
+ __all__ = ["main"]
16
+
17
+
18
+ def get_script_dir() -> Path:
19
+ """Get the directory where this script is located."""
20
+ return Path(__file__).parent.resolve()
21
+
22
+
23
+ def run_translate(args: List[str]) -> int:
24
+ """Run the translate.py script."""
25
+ script_path = get_script_dir() / "translate.py"
26
+ cmd = [sys.executable, str(script_path)] + args
27
+ return subprocess.run(cmd).returncode
28
+
29
+
30
+ def run_timeshift(args: List[str]) -> int:
31
+ """Run the timeshift.py script."""
32
+ script_path = get_script_dir() / "timeshift.py"
33
+ cmd = [sys.executable, str(script_path)] + args
34
+ return subprocess.run(cmd).returncode
35
+
36
+
37
+ def run_mkv2srt(args: List[str]) -> int:
38
+ """Run the mkv2srt.py script."""
39
+ script_path = get_script_dir() / "mkv2srt.py"
40
+ cmd = [sys.executable, str(script_path)] + args
41
+ return subprocess.run(cmd).returncode
42
+
43
+
44
+ def run_web(args: List[str]) -> int:
45
+ """Run the web interface."""
46
+ script_path = get_script_dir() / "web" / "app.py"
47
+ cmd = [sys.executable, str(script_path)] + args
48
+ return subprocess.run(cmd).returncode
49
+
50
+
51
+ def main() -> None:
52
+ if len(sys.argv) < 2:
53
+ parser = argparse.ArgumentParser(
54
+ prog="subtitle-tk",
55
+ description="Subtitle Toolkit - A collection of utilities for working with subtitle files",
56
+ formatter_class=argparse.RawDescriptionHelpFormatter,
57
+ epilog="""
58
+ Commands:
59
+ translate Translate subtitles using AI
60
+ timeshift Shift timestamps in SRT files
61
+ mkv2srt Extract subtitles from MKV files
62
+ web Start the web interface
63
+
64
+ Examples:
65
+ subtitle-tk translate input.srt --instructions instructions.txt
66
+ subtitle-tk timeshift --shift-seconds 2.5 < input.srt > output.srt
67
+ subtitle-tk mkv2srt --input video.mkv --language en
68
+ subtitle-tk web --host 0.0.0.0 --port 8000
69
+ """
70
+ )
71
+ parser.print_help()
72
+ sys.exit(0)
73
+
74
+ command = sys.argv[1]
75
+
76
+ if command in ["-h", "--help"]:
77
+ parser = argparse.ArgumentParser(
78
+ prog="subtitle-tk",
79
+ description="Subtitle Toolkit - A collection of utilities for working with subtitle files",
80
+ formatter_class=argparse.RawDescriptionHelpFormatter,
81
+ epilog="""
82
+ Commands:
83
+ translate Translate subtitles using AI
84
+ timeshift Shift timestamps in SRT files
85
+ mkv2srt Extract subtitles from MKV files
86
+ web Start the web interface
87
+
88
+ Examples:
89
+ subtitle-tk translate input.srt --instructions instructions.txt
90
+ subtitle-tk timeshift --shift-seconds 2.5 < input.srt > output.srt
91
+ subtitle-tk mkv2srt --input video.mkv --language en
92
+ subtitle-tk web --host 0.0.0.0 --port 8000
93
+ """
94
+ )
95
+ parser.print_help()
96
+ sys.exit(0)
97
+
98
+ if command not in ["translate", "timeshift", "mkv2srt", "web"]:
99
+ parser = argparse.ArgumentParser(
100
+ prog="subtitle-tk",
101
+ description="Subtitle Toolkit - A collection of utilities for working with subtitle files"
102
+ )
103
+ parser.add_argument("command", nargs="?", choices=["translate", "timeshift", "mkv2srt", "web"])
104
+ parser.print_help()
105
+ sys.exit(1)
106
+
107
+ remaining_args = sys.argv[2:]
108
+
109
+ if command == "translate":
110
+ sys.exit(run_translate(remaining_args))
111
+ elif command == "timeshift":
112
+ sys.exit(run_timeshift(remaining_args))
113
+ elif command == "mkv2srt":
114
+ sys.exit(run_mkv2srt(remaining_args))
115
+ elif command == "web":
116
+ sys.exit(run_web(remaining_args))
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()
src/mkv2srt.py ADDED
@@ -0,0 +1,329 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ MKV to SRT conversion module for extracting subtitles from video files.
4
+
5
+ Public API:
6
+ - extract_subtitles: Extract subtitles from an MKV file to SRT format
7
+ - extract_all_subtitles: Extract all subtitle tracks from an MKV file
8
+ - clean_srt_content: Remove ASS/SSA formatting tags from SRT content
9
+ """
10
+ import argparse
11
+ import os
12
+ import subprocess
13
+ import sys
14
+ from pathlib import Path
15
+ from typing import List, Optional
16
+
17
+ import json
18
+
19
+ __all__ = ["extract_subtitles", "extract_all_subtitles", "clean_srt_content"]
20
+
21
+
22
+ def extract_subtitles(mkv_file: Path, language: Optional[str] = None, output_file: Optional[Path] = None) -> Path:
23
+ """
24
+ Extract subtitles from an MKV file using ffmpeg.
25
+
26
+ Args:
27
+ mkv_file: Path to the input MKV file
28
+ language: Language code to filter subtitles (optional)
29
+ output_file: Path to output SRT file (optional)
30
+
31
+ Returns:
32
+ Path to the extracted SRT file
33
+ """
34
+ if not mkv_file.exists():
35
+ raise FileNotFoundError(f"MKV file not found: {mkv_file}")
36
+
37
+ # If no output file specified, generate one based on input filename
38
+ if output_file is None:
39
+ output_file = mkv_file.with_suffix('.srt')
40
+
41
+ # Build ffmpeg command
42
+ cmd = ['ffmpeg', '-loglevel', 'error', '-i', str(mkv_file), '-f', 'srt']
43
+
44
+ # Add language filter if specified
45
+ if language:
46
+ # Find subtitle track by language using ffprobe first
47
+ try:
48
+ probe_cmd = ['ffprobe', '-loglevel', 'error', '-v', 'quiet', '-print_format', 'json', '-show_streams', str(mkv_file)]
49
+ result = subprocess.run(probe_cmd, capture_output=True, text=True, check=True)
50
+ info = json.loads(result.stdout)
51
+
52
+ # Find subtitle track with matching language
53
+ subtitle_tracks = []
54
+ for stream in info['streams']:
55
+ if stream['codec_type'] == 'subtitle':
56
+ if 'tags' in stream and 'language' in stream['tags']:
57
+ if stream['tags']['language'] == language:
58
+ subtitle_tracks.append(stream)
59
+
60
+ if subtitle_tracks:
61
+ # Use the first matching track
62
+ track_index = subtitle_tracks[0]['index']
63
+ cmd.extend(['-map', f'0:s:{track_index}'])
64
+ else:
65
+ print(f"Warning: No subtitle track found with language '{language}'")
66
+ # Try to extract all subtitles if specific language not found
67
+ cmd.extend(['-map', '0:s'])
68
+ except Exception as e:
69
+ print(f"Warning: Could not determine subtitle track by language: {e}")
70
+ # Fall back to extracting all subtitles
71
+ cmd.extend(['-map', '0:s'])
72
+ else:
73
+ # Extract all subtitle tracks
74
+ cmd.extend(['-map', '0:s'])
75
+
76
+ # Add output file
77
+ cmd.append(str(output_file))
78
+
79
+ try:
80
+ # Run ffmpeg command
81
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
82
+ print(f"Successfully extracted subtitles to {output_file}")
83
+ return output_file
84
+ except subprocess.CalledProcessError as e:
85
+ print(f"Error extracting subtitles: {e.stderr}")
86
+ raise Exception("Error extracting subtitles")
87
+
88
+
89
+ def extract_all_subtitles(mkv_file: Path) -> List[Path]:
90
+ """
91
+ Extract all subtitle tracks from MKV file and save each to a separate SRT file.
92
+
93
+ Args:
94
+ mkv_file: Path to the input MKV file
95
+
96
+ Returns:
97
+ List of paths to extracted SRT files
98
+ """
99
+ # First, get information about subtitle tracks using ffprobe
100
+ cmd = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_streams', str(mkv_file)]
101
+
102
+ try:
103
+ import json
104
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
105
+ info = json.loads(result.stdout)
106
+
107
+ srt_files = []
108
+ subtitle_tracks = []
109
+
110
+ # Find all subtitle streams
111
+ for stream in info['streams']:
112
+ if stream['codec_type'] == 'subtitle':
113
+ subtitle_tracks.append(stream)
114
+
115
+ if not subtitle_tracks:
116
+ print("No subtitle tracks found in the MKV file")
117
+ return []
118
+
119
+ # Extract each subtitle track
120
+ for i, stream in enumerate(subtitle_tracks):
121
+ # Determine language for naming the output file
122
+ language = "unknown"
123
+ if 'tags' in stream and 'language' in stream['tags']:
124
+ language = stream['tags']['language']
125
+ elif 'codec_name' in stream and 'dvd' in stream['codec_name']:
126
+ # For DVD subtitle streams, we might want to use a different naming convention
127
+ language = "dvd"
128
+
129
+ # Generate output filename based on language or track index
130
+ if language != "unknown" and language != "dvd":
131
+ output_file = mkv_file.with_name(f"{mkv_file.stem}.{language}.srt")
132
+ else:
133
+ output_file = mkv_file.with_name(f"{mkv_file.stem}_sub{i:02d}.srt")
134
+
135
+ # Build ffmpeg command for this specific subtitle track
136
+ cmd = ['ffmpeg', '-loglevel', 'error', '-i', str(mkv_file), '-map', f'0:s:{i}', '-f', 'srt', str(output_file)]
137
+
138
+ try:
139
+ subprocess.run(cmd, capture_output=True, check=True)
140
+ print(f"Successfully extracted subtitle track {i} ({language}) to {output_file}")
141
+ srt_files.append(output_file)
142
+ except subprocess.CalledProcessError as e:
143
+ print(f"Error extracting subtitle track {i}: {e.stderr}")
144
+
145
+ return srt_files
146
+
147
+ except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
148
+ print(f"Error getting subtitle information: {e}")
149
+ return []
150
+
151
+
152
+ def clean_srt_content(content: str) -> str:
153
+ """
154
+ Clean SRT content by removing ASS/SSA formatting tags like {\an7} that
155
+ aren't properly interpreted by video players, while preserving SRT structure.
156
+
157
+ Args:
158
+ content: Raw SRT content
159
+
160
+ Returns:
161
+ Cleaned SRT content
162
+ """
163
+ import re
164
+
165
+ # Split content into blocks (each block is separated by empty lines)
166
+ blocks = content.split('\n\n')
167
+
168
+ cleaned_blocks = []
169
+
170
+ for block in blocks:
171
+ if not block.strip():
172
+ # Empty block - keep it as is
173
+ cleaned_blocks.append('')
174
+ continue
175
+
176
+ # Split block into lines
177
+ lines = block.split('\n')
178
+
179
+ # First two lines are sequence number and timecodes - preserve them
180
+ cleaned_lines = []
181
+
182
+ # Process text lines (skip sequence number and timecodes)
183
+ for i, line in enumerate(lines):
184
+ if i < 2:
185
+ # Keep sequence number and timecodes as-is
186
+ cleaned_lines.append(line)
187
+ else:
188
+ # Clean text lines - remove ASS/SSA formatting tags
189
+ if line.strip():
190
+ # Remove ASS/SSA formatting tags like {\an7}, {\b1}, {\i1}, etc.
191
+ cleaned_line = re.sub(r'\{[^}]*\}', '', line)
192
+ # Remove any remaining backslashes that might be left over
193
+ cleaned_line = re.sub(r'\\[a-zA-Z][0-9]*', '', cleaned_line)
194
+ # Only add non-empty lines
195
+ if cleaned_line.strip():
196
+ cleaned_lines.append(cleaned_line)
197
+
198
+ # Join the cleaned lines back together
199
+ cleaned_block = '\n'.join(cleaned_lines)
200
+ if cleaned_block.strip(): # Only add non-empty blocks
201
+ cleaned_blocks.append(cleaned_block)
202
+
203
+ # Join blocks back with double newlines
204
+ result = '\n\n'.join(cleaned_blocks)
205
+
206
+ # Remove any trailing newlines
207
+ result = result.rstrip('\n')
208
+
209
+ return result
210
+
211
+
212
+ def process_srt_files(srt_files: List[Path]) -> None:
213
+ """
214
+ Process SRT files to clean up formatting tags.
215
+
216
+ Args:
217
+ srt_files: List of SRT file paths to process
218
+ """
219
+ for srt_file in srt_files:
220
+ try:
221
+ # Read the content
222
+ with open(srt_file, 'r', encoding='utf-8') as f:
223
+ content = f.read()
224
+
225
+ # Clean the content
226
+ cleaned_content = clean_srt_content(content)
227
+
228
+ # Write back the cleaned content
229
+ with open(srt_file, 'w', encoding='utf-8') as f:
230
+ f.write(cleaned_content)
231
+
232
+ print(f"Cleaned formatting tags from {srt_file}")
233
+
234
+ except Exception as e:
235
+ print(f"Error processing {srt_file}: {e}")
236
+
237
+
238
+ def check_ffmpeg() -> bool:
239
+ """Check if ffmpeg is installed and provide helpful error messages if not."""
240
+ try:
241
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
242
+ return True
243
+ except (subprocess.CalledProcessError, FileNotFoundError):
244
+ # Provide helpful installation instructions based on platform
245
+ import platform
246
+ system = platform.system().lower()
247
+
248
+ if system == "darwin": # macOS
249
+ install_msg = "To install ffmpeg on macOS, run: brew install ffmpeg"
250
+ elif system == "linux":
251
+ # Try to detect common Linux distributions
252
+ try:
253
+ with open('/etc/os-release', 'r') as f:
254
+ os_release = f.read().lower()
255
+ if 'ubuntu' in os_release or 'debian' in os_release:
256
+ install_msg = "To install ffmpeg on Ubuntu/Debian, run: sudo apt update && sudo apt install ffmpeg"
257
+ elif 'centos' in os_release or 'red hat' in os_release or 'fedora' in os_release:
258
+ install_msg = "To install ffmpeg on CentOS/RHEL/Fedora, run: sudo dnf install ffmpeg"
259
+ else:
260
+ install_msg = "To install ffmpeg on Linux, use your distribution's package manager (e.g., apt, dnf, pacman)"
261
+ except:
262
+ install_msg = "To install ffmpeg on Linux, use your distribution's package manager (e.g., apt, dnf, pacman)"
263
+ else: # Windows or other
264
+ install_msg = "To install ffmpeg on Windows, download it from https://ffmpeg.org/download.html or use: choco install ffmpeg"
265
+
266
+ sys.exit(f"Error: ffmpeg is required but not found. {install_msg}")
267
+
268
+
269
+ def main() -> None:
270
+ parser = argparse.ArgumentParser(
271
+ description="Extract subtitles from MKV files and convert to SRT format"
272
+ )
273
+ parser.add_argument(
274
+ '--input',
275
+ '-i',
276
+ type=Path,
277
+ required=True,
278
+ help='Path to the input MKV file'
279
+ )
280
+ parser.add_argument(
281
+ '--output',
282
+ '-o',
283
+ type=Path,
284
+ help='Output SRT file path (default: input filename with .srt extension)'
285
+ )
286
+ parser.add_argument(
287
+ '--language',
288
+ '-l',
289
+ type=str,
290
+ help='Language code to filter subtitles (e.g., "en", "es")'
291
+ )
292
+
293
+ args = parser.parse_args()
294
+
295
+ # Validate input file
296
+ if not args.input.is_file():
297
+ sys.exit(f"Error: Input file does not exist: {args.input}")
298
+
299
+ # Check if ffmpeg is available
300
+ check_ffmpeg()
301
+
302
+ try:
303
+ # If specific output file is specified, extract to that file
304
+ if args.output:
305
+ output_file = extract_subtitles(
306
+ args.input,
307
+ args.language,
308
+ args.output
309
+ )
310
+ print(f"Subtitle extraction completed: {output_file}")
311
+ else:
312
+ # By default, extract all subtitles to individual files
313
+ srt_files = extract_all_subtitles(args.input)
314
+ if srt_files:
315
+ print(f"Successfully extracted {len(srt_files)} subtitle tracks:")
316
+ for file in srt_files:
317
+ print(f" - {file}")
318
+
319
+ # Clean formatting tags from all extracted files
320
+ process_srt_files(srt_files)
321
+ else:
322
+ print("No subtitle tracks were extracted.")
323
+
324
+ except Exception as e:
325
+ sys.exit(f"Error: {e}")
326
+
327
+
328
+ if __name__ == "__main__":
329
+ main()
src/py.typed ADDED
File without changes
src/static/empty.txt ADDED
File without changes
src/static/favicon.ico ADDED
Binary file