subtitlekit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- subtitlekit-0.1.0/LICENSE +21 -0
- subtitlekit-0.1.0/MANIFEST.in +15 -0
- subtitlekit-0.1.0/PKG-INFO +206 -0
- subtitlekit-0.1.0/README.md +172 -0
- subtitlekit-0.1.0/pyproject.toml +57 -0
- subtitlekit-0.1.0/requirements.txt +4 -0
- subtitlekit-0.1.0/setup.cfg +4 -0
- subtitlekit-0.1.0/src/subtitlekit/__init__.py +27 -0
- subtitlekit-0.1.0/src/subtitlekit/cli/__init__.py +3 -0
- subtitlekit-0.1.0/src/subtitlekit/cli/main.py +154 -0
- subtitlekit-0.1.0/src/subtitlekit/core/__init__.py +13 -0
- subtitlekit-0.1.0/src/subtitlekit/core/cleaner.py +169 -0
- subtitlekit-0.1.0/src/subtitlekit/core/encoding.py +100 -0
- subtitlekit-0.1.0/src/subtitlekit/core/preprocessor.py +372 -0
- subtitlekit-0.1.0/src/subtitlekit/tools/__init__.py +12 -0
- subtitlekit-0.1.0/src/subtitlekit/tools/corrections.py +302 -0
- subtitlekit-0.1.0/src/subtitlekit/tools/enhanced_matcher.py +543 -0
- subtitlekit-0.1.0/src/subtitlekit/tools/matcher.py +352 -0
- subtitlekit-0.1.0/src/subtitlekit/tools/overlaps.py +366 -0
- subtitlekit-0.1.0/src/subtitlekit/ui/__init__.py +11 -0
- subtitlekit-0.1.0/src/subtitlekit/ui/colab.py +240 -0
- subtitlekit-0.1.0/src/subtitlekit/ui/desktop.py +397 -0
- subtitlekit-0.1.0/src/subtitlekit/updater.py +60 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/PKG-INFO +206 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/SOURCES.txt +27 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/dependency_links.txt +1 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/entry_points.txt +2 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/requires.txt +10 -0
- subtitlekit-0.1.0/src/subtitlekit.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 SubtitleKit Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# MANIFEST.in - Include additional files in distribution
|
|
2
|
+
|
|
3
|
+
include LICENSE
|
|
4
|
+
include README.md
|
|
5
|
+
include pyproject.toml
|
|
6
|
+
include requirements.txt
|
|
7
|
+
|
|
8
|
+
# Include all locale files
|
|
9
|
+
recursive-include src/subtitlekit/ui/locales *.json
|
|
10
|
+
|
|
11
|
+
# Exclude compiled files and caches
|
|
12
|
+
global-exclude __pycache__
|
|
13
|
+
global-exclude *.py[co]
|
|
14
|
+
global-exclude *.so
|
|
15
|
+
global-exclude *~
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: subtitlekit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Subtitle processing toolkit: merge, sync, fix overlaps, and apply corrections
|
|
5
|
+
Author-email: angelospk <haroldpoigr@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/angelospk/subtitlekit
|
|
8
|
+
Project-URL: Repository, https://github.com/angelospk/subtitlekit
|
|
9
|
+
Project-URL: Issues, https://github.com/angelospk/subtitlekit/issues
|
|
10
|
+
Keywords: subtitles,srt,synchronization,translation,processing
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Multimedia :: Video
|
|
21
|
+
Classifier: Topic :: Text Processing
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: pysrt>=1.1.2
|
|
26
|
+
Requires-Dist: chardet>=5.0.0
|
|
27
|
+
Requires-Dist: ffsubsync>=0.4.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
31
|
+
Provides-Extra: ui
|
|
32
|
+
Requires-Dist: ipywidgets>=8.0.0; extra == "ui"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# SubtitleKit - Subtitle Processing Toolkit
|
|
36
|
+
|
|
37
|
+
[](https://badge.fury.io/py/subtitlekit)
|
|
38
|
+
[](https://www.python.org/downloads/)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
|
|
41
|
+
Comprehensive Python library and desktop application for subtitle processing, synchronization, and correction.
|
|
42
|
+
|
|
43
|
+
## ⨠Features
|
|
44
|
+
|
|
45
|
+
- **Merge & Sync**: Combine subtitle files with automatic synchronization
|
|
46
|
+
- **Fix Overlaps**: Detect and correct timing issues and overlaps
|
|
47
|
+
- **Apply Corrections**: Apply text corrections from JSON files
|
|
48
|
+
- **LLM Integration**: Generate optimized JSON for translation workflows
|
|
49
|
+
- **Desktop App**: Cross-platform GUI (Windows, macOS, Linux)
|
|
50
|
+
- **Colab Ready**: Works seamlessly in Google Colab notebooks
|
|
51
|
+
|
|
52
|
+
## š Quick Start
|
|
53
|
+
|
|
54
|
+
### Installation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install subtitlekit
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Google Colab
|
|
61
|
+
|
|
62
|
+
[](https://colab.research.google.com/drive/1lvdSX7aNhNknLs9laxfTeKdK_xNUvLOY?usp=sharing)
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# Install
|
|
66
|
+
!pip install subtitlekit
|
|
67
|
+
|
|
68
|
+
# Launch UI
|
|
69
|
+
from subtitlekit.ui import show_ui
|
|
70
|
+
show_ui(lang='en') # or 'el' for Greek
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### As a Library
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from subtitlekit import merge_subtitles, fix_overlaps, apply_corrections
|
|
77
|
+
|
|
78
|
+
# Merge subtitle files
|
|
79
|
+
merge_subtitles("original.srt", ["helper.srt"], "output.json")
|
|
80
|
+
|
|
81
|
+
# Fix timing overlaps
|
|
82
|
+
fix_overlaps("input.srt", "reference.srt", "fixed.srt")
|
|
83
|
+
|
|
84
|
+
# Apply corrections from JSON
|
|
85
|
+
apply_corrections("input.srt", "corrections.json", "output.srt")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### CLI Usage
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Merge subtitles
|
|
92
|
+
subtitlekit merge --original original.srt --helper helper.srt --output output.json
|
|
93
|
+
|
|
94
|
+
# Fix overlaps
|
|
95
|
+
subtitlekit overlaps --input input.srt --reference ref.srt --output fixed.srt
|
|
96
|
+
|
|
97
|
+
# Apply corrections
|
|
98
|
+
subtitlekit corrections --input input.srt --corrections fixes.json --output corrected.srt
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Desktop App
|
|
102
|
+
|
|
103
|
+
Download the standalone application from [Releases](https://github.com/angelospk/subtitlekit/releases).
|
|
104
|
+
|
|
105
|
+
**Or launch programmatically:**
|
|
106
|
+
```python
|
|
107
|
+
python -m subtitlekit.ui.desktop
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## š Documentation
|
|
111
|
+
|
|
112
|
+
### Merge Subtitles
|
|
113
|
+
|
|
114
|
+
Combines original subtitle file with one or more helper files (different languages) to create JSON output optimized for LLM translation workflows.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
subtitlekit merge \
|
|
118
|
+
--original movie.srt \
|
|
119
|
+
--helper helpful_en.srt \
|
|
120
|
+
--helper helpful_pt.srt \
|
|
121
|
+
--output for_translation.json \
|
|
122
|
+
--skip-sync # optional: skip ffsubsync
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Output format:**
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"id": 1,
|
|
129
|
+
"t": "00:00:11,878 --> 00:00:16,130",
|
|
130
|
+
"trans": "Original text to translate",
|
|
131
|
+
"h1": "Helper text (language 1)",
|
|
132
|
+
"h2": "Helper text (language 2)"
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Fix Overlaps
|
|
137
|
+
|
|
138
|
+
Detects and corrects timing issues:
|
|
139
|
+
- Overlapping timestamps
|
|
140
|
+
- Out-of-order entries
|
|
141
|
+
- Unreasonable durations
|
|
142
|
+
- Duplicate timings
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
subtitlekit overlaps \
|
|
146
|
+
--input problematic.srt \
|
|
147
|
+
--reference correct_timings.srt \
|
|
148
|
+
--output fixed.srt \
|
|
149
|
+
--window 5
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Apply Corrections
|
|
153
|
+
|
|
154
|
+
Apply text corrections from JSON file:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
subtitlekit corrections \
|
|
158
|
+
--input subtitle.srt \
|
|
159
|
+
--corrections fixes.json \
|
|
160
|
+
--output corrected.srt
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Corrections JSON format:**
|
|
164
|
+
```json
|
|
165
|
+
[
|
|
166
|
+
{
|
|
167
|
+
"id": 1,
|
|
168
|
+
"rx": "text to find",
|
|
169
|
+
"sb": "replacement text",
|
|
170
|
+
"rate": 8,
|
|
171
|
+
"type": "grammar"
|
|
172
|
+
}
|
|
173
|
+
]
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## š I18n Support
|
|
177
|
+
|
|
178
|
+
Desktop and Colab UIs support:
|
|
179
|
+
- š¬š§ English
|
|
180
|
+
- š¬š· Greek (Īλληνικά)
|
|
181
|
+
|
|
182
|
+
## š¦ Development
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Clone repository
|
|
186
|
+
git clone https://github.com/angelospk/subtitlekit.git
|
|
187
|
+
cd subtitlekit
|
|
188
|
+
|
|
189
|
+
# Install in development mode
|
|
190
|
+
pip install -e .
|
|
191
|
+
|
|
192
|
+
# Run tests
|
|
193
|
+
pytest -v
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## š¤ Contributing
|
|
197
|
+
|
|
198
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
199
|
+
|
|
200
|
+
## š License
|
|
201
|
+
|
|
202
|
+
MIT License - see [LICENSE](LICENSE) file.
|
|
203
|
+
|
|
204
|
+
## š Credits
|
|
205
|
+
|
|
206
|
+
Built by [angelospk](https://github.com/angelospk)
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# SubtitleKit - Subtitle Processing Toolkit
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/subtitlekit)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
Comprehensive Python library and desktop application for subtitle processing, synchronization, and correction.
|
|
8
|
+
|
|
9
|
+
## ⨠Features
|
|
10
|
+
|
|
11
|
+
- **Merge & Sync**: Combine subtitle files with automatic synchronization
|
|
12
|
+
- **Fix Overlaps**: Detect and correct timing issues and overlaps
|
|
13
|
+
- **Apply Corrections**: Apply text corrections from JSON files
|
|
14
|
+
- **LLM Integration**: Generate optimized JSON for translation workflows
|
|
15
|
+
- **Desktop App**: Cross-platform GUI (Windows, macOS, Linux)
|
|
16
|
+
- **Colab Ready**: Works seamlessly in Google Colab notebooks
|
|
17
|
+
|
|
18
|
+
## š Quick Start
|
|
19
|
+
|
|
20
|
+
### Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install subtitlekit
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Google Colab
|
|
27
|
+
|
|
28
|
+
[](https://colab.research.google.com/drive/1lvdSX7aNhNknLs9laxfTeKdK_xNUvLOY?usp=sharing)
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
# Install
|
|
32
|
+
!pip install subtitlekit
|
|
33
|
+
|
|
34
|
+
# Launch UI
|
|
35
|
+
from subtitlekit.ui import show_ui
|
|
36
|
+
show_ui(lang='en') # or 'el' for Greek
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### As a Library
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from subtitlekit import merge_subtitles, fix_overlaps, apply_corrections
|
|
43
|
+
|
|
44
|
+
# Merge subtitle files
|
|
45
|
+
merge_subtitles("original.srt", ["helper.srt"], "output.json")
|
|
46
|
+
|
|
47
|
+
# Fix timing overlaps
|
|
48
|
+
fix_overlaps("input.srt", "reference.srt", "fixed.srt")
|
|
49
|
+
|
|
50
|
+
# Apply corrections from JSON
|
|
51
|
+
apply_corrections("input.srt", "corrections.json", "output.srt")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### CLI Usage
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Merge subtitles
|
|
58
|
+
subtitlekit merge --original original.srt --helper helper.srt --output output.json
|
|
59
|
+
|
|
60
|
+
# Fix overlaps
|
|
61
|
+
subtitlekit overlaps --input input.srt --reference ref.srt --output fixed.srt
|
|
62
|
+
|
|
63
|
+
# Apply corrections
|
|
64
|
+
subtitlekit corrections --input input.srt --corrections fixes.json --output corrected.srt
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Desktop App
|
|
68
|
+
|
|
69
|
+
Download the standalone application from [Releases](https://github.com/angelospk/subtitlekit/releases).
|
|
70
|
+
|
|
71
|
+
**Or launch programmatically:**
|
|
72
|
+
```python
|
|
73
|
+
python -m subtitlekit.ui.desktop
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## š Documentation
|
|
77
|
+
|
|
78
|
+
### Merge Subtitles
|
|
79
|
+
|
|
80
|
+
Combines original subtitle file with one or more helper files (different languages) to create JSON output optimized for LLM translation workflows.
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
subtitlekit merge \
|
|
84
|
+
--original movie.srt \
|
|
85
|
+
--helper helpful_en.srt \
|
|
86
|
+
--helper helpful_pt.srt \
|
|
87
|
+
--output for_translation.json \
|
|
88
|
+
--skip-sync # optional: skip ffsubsync
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Output format:**
|
|
92
|
+
```json
|
|
93
|
+
{
|
|
94
|
+
"id": 1,
|
|
95
|
+
"t": "00:00:11,878 --> 00:00:16,130",
|
|
96
|
+
"trans": "Original text to translate",
|
|
97
|
+
"h1": "Helper text (language 1)",
|
|
98
|
+
"h2": "Helper text (language 2)"
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Fix Overlaps
|
|
103
|
+
|
|
104
|
+
Detects and corrects timing issues:
|
|
105
|
+
- Overlapping timestamps
|
|
106
|
+
- Out-of-order entries
|
|
107
|
+
- Unreasonable durations
|
|
108
|
+
- Duplicate timings
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
subtitlekit overlaps \
|
|
112
|
+
--input problematic.srt \
|
|
113
|
+
--reference correct_timings.srt \
|
|
114
|
+
--output fixed.srt \
|
|
115
|
+
--window 5
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Apply Corrections
|
|
119
|
+
|
|
120
|
+
Apply text corrections from JSON file:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
subtitlekit corrections \
|
|
124
|
+
--input subtitle.srt \
|
|
125
|
+
--corrections fixes.json \
|
|
126
|
+
--output corrected.srt
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Corrections JSON format:**
|
|
130
|
+
```json
|
|
131
|
+
[
|
|
132
|
+
{
|
|
133
|
+
"id": 1,
|
|
134
|
+
"rx": "text to find",
|
|
135
|
+
"sb": "replacement text",
|
|
136
|
+
"rate": 8,
|
|
137
|
+
"type": "grammar"
|
|
138
|
+
}
|
|
139
|
+
]
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## š I18n Support
|
|
143
|
+
|
|
144
|
+
Desktop and Colab UIs support:
|
|
145
|
+
- š¬š§ English
|
|
146
|
+
- š¬š· Greek (Īλληνικά)
|
|
147
|
+
|
|
148
|
+
## š¦ Development
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# Clone repository
|
|
152
|
+
git clone https://github.com/angelospk/subtitlekit.git
|
|
153
|
+
cd subtitlekit
|
|
154
|
+
|
|
155
|
+
# Install in development mode
|
|
156
|
+
pip install -e .
|
|
157
|
+
|
|
158
|
+
# Run tests
|
|
159
|
+
pytest -v
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## š¤ Contributing
|
|
163
|
+
|
|
164
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
165
|
+
|
|
166
|
+
## š License
|
|
167
|
+
|
|
168
|
+
MIT License - see [LICENSE](LICENSE) file.
|
|
169
|
+
|
|
170
|
+
## š Credits
|
|
171
|
+
|
|
172
|
+
Built by [angelospk](https://github.com/angelospk)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "subtitlekit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Subtitle processing toolkit: merge, sync, fix overlaps, and apply corrections"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "angelospk", email = "haroldpoigr@gmail.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["subtitles", "srt", "synchronization", "translation", "processing"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Multimedia :: Video",
|
|
27
|
+
"Topic :: Text Processing",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"pysrt>=1.1.2",
|
|
32
|
+
"chardet>=5.0.0",
|
|
33
|
+
"ffsubsync>=0.4.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=7.0.0",
|
|
39
|
+
"pytest-cov>=4.0.0",
|
|
40
|
+
]
|
|
41
|
+
ui = [
|
|
42
|
+
"ipywidgets>=8.0.0", # For Colab UI
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/angelospk/subtitlekit"
|
|
47
|
+
Repository = "https://github.com/angelospk/subtitlekit"
|
|
48
|
+
Issues = "https://github.com/angelospk/subtitlekit/issues"
|
|
49
|
+
|
|
50
|
+
[project.scripts]
|
|
51
|
+
subtitlekit = "subtitlekit.cli.main:main"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.packages.find]
|
|
54
|
+
where = ["src"]
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.package-data]
|
|
57
|
+
subtitlekit = ["ui/locales/*.json"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SubtitleKit - Subtitle Processing Toolkit
|
|
3
|
+
|
|
4
|
+
A comprehensive library for subtitle processing, synchronization, and correction.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
from .tools import merge_subtitles, fix_overlaps, apply_corrections
|
|
10
|
+
from .core import (
|
|
11
|
+
detect_file_encoding,
|
|
12
|
+
read_srt_with_fallback,
|
|
13
|
+
preprocess_srt_file,
|
|
14
|
+
clean_subtitle_file,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
# Main functions
|
|
19
|
+
'merge_subtitles',
|
|
20
|
+
'fix_overlaps',
|
|
21
|
+
'apply_corrections',
|
|
22
|
+
# Utilities
|
|
23
|
+
'detect_file_encoding',
|
|
24
|
+
'read_srt_with_fallback',
|
|
25
|
+
'preprocess_srt_file',
|
|
26
|
+
'clean_subtitle_file',
|
|
27
|
+
]
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
SubtitleKit CLI - Unified command-line interface
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
subtitlekit merge --original FILE --helper FILE [--helper FILE ...] --output FILE
|
|
7
|
+
subtitlekit overlaps --input FILE --reference FILE --output FILE [--window N]
|
|
8
|
+
subtitlekit corrections --input FILE --corrections FILE --output FILE
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cmd_merge(args):
|
|
17
|
+
"""Merge subtitle files"""
|
|
18
|
+
from subtitlekit.tools.matcher import process_subtitles
|
|
19
|
+
from subtitlekit.core.cleaner import clean_subtitle_file
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
|
|
23
|
+
print(f"Processing subtitles...")
|
|
24
|
+
print(f" Original: {args.original}")
|
|
25
|
+
for i, helper in enumerate(args.helper, 1):
|
|
26
|
+
print(f" Helper {i}: {helper}")
|
|
27
|
+
print(f" Output: {args.output}")
|
|
28
|
+
|
|
29
|
+
if args.skip_sync:
|
|
30
|
+
print(" Skipping synchronization")
|
|
31
|
+
|
|
32
|
+
# Clean subtitle formatting
|
|
33
|
+
print(" Cleaning subtitle formatting...")
|
|
34
|
+
cleaned_original = clean_subtitle_file(args.original)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Process with cleaned file
|
|
38
|
+
results = process_subtitles(
|
|
39
|
+
cleaned_original,
|
|
40
|
+
args.helper,
|
|
41
|
+
skip_sync=args.skip_sync
|
|
42
|
+
)
|
|
43
|
+
finally:
|
|
44
|
+
# Clean up temporary file
|
|
45
|
+
if os.path.exists(cleaned_original):
|
|
46
|
+
os.unlink(cleaned_original)
|
|
47
|
+
|
|
48
|
+
# Write output
|
|
49
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
|
50
|
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
51
|
+
|
|
52
|
+
print(f"\nā
Success! Processed {len(results)} subtitle entries.")
|
|
53
|
+
print(f"Output written to: {args.output}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def cmd_overlaps(args):
|
|
57
|
+
"""Fix timing overlaps"""
|
|
58
|
+
from subtitlekit.tools.overlaps import fix_problematic_timings
|
|
59
|
+
|
|
60
|
+
print(f"Fixing overlaps and timing issues...")
|
|
61
|
+
print(f" Input: {args.input}")
|
|
62
|
+
print(f" Reference: {args.reference}")
|
|
63
|
+
print(f" Output: {args.output}")
|
|
64
|
+
print(f" Window: {args.window}")
|
|
65
|
+
|
|
66
|
+
fix_problematic_timings(
|
|
67
|
+
args.input,
|
|
68
|
+
args.reference,
|
|
69
|
+
args.output,
|
|
70
|
+
window=args.window,
|
|
71
|
+
preprocess=args.preprocess
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
print(f"\nā
Done! Fixed file saved to: {args.output}")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def cmd_corrections(args):
|
|
78
|
+
"""Apply corrections from JSON"""
|
|
79
|
+
from subtitlekit.tools.corrections import apply_corrections_from_file
|
|
80
|
+
|
|
81
|
+
print(f"Applying corrections...")
|
|
82
|
+
print(f" Input: {args.input}")
|
|
83
|
+
print(f" Corrections: {args.corrections}")
|
|
84
|
+
print(f" Output: {args.output}")
|
|
85
|
+
|
|
86
|
+
stats = apply_corrections_from_file(
|
|
87
|
+
args.input,
|
|
88
|
+
args.corrections,
|
|
89
|
+
args.output,
|
|
90
|
+
verbose=not args.quiet
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if args.quiet:
|
|
94
|
+
print(f"ā
Applied {stats['applied']}/{stats['total']} corrections")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def main():
|
|
98
|
+
"""Main CLI entry point"""
|
|
99
|
+
parser = argparse.ArgumentParser(
|
|
100
|
+
prog='subtitlekit',
|
|
101
|
+
description='Subtitle processing toolkit: merge, sync, fix, and correct subtitles'
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
105
|
+
|
|
106
|
+
# Merge command
|
|
107
|
+
merge_parser = subparsers.add_parser('merge', help='Merge and synchronize subtitle files')
|
|
108
|
+
merge_parser.add_argument('--original', required=True, help='Original subtitle file (to translate)')
|
|
109
|
+
merge_parser.add_argument('--helper', action='append', required=True,
|
|
110
|
+
help='Helper subtitle file (can be used multiple times)')
|
|
111
|
+
merge_parser.add_argument('--output', required=True, help='Output JSON file')
|
|
112
|
+
merge_parser.add_argument('--skip-sync', action='store_true',
|
|
113
|
+
help='Skip ffsubsync synchronization')
|
|
114
|
+
merge_parser.set_defaults(func=cmd_merge)
|
|
115
|
+
|
|
116
|
+
# Overlaps command
|
|
117
|
+
overlaps_parser = subparsers.add_parser('overlaps', help='Fix timing overlaps and issues')
|
|
118
|
+
overlaps_parser.add_argument('--input', required=True, help='Input subtitle file')
|
|
119
|
+
overlaps_parser.add_argument('--reference', required=True, help='Reference subtitle file')
|
|
120
|
+
overlaps_parser.add_argument('--output', required=True, help='Output subtitle file')
|
|
121
|
+
overlaps_parser.add_argument('--window', type=int, default=5,
|
|
122
|
+
help='Context window for matching (default: 5)')
|
|
123
|
+
overlaps_parser.add_argument('--preprocess', action='store_true',
|
|
124
|
+
help='Preprocess input file first')
|
|
125
|
+
overlaps_parser.set_defaults(func=cmd_overlaps)
|
|
126
|
+
|
|
127
|
+
# Corrections command
|
|
128
|
+
corrections_parser = subparsers.add_parser('corrections', help='Apply corrections from JSON')
|
|
129
|
+
corrections_parser.add_argument('--input', required=True, help='Input subtitle file')
|
|
130
|
+
corrections_parser.add_argument('--corrections', required=True, help='Corrections JSON file')
|
|
131
|
+
corrections_parser.add_argument('--output', required=True, help='Output subtitle file')
|
|
132
|
+
corrections_parser.add_argument('--quiet', '-q', action='store_true',
|
|
133
|
+
help='Quiet mode (minimal output)')
|
|
134
|
+
corrections_parser.set_defaults(func=cmd_corrections)
|
|
135
|
+
|
|
136
|
+
# Parse and execute
|
|
137
|
+
args = parser.parse_args()
|
|
138
|
+
|
|
139
|
+
if not args.command:
|
|
140
|
+
parser.print_help()
|
|
141
|
+
return 1
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
args.func(args)
|
|
145
|
+
return 0
|
|
146
|
+
except Exception as e:
|
|
147
|
+
print(f"\nā Error: {e}", file=sys.stderr)
|
|
148
|
+
if '--verbose' in sys.argv:
|
|
149
|
+
raise
|
|
150
|
+
return 1
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
if __name__ == '__main__':
|
|
154
|
+
sys.exit(main())
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SubtitleKit Core - Encoding utilities
|
|
3
|
+
"""
|
|
4
|
+
from .encoding import *
|
|
5
|
+
from .preprocessor import *
|
|
6
|
+
from .cleaner import *
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
'detect_file_encoding',
|
|
10
|
+
'read_srt_with_fallback',
|
|
11
|
+
'preprocess_srt_file',
|
|
12
|
+
'clean_subtitle_file',
|
|
13
|
+
]
|