subtitlekit 0.1.0__tar.gz → 0.1.3.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- subtitlekit-0.1.3.dev0/.github/workflows/build-executables.yml +74 -0
- subtitlekit-0.1.3.dev0/.github/workflows/publish-pypi.yml +33 -0
- subtitlekit-0.1.3.dev0/.gitignore +31 -0
- subtitlekit-0.1.3.dev0/BUILD_GUIDE.md +77 -0
- {subtitlekit-0.1.0/src/subtitlekit.egg-info → subtitlekit-0.1.3.dev0}/PKG-INFO +1 -1
- subtitlekit-0.1.3.dev0/README_OLD.md +240 -0
- subtitlekit-0.1.3.dev0/SubtitleKit.spec +59 -0
- subtitlekit-0.1.3.dev0/SubtitleKit_Colab.ipynb +144 -0
- subtitlekit-0.1.3.dev0/correction_run.log +27 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/pyproject.toml +7 -2
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/__init__.py +4 -1
- subtitlekit-0.1.3.dev0/src/subtitlekit/_version.py +34 -0
- subtitlekit-0.1.3.dev0/src/subtitlekit/ui/colab.py +379 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/ui/desktop.py +4 -0
- subtitlekit-0.1.3.dev0/src/subtitlekit/ui/locales/el.json +47 -0
- subtitlekit-0.1.3.dev0/src/subtitlekit/ui/locales/en.json +47 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/updater.py +1 -1
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0/src/subtitlekit.egg-info}/PKG-INFO +1 -1
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit.egg-info/SOURCES.txt +25 -1
- subtitlekit-0.1.3.dev0/test_bracket_conversion.py +34 -0
- subtitlekit-0.1.3.dev0/test_enhanced_matcher.py +198 -0
- subtitlekit-0.1.3.dev0/test_fix_overlaps.py +191 -0
- subtitlekit-0.1.3.dev0/test_fixtures/duplicate_edge_case.srt +25 -0
- subtitlekit-0.1.3.dev0/test_fixtures/duplicate_edge_case_cleaned.srt +19 -0
- subtitlekit-0.1.3.dev0/test_fixtures/messy_input.srt +29 -0
- subtitlekit-0.1.3.dev0/test_fixtures/messy_input_cleaned.srt +21 -0
- subtitlekit-0.1.3.dev0/test_fixtures/sample_greek_corrupted.srt +62 -0
- subtitlekit-0.1.3.dev0/test_fixtures/sample_greek_perfect.srt +62 -0
- subtitlekit-0.1.3.dev0/test_fixtures/sample_helpful.srt +26 -0
- subtitlekit-0.1.3.dev0/test_fixtures/sample_original.srt +26 -0
- subtitlekit-0.1.3.dev0/test_fixtures/sample_swedish_15.srt +62 -0
- subtitlekit-0.1.3.dev0/test_subsync_matcher.py +273 -0
- subtitlekit-0.1.0/src/subtitlekit/ui/colab.py +0 -240
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/LICENSE +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/MANIFEST.in +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/README.md +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/requirements.txt +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/setup.cfg +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/cli/__init__.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/cli/main.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/core/__init__.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/core/cleaner.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/core/encoding.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/core/preprocessor.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/tools/__init__.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/tools/corrections.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/tools/enhanced_matcher.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/tools/matcher.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/tools/overlaps.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit/ui/__init__.py +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit.egg-info/dependency_links.txt +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit.egg-info/entry_points.txt +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit.egg-info/requires.txt +0 -0
- {subtitlekit-0.1.0 → subtitlekit-0.1.3.dev0}/src/subtitlekit.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
name: Build Desktop Executables
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch: # Allow manual trigger
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
name: Build on ${{ matrix.os }}
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
include:
|
|
15
|
+
- os: ubuntu-latest
|
|
16
|
+
artifact_name: subtitlekit-linux
|
|
17
|
+
asset_name: subtitlekit-linux
|
|
18
|
+
- os: windows-latest
|
|
19
|
+
artifact_name: SubtitleKit.exe
|
|
20
|
+
asset_name: subtitlekit-windows.exe
|
|
21
|
+
- os: macos-latest
|
|
22
|
+
artifact_name: SubtitleKit
|
|
23
|
+
asset_name: subtitlekit-macos
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0 # Full history for setuptools-scm version detection
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: '3.11'
|
|
34
|
+
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: |
|
|
37
|
+
python -m pip install --upgrade pip
|
|
38
|
+
pip install -e .
|
|
39
|
+
pip install pyinstaller
|
|
40
|
+
|
|
41
|
+
- name: Build executable
|
|
42
|
+
run: |
|
|
43
|
+
# Build with spec file for better control
|
|
44
|
+
pyinstaller SubtitleKit.spec
|
|
45
|
+
shell: bash
|
|
46
|
+
|
|
47
|
+
- name: Verify executable (Non-Windows)
|
|
48
|
+
if: runner.os != 'Windows'
|
|
49
|
+
run: |
|
|
50
|
+
ls -lh dist/
|
|
51
|
+
file dist/SubtitleKit || true
|
|
52
|
+
|
|
53
|
+
- name: Verify executable (Windows)
|
|
54
|
+
if: runner.os == 'Windows'
|
|
55
|
+
run: |
|
|
56
|
+
dir dist\
|
|
57
|
+
|
|
58
|
+
- name: Upload artifact
|
|
59
|
+
uses: actions/upload-artifact@v4
|
|
60
|
+
with:
|
|
61
|
+
name: ${{ matrix.artifact_name }}
|
|
62
|
+
path: dist/${{ matrix.artifact_name }}
|
|
63
|
+
if-no-files-found: error
|
|
64
|
+
|
|
65
|
+
- name: Upload to Release
|
|
66
|
+
if: github.event_name == 'release'
|
|
67
|
+
uses: actions/upload-release-asset@v1
|
|
68
|
+
env:
|
|
69
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
70
|
+
with:
|
|
71
|
+
upload_url: ${{ github.event.release.upload_url }}
|
|
72
|
+
asset_path: dist/${{ matrix.artifact_name }}
|
|
73
|
+
asset_name: ${{ matrix.asset_name }}
|
|
74
|
+
asset_content_type: application/octet-stream
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
with:
|
|
14
|
+
fetch-depth: 0 # Full history for setuptools-scm version detection
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: '3.11'
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install build twine
|
|
25
|
+
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: python -m build
|
|
28
|
+
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
env:
|
|
31
|
+
TWINE_USERNAME: __token__
|
|
32
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
|
33
|
+
run: twine upload dist/*
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Subtitle files (except test fixtures)
|
|
2
|
+
*.srt
|
|
3
|
+
!test_fixtures/*.srt
|
|
4
|
+
|
|
5
|
+
# Generated JSON files (except test fixtures and locales)
|
|
6
|
+
*.json
|
|
7
|
+
!test_fixtures/*.json
|
|
8
|
+
!src/subtitlekit/ui/locales/*.json
|
|
9
|
+
|
|
10
|
+
# Generated version file (created by setuptools-scm)
|
|
11
|
+
src/subtitlekit/_version.py
|
|
12
|
+
|
|
13
|
+
# Python cache
|
|
14
|
+
__pycache__/
|
|
15
|
+
*.pyc
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
old_code_backup/
|
|
18
|
+
|
|
19
|
+
# Virtual environment
|
|
20
|
+
venv/
|
|
21
|
+
|
|
22
|
+
# IDE files
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
*.swp
|
|
26
|
+
*.swo
|
|
27
|
+
*~
|
|
28
|
+
|
|
29
|
+
# OS files
|
|
30
|
+
.DS_Store
|
|
31
|
+
Thumbs.db
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Building Executables Locally
|
|
2
|
+
|
|
3
|
+
Για να δοκιμάσεις το build process τοπικά πριν το push:
|
|
4
|
+
|
|
5
|
+
## Quick Build
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Install PyInstaller
|
|
9
|
+
pip install pyinstaller
|
|
10
|
+
|
|
11
|
+
# Build executable (simple)
|
|
12
|
+
pyinstaller --onefile --windowed --name SubtitleKit src/subtitlekit/ui/desktop.py
|
|
13
|
+
|
|
14
|
+
# Output: dist/SubtitleKit (or SubtitleKit.exe on Windows)
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Advanced Build (με locale files)
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Using the spec file
|
|
21
|
+
pyinstaller SubtitleKit.spec
|
|
22
|
+
|
|
23
|
+
# Test the executable
|
|
24
|
+
./dist/SubtitleKit # macOS/Linux
|
|
25
|
+
./dist/SubtitleKit.exe # Windows
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Τι Περιλαμβάνει το Workflow
|
|
29
|
+
|
|
30
|
+
### Trigger Events:
|
|
31
|
+
- ✅ **On Release**: Αυτόματο build όταν δημιουργείς νέο release στο GitHub
|
|
32
|
+
- ✅ **Manual**: Μπορείς να το τρέξεις χειροκίνητα από το Actions tab
|
|
33
|
+
|
|
34
|
+
### Builds:
|
|
35
|
+
- 🪟 **Windows**: `subtitlekit-windows.exe`
|
|
36
|
+
- 🍎 **macOS**: `subtitlekit-macos`
|
|
37
|
+
- 🐧 **Linux**: `subtitlekit-linux`
|
|
38
|
+
|
|
39
|
+
### Output Location:
|
|
40
|
+
Τα executables ανεβαίνουν:
|
|
41
|
+
1. Ως **artifacts** (για testing) - διαθέσιμα για 90 μέρες
|
|
42
|
+
2. Ως **release assets** (για download) - μόνιμα με το release
|
|
43
|
+
|
|
44
|
+
## Πώς να Δημιουργήσεις Release
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 1. Tag the version
|
|
48
|
+
git tag v0.1.0
|
|
49
|
+
git push origin v0.1.0
|
|
50
|
+
|
|
51
|
+
# 2. Create release on GitHub
|
|
52
|
+
# Στο GitHub UI: Releases → Draft new release → Choose tag v0.1.0
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Μόλις δημιουργηθεί το release:
|
|
56
|
+
- ✅ PyPI workflow ανεβάζει στο `pip install subtitlekit`
|
|
57
|
+
- ✅ Build workflow δημιουργεί executables για κάθε OS
|
|
58
|
+
- ✅ Executables ανεβαίνουν αυτόματα στο release
|
|
59
|
+
|
|
60
|
+
## Testing Builds
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# After download
|
|
64
|
+
chmod +x subtitlekit-macos # macOS/Linux only
|
|
65
|
+
./subtitlekit-macos
|
|
66
|
+
|
|
67
|
+
# Windows - just double click .exe
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Προσοχή σε macOS
|
|
71
|
+
|
|
72
|
+
Τα unsigned executables μπορεί να δείξουν warning. Ο χρήστης πρέπει:
|
|
73
|
+
```
|
|
74
|
+
Right-click → Open → Open anyway
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Για signed executables χρειάζεται Apple Developer account (πληρωμένο).
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# Subtitle Processing Tools
|
|
2
|
+
|
|
3
|
+
Εργαλεία Python για επεξεργασία και διόρθωση υποτίτλων.
|
|
4
|
+
|
|
5
|
+
## Εγκατάσταση
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Δημιουργία virtual environment
|
|
9
|
+
python3 -m venv venv
|
|
10
|
+
source venv/bin/activate
|
|
11
|
+
|
|
12
|
+
# Εγκατάσταση dependencies
|
|
13
|
+
pip install -r requirements.txt
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Εργαλεία
|
|
17
|
+
|
|
18
|
+
### 1. JSON Generation για LLM Translation
|
|
19
|
+
|
|
20
|
+
Δημιουργεί JSON από SRT αρχεία για επεξεργασία με LLM.
|
|
21
|
+
|
|
22
|
+
**Χρήση:**
|
|
23
|
+
```bash
|
|
24
|
+
python main.py --original original.srt --helper helpful.srt --output output.json
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**Παράμετροι:**
|
|
28
|
+
- `--original`: Αρχείο υπότιτλου για μετάφραση
|
|
29
|
+
- `--helper`: Βοηθητικό αρχείο υπότιτλου (άλλη γλώσσα)
|
|
30
|
+
- `--output`: Αρχείο εξόδου JSON
|
|
31
|
+
- `--skip-sync`: Παράλειψη συγχρονισμού με ffsubsync (αν είναι ήδη συγχρονισμένο)
|
|
32
|
+
|
|
33
|
+
**JSON Format:**
|
|
34
|
+
```json
|
|
35
|
+
{
|
|
36
|
+
"id": 16,
|
|
37
|
+
"t": "00:02:28,050 --> 00:02:29,385",
|
|
38
|
+
"trans": "<i>-Det gör jag.</i>\n-Det verkar som...",
|
|
39
|
+
"h": "Parece que está fazendo\no oposto do seu trabalho,"
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
### 2. Smart Overlap Detection and Correction
|
|
46
|
+
|
|
47
|
+
Ανιχνεύει και διορθώνει timing προβλήματα στους υποτίτλους.
|
|
48
|
+
|
|
49
|
+
**Τι διορθώνει:**
|
|
50
|
+
- ✅ Overlapping timings (end_time > next_start_time)
|
|
51
|
+
- ✅ Χρονολογικά προβλήματα (start <= previous_end)
|
|
52
|
+
- ✅ Unreasonable durations (> 60 δευτ., π.χ. typos με ώρες)
|
|
53
|
+
- ✅ Duplicate timings
|
|
54
|
+
|
|
55
|
+
**Χρήση:**
|
|
56
|
+
```bash
|
|
57
|
+
python fix_overlaps.py \
|
|
58
|
+
--input greek.srt \
|
|
59
|
+
--reference original.srt \
|
|
60
|
+
--output greek_fixed.srt \
|
|
61
|
+
--window 10
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Παράμετροι:**
|
|
65
|
+
- `--input`: Αρχείο υπότιτλου με προβλήματα
|
|
66
|
+
- `--reference`: Reference αρχείο με σωστά timings
|
|
67
|
+
- `--output`: Αρχείο εξόδου
|
|
68
|
+
- `--window`: Context window για matching (default: 5)
|
|
69
|
+
- `--preprocess`: Καθαρίζει το input αρχείο πρώτα (markdown, duplicates κλπ)
|
|
70
|
+
|
|
71
|
+
**Πώς λειτουργεί:**
|
|
72
|
+
1. **Ανίχνευση**: Βρίσκει overlaps, chronological issues, unreasonable durations
|
|
73
|
+
2. **Matching**: Ταιριάζει προβληματικές γραμμές με το reference (timing-based)
|
|
74
|
+
3. **Διόρθωση**: Αντικαθιστά μόνο τα λάθος timings
|
|
75
|
+
4. **Deduplication**: Αφαιρεί duplicate timings
|
|
76
|
+
5. **Validation**: Επαληθεύει ότι δεν υπάρχουν προβλήματα
|
|
77
|
+
|
|
78
|
+
**Αποτελέσματα:**
|
|
79
|
+
```
|
|
80
|
+
Problems found: 12
|
|
81
|
+
Problems fixed: 12
|
|
82
|
+
Duplicates removed: 1
|
|
83
|
+
|
|
84
|
+
Validation:
|
|
85
|
+
no_overlaps: ✓ PASS
|
|
86
|
+
chronological_order: ✓ PASS
|
|
87
|
+
no_duplicates: ✓ PASS
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
### 3. Text Corrections with JSON
|
|
93
|
+
|
|
94
|
+
Εφαρμόζει διορθώσεις κειμένου από JSON αρχείο σε SRT.
|
|
95
|
+
|
|
96
|
+
**Τι διορθώνει:**
|
|
97
|
+
- ✅ Λεξιλόγιο και φυσικότητα
|
|
98
|
+
- ✅ Αργκό και ιδιωματισμούς
|
|
99
|
+
- ✅ Ροή και σύνταξη
|
|
100
|
+
- ✅ Γραμματική και συστολές
|
|
101
|
+
|
|
102
|
+
**Χρήση:**
|
|
103
|
+
```bash
|
|
104
|
+
python apply_corrections_FIXED.py
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Input Files:**
|
|
108
|
+
- `greek_fixed.srt`: Το SRT αρχείο που θέλουμε να διορθώσουμε
|
|
109
|
+
- `corrections.json`: JSON με τις διορθώσεις
|
|
110
|
+
|
|
111
|
+
**JSON Format:**
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"id": 43,
|
|
115
|
+
"rx": "δεν έβρισκες στο Λύκειο.",
|
|
116
|
+
"sb": "Στο σχολείο στέγνωνες.",
|
|
117
|
+
"rate": 8,
|
|
118
|
+
"type": "αργκό"
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Πώς λειτουργεί:**
|
|
123
|
+
1. **Global Search**: Αναζητά το `rx` (search text) σε όλα τα subtitles
|
|
124
|
+
2. **Smart Matching**: Δοκιμάζει exact, normalized και newline variants
|
|
125
|
+
3. **Apply**: Αντικαθιστά με το `sb` (replacement text)
|
|
126
|
+
4. **Report**: Αναφέρει ποιες διορθώσεις εφαρμόστηκαν
|
|
127
|
+
|
|
128
|
+
**Output:**
|
|
129
|
+
- `corrected_greek_fixed_FINAL.srt`: Το διορθωμένο αρχείο
|
|
130
|
+
- Console report με applied/not found corrections
|
|
131
|
+
|
|
132
|
+
**Αποτελέσματα:**
|
|
133
|
+
```
|
|
134
|
+
✓ ID 43 → Applied at subtitle #42 (offset: -2)
|
|
135
|
+
✓ ID 62 → Applied at subtitle #60 (offset: -2)
|
|
136
|
+
✗ ID 99: NOT FOUND
|
|
137
|
+
|
|
138
|
+
SUMMARY
|
|
139
|
+
Total corrections: 79
|
|
140
|
+
✓ Applied: 76
|
|
141
|
+
✗ Not found: 3
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Δομή Αρχείων
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
submerge/
|
|
148
|
+
├── main.py # JSON generation για LLM
|
|
149
|
+
├── subsync_matcher.py # Subtitle matching engine
|
|
150
|
+
├── fix_overlaps.py # Smart overlap correction 🆕
|
|
151
|
+
├── apply_corrections_FIXED.py # Text corrections από JSON 🆕
|
|
152
|
+
├── encoding_utils.py # Robust encoding detection
|
|
153
|
+
├── srt_preprocessor.py # SRT cleaning utilities
|
|
154
|
+
├── enhanced_matcher.py # Advanced matching algorithms
|
|
155
|
+
├── corrections.ipynb # Notebook για corrections (Google Colab)
|
|
156
|
+
├── corrections.json # Διορθώσεις κειμένου
|
|
157
|
+
├── test_fix_overlaps.py # Tests για overlap correction
|
|
158
|
+
├── test_subsync_matcher.py # Tests για matching
|
|
159
|
+
├── test_enhanced_matcher.py # Tests για enhanced matching
|
|
160
|
+
├── llm_prompt_greek.md # Οδηγίες για LLM
|
|
161
|
+
├── llm_srt_reading_guide.md # Guide για reading SRT με notes
|
|
162
|
+
├── LLM_USAGE_EXAMPLE.md # Παραδείγματα χρήσης
|
|
163
|
+
├── FIX_TIMINGS_GUIDE.md # Guide για timing correction
|
|
164
|
+
└── requirements.txt # Dependencies
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Requirements
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
pip install -r requirements.txt
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Dependencies:**
|
|
174
|
+
- `pysrt>=1.1.2` - SRT parsing
|
|
175
|
+
- `pytest>=7.0.0` - Testing
|
|
176
|
+
- `ffsubsync>=0.4.0` - Automatic timing sync (optional για main.py)
|
|
177
|
+
- `chardet>=5.0.0` - Encoding detection
|
|
178
|
+
|
|
179
|
+
## Testing
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# Τρέξε όλα τα tests
|
|
183
|
+
pytest -v
|
|
184
|
+
|
|
185
|
+
# Μόνο overlap correction tests
|
|
186
|
+
pytest test_fix_overlaps.py -v
|
|
187
|
+
|
|
188
|
+
# Μόνο matching tests
|
|
189
|
+
pytest test_subsync_matcher.py -v
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Workflows
|
|
193
|
+
|
|
194
|
+
### Workflow 1: LLM Translation
|
|
195
|
+
```bash
|
|
196
|
+
# 1. Generate JSON
|
|
197
|
+
python main.py --original original.srt --helper helpful.srt --output for_llm.json --skip-sync
|
|
198
|
+
|
|
199
|
+
# 2. Send to LLM για μετάφραση (δες llm_prompt_greek.md)
|
|
200
|
+
|
|
201
|
+
# 3. Αν υπάρχουν timing issues στο output, χρησιμοποίησε fix_overlaps.py
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Workflow 2: Fix Timing Issues
|
|
205
|
+
```bash
|
|
206
|
+
# Διόρθωση overlaps και timing προβλημάτων
|
|
207
|
+
python fix_overlaps.py --input greek.srt --reference original.srt --output greek_fixed.srt
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Workflow 3: Apply Text Corrections
|
|
211
|
+
```bash
|
|
212
|
+
# Εφαρμογή διορθώσεων κειμένου από JSON
|
|
213
|
+
python apply_corrections_FIXED.py
|
|
214
|
+
|
|
215
|
+
# Input: greek_fixed.srt + corrections.json
|
|
216
|
+
# Output: corrected_greek_fixed_FINAL.srt
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Complete Pipeline
|
|
220
|
+
```bash
|
|
221
|
+
# 1. Generate JSON για LLM
|
|
222
|
+
python main.py --original original.srt --helper helpful.srt --output for_llm.json
|
|
223
|
+
|
|
224
|
+
# 2. LLM μετάφραση → greek.srt
|
|
225
|
+
|
|
226
|
+
# 3. Fix timing issues
|
|
227
|
+
python fix_overlaps.py --input greek.srt --reference original.srt --output greek_fixed.srt
|
|
228
|
+
|
|
229
|
+
# 4. Apply text corrections
|
|
230
|
+
python apply_corrections_FIXED.py
|
|
231
|
+
# → corrected_greek_fixed_FINAL.srt
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Σημειώσεις
|
|
235
|
+
|
|
236
|
+
- Το matching χρησιμοποιεί **temporal overlap** για ακρίβεια
|
|
237
|
+
- Το fix_overlaps.py δουλεύει με **διαφορετικές γλώσσες** (timing-based matching)
|
|
238
|
+
- Το apply_corrections_FIXED.py κάνει **global text search** (δεν βασίζεται σε IDs)
|
|
239
|
+
- **Δεν** χρειάζεται sorting - διατηρεί την αρχική σειρά
|
|
240
|
+
- Η **αρίθμηση γραμμών** δεν έχει σημασία για τα media players
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# -*- mode: python ; coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
PyInstaller spec file for SubtitleKit desktop app
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
block_cipher = None
|
|
8
|
+
|
|
9
|
+
a = Analysis(
|
|
10
|
+
['src/subtitlekit/ui/desktop.py'],
|
|
11
|
+
pathex=[],
|
|
12
|
+
binaries=[],
|
|
13
|
+
datas=[
|
|
14
|
+
('src/subtitlekit/ui/locales/*.json', 'subtitlekit/ui/locales'),
|
|
15
|
+
],
|
|
16
|
+
hiddenimports=[
|
|
17
|
+
'subtitlekit',
|
|
18
|
+
'subtitlekit.core',
|
|
19
|
+
'subtitlekit.tools',
|
|
20
|
+
'subtitlekit.ui',
|
|
21
|
+
],
|
|
22
|
+
hookspath=[],
|
|
23
|
+
hooksconfig={},
|
|
24
|
+
runtime_hooks=[],
|
|
25
|
+
excludes=[
|
|
26
|
+
'ipywidgets', # Not needed for desktop
|
|
27
|
+
'jupyter',
|
|
28
|
+
'notebook',
|
|
29
|
+
],
|
|
30
|
+
win_no_prefer_redirects=False,
|
|
31
|
+
win_private_assemblies=False,
|
|
32
|
+
cipher=block_cipher,
|
|
33
|
+
noarchive=False,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
|
37
|
+
|
|
38
|
+
exe = EXE(
|
|
39
|
+
pyz,
|
|
40
|
+
a.scripts,
|
|
41
|
+
a.binaries,
|
|
42
|
+
a.zipfiles,
|
|
43
|
+
a.datas,
|
|
44
|
+
[],
|
|
45
|
+
name='SubtitleKit',
|
|
46
|
+
debug=False,
|
|
47
|
+
bootloader_ignore_signals=False,
|
|
48
|
+
strip=False,
|
|
49
|
+
upx=True,
|
|
50
|
+
upx_exclude=[],
|
|
51
|
+
runtime_tmpdir=None,
|
|
52
|
+
console=False, # No console window
|
|
53
|
+
disable_windowed_traceback=False,
|
|
54
|
+
argv_emulation=False,
|
|
55
|
+
target_arch=None,
|
|
56
|
+
codesign_identity=None,
|
|
57
|
+
entitlements_file=None,
|
|
58
|
+
icon=None, # Add icon path here if you create one
|
|
59
|
+
)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# SubtitleKit - Subtitle Processing\n",
|
|
8
|
+
"\n",
|
|
9
|
+
"[](https://pypi.org/project/subtitlekit/)\n",
|
|
10
|
+
"[](https://github.com/angelospk/subtitlekit)\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"Εργαλεία επεξεργασίας υποτίτλων με γραφικό περιβάλλον."
|
|
13
|
+
]
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"cell_type": "markdown",
|
|
17
|
+
"metadata": {},
|
|
18
|
+
"source": [
|
|
19
|
+
"## 📦 Installation\n",
|
|
20
|
+
"\n",
|
|
21
|
+
"Εγκατάσταση της τελευταίας έκδοσης:"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"cell_type": "code",
|
|
26
|
+
"execution_count": null,
|
|
27
|
+
"metadata": {},
|
|
28
|
+
"outputs": [],
|
|
29
|
+
"source": [
|
|
30
|
+
"!pip install -q subtitlekit"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"cell_type": "markdown",
|
|
35
|
+
"metadata": {},
|
|
36
|
+
"source": [
|
|
37
|
+
"## 🚀 Launch UI\n",
|
|
38
|
+
"\n",
|
|
39
|
+
"Εκκίνηση γραφικού περιβάλλοντος (Ελληνικά ή Αγγλικά):"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"cell_type": "code",
|
|
44
|
+
"execution_count": null,
|
|
45
|
+
"metadata": {},
|
|
46
|
+
"outputs": [],
|
|
47
|
+
"source": [
|
|
48
|
+
"from subtitlekit.ui import show_ui\n",
|
|
49
|
+
"\n",
|
|
50
|
+
"# Ελληνικά\n",
|
|
51
|
+
"show_ui(lang='el')\n",
|
|
52
|
+
"\n",
|
|
53
|
+
"# English\n",
|
|
54
|
+
"# show_ui(lang='en')"
|
|
55
|
+
]
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"cell_type": "markdown",
|
|
59
|
+
"metadata": {},
|
|
60
|
+
"source": [
|
|
61
|
+
"## 💡 Tips\n",
|
|
62
|
+
"\n",
|
|
63
|
+
"### Upload Files\n",
|
|
64
|
+
"Χρησιμοποίησε το file browser (αριστερά) για να ανεβάσεις τα αρχεία σου.\n",
|
|
65
|
+
"\n",
|
|
66
|
+
"### Google Drive\n",
|
|
67
|
+
"Μπορείς να συνδέσεις το Google Drive σου:"
|
|
68
|
+
]
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"cell_type": "code",
|
|
72
|
+
"execution_count": null,
|
|
73
|
+
"metadata": {},
|
|
74
|
+
"outputs": [],
|
|
75
|
+
"source": [
|
|
76
|
+
"from google.colab import drive\n",
|
|
77
|
+
"drive.mount('/content/drive')\n",
|
|
78
|
+
"\n",
|
|
79
|
+
"# Τα αρχεία σου είναι στο: /content/drive/MyDrive/"
|
|
80
|
+
]
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"cell_type": "markdown",
|
|
84
|
+
"metadata": {},
|
|
85
|
+
"source": [
|
|
86
|
+
"## 📚 Library Usage\n",
|
|
87
|
+
"\n",
|
|
88
|
+
"Αν προτιμάς programmatic χρήση:"
|
|
89
|
+
]
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"cell_type": "code",
|
|
93
|
+
"execution_count": null,
|
|
94
|
+
"metadata": {},
|
|
95
|
+
"outputs": [],
|
|
96
|
+
"source": [
|
|
97
|
+
"from subtitlekit import merge_subtitles, fix_overlaps, apply_corrections\n",
|
|
98
|
+
"\n",
|
|
99
|
+
"# Merge example\n",
|
|
100
|
+
"# merge_subtitles(\n",
|
|
101
|
+
"# \"original.srt\",\n",
|
|
102
|
+
"# [\"helper.srt\"],\n",
|
|
103
|
+
"# \"output.json\",\n",
|
|
104
|
+
"# )\n",
|
|
105
|
+
"\n",
|
|
106
|
+
"# Fix overlaps example\n",
|
|
107
|
+
"# fix_overlaps(\n",
|
|
108
|
+
"# \"input.srt\",\n",
|
|
109
|
+
"# \"reference.srt\",\n",
|
|
110
|
+
"# \"fixed.srt\"\n",
|
|
111
|
+
"# )\n",
|
|
112
|
+
"\n",
|
|
113
|
+
"# Apply corrections example\n",
|
|
114
|
+
"# apply_corrections(\n",
|
|
115
|
+
"# \"input.srt\",\n",
|
|
116
|
+
"# \"corrections.json\",\n",
|
|
117
|
+
"# \"corrected.srt\"\n",
|
|
118
|
+
"# )"
|
|
119
|
+
]
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"cell_type": "markdown",
|
|
123
|
+
"metadata": {},
|
|
124
|
+
"source": [
|
|
125
|
+
"## 📖 Documentation\n",
|
|
126
|
+
"\n",
|
|
127
|
+
"Πλήρης τεκμηρίωση: [https://github.com/angelospk/subtitlekit](https://github.com/angelospk/subtitlekit)"
|
|
128
|
+
]
|
|
129
|
+
}
|
|
130
|
+
],
|
|
131
|
+
"metadata": {
|
|
132
|
+
"kernelspec": {
|
|
133
|
+
"display_name": "Python 3",
|
|
134
|
+
"language": "python",
|
|
135
|
+
"name": "python3"
|
|
136
|
+
},
|
|
137
|
+
"language_info": {
|
|
138
|
+
"name": "python",
|
|
139
|
+
"version": "3.11.0"
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"nbformat": 4,
|
|
143
|
+
"nbformat_minor": 2
|
|
144
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Loading files...
|
|
2
|
+
Loaded 1021 subtitles and 89 corrections
|
|
3
|
+
|
|
4
|
+
✓ ID 101 → Applied at subtitle #100 (offset: -1)
|
|
5
|
+
✓ ID 175 → Applied at subtitle #174 (offset: -1)
|
|
6
|
+
✗ ID 514: NOT FOUND - 'Ποια πηδάς;...'
|
|
7
|
+
✗ ID 651: NOT FOUND - 'Η σκέψη... να μπω σ' ένα άδειο διαμέρισμα είναι
|
|
8
|
+
απ...'
|
|
9
|
+
✗ ID 676: NOT FOUND - 'Ε, μπεργκερά....'
|
|
10
|
+
✓ ID 720 → Applied at subtitle #719 (offset: -1)
|
|
11
|
+
✓ ID 738 → Applied at subtitle #739 (offset: +1)
|
|
12
|
+
✗ ID 818: NOT FOUND - '- Καταπίνουν;
|
|
13
|
+
- Γκέιμπ!...'
|
|
14
|
+
✓ ID 950 → Applied at subtitle #949 (offset: -1)
|
|
15
|
+
✓ ID 979 → Applied at subtitle #974 (offset: -5)
|
|
16
|
+
|
|
17
|
+
Saving to corrected_greek_fixed_FINAL.srt...
|
|
18
|
+
|
|
19
|
+
================================================================================
|
|
20
|
+
SUMMARY
|
|
21
|
+
================================================================================
|
|
22
|
+
Total corrections: 89
|
|
23
|
+
✓ Applied: 85
|
|
24
|
+
✗ Not found: 4
|
|
25
|
+
|
|
26
|
+
Output saved to: corrected_greek_fixed_FINAL.srt
|
|
27
|
+
|