subaligner 0.2.4__py3.7.egg → 0.3.0__py3.7.egg
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- EGG-INFO/PKG-INFO +14 -14
- EGG-INFO/SOURCES.txt +1 -0
- EGG-INFO/requires.txt +23 -13
- EGG-INFO/scripts/subaligner +82 -28
- EGG-INFO/scripts/subaligner_1pass +1 -1
- EGG-INFO/scripts/subaligner_2pass +1 -1
- EGG-INFO/scripts/subaligner_batch +1 -1
- EGG-INFO/scripts/subaligner_convert +1 -1
- subaligner/__init__.py +2 -0
- subaligner/__main__.py +82 -28
- subaligner/_version.py +1 -1
- subaligner/exception.py +4 -0
- subaligner/predictor.py +1 -1
- subaligner/subaligner_1pass/__main__.py +1 -1
- subaligner/subaligner_2pass/__main__.py +1 -1
- subaligner/subaligner_batch/__main__.py +1 -1
- subaligner/subaligner_convert/__main__.py +1 -1
- subaligner/subtitle.py +15 -0
- subaligner/trainer.py +2 -2
- subaligner/transcriber.py +118 -0
- subaligner/translator.py +65 -23
EGG-INFO/PKG-INFO
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: subaligner
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Automatically synchronize and translate subtitles with pretrained deep neural networks, forced alignments and transformers.
|
|
5
5
|
Home-page: https://subaligner.readthedocs.io/en/latest/
|
|
6
6
|
Author: Xi Bai
|
|
7
7
|
Author-email: xi.bai.ed@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.7
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.8
|
|
@@ -19,6 +18,7 @@ Provides-Extra: dev
|
|
|
19
18
|
Provides-Extra: docs
|
|
20
19
|
Provides-Extra: stretch
|
|
21
20
|
Provides-Extra: translation
|
|
21
|
+
Provides-Extra: llm
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
|
|
24
24
|
<div align="center">
|
|
@@ -26,11 +26,12 @@ License-File: LICENSE
|
|
|
26
26
|
</div>
|
|
27
27
|
|
|
28
28
|
[](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml?query=branch%3Amaster) 
|
|
29
|
-
[](https://www.python.org/downloads/release/python-390/) [](https://www.python.org/downloads/release/python-380/) [](https://www.python.org/downloads/release/python-370/)
|
|
29
|
+
[](https://www.python.org/downloads/release/python-3100/) [](https://www.python.org/downloads/release/python-390/) [](https://www.python.org/downloads/release/python-380/) [](https://www.python.org/downloads/release/python-370/)
|
|
30
30
|
[](https://subaligner.readthedocs.io/en/latest/?badge=latest)
|
|
31
31
|
[](https://github.com/baxtree/subaligner/blob/master/LICENSE)
|
|
32
32
|
[](https://badge.fury.io/py/subaligner)
|
|
33
|
-
[](https://hub.docker.com/r/baxtree/subaligner/builds)
|
|
33
|
+
[](https://hub.docker.com/r/baxtree/subaligner/builds)
|
|
34
|
+
[](https://hub.docker.com/r/baxtree/subaligner)
|
|
34
35
|
[](https://doi.org/10.5281/zenodo.5603083)
|
|
35
36
|
|
|
36
37
|
## Supported Formats
|
|
@@ -56,9 +57,9 @@ $ pip install subaligner
|
|
|
56
57
|
|
|
57
58
|
## Installation with Optional Packages Supporting Additional Features
|
|
58
59
|
```
|
|
59
|
-
# Install dependencies for enabling translation
|
|
60
|
+
# Install dependencies for enabling translation and transcription
|
|
60
61
|
|
|
61
|
-
$ pip install 'subaligner[
|
|
62
|
+
$ pip install 'subaligner[llm]'
|
|
62
63
|
```
|
|
63
64
|
```
|
|
64
65
|
# Install dependencies for enabling forced alignment
|
|
@@ -140,6 +141,10 @@ $ subaligner -m single -v https://example.com/video.mp4 -s https://example.com/s
|
|
|
140
141
|
$ subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
|
|
141
142
|
```
|
|
142
143
|
```
|
|
144
|
+
# Generate subtitles by transcribing audiovisual files
|
|
145
|
+
$ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
|
|
146
|
+
```
|
|
147
|
+
```
|
|
143
148
|
# Alignment on segmented plain texts (double newlines as the delimiter)
|
|
144
149
|
|
|
145
150
|
$ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
|
|
@@ -159,15 +164,11 @@ $ subaligner -m dual -v video.mkv -s embedded:stream_index=0 -o subtitle_aligned
|
|
|
159
164
|
```
|
|
160
165
|
```
|
|
161
166
|
# Translative alignment with the ISO 639-3 language code pair (src,tgt)
|
|
162
|
-
|
|
163
|
-
$ subaligner_1pass --languages
|
|
164
|
-
$ subaligner_1pass -v video.mp4 -s subtitle.srt -t src,tgt
|
|
165
|
-
$ subaligner_2pass --languages
|
|
166
|
-
$ subaligner_2pass -v video.mp4 -s subtitle.srt -t src,tgt
|
|
167
167
|
$ subaligner --languages
|
|
168
168
|
$ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
|
|
169
169
|
$ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
|
|
170
170
|
$ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
|
|
171
|
+
$ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
|
|
171
172
|
```
|
|
172
173
|
```
|
|
173
174
|
# Shift subtitle manually by offset in seconds
|
|
@@ -236,10 +237,9 @@ This tool wouldn't be possible without the following packages:
|
|
|
236
237
|
[pysrt](https://github.com/byroot/pysrt)
|
|
237
238
|
[pysubs2](https://github.com/tkarabela/pysubs2)
|
|
238
239
|
[aeneas](https://www.readbeyond.it/aeneas/)
|
|
239
|
-
[transformers](https://huggingface.co/transformers/)
|
|
240
|
+
[transformers](https://huggingface.co/transformers/)
|
|
241
|
+
[openai-whisper](https://github.com/openai/whisper).
|
|
240
242
|
|
|
241
243
|
Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
|
|
242
244
|
|
|
243
245
|
|
|
244
|
-
|
|
245
|
-
|
EGG-INFO/SOURCES.txt
CHANGED
EGG-INFO/requires.txt
CHANGED
|
@@ -6,19 +6,19 @@ tornado==5.1.0
|
|
|
6
6
|
toolz==0.9.0
|
|
7
7
|
toml==0.10.0
|
|
8
8
|
termcolor==1.1.0
|
|
9
|
-
tensorflow<2.
|
|
9
|
+
tensorflow<2.9,>=1.15.5
|
|
10
10
|
tblib==1.3.2
|
|
11
11
|
six~=1.15.0
|
|
12
12
|
setuptools>=41.0.0
|
|
13
13
|
scikit-learn~=0.24.2
|
|
14
|
-
scipy
|
|
14
|
+
scipy<=1.8.1
|
|
15
15
|
rsa==4.7
|
|
16
16
|
requests-oauthlib==1.3.0
|
|
17
17
|
requests~=2.25.1
|
|
18
18
|
PyYAML>=4.2b1
|
|
19
19
|
pytz==2018.4
|
|
20
20
|
pystack-debugger==0.8.0
|
|
21
|
-
pysubs2
|
|
21
|
+
pysubs2<=1.4.2
|
|
22
22
|
pysrt==1.1.1
|
|
23
23
|
pyprof2calltree==1.4.3
|
|
24
24
|
pydotplus==2.0.2
|
|
@@ -31,7 +31,7 @@ psutil==5.6.7
|
|
|
31
31
|
pluggy==0.13.1
|
|
32
32
|
pbr==4.0.2
|
|
33
33
|
oauthlib==3.1.0
|
|
34
|
-
numpy<1.
|
|
34
|
+
numpy<1.24.0
|
|
35
35
|
numba>=0.50.0
|
|
36
36
|
msgpack-python==0.5.6
|
|
37
37
|
networkx>=2.5.1
|
|
@@ -48,13 +48,13 @@ isort==4.3.4
|
|
|
48
48
|
idna==2.8
|
|
49
49
|
hyperopt==0.2.4
|
|
50
50
|
html5lib==1.0b9
|
|
51
|
-
h5py
|
|
51
|
+
h5py<=3.6.0
|
|
52
52
|
HeapDict==1.0.0
|
|
53
53
|
graphviz==0.8.3
|
|
54
54
|
google-pasta~=0.2
|
|
55
55
|
google-auth-oauthlib==0.4.2
|
|
56
56
|
google-auth==1.27.0
|
|
57
|
-
filelock
|
|
57
|
+
filelock<4.0.0
|
|
58
58
|
distributed==1.13.0
|
|
59
59
|
decorator==4.3.0
|
|
60
60
|
dask<2022.1.0
|
|
@@ -81,7 +81,7 @@ typing-extensions<4.0.0
|
|
|
81
81
|
types-setuptools==57.4.9
|
|
82
82
|
types-requests==2.27.9
|
|
83
83
|
mypy==0.931
|
|
84
|
-
pex
|
|
84
|
+
pex<=2.1.80
|
|
85
85
|
radish-bdd~=0.13.3
|
|
86
86
|
scikit-build==0.11.1
|
|
87
87
|
line-profiler==3.1.0
|
|
@@ -92,8 +92,9 @@ tox~=3.23.0
|
|
|
92
92
|
coverage==5.5
|
|
93
93
|
mock==4.0.3
|
|
94
94
|
aeneas~=1.7.3.0
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
openai-whisper==20230124
|
|
96
|
+
transformers<4.27.0
|
|
97
|
+
torch<1.13.0
|
|
97
98
|
sentencepiece~=0.1.95
|
|
98
99
|
pycountry~=20.7.3
|
|
99
100
|
docutils~=0.17.0
|
|
@@ -107,8 +108,16 @@ sphinx==3.3.1
|
|
|
107
108
|
|
|
108
109
|
[harmony]
|
|
109
110
|
aeneas~=1.7.3.0
|
|
110
|
-
|
|
111
|
-
|
|
111
|
+
openai-whisper==20230124
|
|
112
|
+
transformers<4.27.0
|
|
113
|
+
torch<1.13.0
|
|
114
|
+
sentencepiece~=0.1.95
|
|
115
|
+
pycountry~=20.7.3
|
|
116
|
+
|
|
117
|
+
[llm]
|
|
118
|
+
openai-whisper==20230124
|
|
119
|
+
transformers<4.27.0
|
|
120
|
+
torch<1.13.0
|
|
112
121
|
sentencepiece~=0.1.95
|
|
113
122
|
pycountry~=20.7.3
|
|
114
123
|
|
|
@@ -116,7 +125,8 @@ pycountry~=20.7.3
|
|
|
116
125
|
aeneas~=1.7.3.0
|
|
117
126
|
|
|
118
127
|
[translation]
|
|
119
|
-
|
|
120
|
-
|
|
128
|
+
openai-whisper==20230124
|
|
129
|
+
transformers<4.27.0
|
|
130
|
+
torch<1.13.0
|
|
121
131
|
sentencepiece~=0.1.95
|
|
122
132
|
pycountry~=20.7.3
|
EGG-INFO/scripts/subaligner
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
#!python
|
|
2
2
|
"""
|
|
3
|
-
usage: subaligner [-h] [-m {single,dual,script,shift}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
|
|
3
|
+
usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
|
|
4
4
|
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
|
|
5
|
-
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
|
|
5
|
+
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
|
|
6
|
+
[-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
|
|
7
|
+
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
|
|
6
8
|
|
|
7
9
|
Subaligner command line interface
|
|
8
10
|
|
|
9
11
|
optional arguments:
|
|
10
12
|
-h, --help show this help message and exit
|
|
13
|
+
-s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
|
|
14
|
+
File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
|
|
11
15
|
-l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
|
|
12
16
|
Max global log loss for alignment
|
|
13
17
|
-so, --stretch_on Switch on stretch on subtitles)
|
|
@@ -23,18 +27,22 @@ optional arguments:
|
|
|
23
27
|
Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
|
|
24
28
|
-os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
|
|
25
29
|
Offset by which the subtitle will be shifted
|
|
30
|
+
-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
|
|
31
|
+
Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
|
|
32
|
+
-mr {whisper}, --llm_recipe {whisper}
|
|
33
|
+
LLM recipe used for transcribing video files
|
|
34
|
+
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
|
|
35
|
+
Flavour variation for a specific LLM recipe
|
|
26
36
|
-lgs, --languages Print out language codes used for stretch and translation
|
|
27
37
|
-d, --debug Print out debugging information
|
|
28
38
|
-q, --quiet Switch off logging information
|
|
29
39
|
-ver, --version show program's version number and exit
|
|
30
40
|
|
|
31
41
|
required arguments:
|
|
32
|
-
-m {single,dual,script,shift}, --mode {single,dual,script,shift}
|
|
33
|
-
Alignment mode:
|
|
42
|
+
-m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
|
|
43
|
+
Alignment mode: single, dual, script, shift or transcribe
|
|
34
44
|
-v VIDEO_PATH, --video_path VIDEO_PATH
|
|
35
45
|
File path or URL to the video file
|
|
36
|
-
-s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
|
|
37
|
-
File path or URL to the subtitle file (Extensions of supported subtitles: .sami, .ssa, .vtt, .xml, .sub, .smi, .ass, .srt, .tmp, .dfxp, .stl, .ttml, .sbv, .txt, .ytt, .scc) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
|
|
38
46
|
"""
|
|
39
47
|
|
|
40
48
|
import argparse
|
|
@@ -61,10 +69,10 @@ def main():
|
|
|
61
69
|
required_args.add_argument(
|
|
62
70
|
"-m",
|
|
63
71
|
"--mode",
|
|
64
|
-
type=str,
|
|
72
|
+
type=str.lower,
|
|
65
73
|
default="",
|
|
66
|
-
choices=["single", "dual", "script", "shift"],
|
|
67
|
-
help="Alignment mode:
|
|
74
|
+
choices=["single", "dual", "script", "shift", "transcribe"],
|
|
75
|
+
help="Alignment mode: single, dual, script, shift or transcribe",
|
|
68
76
|
)
|
|
69
77
|
required_args.add_argument(
|
|
70
78
|
"-v",
|
|
@@ -74,7 +82,7 @@ def main():
|
|
|
74
82
|
help="File path or URL to the video file",
|
|
75
83
|
)
|
|
76
84
|
from subaligner.subtitle import Subtitle
|
|
77
|
-
|
|
85
|
+
parser.add_argument(
|
|
78
86
|
"-s",
|
|
79
87
|
"--subtitle_path",
|
|
80
88
|
type=str,
|
|
@@ -100,7 +108,7 @@ def main():
|
|
|
100
108
|
parser.add_argument(
|
|
101
109
|
"-sil",
|
|
102
110
|
"--stretch_in_language",
|
|
103
|
-
type=str,
|
|
111
|
+
type=str.lower,
|
|
104
112
|
choices=Utils.get_stretch_language_codes(),
|
|
105
113
|
default="eng",
|
|
106
114
|
help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
|
|
@@ -137,6 +145,29 @@ def main():
|
|
|
137
145
|
type=float,
|
|
138
146
|
help="Offset by which the subtitle will be shifted"
|
|
139
147
|
)
|
|
148
|
+
parser.add_argument(
|
|
149
|
+
"-ml",
|
|
150
|
+
"--main_language",
|
|
151
|
+
type=str.lower,
|
|
152
|
+
choices=Utils.get_stretch_language_codes(),
|
|
153
|
+
help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
|
|
154
|
+
)
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"-mr",
|
|
157
|
+
"--llm_recipe",
|
|
158
|
+
type=str.lower,
|
|
159
|
+
default="whisper",
|
|
160
|
+
choices=["whisper"],
|
|
161
|
+
help="LLM recipe used for transcribing video files"
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
"-mf",
|
|
165
|
+
"--llm_flavour",
|
|
166
|
+
type=str.lower,
|
|
167
|
+
default="small",
|
|
168
|
+
choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
|
|
169
|
+
help="Flavour variation for a specific LLM recipe"
|
|
170
|
+
)
|
|
140
171
|
parser.add_argument("-lgs", "--languages", action="store_true",
|
|
141
172
|
help="Print out language codes used for stretch and translation")
|
|
142
173
|
parser.add_argument("-d", "--debug", action="store_true",
|
|
@@ -153,33 +184,45 @@ def main():
|
|
|
153
184
|
print("ERROR: --mode was not passed in")
|
|
154
185
|
parser.print_usage()
|
|
155
186
|
sys.exit(21)
|
|
187
|
+
|
|
156
188
|
FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
|
|
157
189
|
|
|
158
|
-
if not FLAGS.subtitle_path:
|
|
190
|
+
if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
|
|
159
191
|
print("ERROR: --subtitle_path was not passed in")
|
|
160
192
|
parser.print_usage()
|
|
161
193
|
sys.exit(21)
|
|
162
|
-
|
|
194
|
+
elif FLAGS.mode == "transcribe":
|
|
195
|
+
FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
|
|
196
|
+
if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
|
|
163
197
|
for subtitle_path in FLAGS.subtitle_path:
|
|
164
198
|
if FLAGS.video_path == "":
|
|
165
199
|
print("ERROR: --video_path was not passed in")
|
|
166
200
|
parser.print_usage()
|
|
167
201
|
sys.exit(21)
|
|
168
202
|
if subtitle_path.lower().startswith("http") and FLAGS.output == "":
|
|
169
|
-
print("ERROR: --output was not passed in
|
|
203
|
+
print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
|
|
170
204
|
parser.print_usage()
|
|
171
205
|
sys.exit(21)
|
|
172
206
|
if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
|
|
173
|
-
print("ERROR: --output was not passed in
|
|
207
|
+
print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
|
|
174
208
|
parser.print_usage()
|
|
175
209
|
sys.exit(21)
|
|
176
210
|
if FLAGS.mode == "script" and FLAGS.output == "":
|
|
177
|
-
print("ERROR: --output was not passed in
|
|
211
|
+
print("ERROR: --output was not passed in but required by alignment on plain texts")
|
|
178
212
|
parser.print_usage()
|
|
179
213
|
sys.exit(21)
|
|
180
|
-
if FLAGS.
|
|
214
|
+
if FLAGS.mode == "transcribe":
|
|
215
|
+
if FLAGS.output == "":
|
|
216
|
+
print("ERROR: --output was not passed in but required by mode 'transcribe'")
|
|
217
|
+
parser.print_usage()
|
|
218
|
+
sys.exit(21)
|
|
219
|
+
if FLAGS.main_language is None:
|
|
220
|
+
print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
|
|
221
|
+
parser.print_usage()
|
|
222
|
+
sys.exit(21)
|
|
223
|
+
if FLAGS.translate is not None or FLAGS.mode == "transcribe":
|
|
181
224
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
182
|
-
print('ERROR: Alignment has been configured to
|
|
225
|
+
print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
|
|
183
226
|
sys.exit(21)
|
|
184
227
|
if FLAGS.stretch_on or FLAGS.mode == "script":
|
|
185
228
|
if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
@@ -190,13 +233,13 @@ def main():
|
|
|
190
233
|
local_subtitle_path = subtitle_path
|
|
191
234
|
exit_segfail = FLAGS.exit_segfail
|
|
192
235
|
stretch = FLAGS.stretch_on
|
|
193
|
-
stretch_in_lang = FLAGS.stretch_in_language
|
|
236
|
+
stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
|
|
194
237
|
|
|
195
238
|
from subaligner.logger import Logger
|
|
196
239
|
Logger.VERBOSE = FLAGS.debug
|
|
197
240
|
Logger.QUIET = FLAGS.quiet
|
|
198
241
|
from subaligner.predictor import Predictor
|
|
199
|
-
from subaligner.exception import UnsupportedFormatException
|
|
242
|
+
from subaligner.exception import UnsupportedFormatException, TranscriptionException
|
|
200
243
|
from subaligner.exception import TerminalException
|
|
201
244
|
|
|
202
245
|
try:
|
|
@@ -230,6 +273,7 @@ def main():
|
|
|
230
273
|
parser.print_usage()
|
|
231
274
|
sys.exit(21)
|
|
232
275
|
|
|
276
|
+
voice_probabilities = None
|
|
233
277
|
predictor = Predictor()
|
|
234
278
|
if FLAGS.mode == "single":
|
|
235
279
|
aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
|
|
@@ -252,6 +296,11 @@ def main():
|
|
|
252
296
|
subtitle_file_path=local_subtitle_path,
|
|
253
297
|
stretch_in_lang=stretch_in_lang,
|
|
254
298
|
)
|
|
299
|
+
elif FLAGS.mode == "transcribe":
|
|
300
|
+
from subaligner.transcriber import Transcriber
|
|
301
|
+
transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
|
|
302
|
+
subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
|
|
303
|
+
aligned_subs = subtitle.subs
|
|
255
304
|
else:
|
|
256
305
|
print("ERROR: Unknown mode {}".format(FLAGS.mode))
|
|
257
306
|
parser.print_usage()
|
|
@@ -267,6 +316,9 @@ def main():
|
|
|
267
316
|
aligned_subs = translator.translate(aligned_subs)
|
|
268
317
|
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
269
318
|
frame_rate, "utf-8")
|
|
319
|
+
elif FLAGS.mode == "transcribe":
|
|
320
|
+
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
321
|
+
frame_rate, "utf-8")
|
|
270
322
|
else:
|
|
271
323
|
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
272
324
|
frame_rate)
|
|
@@ -277,35 +329,35 @@ def main():
|
|
|
277
329
|
print(
|
|
278
330
|
"ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
|
|
279
331
|
)
|
|
280
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
332
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
281
333
|
sys.exit(22)
|
|
282
334
|
|
|
283
335
|
print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
|
|
284
|
-
except UnsupportedFormatException as e:
|
|
336
|
+
except (UnsupportedFormatException, TranscriptionException) as e:
|
|
285
337
|
print(
|
|
286
338
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
287
339
|
)
|
|
288
340
|
traceback.print_tb(e.__traceback__)
|
|
289
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
341
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
290
342
|
sys.exit(23)
|
|
291
343
|
except TerminalException as e:
|
|
292
344
|
print(
|
|
293
345
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
294
346
|
)
|
|
295
347
|
traceback.print_tb(e.__traceback__)
|
|
296
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
348
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
297
349
|
sys.exit(24)
|
|
298
350
|
except Exception as e:
|
|
299
351
|
print(
|
|
300
352
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
301
353
|
)
|
|
302
354
|
traceback.print_tb(e.__traceback__)
|
|
303
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
355
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
304
356
|
sys.exit(1)
|
|
305
357
|
else:
|
|
306
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
358
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
307
359
|
sys.exit(0)
|
|
308
|
-
|
|
360
|
+
elif FLAGS.mode == "shift":
|
|
309
361
|
if FLAGS.offset_seconds is None:
|
|
310
362
|
print("ERROR: --offset_seconds was not passed in during subtitle shifting")
|
|
311
363
|
sys.exit(21)
|
|
@@ -319,11 +371,13 @@ def main():
|
|
|
319
371
|
sys.exit(0)
|
|
320
372
|
|
|
321
373
|
|
|
322
|
-
def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path):
|
|
374
|
+
def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
|
|
323
375
|
if video_path.lower().startswith("http") and os.path.exists(local_video_path):
|
|
324
376
|
os.remove(local_video_path)
|
|
325
377
|
if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
|
|
326
378
|
os.remove(local_subtitle_path)
|
|
379
|
+
if mode == "transcribe" and os.path.exists(local_subtitle_path):
|
|
380
|
+
os.remove(local_subtitle_path)
|
|
327
381
|
|
|
328
382
|
|
|
329
383
|
if __name__ == "__main__":
|
|
@@ -120,7 +120,7 @@ def main():
|
|
|
120
120
|
sys.exit(21)
|
|
121
121
|
if FLAGS.translate is not None:
|
|
122
122
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
123
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
123
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
124
124
|
sys.exit(21)
|
|
125
125
|
|
|
126
126
|
local_video_path = FLAGS.video_path
|
|
@@ -147,7 +147,7 @@ def main():
|
|
|
147
147
|
sys.exit(21)
|
|
148
148
|
if FLAGS.translate is not None:
|
|
149
149
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
150
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
150
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
151
151
|
sys.exit(21)
|
|
152
152
|
if FLAGS.stretch_on:
|
|
153
153
|
if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
@@ -173,7 +173,7 @@ Each file pair needs to share the same base filename, the part before the extens
|
|
|
173
173
|
sys.exit(21)
|
|
174
174
|
if FLAGS.translate is not None:
|
|
175
175
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
176
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
176
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
177
177
|
sys.exit(21)
|
|
178
178
|
|
|
179
179
|
video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
|
|
@@ -99,7 +99,7 @@ def main():
|
|
|
99
99
|
sys.exit(21)
|
|
100
100
|
if FLAGS.translate is not None:
|
|
101
101
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
102
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
102
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
103
103
|
sys.exit(21)
|
|
104
104
|
|
|
105
105
|
local_subtitle_path = FLAGS.input_subtitle_path
|
subaligner/__init__.py
CHANGED
subaligner/__main__.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
"""
|
|
3
|
-
usage: subaligner [-h] [-m {single,dual,script,shift}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
|
|
3
|
+
usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
|
|
4
4
|
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
|
|
5
|
-
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
|
|
5
|
+
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
|
|
6
|
+
[-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
|
|
7
|
+
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
|
|
6
8
|
|
|
7
9
|
Subaligner command line interface
|
|
8
10
|
|
|
9
11
|
optional arguments:
|
|
10
12
|
-h, --help show this help message and exit
|
|
13
|
+
-s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
|
|
14
|
+
File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
|
|
11
15
|
-l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
|
|
12
16
|
Max global log loss for alignment
|
|
13
17
|
-so, --stretch_on Switch on stretch on subtitles)
|
|
@@ -23,18 +27,22 @@ optional arguments:
|
|
|
23
27
|
Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
|
|
24
28
|
-os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
|
|
25
29
|
Offset by which the subtitle will be shifted
|
|
30
|
+
-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
|
|
31
|
+
Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
|
|
32
|
+
-mr {whisper}, --llm_recipe {whisper}
|
|
33
|
+
LLM recipe used for transcribing video files
|
|
34
|
+
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
|
|
35
|
+
Flavour variation for a specific LLM recipe
|
|
26
36
|
-lgs, --languages Print out language codes used for stretch and translation
|
|
27
37
|
-d, --debug Print out debugging information
|
|
28
38
|
-q, --quiet Switch off logging information
|
|
29
39
|
-ver, --version show program's version number and exit
|
|
30
40
|
|
|
31
41
|
required arguments:
|
|
32
|
-
-m {single,dual,script,shift}, --mode {single,dual,script,shift}
|
|
33
|
-
Alignment mode:
|
|
42
|
+
-m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
|
|
43
|
+
Alignment mode: single, dual, script, shift or transcribe
|
|
34
44
|
-v VIDEO_PATH, --video_path VIDEO_PATH
|
|
35
45
|
File path or URL to the video file
|
|
36
|
-
-s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
|
|
37
|
-
File path or URL to the subtitle file (Extensions of supported subtitles: .sami, .ssa, .vtt, .xml, .sub, .smi, .ass, .srt, .tmp, .dfxp, .stl, .ttml, .sbv, .txt, .ytt, .scc) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
|
|
38
46
|
"""
|
|
39
47
|
|
|
40
48
|
import argparse
|
|
@@ -61,10 +69,10 @@ def main():
|
|
|
61
69
|
required_args.add_argument(
|
|
62
70
|
"-m",
|
|
63
71
|
"--mode",
|
|
64
|
-
type=str,
|
|
72
|
+
type=str.lower,
|
|
65
73
|
default="",
|
|
66
|
-
choices=["single", "dual", "script", "shift"],
|
|
67
|
-
help="Alignment mode:
|
|
74
|
+
choices=["single", "dual", "script", "shift", "transcribe"],
|
|
75
|
+
help="Alignment mode: single, dual, script, shift or transcribe",
|
|
68
76
|
)
|
|
69
77
|
required_args.add_argument(
|
|
70
78
|
"-v",
|
|
@@ -74,7 +82,7 @@ def main():
|
|
|
74
82
|
help="File path or URL to the video file",
|
|
75
83
|
)
|
|
76
84
|
from subaligner.subtitle import Subtitle
|
|
77
|
-
|
|
85
|
+
parser.add_argument(
|
|
78
86
|
"-s",
|
|
79
87
|
"--subtitle_path",
|
|
80
88
|
type=str,
|
|
@@ -100,7 +108,7 @@ def main():
|
|
|
100
108
|
parser.add_argument(
|
|
101
109
|
"-sil",
|
|
102
110
|
"--stretch_in_language",
|
|
103
|
-
type=str,
|
|
111
|
+
type=str.lower,
|
|
104
112
|
choices=Utils.get_stretch_language_codes(),
|
|
105
113
|
default="eng",
|
|
106
114
|
help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
|
|
@@ -137,6 +145,29 @@ def main():
|
|
|
137
145
|
type=float,
|
|
138
146
|
help="Offset by which the subtitle will be shifted"
|
|
139
147
|
)
|
|
148
|
+
parser.add_argument(
|
|
149
|
+
"-ml",
|
|
150
|
+
"--main_language",
|
|
151
|
+
type=str.lower,
|
|
152
|
+
choices=Utils.get_stretch_language_codes(),
|
|
153
|
+
help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
|
|
154
|
+
)
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"-mr",
|
|
157
|
+
"--llm_recipe",
|
|
158
|
+
type=str.lower,
|
|
159
|
+
default="whisper",
|
|
160
|
+
choices=["whisper"],
|
|
161
|
+
help="LLM recipe used for transcribing video files"
|
|
162
|
+
)
|
|
163
|
+
parser.add_argument(
|
|
164
|
+
"-mf",
|
|
165
|
+
"--llm_flavour",
|
|
166
|
+
type=str.lower,
|
|
167
|
+
default="small",
|
|
168
|
+
choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
|
|
169
|
+
help="Flavour variation for a specific LLM recipe"
|
|
170
|
+
)
|
|
140
171
|
parser.add_argument("-lgs", "--languages", action="store_true",
|
|
141
172
|
help="Print out language codes used for stretch and translation")
|
|
142
173
|
parser.add_argument("-d", "--debug", action="store_true",
|
|
@@ -153,33 +184,45 @@ def main():
|
|
|
153
184
|
print("ERROR: --mode was not passed in")
|
|
154
185
|
parser.print_usage()
|
|
155
186
|
sys.exit(21)
|
|
187
|
+
|
|
156
188
|
FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
|
|
157
189
|
|
|
158
|
-
if not FLAGS.subtitle_path:
|
|
190
|
+
if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
|
|
159
191
|
print("ERROR: --subtitle_path was not passed in")
|
|
160
192
|
parser.print_usage()
|
|
161
193
|
sys.exit(21)
|
|
162
|
-
|
|
194
|
+
elif FLAGS.mode == "transcribe":
|
|
195
|
+
FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
|
|
196
|
+
if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
|
|
163
197
|
for subtitle_path in FLAGS.subtitle_path:
|
|
164
198
|
if FLAGS.video_path == "":
|
|
165
199
|
print("ERROR: --video_path was not passed in")
|
|
166
200
|
parser.print_usage()
|
|
167
201
|
sys.exit(21)
|
|
168
202
|
if subtitle_path.lower().startswith("http") and FLAGS.output == "":
|
|
169
|
-
print("ERROR: --output was not passed in
|
|
203
|
+
print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
|
|
170
204
|
parser.print_usage()
|
|
171
205
|
sys.exit(21)
|
|
172
206
|
if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
|
|
173
|
-
print("ERROR: --output was not passed in
|
|
207
|
+
print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
|
|
174
208
|
parser.print_usage()
|
|
175
209
|
sys.exit(21)
|
|
176
210
|
if FLAGS.mode == "script" and FLAGS.output == "":
|
|
177
|
-
print("ERROR: --output was not passed in
|
|
211
|
+
print("ERROR: --output was not passed in but required by alignment on plain texts")
|
|
178
212
|
parser.print_usage()
|
|
179
213
|
sys.exit(21)
|
|
180
|
-
if FLAGS.
|
|
214
|
+
if FLAGS.mode == "transcribe":
|
|
215
|
+
if FLAGS.output == "":
|
|
216
|
+
print("ERROR: --output was not passed in but required by mode 'transcribe'")
|
|
217
|
+
parser.print_usage()
|
|
218
|
+
sys.exit(21)
|
|
219
|
+
if FLAGS.main_language is None:
|
|
220
|
+
print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
|
|
221
|
+
parser.print_usage()
|
|
222
|
+
sys.exit(21)
|
|
223
|
+
if FLAGS.translate is not None or FLAGS.mode == "transcribe":
|
|
181
224
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
182
|
-
print('ERROR: Alignment has been configured to
|
|
225
|
+
print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
|
|
183
226
|
sys.exit(21)
|
|
184
227
|
if FLAGS.stretch_on or FLAGS.mode == "script":
|
|
185
228
|
if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
@@ -190,13 +233,13 @@ def main():
|
|
|
190
233
|
local_subtitle_path = subtitle_path
|
|
191
234
|
exit_segfail = FLAGS.exit_segfail
|
|
192
235
|
stretch = FLAGS.stretch_on
|
|
193
|
-
stretch_in_lang = FLAGS.stretch_in_language
|
|
236
|
+
stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
|
|
194
237
|
|
|
195
238
|
from subaligner.logger import Logger
|
|
196
239
|
Logger.VERBOSE = FLAGS.debug
|
|
197
240
|
Logger.QUIET = FLAGS.quiet
|
|
198
241
|
from subaligner.predictor import Predictor
|
|
199
|
-
from subaligner.exception import UnsupportedFormatException
|
|
242
|
+
from subaligner.exception import UnsupportedFormatException, TranscriptionException
|
|
200
243
|
from subaligner.exception import TerminalException
|
|
201
244
|
|
|
202
245
|
try:
|
|
@@ -230,6 +273,7 @@ def main():
|
|
|
230
273
|
parser.print_usage()
|
|
231
274
|
sys.exit(21)
|
|
232
275
|
|
|
276
|
+
voice_probabilities = None
|
|
233
277
|
predictor = Predictor()
|
|
234
278
|
if FLAGS.mode == "single":
|
|
235
279
|
aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
|
|
@@ -252,6 +296,11 @@ def main():
|
|
|
252
296
|
subtitle_file_path=local_subtitle_path,
|
|
253
297
|
stretch_in_lang=stretch_in_lang,
|
|
254
298
|
)
|
|
299
|
+
elif FLAGS.mode == "transcribe":
|
|
300
|
+
from subaligner.transcriber import Transcriber
|
|
301
|
+
transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
|
|
302
|
+
subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
|
|
303
|
+
aligned_subs = subtitle.subs
|
|
255
304
|
else:
|
|
256
305
|
print("ERROR: Unknown mode {}".format(FLAGS.mode))
|
|
257
306
|
parser.print_usage()
|
|
@@ -267,6 +316,9 @@ def main():
|
|
|
267
316
|
aligned_subs = translator.translate(aligned_subs)
|
|
268
317
|
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
269
318
|
frame_rate, "utf-8")
|
|
319
|
+
elif FLAGS.mode == "transcribe":
|
|
320
|
+
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
321
|
+
frame_rate, "utf-8")
|
|
270
322
|
else:
|
|
271
323
|
Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
|
|
272
324
|
frame_rate)
|
|
@@ -277,35 +329,35 @@ def main():
|
|
|
277
329
|
print(
|
|
278
330
|
"ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
|
|
279
331
|
)
|
|
280
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
332
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
281
333
|
sys.exit(22)
|
|
282
334
|
|
|
283
335
|
print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
|
|
284
|
-
except UnsupportedFormatException as e:
|
|
336
|
+
except (UnsupportedFormatException, TranscriptionException) as e:
|
|
285
337
|
print(
|
|
286
338
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
287
339
|
)
|
|
288
340
|
traceback.print_tb(e.__traceback__)
|
|
289
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
341
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
290
342
|
sys.exit(23)
|
|
291
343
|
except TerminalException as e:
|
|
292
344
|
print(
|
|
293
345
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
294
346
|
)
|
|
295
347
|
traceback.print_tb(e.__traceback__)
|
|
296
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
348
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
297
349
|
sys.exit(24)
|
|
298
350
|
except Exception as e:
|
|
299
351
|
print(
|
|
300
352
|
"ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
|
|
301
353
|
)
|
|
302
354
|
traceback.print_tb(e.__traceback__)
|
|
303
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
355
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
304
356
|
sys.exit(1)
|
|
305
357
|
else:
|
|
306
|
-
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
|
|
358
|
+
_remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
|
|
307
359
|
sys.exit(0)
|
|
308
|
-
|
|
360
|
+
elif FLAGS.mode == "shift":
|
|
309
361
|
if FLAGS.offset_seconds is None:
|
|
310
362
|
print("ERROR: --offset_seconds was not passed in during subtitle shifting")
|
|
311
363
|
sys.exit(21)
|
|
@@ -319,11 +371,13 @@ def main():
|
|
|
319
371
|
sys.exit(0)
|
|
320
372
|
|
|
321
373
|
|
|
322
|
-
def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path):
|
|
374
|
+
def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
|
|
323
375
|
if video_path.lower().startswith("http") and os.path.exists(local_video_path):
|
|
324
376
|
os.remove(local_video_path)
|
|
325
377
|
if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
|
|
326
378
|
os.remove(local_subtitle_path)
|
|
379
|
+
if mode == "transcribe" and os.path.exists(local_subtitle_path):
|
|
380
|
+
os.remove(local_subtitle_path)
|
|
327
381
|
|
|
328
382
|
|
|
329
383
|
if __name__ == "__main__":
|
subaligner/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""The semver for the current release."""
|
|
2
|
-
__version__ = "0.
|
|
2
|
+
__version__ = "0.3.0"
|
subaligner/exception.py
CHANGED
subaligner/predictor.py
CHANGED
|
@@ -37,7 +37,7 @@ class Predictor(metaclass=Singleton):
|
|
|
37
37
|
__SEGMENT_PREDICTION_TIMEOUT = 60 # Maximum waiting time in seconds when predicting each segment
|
|
38
38
|
|
|
39
39
|
__THREAD_QUEUE_SIZE = 8
|
|
40
|
-
__THREAD_NUMBER =
|
|
40
|
+
__THREAD_NUMBER = 1 # Do not change
|
|
41
41
|
|
|
42
42
|
def __init__(self, **kwargs) -> None:
|
|
43
43
|
"""Feature predictor initialiser.
|
|
@@ -120,7 +120,7 @@ def main():
|
|
|
120
120
|
sys.exit(21)
|
|
121
121
|
if FLAGS.translate is not None:
|
|
122
122
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
123
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
123
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
124
124
|
sys.exit(21)
|
|
125
125
|
|
|
126
126
|
local_video_path = FLAGS.video_path
|
|
@@ -147,7 +147,7 @@ def main():
|
|
|
147
147
|
sys.exit(21)
|
|
148
148
|
if FLAGS.translate is not None:
|
|
149
149
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
150
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
150
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
151
151
|
sys.exit(21)
|
|
152
152
|
if FLAGS.stretch_on:
|
|
153
153
|
if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
@@ -173,7 +173,7 @@ Each file pair needs to share the same base filename, the part before the extens
|
|
|
173
173
|
sys.exit(21)
|
|
174
174
|
if FLAGS.translate is not None:
|
|
175
175
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
176
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
176
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
177
177
|
sys.exit(21)
|
|
178
178
|
|
|
179
179
|
video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
|
|
@@ -99,7 +99,7 @@ def main():
|
|
|
99
99
|
sys.exit(21)
|
|
100
100
|
if FLAGS.translate is not None:
|
|
101
101
|
if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
|
|
102
|
-
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[
|
|
102
|
+
print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
|
|
103
103
|
sys.exit(21)
|
|
104
104
|
|
|
105
105
|
local_subtitle_path = FLAGS.input_subtitle_path
|
subaligner/subtitle.py
CHANGED
|
@@ -59,6 +59,8 @@ class Subtitle(object):
|
|
|
59
59
|
|
|
60
60
|
if subtitle_format == "subrip":
|
|
61
61
|
self.__subs = self.__load_subrip(subtitle_file_path)
|
|
62
|
+
elif subtitle_format == "subrip_raw":
|
|
63
|
+
self.__subs = pysrt.SubRipFile().from_string(subtitle_file_path)
|
|
62
64
|
elif subtitle_format == "ttml":
|
|
63
65
|
self.__subs = self.__convert_ttml_to_subs(subtitle_file_path)
|
|
64
66
|
elif subtitle_format == "webvtt":
|
|
@@ -105,6 +107,19 @@ class Subtitle(object):
|
|
|
105
107
|
|
|
106
108
|
return cls(cls.__secret, subtitle_file_path, "subrip")
|
|
107
109
|
|
|
110
|
+
@classmethod
|
|
111
|
+
def load_subrip_str(cls, subrip_raw: str) -> "Subtitle":
|
|
112
|
+
"""Load a SubRip subtitle string.
|
|
113
|
+
|
|
114
|
+
Arguments:
|
|
115
|
+
subrip_str {string} -- The string representation of the SubRip content.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Subtitle -- Subtitle object.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
return cls(cls.__secret, subrip_raw, "subrip_raw")
|
|
122
|
+
|
|
108
123
|
@classmethod
|
|
109
124
|
def load_ttml(cls, subtitle_file_path: str) -> "Subtitle":
|
|
110
125
|
"""Load a TTML subtitle file.
|
subaligner/trainer.py
CHANGED
|
@@ -315,8 +315,8 @@ class Trainer(object):
|
|
|
315
315
|
train_data = [x for x in train_data if x is not None]
|
|
316
316
|
labels = [x for x in labels if x is not None]
|
|
317
317
|
|
|
318
|
-
train_data = np.concatenate(train_data)
|
|
319
|
-
labels = np.concatenate(labels)
|
|
318
|
+
train_data: np.ndarray = np.concatenate(train_data) # type: ignore
|
|
319
|
+
labels: np.ndarray = np.concatenate(labels) # type: ignore
|
|
320
320
|
self.__LOGGER.debug(
|
|
321
321
|
"Data and labels extracted after {} seconds".format(
|
|
322
322
|
str(datetime.datetime.now() - extraction_start)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import whisper
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Tuple, Optional
|
|
5
|
+
from pysrt import SubRipTime
|
|
6
|
+
from whisper.tokenizer import LANGUAGES
|
|
7
|
+
from .translator import Translator
|
|
8
|
+
from .subtitle import Subtitle
|
|
9
|
+
from .media_helper import MediaHelper
|
|
10
|
+
from .logger import Logger
|
|
11
|
+
from .exception import NoFrameRateException, TranscriptionException
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Transcriber(object):
|
|
15
|
+
"""Transcribe audiovisual content for subtitle generation.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, recipe: str = "whisper", flavour: str = "small") -> None:
|
|
19
|
+
"""Initialiser for the transcribing process.
|
|
20
|
+
|
|
21
|
+
Arguments:
|
|
22
|
+
recipe {string} -- the LLM recipe used for transcribing video files (default: "whisper").
|
|
23
|
+
flavour {string} -- the flavour variation for a specific LLM recipe (default: "small").
|
|
24
|
+
Raises:
|
|
25
|
+
NotImplementedError -- Thrown when the LLM recipe is unknown.
|
|
26
|
+
"""
|
|
27
|
+
if recipe not in [r.value for r in Recipe]:
|
|
28
|
+
raise NotImplementedError(f"Unknown recipe: {recipe}")
|
|
29
|
+
if recipe == Recipe.whisper.value:
|
|
30
|
+
if flavour not in [f.value for f in WhisperFlavour]:
|
|
31
|
+
raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
|
|
32
|
+
self.__model = whisper.load_model(flavour)
|
|
33
|
+
self.recipe = recipe
|
|
34
|
+
self.flavour = flavour
|
|
35
|
+
self.__media_helper = MediaHelper()
|
|
36
|
+
self.__LOGGER = Logger().get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle, Optional[float]]:
|
|
39
|
+
"""Transcribe an audiovisual file and generate subtitles.
|
|
40
|
+
|
|
41
|
+
Arguments:
|
|
42
|
+
video_file_path {string} -- The input video file path.
|
|
43
|
+
language_code {string} -- An alpha 3 language code derived from ISO 639-3.
|
|
44
|
+
Raises:
|
|
45
|
+
TranscriptionException -- Thrown when transcription is failed.
|
|
46
|
+
NotImplementedError -- Thrown when the LLM recipe is not supported.
|
|
47
|
+
"""
|
|
48
|
+
if self.recipe == "whisper":
|
|
49
|
+
lang = Translator.get_iso_639_alpha_2(language_code)
|
|
50
|
+
if lang not in LANGUAGES:
|
|
51
|
+
raise TranscriptionException(f'"{language_code}" is not supported by {self.recipe} ({self.flavour})')
|
|
52
|
+
audio_file_path = self.__media_helper.extract_audio(video_file_path, True, 16000)
|
|
53
|
+
try:
|
|
54
|
+
audio = whisper.load_audio(audio_file_path)
|
|
55
|
+
self.__LOGGER.debug("Start transcribing the audio...")
|
|
56
|
+
result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
|
|
57
|
+
self.__LOGGER.info("Finished transcribing the audio")
|
|
58
|
+
srt_str = ""
|
|
59
|
+
for i, segment in enumerate(result["segments"], start=1):
|
|
60
|
+
srt_str += f"{i}\n" \
|
|
61
|
+
f"{self.__format_timestamp(segment['start'])} --> {self.__format_timestamp(segment['end'])}\n" \
|
|
62
|
+
f"{segment['text'].strip().replace('-->', '->')}\n" \
|
|
63
|
+
"\n"
|
|
64
|
+
subtitle = Subtitle.load_subrip_str(srt_str)
|
|
65
|
+
subtitle, frame_rate = self.__on_frame_timecodes(subtitle, video_file_path)
|
|
66
|
+
self.__LOGGER.debug("Generated the raw subtitle")
|
|
67
|
+
return subtitle, frame_rate
|
|
68
|
+
finally:
|
|
69
|
+
if os.path.exists(audio_file_path):
|
|
70
|
+
os.remove(audio_file_path)
|
|
71
|
+
else:
|
|
72
|
+
raise NotImplementedError(f"{self.recipe} ({self.flavour}) is not supported")
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def __format_timestamp(seconds: float) -> str:
|
|
76
|
+
assert seconds >= 0, "non-negative timestamp expected"
|
|
77
|
+
milliseconds = round(seconds * 1000.0)
|
|
78
|
+
hours = milliseconds // 3_600_000
|
|
79
|
+
milliseconds -= hours * 3_600_000
|
|
80
|
+
minutes = milliseconds // 60_000
|
|
81
|
+
milliseconds -= minutes * 60_000
|
|
82
|
+
seconds = milliseconds // 1_000
|
|
83
|
+
milliseconds -= seconds * 1_000
|
|
84
|
+
hours_marker = f"{hours:02d}:"
|
|
85
|
+
return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
|
86
|
+
|
|
87
|
+
def __on_frame_timecodes(self, subtitle: Subtitle, video_file_path: str) -> Tuple[Subtitle, Optional[float]]:
|
|
88
|
+
frame_rate = None
|
|
89
|
+
try:
|
|
90
|
+
frame_rate = self.__media_helper.get_frame_rate(video_file_path)
|
|
91
|
+
frame_duration = 1.0 / frame_rate
|
|
92
|
+
for sub in subtitle.subs:
|
|
93
|
+
start_seconds = sub.start.hours * 3600 + sub.start.minutes * 60 + sub.start.seconds + sub.start.milliseconds / 1000.0
|
|
94
|
+
end_seconds = sub.end.hours * 3600 + sub.end.minutes * 60 + sub.end.seconds + sub.end.milliseconds / 1000.0
|
|
95
|
+
start_frames = int(start_seconds / frame_duration)
|
|
96
|
+
end_frames = int(end_seconds / frame_duration)
|
|
97
|
+
sub.start = SubRipTime(seconds=start_frames * frame_duration)
|
|
98
|
+
sub.end = SubRipTime(seconds=end_frames * frame_duration)
|
|
99
|
+
except NoFrameRateException:
|
|
100
|
+
self.__LOGGER.warning("Cannot detect the frame rate for %s" % video_file_path)
|
|
101
|
+
return subtitle, frame_rate
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class Recipe(str, Enum):
|
|
105
|
+
whisper = "whisper"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class WhisperFlavour(str, Enum):
|
|
109
|
+
tiny = "tiny"
|
|
110
|
+
tiny_en = "tiny.en"
|
|
111
|
+
small = "small"
|
|
112
|
+
medium = "medium"
|
|
113
|
+
medium_en = "medium.en"
|
|
114
|
+
base = "base"
|
|
115
|
+
base_en = "base.en"
|
|
116
|
+
large_v1 = "large-v1"
|
|
117
|
+
large_v2 = "large-v2"
|
|
118
|
+
large = "large"
|
subaligner/translator.py
CHANGED
|
@@ -16,6 +16,7 @@ class Translator(metaclass=Singleton):
|
|
|
16
16
|
|
|
17
17
|
__TENSOR_TYPE = "pt"
|
|
18
18
|
__OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
|
|
19
|
+
__OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
|
|
19
20
|
__OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
|
|
20
21
|
__TRANSLATING_BATCH_SIZE = 10
|
|
21
22
|
__LANGUAGE_CODE_MAPPER = {
|
|
@@ -128,8 +129,8 @@ class Translator(metaclass=Singleton):
|
|
|
128
129
|
num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
|
|
129
130
|
self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
|
|
130
131
|
for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
|
|
131
|
-
|
|
132
|
-
translated = self.lang_model.generate(**
|
|
132
|
+
input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
|
|
133
|
+
translated = self.lang_model.generate(**input_ids)
|
|
133
134
|
translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
|
|
134
135
|
for index in range(len(new_subs)):
|
|
135
136
|
new_subs[index].text = translated_texts[index]
|
|
@@ -140,59 +141,100 @@ class Translator(metaclass=Singleton):
|
|
|
140
141
|
src_lang = Translator.normalise_single(src_lang)
|
|
141
142
|
tgt_lang = Translator.normalise_single(tgt_lang)
|
|
142
143
|
src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
|
|
144
|
+
|
|
145
|
+
if self.__download_mt_model(src_lang, tgt_lang):
|
|
146
|
+
return
|
|
147
|
+
elif self.__download_mt_tc_big_model(src_lang, tgt_lang):
|
|
148
|
+
return
|
|
149
|
+
elif self.__download_tatoeba_model(src_lang, tgt_lang):
|
|
150
|
+
return
|
|
151
|
+
else:
|
|
152
|
+
message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
|
|
153
|
+
self.__LOGGER.error(message)
|
|
154
|
+
raise NotImplementedError(message)
|
|
155
|
+
|
|
156
|
+
def __download_mt_model(self, src_lang: str, tgt_lang: str) -> bool:
|
|
143
157
|
try:
|
|
144
158
|
mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
|
|
145
|
-
self.
|
|
146
|
-
return
|
|
159
|
+
self.__download(mt_model_name)
|
|
160
|
+
return True
|
|
147
161
|
except OSError:
|
|
148
162
|
self.__log_and_back_off(mt_model_name)
|
|
149
163
|
try:
|
|
150
164
|
mt_model_name = Translator.__OPUS_MT.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
|
|
151
|
-
self.
|
|
152
|
-
return
|
|
165
|
+
self.__download(mt_model_name)
|
|
166
|
+
return True
|
|
153
167
|
except OSError:
|
|
154
168
|
self.__log_and_back_off(mt_model_name)
|
|
155
169
|
try:
|
|
156
170
|
mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
|
|
157
|
-
self.
|
|
158
|
-
return
|
|
171
|
+
self.__download(mt_model_name)
|
|
172
|
+
return True
|
|
159
173
|
except OSError:
|
|
160
174
|
self.__log_and_back_off(mt_model_name)
|
|
161
175
|
try:
|
|
162
176
|
mt_model_name = Translator.__OPUS_MT.format(src_lang, tgt_lang)
|
|
163
|
-
self.
|
|
164
|
-
return
|
|
177
|
+
self.__download(mt_model_name)
|
|
178
|
+
return True
|
|
165
179
|
except OSError:
|
|
166
180
|
self.__log_and_back_off(mt_model_name)
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
def __download_mt_tc_big_model(self, src_lang: str, tgt_lang: str) -> bool:
|
|
184
|
+
try:
|
|
185
|
+
mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
|
|
186
|
+
self.__download(mt_tc_model_name)
|
|
187
|
+
return True
|
|
188
|
+
except OSError:
|
|
189
|
+
self.__log_and_back_off(mt_tc_model_name)
|
|
190
|
+
try:
|
|
191
|
+
mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
|
|
192
|
+
self.__download(mt_tc_model_name)
|
|
193
|
+
return True
|
|
194
|
+
except OSError:
|
|
195
|
+
self.__log_and_back_off(mt_tc_model_name)
|
|
196
|
+
try:
|
|
197
|
+
mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
|
|
198
|
+
self.__download(mt_tc_model_name)
|
|
199
|
+
return True
|
|
200
|
+
except OSError:
|
|
201
|
+
self.__log_and_back_off(mt_tc_model_name)
|
|
202
|
+
try:
|
|
203
|
+
mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, tgt_lang)
|
|
204
|
+
self.__download(mt_tc_model_name)
|
|
205
|
+
return True
|
|
206
|
+
except OSError:
|
|
207
|
+
self.__log_and_back_off(mt_tc_model_name)
|
|
208
|
+
return False
|
|
209
|
+
|
|
210
|
+
def __download_tatoeba_model(self, src_lang: str, tgt_lang: str) -> bool:
|
|
167
211
|
try:
|
|
168
212
|
mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
|
|
169
|
-
self.
|
|
170
|
-
return
|
|
213
|
+
self.__download(mt_model_name)
|
|
214
|
+
return True
|
|
171
215
|
except OSError:
|
|
172
216
|
self.__log_and_back_off(mt_model_name)
|
|
173
217
|
try:
|
|
174
218
|
mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
|
|
175
|
-
self.
|
|
176
|
-
return
|
|
219
|
+
self.__download(mt_model_name)
|
|
220
|
+
return True
|
|
177
221
|
except OSError:
|
|
178
222
|
self.__log_and_back_off(mt_model_name)
|
|
179
223
|
try:
|
|
180
224
|
mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
|
|
181
|
-
self.
|
|
182
|
-
return
|
|
225
|
+
self.__download(mt_model_name)
|
|
226
|
+
return True
|
|
183
227
|
except OSError:
|
|
184
228
|
self.__log_and_back_off(mt_model_name)
|
|
185
229
|
try:
|
|
186
230
|
mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, tgt_lang)
|
|
187
|
-
self.
|
|
188
|
-
return
|
|
231
|
+
self.__download(mt_model_name)
|
|
232
|
+
return True
|
|
189
233
|
except OSError:
|
|
190
|
-
self.
|
|
191
|
-
|
|
192
|
-
self.__LOGGER.error(message)
|
|
193
|
-
raise NotImplementedError(message)
|
|
234
|
+
self.__log_and_back_off(mt_model_name)
|
|
235
|
+
return False
|
|
194
236
|
|
|
195
|
-
def
|
|
237
|
+
def __download(self, mt_model_name: str) -> None:
|
|
196
238
|
self.__LOGGER.debug("Trying to download the MT model %s" % mt_model_name)
|
|
197
239
|
self.tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
|
|
198
240
|
self.lang_model = MarianMTModel.from_pretrained(mt_model_name)
|