subaligner 0.2.4__py3.8.egg → 0.3.0__py3.8.egg

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
EGG-INFO/PKG-INFO CHANGED
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: subaligner
3
- Version: 0.2.4
3
+ Version: 0.3.0
4
4
  Summary: Automatically synchronize and translate subtitles with pretrained deep neural networks, forced alignments and transformers.
5
5
  Home-page: https://subaligner.readthedocs.io/en/latest/
6
6
  Author: Xi Bai
7
7
  Author-email: xi.bai.ed@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.7
12
11
  Classifier: Programming Language :: Python :: 3.8
@@ -19,6 +18,7 @@ Provides-Extra: dev
19
18
  Provides-Extra: docs
20
19
  Provides-Extra: stretch
21
20
  Provides-Extra: translation
21
+ Provides-Extra: llm
22
22
  License-File: LICENSE
23
23
 
24
24
  <div align="center">
@@ -26,11 +26,12 @@ License-File: LICENSE
26
26
  </div>
27
27
 
28
28
  [![Build Status](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml/badge.svg?branch=master)](https://github.com/baxtree/subaligner/actions/workflows/ci-pipeline.yml?query=branch%3Amaster) ![Codecov](https://img.shields.io/codecov/c/github/baxtree/subaligner)
29
- [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
29
+ [![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/) [![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
30
30
  [![Documentation Status](https://readthedocs.org/projects/subaligner/badge/?version=latest)](https://subaligner.readthedocs.io/en/latest/?badge=latest)
31
31
  [![GitHub license](https://img.shields.io/github/license/baxtree/subaligner)](https://github.com/baxtree/subaligner/blob/master/LICENSE)
32
32
  [![PyPI](https://badge.fury.io/py/subaligner.svg)](https://badge.fury.io/py/subaligner)
33
- [![Docker](https://img.shields.io/docker/cloud/build/baxtree/subaligner?label=Docker&style=flat)](https://hub.docker.com/r/baxtree/subaligner/builds)
33
+ [![Docker Build](https://img.shields.io/docker/cloud/build/baxtree/subaligner?label=Docker&style=flat)](https://hub.docker.com/r/baxtree/subaligner/builds)
34
+ [![Docker Pulls](https://img.shields.io/docker/pulls/baxtree/subaligner)](https://hub.docker.com/r/baxtree/subaligner)
34
35
  [![Citation](https://zenodo.org/badge/228440472.svg)](https://doi.org/10.5281/zenodo.5603083)
35
36
 
36
37
  ## Supported Formats
@@ -56,9 +57,9 @@ $ pip install subaligner
56
57
 
57
58
  ## Installation with Optional Packages Supporting Additional Features
58
59
  ```
59
- # Install dependencies for enabling translation
60
+ # Install dependencies for enabling translation and transcription
60
61
 
61
- $ pip install 'subaligner[translation]'
62
+ $ pip install 'subaligner[llm]'
62
63
  ```
63
64
  ```
64
65
  # Install dependencies for enabling forced alignment
@@ -140,6 +141,10 @@ $ subaligner -m single -v https://example.com/video.mp4 -s https://example.com/s
140
141
  $ subaligner -m dual -v https://example.com/video.mp4 -s https://example.com/subtitle.srt -o subtitle_aligned.srt
141
142
  ```
142
143
  ```
144
+ # Generate subtitles by transcribing audiovisual files
145
+ $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt
146
+ ```
147
+ ```
143
148
  # Alignment on segmented plain texts (double newlines as the delimiter)
144
149
 
145
150
  $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt
@@ -159,15 +164,11 @@ $ subaligner -m dual -v video.mkv -s embedded:stream_index=0 -o subtitle_aligned
159
164
  ```
160
165
  ```
161
166
  # Translative alignment with the ISO 639-3 language code pair (src,tgt)
162
-
163
- $ subaligner_1pass --languages
164
- $ subaligner_1pass -v video.mp4 -s subtitle.srt -t src,tgt
165
- $ subaligner_2pass --languages
166
- $ subaligner_2pass -v video.mp4 -s subtitle.srt -t src,tgt
167
167
  $ subaligner --languages
168
168
  $ subaligner -m single -v video.mp4 -s subtitle.srt -t src,tgt
169
169
  $ subaligner -m dual -v video.mp4 -s subtitle.srt -t src,tgt
170
170
  $ subaligner -m script -v test.mp4 -s subtitle.txt -o subtitle_aligned.srt -t src,tgt
171
+ $ subaligner -m transcribe -v video.mp4 -ml eng -mr whisper -mf small -o subtitle_aligned.srt -t src,tgt
171
172
  ```
172
173
  ```
173
174
  # Shift subtitle manually by offset in seconds
@@ -236,10 +237,9 @@ This tool wouldn't be possible without the following packages:
236
237
  [pysrt](https://github.com/byroot/pysrt)
237
238
  [pysubs2](https://github.com/tkarabela/pysubs2)
238
239
  [aeneas](https://www.readbeyond.it/aeneas/)
239
- [transformers](https://huggingface.co/transformers/).
240
+ [transformers](https://huggingface.co/transformers/)
241
+ [openai-whisper](https://github.com/openai/whisper).
240
242
 
241
243
  Thanks to Alan Robinson and Nigel Megitt for their invaluable feedback.
242
244
 
243
245
 
244
-
245
-
EGG-INFO/SOURCES.txt CHANGED
@@ -24,6 +24,7 @@ subaligner/predictor.py
24
24
  subaligner/singleton.py
25
25
  subaligner/subtitle.py
26
26
  subaligner/trainer.py
27
+ subaligner/transcriber.py
27
28
  subaligner/translator.py
28
29
  subaligner/utils.py
29
30
  subaligner.egg-info/PKG-INFO
EGG-INFO/requires.txt CHANGED
@@ -6,19 +6,19 @@ tornado==5.1.0
6
6
  toolz==0.9.0
7
7
  toml==0.10.0
8
8
  termcolor==1.1.0
9
- tensorflow<2.8,>=1.15.5
9
+ tensorflow<2.9,>=1.15.5
10
10
  tblib==1.3.2
11
11
  six~=1.15.0
12
12
  setuptools>=41.0.0
13
13
  scikit-learn~=0.24.2
14
- scipy~=1.5.4
14
+ scipy<=1.8.1
15
15
  rsa==4.7
16
16
  requests-oauthlib==1.3.0
17
17
  requests~=2.25.1
18
18
  PyYAML>=4.2b1
19
19
  pytz==2018.4
20
20
  pystack-debugger==0.8.0
21
- pysubs2==0.2.4
21
+ pysubs2<=1.4.2
22
22
  pysrt==1.1.1
23
23
  pyprof2calltree==1.4.3
24
24
  pydotplus==2.0.2
@@ -31,7 +31,7 @@ psutil==5.6.7
31
31
  pluggy==0.13.1
32
32
  pbr==4.0.2
33
33
  oauthlib==3.1.0
34
- numpy<1.23.0
34
+ numpy<1.24.0
35
35
  numba>=0.50.0
36
36
  msgpack-python==0.5.6
37
37
  networkx>=2.5.1
@@ -48,13 +48,13 @@ isort==4.3.4
48
48
  idna==2.8
49
49
  hyperopt==0.2.4
50
50
  html5lib==1.0b9
51
- h5py~=3.1.0
51
+ h5py<=3.6.0
52
52
  HeapDict==1.0.0
53
53
  graphviz==0.8.3
54
54
  google-pasta~=0.2
55
55
  google-auth-oauthlib==0.4.2
56
56
  google-auth==1.27.0
57
- filelock==3.0.12
57
+ filelock<4.0.0
58
58
  distributed==1.13.0
59
59
  decorator==4.3.0
60
60
  dask<2022.1.0
@@ -81,7 +81,7 @@ typing-extensions<4.0.0
81
81
  types-setuptools==57.4.9
82
82
  types-requests==2.27.9
83
83
  mypy==0.931
84
- pex==2.1.34
84
+ pex<=2.1.80
85
85
  radish-bdd~=0.13.3
86
86
  scikit-build==0.11.1
87
87
  line-profiler==3.1.0
@@ -92,8 +92,9 @@ tox~=3.23.0
92
92
  coverage==5.5
93
93
  mock==4.0.3
94
94
  aeneas~=1.7.3.0
95
- transformers~=4.5.1
96
- torch~=1.8.1
95
+ openai-whisper==20230124
96
+ transformers<4.27.0
97
+ torch<1.13.0
97
98
  sentencepiece~=0.1.95
98
99
  pycountry~=20.7.3
99
100
  docutils~=0.17.0
@@ -107,8 +108,16 @@ sphinx==3.3.1
107
108
 
108
109
  [harmony]
109
110
  aeneas~=1.7.3.0
110
- transformers~=4.5.1
111
- torch~=1.8.1
111
+ openai-whisper==20230124
112
+ transformers<4.27.0
113
+ torch<1.13.0
114
+ sentencepiece~=0.1.95
115
+ pycountry~=20.7.3
116
+
117
+ [llm]
118
+ openai-whisper==20230124
119
+ transformers<4.27.0
120
+ torch<1.13.0
112
121
  sentencepiece~=0.1.95
113
122
  pycountry~=20.7.3
114
123
 
@@ -116,7 +125,8 @@ pycountry~=20.7.3
116
125
  aeneas~=1.7.3.0
117
126
 
118
127
  [translation]
119
- transformers~=4.5.1
120
- torch~=1.8.1
128
+ openai-whisper==20230124
129
+ transformers<4.27.0
130
+ torch<1.13.0
121
131
  sentencepiece~=0.1.95
122
132
  pycountry~=20.7.3
@@ -1,13 +1,17 @@
1
1
  #!python
2
2
  """
3
- usage: subaligner [-h] [-m {single,dual,script,shift}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
3
+ usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
4
4
  [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
5
- [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS] [-lgs] [-d] [-q] [-ver]
5
+ [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
6
+ [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
7
+ [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
6
8
 
7
9
  Subaligner command line interface
8
10
 
9
11
  optional arguments:
10
12
  -h, --help show this help message and exit
13
+ -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
14
+ File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
11
15
  -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
12
16
  Max global log loss for alignment
13
17
  -so, --stretch_on Switch on stretch on subtitles)
@@ -23,18 +27,22 @@ optional arguments:
23
27
  Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
24
28
  -os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
25
29
  Offset by which the subtitle will be shifted
30
+ -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
31
+ Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
32
+ -mr {whisper}, --llm_recipe {whisper}
33
+ LLM recipe used for transcribing video files
34
+ -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
35
+ Flavour variation for a specific LLM recipe
26
36
  -lgs, --languages Print out language codes used for stretch and translation
27
37
  -d, --debug Print out debugging information
28
38
  -q, --quiet Switch off logging information
29
39
  -ver, --version show program's version number and exit
30
40
 
31
41
  required arguments:
32
- -m {single,dual,script,shift}, --mode {single,dual,script,shift}
33
- Alignment mode: either single or dual
42
+ -m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
43
+ Alignment mode: single, dual, script, shift or transcribe
34
44
  -v VIDEO_PATH, --video_path VIDEO_PATH
35
45
  File path or URL to the video file
36
- -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
37
- File path or URL to the subtitle file (Extensions of supported subtitles: .sami, .ssa, .vtt, .xml, .sub, .smi, .ass, .srt, .tmp, .dfxp, .stl, .ttml, .sbv, .txt, .ytt, .scc) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
38
46
  """
39
47
 
40
48
  import argparse
@@ -61,10 +69,10 @@ def main():
61
69
  required_args.add_argument(
62
70
  "-m",
63
71
  "--mode",
64
- type=str,
72
+ type=str.lower,
65
73
  default="",
66
- choices=["single", "dual", "script", "shift"],
67
- help="Alignment mode: either single or dual",
74
+ choices=["single", "dual", "script", "shift", "transcribe"],
75
+ help="Alignment mode: single, dual, script, shift or transcribe",
68
76
  )
69
77
  required_args.add_argument(
70
78
  "-v",
@@ -74,7 +82,7 @@ def main():
74
82
  help="File path or URL to the video file",
75
83
  )
76
84
  from subaligner.subtitle import Subtitle
77
- required_args.add_argument(
85
+ parser.add_argument(
78
86
  "-s",
79
87
  "--subtitle_path",
80
88
  type=str,
@@ -100,7 +108,7 @@ def main():
100
108
  parser.add_argument(
101
109
  "-sil",
102
110
  "--stretch_in_language",
103
- type=str,
111
+ type=str.lower,
104
112
  choices=Utils.get_stretch_language_codes(),
105
113
  default="eng",
106
114
  help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
@@ -137,6 +145,29 @@ def main():
137
145
  type=float,
138
146
  help="Offset by which the subtitle will be shifted"
139
147
  )
148
+ parser.add_argument(
149
+ "-ml",
150
+ "--main_language",
151
+ type=str.lower,
152
+ choices=Utils.get_stretch_language_codes(),
153
+ help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
154
+ )
155
+ parser.add_argument(
156
+ "-mr",
157
+ "--llm_recipe",
158
+ type=str.lower,
159
+ default="whisper",
160
+ choices=["whisper"],
161
+ help="LLM recipe used for transcribing video files"
162
+ )
163
+ parser.add_argument(
164
+ "-mf",
165
+ "--llm_flavour",
166
+ type=str.lower,
167
+ default="small",
168
+ choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
169
+ help="Flavour variation for a specific LLM recipe"
170
+ )
140
171
  parser.add_argument("-lgs", "--languages", action="store_true",
141
172
  help="Print out language codes used for stretch and translation")
142
173
  parser.add_argument("-d", "--debug", action="store_true",
@@ -153,33 +184,45 @@ def main():
153
184
  print("ERROR: --mode was not passed in")
154
185
  parser.print_usage()
155
186
  sys.exit(21)
187
+
156
188
  FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
157
189
 
158
- if not FLAGS.subtitle_path:
190
+ if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
159
191
  print("ERROR: --subtitle_path was not passed in")
160
192
  parser.print_usage()
161
193
  sys.exit(21)
162
- if FLAGS.mode != "shift":
194
+ elif FLAGS.mode == "transcribe":
195
+ FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
196
+ if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
163
197
  for subtitle_path in FLAGS.subtitle_path:
164
198
  if FLAGS.video_path == "":
165
199
  print("ERROR: --video_path was not passed in")
166
200
  parser.print_usage()
167
201
  sys.exit(21)
168
202
  if subtitle_path.lower().startswith("http") and FLAGS.output == "":
169
- print("ERROR: --output was not passed in for alignment on a remote subtitle file")
203
+ print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
170
204
  parser.print_usage()
171
205
  sys.exit(21)
172
206
  if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
173
- print("ERROR: --output was not passed in for alignment on embedded subtitles")
207
+ print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
174
208
  parser.print_usage()
175
209
  sys.exit(21)
176
210
  if FLAGS.mode == "script" and FLAGS.output == "":
177
- print("ERROR: --output was not passed in for alignment on plain texts")
211
+ print("ERROR: --output was not passed in but required by alignment on plain texts")
178
212
  parser.print_usage()
179
213
  sys.exit(21)
180
- if FLAGS.translate is not None:
214
+ if FLAGS.mode == "transcribe":
215
+ if FLAGS.output == "":
216
+ print("ERROR: --output was not passed in but required by mode 'transcribe'")
217
+ parser.print_usage()
218
+ sys.exit(21)
219
+ if FLAGS.main_language is None:
220
+ print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
221
+ parser.print_usage()
222
+ sys.exit(21)
223
+ if FLAGS.translate is not None or FLAGS.mode == "transcribe":
181
224
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
182
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
225
+ print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
183
226
  sys.exit(21)
184
227
  if FLAGS.stretch_on or FLAGS.mode == "script":
185
228
  if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
@@ -190,13 +233,13 @@ def main():
190
233
  local_subtitle_path = subtitle_path
191
234
  exit_segfail = FLAGS.exit_segfail
192
235
  stretch = FLAGS.stretch_on
193
- stretch_in_lang = FLAGS.stretch_in_language
236
+ stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
194
237
 
195
238
  from subaligner.logger import Logger
196
239
  Logger.VERBOSE = FLAGS.debug
197
240
  Logger.QUIET = FLAGS.quiet
198
241
  from subaligner.predictor import Predictor
199
- from subaligner.exception import UnsupportedFormatException
242
+ from subaligner.exception import UnsupportedFormatException, TranscriptionException
200
243
  from subaligner.exception import TerminalException
201
244
 
202
245
  try:
@@ -230,6 +273,7 @@ def main():
230
273
  parser.print_usage()
231
274
  sys.exit(21)
232
275
 
276
+ voice_probabilities = None
233
277
  predictor = Predictor()
234
278
  if FLAGS.mode == "single":
235
279
  aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
@@ -252,6 +296,11 @@ def main():
252
296
  subtitle_file_path=local_subtitle_path,
253
297
  stretch_in_lang=stretch_in_lang,
254
298
  )
299
+ elif FLAGS.mode == "transcribe":
300
+ from subaligner.transcriber import Transcriber
301
+ transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
302
+ subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
303
+ aligned_subs = subtitle.subs
255
304
  else:
256
305
  print("ERROR: Unknown mode {}".format(FLAGS.mode))
257
306
  parser.print_usage()
@@ -267,6 +316,9 @@ def main():
267
316
  aligned_subs = translator.translate(aligned_subs)
268
317
  Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
269
318
  frame_rate, "utf-8")
319
+ elif FLAGS.mode == "transcribe":
320
+ Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
321
+ frame_rate, "utf-8")
270
322
  else:
271
323
  Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
272
324
  frame_rate)
@@ -277,35 +329,35 @@ def main():
277
329
  print(
278
330
  "ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
279
331
  )
280
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
332
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
281
333
  sys.exit(22)
282
334
 
283
335
  print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
284
- except UnsupportedFormatException as e:
336
+ except (UnsupportedFormatException, TranscriptionException) as e:
285
337
  print(
286
338
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
287
339
  )
288
340
  traceback.print_tb(e.__traceback__)
289
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
341
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
290
342
  sys.exit(23)
291
343
  except TerminalException as e:
292
344
  print(
293
345
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
294
346
  )
295
347
  traceback.print_tb(e.__traceback__)
296
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
348
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
297
349
  sys.exit(24)
298
350
  except Exception as e:
299
351
  print(
300
352
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
301
353
  )
302
354
  traceback.print_tb(e.__traceback__)
303
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
355
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
304
356
  sys.exit(1)
305
357
  else:
306
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
358
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
307
359
  sys.exit(0)
308
- else:
360
+ elif FLAGS.mode == "shift":
309
361
  if FLAGS.offset_seconds is None:
310
362
  print("ERROR: --offset_seconds was not passed in during subtitle shifting")
311
363
  sys.exit(21)
@@ -319,11 +371,13 @@ def main():
319
371
  sys.exit(0)
320
372
 
321
373
 
322
- def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path):
374
+ def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
323
375
  if video_path.lower().startswith("http") and os.path.exists(local_video_path):
324
376
  os.remove(local_video_path)
325
377
  if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
326
378
  os.remove(local_subtitle_path)
379
+ if mode == "transcribe" and os.path.exists(local_subtitle_path):
380
+ os.remove(local_subtitle_path)
327
381
 
328
382
 
329
383
  if __name__ == "__main__":
@@ -120,7 +120,7 @@ def main():
120
120
  sys.exit(21)
121
121
  if FLAGS.translate is not None:
122
122
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
123
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
123
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
124
124
  sys.exit(21)
125
125
 
126
126
  local_video_path = FLAGS.video_path
@@ -147,7 +147,7 @@ def main():
147
147
  sys.exit(21)
148
148
  if FLAGS.translate is not None:
149
149
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
150
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
150
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
151
151
  sys.exit(21)
152
152
  if FLAGS.stretch_on:
153
153
  if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
@@ -173,7 +173,7 @@ Each file pair needs to share the same base filename, the part before the extens
173
173
  sys.exit(21)
174
174
  if FLAGS.translate is not None:
175
175
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
176
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
176
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
177
177
  sys.exit(21)
178
178
 
179
179
  video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
@@ -99,7 +99,7 @@ def main():
99
99
  sys.exit(21)
100
100
  if FLAGS.translate is not None:
101
101
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
102
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
102
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
103
103
  sys.exit(21)
104
104
 
105
105
  local_subtitle_path = FLAGS.input_subtitle_path
subaligner/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
+ import os
1
2
  import multiprocessing as mp
2
3
  from ._version import __version__
3
4
 
4
5
  __all__ = ["__version__"]
5
6
  mp.set_start_method("spawn", force=True)
7
+ os.environ["KMP_WARNINGS"] = "0"
subaligner/__main__.py CHANGED
@@ -1,13 +1,17 @@
1
1
  #!/usr/bin/env python
2
2
  """
3
- usage: subaligner [-h] [-m {single,dual,script,shift}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
3
+ usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
4
4
  [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
5
- [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS] [-lgs] [-d] [-q] [-ver]
5
+ [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
6
+ [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
7
+ [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-lgs] [-d] [-q] [-ver]
6
8
 
7
9
  Subaligner command line interface
8
10
 
9
11
  optional arguments:
10
12
  -h, --help show this help message and exit
13
+ -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
14
+ File path or URL to the subtitle file (Extensions of supported subtitles: .ssa, .vtt, .srt, .txt, .smi, .ytt, .sub, .xml, .sbv, .ass, .sami, .scc, .tmp, .stl, .ttml, .dfxp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
11
15
  -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
12
16
  Max global log loss for alignment
13
17
  -so, --stretch_on Switch on stretch on subtitles)
@@ -23,18 +27,22 @@ optional arguments:
23
27
  Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
24
28
  -os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
25
29
  Offset by which the subtitle will be shifted
30
+ -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
31
+ Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
32
+ -mr {whisper}, --llm_recipe {whisper}
33
+ LLM recipe used for transcribing video files
34
+ -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --llm_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
35
+ Flavour variation for a specific LLM recipe
26
36
  -lgs, --languages Print out language codes used for stretch and translation
27
37
  -d, --debug Print out debugging information
28
38
  -q, --quiet Switch off logging information
29
39
  -ver, --version show program's version number and exit
30
40
 
31
41
  required arguments:
32
- -m {single,dual,script,shift}, --mode {single,dual,script,shift}
33
- Alignment mode: either single or dual
42
+ -m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
43
+ Alignment mode: single, dual, script, shift or transcribe
34
44
  -v VIDEO_PATH, --video_path VIDEO_PATH
35
45
  File path or URL to the video file
36
- -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
37
- File path or URL to the subtitle file (Extensions of supported subtitles: .sami, .ssa, .vtt, .xml, .sub, .smi, .ass, .srt, .tmp, .dfxp, .stl, .ttml, .sbv, .txt, .ytt, .scc) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
38
46
  """
39
47
 
40
48
  import argparse
@@ -61,10 +69,10 @@ def main():
61
69
  required_args.add_argument(
62
70
  "-m",
63
71
  "--mode",
64
- type=str,
72
+ type=str.lower,
65
73
  default="",
66
- choices=["single", "dual", "script", "shift"],
67
- help="Alignment mode: either single or dual",
74
+ choices=["single", "dual", "script", "shift", "transcribe"],
75
+ help="Alignment mode: single, dual, script, shift or transcribe",
68
76
  )
69
77
  required_args.add_argument(
70
78
  "-v",
@@ -74,7 +82,7 @@ def main():
74
82
  help="File path or URL to the video file",
75
83
  )
76
84
  from subaligner.subtitle import Subtitle
77
- required_args.add_argument(
85
+ parser.add_argument(
78
86
  "-s",
79
87
  "--subtitle_path",
80
88
  type=str,
@@ -100,7 +108,7 @@ def main():
100
108
  parser.add_argument(
101
109
  "-sil",
102
110
  "--stretch_in_language",
103
- type=str,
111
+ type=str.lower,
104
112
  choices=Utils.get_stretch_language_codes(),
105
113
  default="eng",
106
114
  help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
@@ -137,6 +145,29 @@ def main():
137
145
  type=float,
138
146
  help="Offset by which the subtitle will be shifted"
139
147
  )
148
+ parser.add_argument(
149
+ "-ml",
150
+ "--main_language",
151
+ type=str.lower,
152
+ choices=Utils.get_stretch_language_codes(),
153
+ help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
154
+ )
155
+ parser.add_argument(
156
+ "-mr",
157
+ "--llm_recipe",
158
+ type=str.lower,
159
+ default="whisper",
160
+ choices=["whisper"],
161
+ help="LLM recipe used for transcribing video files"
162
+ )
163
+ parser.add_argument(
164
+ "-mf",
165
+ "--llm_flavour",
166
+ type=str.lower,
167
+ default="small",
168
+ choices=["tiny", "tiny.en", "small", "medium", "medium.en", "base", "base.en", "large-v1", "large-v2", "large"],
169
+ help="Flavour variation for a specific LLM recipe"
170
+ )
140
171
  parser.add_argument("-lgs", "--languages", action="store_true",
141
172
  help="Print out language codes used for stretch and translation")
142
173
  parser.add_argument("-d", "--debug", action="store_true",
@@ -153,33 +184,45 @@ def main():
153
184
  print("ERROR: --mode was not passed in")
154
185
  parser.print_usage()
155
186
  sys.exit(21)
187
+
156
188
  FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
157
189
 
158
- if not FLAGS.subtitle_path:
190
+ if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
159
191
  print("ERROR: --subtitle_path was not passed in")
160
192
  parser.print_usage()
161
193
  sys.exit(21)
162
- if FLAGS.mode != "shift":
194
+ elif FLAGS.mode == "transcribe":
195
+ FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
196
+ if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
163
197
  for subtitle_path in FLAGS.subtitle_path:
164
198
  if FLAGS.video_path == "":
165
199
  print("ERROR: --video_path was not passed in")
166
200
  parser.print_usage()
167
201
  sys.exit(21)
168
202
  if subtitle_path.lower().startswith("http") and FLAGS.output == "":
169
- print("ERROR: --output was not passed in for alignment on a remote subtitle file")
203
+ print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
170
204
  parser.print_usage()
171
205
  sys.exit(21)
172
206
  if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
173
- print("ERROR: --output was not passed in for alignment on embedded subtitles")
207
+ print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
174
208
  parser.print_usage()
175
209
  sys.exit(21)
176
210
  if FLAGS.mode == "script" and FLAGS.output == "":
177
- print("ERROR: --output was not passed in for alignment on plain texts")
211
+ print("ERROR: --output was not passed in but required by alignment on plain texts")
178
212
  parser.print_usage()
179
213
  sys.exit(21)
180
- if FLAGS.translate is not None:
214
+ if FLAGS.mode == "transcribe":
215
+ if FLAGS.output == "":
216
+ print("ERROR: --output was not passed in but required by mode 'transcribe'")
217
+ parser.print_usage()
218
+ sys.exit(21)
219
+ if FLAGS.main_language is None:
220
+ print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
221
+ parser.print_usage()
222
+ sys.exit(21)
223
+ if FLAGS.translate is not None or FLAGS.mode == "transcribe":
181
224
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
182
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
225
+ print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
183
226
  sys.exit(21)
184
227
  if FLAGS.stretch_on or FLAGS.mode == "script":
185
228
  if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
@@ -190,13 +233,13 @@ def main():
190
233
  local_subtitle_path = subtitle_path
191
234
  exit_segfail = FLAGS.exit_segfail
192
235
  stretch = FLAGS.stretch_on
193
- stretch_in_lang = FLAGS.stretch_in_language
236
+ stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
194
237
 
195
238
  from subaligner.logger import Logger
196
239
  Logger.VERBOSE = FLAGS.debug
197
240
  Logger.QUIET = FLAGS.quiet
198
241
  from subaligner.predictor import Predictor
199
- from subaligner.exception import UnsupportedFormatException
242
+ from subaligner.exception import UnsupportedFormatException, TranscriptionException
200
243
  from subaligner.exception import TerminalException
201
244
 
202
245
  try:
@@ -230,6 +273,7 @@ def main():
230
273
  parser.print_usage()
231
274
  sys.exit(21)
232
275
 
276
+ voice_probabilities = None
233
277
  predictor = Predictor()
234
278
  if FLAGS.mode == "single":
235
279
  aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
@@ -252,6 +296,11 @@ def main():
252
296
  subtitle_file_path=local_subtitle_path,
253
297
  stretch_in_lang=stretch_in_lang,
254
298
  )
299
+ elif FLAGS.mode == "transcribe":
300
+ from subaligner.transcriber import Transcriber
301
+ transcriber = Transcriber(recipe=FLAGS.llm_recipe, flavour=FLAGS.llm_flavour)
302
+ subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
303
+ aligned_subs = subtitle.subs
255
304
  else:
256
305
  print("ERROR: Unknown mode {}".format(FLAGS.mode))
257
306
  parser.print_usage()
@@ -267,6 +316,9 @@ def main():
267
316
  aligned_subs = translator.translate(aligned_subs)
268
317
  Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
269
318
  frame_rate, "utf-8")
319
+ elif FLAGS.mode == "transcribe":
320
+ Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
321
+ frame_rate, "utf-8")
270
322
  else:
271
323
  Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
272
324
  frame_rate)
@@ -277,35 +329,35 @@ def main():
277
329
  print(
278
330
  "ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
279
331
  )
280
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
332
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
281
333
  sys.exit(22)
282
334
 
283
335
  print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
284
- except UnsupportedFormatException as e:
336
+ except (UnsupportedFormatException, TranscriptionException) as e:
285
337
  print(
286
338
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
287
339
  )
288
340
  traceback.print_tb(e.__traceback__)
289
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
341
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
290
342
  sys.exit(23)
291
343
  except TerminalException as e:
292
344
  print(
293
345
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
294
346
  )
295
347
  traceback.print_tb(e.__traceback__)
296
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
348
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
297
349
  sys.exit(24)
298
350
  except Exception as e:
299
351
  print(
300
352
  "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
301
353
  )
302
354
  traceback.print_tb(e.__traceback__)
303
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
355
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
304
356
  sys.exit(1)
305
357
  else:
306
- _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path)
358
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
307
359
  sys.exit(0)
308
- else:
360
+ elif FLAGS.mode == "shift":
309
361
  if FLAGS.offset_seconds is None:
310
362
  print("ERROR: --offset_seconds was not passed in during subtitle shifting")
311
363
  sys.exit(21)
@@ -319,11 +371,13 @@ def main():
319
371
  sys.exit(0)
320
372
 
321
373
 
322
- def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path):
374
+ def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
323
375
  if video_path.lower().startswith("http") and os.path.exists(local_video_path):
324
376
  os.remove(local_video_path)
325
377
  if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
326
378
  os.remove(local_subtitle_path)
379
+ if mode == "transcribe" and os.path.exists(local_subtitle_path):
380
+ os.remove(local_subtitle_path)
327
381
 
328
382
 
329
383
  if __name__ == "__main__":
subaligner/_version.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """The semver for the current release."""
2
- __version__ = "0.2.4"
2
+ __version__ = "0.3.0"
subaligner/exception.py CHANGED
@@ -8,3 +8,7 @@ class TerminalException(Exception):
8
8
 
9
9
  class NoFrameRateException(Exception):
10
10
  """ An exception raised due to frame rate not found."""
11
+
12
+
13
+ class TranscriptionException(Exception):
14
+ """ An exception raised due to transcription failures."""
subaligner/predictor.py CHANGED
@@ -37,7 +37,7 @@ class Predictor(metaclass=Singleton):
37
37
  __SEGMENT_PREDICTION_TIMEOUT = 60 # Maximum waiting time in seconds when predicting each segment
38
38
 
39
39
  __THREAD_QUEUE_SIZE = 8
40
- __THREAD_NUMBER = 4
40
+ __THREAD_NUMBER = 1 # Do not change
41
41
 
42
42
  def __init__(self, **kwargs) -> None:
43
43
  """Feature predictor initialiser.
@@ -120,7 +120,7 @@ def main():
120
120
  sys.exit(21)
121
121
  if FLAGS.translate is not None:
122
122
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
123
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
123
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
124
124
  sys.exit(21)
125
125
 
126
126
  local_video_path = FLAGS.video_path
@@ -147,7 +147,7 @@ def main():
147
147
  sys.exit(21)
148
148
  if FLAGS.translate is not None:
149
149
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
150
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
150
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
151
151
  sys.exit(21)
152
152
  if FLAGS.stretch_on:
153
153
  if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
@@ -173,7 +173,7 @@ Each file pair needs to share the same base filename, the part before the extens
173
173
  sys.exit(21)
174
174
  if FLAGS.translate is not None:
175
175
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
176
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
176
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
177
177
  sys.exit(21)
178
178
 
179
179
  video_file_paths = [os.path.abspath(os.path.join(path, p)) for path, _, files in
@@ -99,7 +99,7 @@ def main():
99
99
  sys.exit(21)
100
100
  if FLAGS.translate is not None:
101
101
  if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
102
- print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[translation]" and run your command again.')
102
+ print('ERROR: Alignment has been configured to perform translation. Please install "subaligner[llm]" and run your command again.')
103
103
  sys.exit(21)
104
104
 
105
105
  local_subtitle_path = FLAGS.input_subtitle_path
subaligner/subtitle.py CHANGED
@@ -59,6 +59,8 @@ class Subtitle(object):
59
59
 
60
60
  if subtitle_format == "subrip":
61
61
  self.__subs = self.__load_subrip(subtitle_file_path)
62
+ elif subtitle_format == "subrip_raw":
63
+ self.__subs = pysrt.SubRipFile().from_string(subtitle_file_path)
62
64
  elif subtitle_format == "ttml":
63
65
  self.__subs = self.__convert_ttml_to_subs(subtitle_file_path)
64
66
  elif subtitle_format == "webvtt":
@@ -105,6 +107,19 @@ class Subtitle(object):
105
107
 
106
108
  return cls(cls.__secret, subtitle_file_path, "subrip")
107
109
 
110
+ @classmethod
111
+ def load_subrip_str(cls, subrip_raw: str) -> "Subtitle":
112
+ """Load a SubRip subtitle string.
113
+
114
+ Arguments:
115
+ subrip_str {string} -- The string representation of the SubRip content.
116
+
117
+ Returns:
118
+ Subtitle -- Subtitle object.
119
+ """
120
+
121
+ return cls(cls.__secret, subrip_raw, "subrip_raw")
122
+
108
123
  @classmethod
109
124
  def load_ttml(cls, subtitle_file_path: str) -> "Subtitle":
110
125
  """Load a TTML subtitle file.
subaligner/trainer.py CHANGED
@@ -315,8 +315,8 @@ class Trainer(object):
315
315
  train_data = [x for x in train_data if x is not None]
316
316
  labels = [x for x in labels if x is not None]
317
317
 
318
- train_data = np.concatenate(train_data)
319
- labels = np.concatenate(labels)
318
+ train_data: np.ndarray = np.concatenate(train_data) # type: ignore
319
+ labels: np.ndarray = np.concatenate(labels) # type: ignore
320
320
  self.__LOGGER.debug(
321
321
  "Data and labels extracted after {} seconds".format(
322
322
  str(datetime.datetime.now() - extraction_start)
@@ -0,0 +1,118 @@
1
+ import os
2
+ import whisper
3
+ from enum import Enum
4
+ from typing import Tuple, Optional
5
+ from pysrt import SubRipTime
6
+ from whisper.tokenizer import LANGUAGES
7
+ from .translator import Translator
8
+ from .subtitle import Subtitle
9
+ from .media_helper import MediaHelper
10
+ from .logger import Logger
11
+ from .exception import NoFrameRateException, TranscriptionException
12
+
13
+
14
+ class Transcriber(object):
15
+ """Transcribe audiovisual content for subtitle generation.
16
+ """
17
+
18
+ def __init__(self, recipe: str = "whisper", flavour: str = "small") -> None:
19
+ """Initialiser for the transcribing process.
20
+
21
+ Arguments:
22
+ recipe {string} -- the LLM recipe used for transcribing video files (default: "whisper").
23
+ flavour {string} -- the flavour variation for a specific LLM recipe (default: "small").
24
+ Raises:
25
+ NotImplementedError -- Thrown when the LLM recipe is unknown.
26
+ """
27
+ if recipe not in [r.value for r in Recipe]:
28
+ raise NotImplementedError(f"Unknown recipe: {recipe}")
29
+ if recipe == Recipe.whisper.value:
30
+ if flavour not in [f.value for f in WhisperFlavour]:
31
+ raise NotImplementedError(f"Unknown {recipe} flavour: {flavour}")
32
+ self.__model = whisper.load_model(flavour)
33
+ self.recipe = recipe
34
+ self.flavour = flavour
35
+ self.__media_helper = MediaHelper()
36
+ self.__LOGGER = Logger().get_logger(__name__)
37
+
38
+ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle, Optional[float]]:
39
+ """Transcribe an audiovisual file and generate subtitles.
40
+
41
+ Arguments:
42
+ video_file_path {string} -- The input video file path.
43
+ language_code {string} -- An alpha 3 language code derived from ISO 639-3.
44
+ Raises:
45
+ TranscriptionException -- Thrown when transcription is failed.
46
+ NotImplementedError -- Thrown when the LLM recipe is not supported.
47
+ """
48
+ if self.recipe == "whisper":
49
+ lang = Translator.get_iso_639_alpha_2(language_code)
50
+ if lang not in LANGUAGES:
51
+ raise TranscriptionException(f'"{language_code}" is not supported by {self.recipe} ({self.flavour})')
52
+ audio_file_path = self.__media_helper.extract_audio(video_file_path, True, 16000)
53
+ try:
54
+ audio = whisper.load_audio(audio_file_path)
55
+ self.__LOGGER.debug("Start transcribing the audio...")
56
+ result = self.__model.transcribe(audio, task="transcribe", language=LANGUAGES[lang])
57
+ self.__LOGGER.info("Finished transcribing the audio")
58
+ srt_str = ""
59
+ for i, segment in enumerate(result["segments"], start=1):
60
+ srt_str += f"{i}\n" \
61
+ f"{self.__format_timestamp(segment['start'])} --> {self.__format_timestamp(segment['end'])}\n" \
62
+ f"{segment['text'].strip().replace('-->', '->')}\n" \
63
+ "\n"
64
+ subtitle = Subtitle.load_subrip_str(srt_str)
65
+ subtitle, frame_rate = self.__on_frame_timecodes(subtitle, video_file_path)
66
+ self.__LOGGER.debug("Generated the raw subtitle")
67
+ return subtitle, frame_rate
68
+ finally:
69
+ if os.path.exists(audio_file_path):
70
+ os.remove(audio_file_path)
71
+ else:
72
+ raise NotImplementedError(f"{self.recipe} ({self.flavour}) is not supported")
73
+
74
+ @staticmethod
75
+ def __format_timestamp(seconds: float) -> str:
76
+ assert seconds >= 0, "non-negative timestamp expected"
77
+ milliseconds = round(seconds * 1000.0)
78
+ hours = milliseconds // 3_600_000
79
+ milliseconds -= hours * 3_600_000
80
+ minutes = milliseconds // 60_000
81
+ milliseconds -= minutes * 60_000
82
+ seconds = milliseconds // 1_000
83
+ milliseconds -= seconds * 1_000
84
+ hours_marker = f"{hours:02d}:"
85
+ return f"{hours_marker}{minutes:02d}:{seconds:02d},{milliseconds:03d}"
86
+
87
+ def __on_frame_timecodes(self, subtitle: Subtitle, video_file_path: str) -> Tuple[Subtitle, Optional[float]]:
88
+ frame_rate = None
89
+ try:
90
+ frame_rate = self.__media_helper.get_frame_rate(video_file_path)
91
+ frame_duration = 1.0 / frame_rate
92
+ for sub in subtitle.subs:
93
+ start_seconds = sub.start.hours * 3600 + sub.start.minutes * 60 + sub.start.seconds + sub.start.milliseconds / 1000.0
94
+ end_seconds = sub.end.hours * 3600 + sub.end.minutes * 60 + sub.end.seconds + sub.end.milliseconds / 1000.0
95
+ start_frames = int(start_seconds / frame_duration)
96
+ end_frames = int(end_seconds / frame_duration)
97
+ sub.start = SubRipTime(seconds=start_frames * frame_duration)
98
+ sub.end = SubRipTime(seconds=end_frames * frame_duration)
99
+ except NoFrameRateException:
100
+ self.__LOGGER.warning("Cannot detect the frame rate for %s" % video_file_path)
101
+ return subtitle, frame_rate
102
+
103
+
104
+ class Recipe(str, Enum):
105
+ whisper = "whisper"
106
+
107
+
108
+ class WhisperFlavour(str, Enum):
109
+ tiny = "tiny"
110
+ tiny_en = "tiny.en"
111
+ small = "small"
112
+ medium = "medium"
113
+ medium_en = "medium.en"
114
+ base = "base"
115
+ base_en = "base.en"
116
+ large_v1 = "large-v1"
117
+ large_v2 = "large-v2"
118
+ large = "large"
subaligner/translator.py CHANGED
@@ -16,6 +16,7 @@ class Translator(metaclass=Singleton):
16
16
 
17
17
  __TENSOR_TYPE = "pt"
18
18
  __OPUS_MT = "Helsinki-NLP/opus-mt-{}-{}"
19
+ __OPUS_MT_TC_BIG = "Helsinki-NLP/opus-mt-tc-big-{}-{}"
19
20
  __OPUS_TATOEBA = "Helsinki-NLP/opus-tatoeba-{}-{}"
20
21
  __TRANSLATING_BATCH_SIZE = 10
21
22
  __LANGUAGE_CODE_MAPPER = {
@@ -128,8 +129,8 @@ class Translator(metaclass=Singleton):
128
129
  num_of_batches = math.ceil(len(src_texts) / Translator.__TRANSLATING_BATCH_SIZE)
129
130
  self.__LOGGER.info("Translating %s subtitle cue(s)..." % len(src_texts))
130
131
  for batch in tqdm(Translator.__batch(src_texts, Translator.__TRANSLATING_BATCH_SIZE), total=num_of_batches):
131
- tokenizer = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
132
- translated = self.lang_model.generate(**tokenizer)
132
+ input_ids = self.tokenizer(batch, return_tensors=Translator.__TENSOR_TYPE, padding=True)
133
+ translated = self.lang_model.generate(**input_ids)
133
134
  translated_texts.extend([self.tokenizer.decode(t, skip_special_tokens=True) for t in translated])
134
135
  for index in range(len(new_subs)):
135
136
  new_subs[index].text = translated_texts[index]
@@ -140,59 +141,100 @@ class Translator(metaclass=Singleton):
140
141
  src_lang = Translator.normalise_single(src_lang)
141
142
  tgt_lang = Translator.normalise_single(tgt_lang)
142
143
  src_lang, tgt_lang = Translator.normalise_pair(src_lang, tgt_lang)
144
+
145
+ if self.__download_mt_model(src_lang, tgt_lang):
146
+ return
147
+ elif self.__download_mt_tc_big_model(src_lang, tgt_lang):
148
+ return
149
+ elif self.__download_tatoeba_model(src_lang, tgt_lang):
150
+ return
151
+ else:
152
+ message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
153
+ self.__LOGGER.error(message)
154
+ raise NotImplementedError(message)
155
+
156
+ def __download_mt_model(self, src_lang: str, tgt_lang: str) -> bool:
143
157
  try:
144
158
  mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
145
- self.__download_mt_model(mt_model_name)
146
- return
159
+ self.__download(mt_model_name)
160
+ return True
147
161
  except OSError:
148
162
  self.__log_and_back_off(mt_model_name)
149
163
  try:
150
164
  mt_model_name = Translator.__OPUS_MT.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
151
- self.__download_mt_model(mt_model_name)
152
- return
165
+ self.__download(mt_model_name)
166
+ return True
153
167
  except OSError:
154
168
  self.__log_and_back_off(mt_model_name)
155
169
  try:
156
170
  mt_model_name = Translator.__OPUS_MT.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
157
- self.__download_mt_model(mt_model_name)
158
- return
171
+ self.__download(mt_model_name)
172
+ return True
159
173
  except OSError:
160
174
  self.__log_and_back_off(mt_model_name)
161
175
  try:
162
176
  mt_model_name = Translator.__OPUS_MT.format(src_lang, tgt_lang)
163
- self.__download_mt_model(mt_model_name)
164
- return
177
+ self.__download(mt_model_name)
178
+ return True
165
179
  except OSError:
166
180
  self.__log_and_back_off(mt_model_name)
181
+ return False
182
+
183
+ def __download_mt_tc_big_model(self, src_lang: str, tgt_lang: str) -> bool:
184
+ try:
185
+ mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
186
+ self.__download(mt_tc_model_name)
187
+ return True
188
+ except OSError:
189
+ self.__log_and_back_off(mt_tc_model_name)
190
+ try:
191
+ mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
192
+ self.__download(mt_tc_model_name)
193
+ return True
194
+ except OSError:
195
+ self.__log_and_back_off(mt_tc_model_name)
196
+ try:
197
+ mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
198
+ self.__download(mt_tc_model_name)
199
+ return True
200
+ except OSError:
201
+ self.__log_and_back_off(mt_tc_model_name)
202
+ try:
203
+ mt_tc_model_name = Translator.__OPUS_MT_TC_BIG.format(src_lang, tgt_lang)
204
+ self.__download(mt_tc_model_name)
205
+ return True
206
+ except OSError:
207
+ self.__log_and_back_off(mt_tc_model_name)
208
+ return False
209
+
210
+ def __download_tatoeba_model(self, src_lang: str, tgt_lang: str) -> bool:
167
211
  try:
168
212
  mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), Translator.get_iso_639_alpha_2(tgt_lang))
169
- self.__download_mt_model(mt_model_name)
170
- return
213
+ self.__download(mt_model_name)
214
+ return True
171
215
  except OSError:
172
216
  self.__log_and_back_off(mt_model_name)
173
217
  try:
174
218
  mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, Translator.get_iso_639_alpha_2(tgt_lang))
175
- self.__download_mt_model(mt_model_name)
176
- return
219
+ self.__download(mt_model_name)
220
+ return True
177
221
  except OSError:
178
222
  self.__log_and_back_off(mt_model_name)
179
223
  try:
180
224
  mt_model_name = Translator.__OPUS_TATOEBA.format(Translator.get_iso_639_alpha_2(src_lang), tgt_lang)
181
- self.__download_mt_model(mt_model_name)
182
- return
225
+ self.__download(mt_model_name)
226
+ return True
183
227
  except OSError:
184
228
  self.__log_and_back_off(mt_model_name)
185
229
  try:
186
230
  mt_model_name = Translator.__OPUS_TATOEBA.format(src_lang, tgt_lang)
187
- self.__download_mt_model(mt_model_name)
188
- return
231
+ self.__download(mt_model_name)
232
+ return True
189
233
  except OSError:
190
- self.__LOGGER.debug("Cannot download the MT model %s" % mt_model_name)
191
- message = 'Cannot find the MT model for source language "{}" and destination language "{}"'.format(src_lang, tgt_lang)
192
- self.__LOGGER.error(message)
193
- raise NotImplementedError(message)
234
+ self.__log_and_back_off(mt_model_name)
235
+ return False
194
236
 
195
- def __download_mt_model(self, mt_model_name: str) -> None:
237
+ def __download(self, mt_model_name: str) -> None:
196
238
  self.__LOGGER.debug("Trying to download the MT model %s" % mt_model_name)
197
239
  self.tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
198
240
  self.lang_model = MarianMTModel.from_pretrained(mt_model_name)