subaligner 0.3.4__py39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. subaligner/__init__.py +12 -0
  2. subaligner/__main__.py +408 -0
  3. subaligner/_version.py +2 -0
  4. subaligner/embedder.py +357 -0
  5. subaligner/exception.py +18 -0
  6. subaligner/hparam_tuner.py +100 -0
  7. subaligner/hyperparameters.py +240 -0
  8. subaligner/lib/__init__.py +0 -0
  9. subaligner/lib/language.py +408 -0
  10. subaligner/lib/to_srt.py +600 -0
  11. subaligner/llm.py +34 -0
  12. subaligner/logger.py +60 -0
  13. subaligner/media_helper.py +417 -0
  14. subaligner/models/training/config/hyperparameters.json +26 -0
  15. subaligner/models/training/model/__init__.py +1 -0
  16. subaligner/models/training/model/model.hdf5 +0 -0
  17. subaligner/models/training/weights/__init__.py +1 -0
  18. subaligner/models/training/weights/weights.hdf5 +0 -0
  19. subaligner/network.py +593 -0
  20. subaligner/predictor.py +913 -0
  21. subaligner/singleton.py +14 -0
  22. subaligner/subaligner_1pass/__init__.py +0 -0
  23. subaligner/subaligner_1pass/__main__.py +227 -0
  24. subaligner/subaligner_2pass/__init__.py +0 -0
  25. subaligner/subaligner_2pass/__main__.py +265 -0
  26. subaligner/subaligner_batch/__init__.py +0 -0
  27. subaligner/subaligner_batch/__main__.py +342 -0
  28. subaligner/subaligner_convert/__init__.py +0 -0
  29. subaligner/subaligner_convert/__main__.py +163 -0
  30. subaligner/subaligner_train/__init__.py +0 -0
  31. subaligner/subaligner_train/__main__.py +351 -0
  32. subaligner/subaligner_tune/__init__.py +0 -0
  33. subaligner/subaligner_tune/__main__.py +165 -0
  34. subaligner/subtitle.py +852 -0
  35. subaligner/trainer.py +380 -0
  36. subaligner/transcriber.py +91 -0
  37. subaligner/translator.py +232 -0
  38. subaligner/utils.py +774 -0
  39. subaligner-0.3.4.data/scripts/subaligner +408 -0
  40. subaligner-0.3.4.data/scripts/subaligner_1pass +227 -0
  41. subaligner-0.3.4.data/scripts/subaligner_2pass +265 -0
  42. subaligner-0.3.4.data/scripts/subaligner_batch +352 -0
  43. subaligner-0.3.4.data/scripts/subaligner_convert +163 -0
  44. subaligner-0.3.4.data/scripts/subaligner_train +351 -0
  45. subaligner-0.3.4.data/scripts/subaligner_tune +165 -0
  46. subaligner-0.3.4.dist-info/LICENSE +21 -0
  47. subaligner-0.3.4.dist-info/METADATA +340 -0
  48. subaligner-0.3.4.dist-info/RECORD +51 -0
  49. subaligner-0.3.4.dist-info/WHEEL +5 -0
  50. subaligner-0.3.4.dist-info/entry_points.txt +8 -0
  51. subaligner-0.3.4.dist-info/top_level.txt +1 -0
subaligner/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ import os
2
+ import warnings
3
+ import multiprocessing as mp
4
+ from ._version import __version__
5
+
6
+ __all__ = ["__version__"]
7
+
8
+ warnings.filterwarnings("ignore")
9
+ warnings.simplefilter("ignore")
10
+
11
+ mp.set_start_method("spawn", force=True)
12
+ os.environ["KMP_WARNINGS"] = "0"
subaligner/__main__.py ADDED
@@ -0,0 +1,408 @@
1
+ #!/usr/bin/env python
2
+ """
3
+ usage: subaligner [-h] [-m {single,dual,script,shift,transcribe}] [-v VIDEO_PATH] [-s SUBTITLE_PATH [SUBTITLE_PATH ...]] [-l MAX_LOGLOSS] [-so]
4
+ [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
5
+ [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
6
+ [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
7
+ [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}]
8
+ [-tf TRANSLATION_FLAVOUR] [-lgs] [-d] [-q] [-ver]
9
+
10
+ Subaligner command line interface
11
+
12
+ optional arguments:
13
+ -h, --help show this help message and exit
14
+ -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
15
+ File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .ssa, .stl, .sbv, .dfxp, .srt, .txt, .ytt, .vtt, .sub, .sami, .xml, .scc, .ass, .smi, .tmp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
16
+ -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
17
+ Max global log loss for alignment
18
+ -so, --stretch_on Switch on stretch on subtitles)
19
+ -sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --stretch_in_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
20
+ Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].
21
+ NB: This will be ignored if neither -so nor --stretch_on is present
22
+ -fos, --exit_segfail Exit on any segment alignment failures
23
+ -tod TRAINING_OUTPUT_DIRECTORY, --training_output_directory TRAINING_OUTPUT_DIRECTORY
24
+ Path to the output directory containing training results
25
+ -o OUTPUT, --output OUTPUT
26
+ Path to the output subtitle file
27
+ -t TRANSLATE, --translate TRANSLATE
28
+ Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)
29
+ -os OFFSET_SECONDS, --offset_seconds OFFSET_SECONDS
30
+ Offset by which the subtitle will be shifted
31
+ -ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}, --main_language {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}
32
+ Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
33
+ -mr {whisper}, --transcription_recipe {whisper}
34
+ LLM recipe used for transcribing video files
35
+ -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large}
36
+ Flavour variation for a specific LLM recipe supporting transcription
37
+ -tr {helsinki-nlp,whisper,facebook-mbart}, --translation_recipe {helsinki-nlp,whisper,facebook-mbart}
38
+ LLM recipe used for translating subtitles
39
+ -tf TRANSLATION_FLAVOUR, --translation_flavour TRANSLATION_FLAVOUR
40
+ Flavour variation for a specific LLM recipe supporting translation
41
+ -lgs, --languages Print out language codes used for stretch and translation
42
+ -d, --debug Print out debugging information
43
+ -q, --quiet Switch off logging information
44
+ -ver, --version show program's version number and exit
45
+
46
+ required arguments:
47
+ -m {single,dual,script,shift,transcribe}, --mode {single,dual,script,shift,transcribe}
48
+ Alignment mode: single, dual, script, shift or transcribe
49
+ -v VIDEO_PATH, --video_path VIDEO_PATH
50
+ File path or URL to the video file
51
+ """
52
+
53
+ import argparse
54
+ import sys
55
+ import traceback
56
+ import os
57
+ import tempfile
58
+ import pkg_resources
59
+
60
+
61
+ def main():
62
+ if sys.version_info.major != 3:
63
+ print("ERROR: Cannot find Python 3")
64
+ sys.exit(20)
65
+ try:
66
+ import subaligner
67
+ except ModuleNotFoundError:
68
+ print("ERROR: Subaligner is not installed")
69
+ sys.exit(20)
70
+
71
+ from subaligner._version import __version__
72
+ parser = argparse.ArgumentParser(description="Subaligner command line interface (v%s)" % __version__, formatter_class=argparse.RawTextHelpFormatter)
73
+ required_args = parser.add_argument_group("required arguments")
74
+ required_args.add_argument(
75
+ "-m",
76
+ "--mode",
77
+ type=str.lower,
78
+ default="",
79
+ choices=["single", "dual", "script", "shift", "transcribe"],
80
+ help="Alignment mode: single, dual, script, shift or transcribe",
81
+ )
82
+ required_args.add_argument(
83
+ "-v",
84
+ "--video_path",
85
+ type=str,
86
+ default="",
87
+ help="File path or URL to the video file",
88
+ )
89
+ from subaligner.subtitle import Subtitle
90
+ parser.add_argument(
91
+ "-s",
92
+ "--subtitle_path",
93
+ type=str,
94
+ default=[],
95
+ action="append",
96
+ nargs="+",
97
+ help="File path or URL to the subtitle file (Extensions of supported subtitles: {}) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)".format(", ".join(Subtitle.subtitle_extensions())),
98
+ )
99
+ parser.add_argument(
100
+ "-l",
101
+ "--max_logloss",
102
+ type=float,
103
+ default=float("inf"),
104
+ help="Max global log loss for alignment",
105
+ )
106
+ parser.add_argument(
107
+ "-so",
108
+ "--stretch_on",
109
+ action="store_true",
110
+ help="Switch on stretch on subtitles)",
111
+ )
112
+ from subaligner.utils import Utils
113
+ parser.add_argument(
114
+ "-sil",
115
+ "--stretch_in_language",
116
+ type=str.lower,
117
+ choices=Utils.get_stretch_language_codes(),
118
+ default="eng",
119
+ help="Stretch the subtitle with the supported ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes].\nNB: This will be ignored if neither -so nor --stretch_on is present",
120
+ )
121
+ parser.add_argument(
122
+ "-fos",
123
+ "--exit_segfail",
124
+ action="store_true",
125
+ help="Exit on any segment alignment failures",
126
+ )
127
+ parser.add_argument(
128
+ "-tod",
129
+ "--training_output_directory",
130
+ type=str,
131
+ default=os.path.abspath(os.path.dirname(subaligner.__file__)),
132
+ help="Path to the output directory containing training results",
133
+ )
134
+ parser.add_argument(
135
+ "-o",
136
+ "--output",
137
+ type=str,
138
+ default="",
139
+ help="Path to the output subtitle file",
140
+ )
141
+ parser.add_argument(
142
+ "-t",
143
+ "--translate",
144
+ type=str,
145
+ help="Source and target ISO 639-3 language codes separated by a comma (e.g., eng,zho)",
146
+ )
147
+ parser.add_argument(
148
+ "-os",
149
+ "--offset_seconds",
150
+ type=float,
151
+ help="Offset by which the subtitle will be shifted"
152
+ )
153
+ parser.add_argument(
154
+ "-ml",
155
+ "--main_language",
156
+ type=str.lower,
157
+ choices=Utils.get_stretch_language_codes(),
158
+ help="Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]",
159
+ )
160
+ from subaligner.llm import TranscriptionRecipe
161
+ from subaligner.llm import WhisperFlavour
162
+ parser.add_argument(
163
+ "-mr",
164
+ "--transcription_recipe",
165
+ type=str.lower,
166
+ default=TranscriptionRecipe.WHISPER.value,
167
+ choices=[r.value for r in TranscriptionRecipe],
168
+ help="LLM recipe used for transcribing video files"
169
+ )
170
+ parser.add_argument(
171
+ "-mf",
172
+ "--transcription_flavour",
173
+ type=str.lower,
174
+ default=WhisperFlavour.SMALL.value,
175
+ choices=[wf.value for wf in WhisperFlavour],
176
+ help="Flavour variation for a specific LLM recipe supporting transcription"
177
+ )
178
+ from subaligner.llm import TranslationRecipe
179
+ from subaligner.llm import HelsinkiNLPFlavour
180
+ parser.add_argument(
181
+ "-tr",
182
+ "--translation_recipe",
183
+ type=str.lower,
184
+ default=TranslationRecipe.HELSINKI_NLP.value,
185
+ choices=[r.value for r in TranslationRecipe],
186
+ help="LLM recipe used for translating subtitles"
187
+ )
188
+ parser.add_argument(
189
+ "-tf",
190
+ "--translation_flavour",
191
+ type=str.lower,
192
+ default=None,
193
+ help="Flavour variation for a specific LLM recipe supporting translation"
194
+ )
195
+ parser.add_argument("-lgs", "--languages", action="store_true",
196
+ help="Print out language codes used for stretch and translation")
197
+ parser.add_argument("-d", "--debug", action="store_true",
198
+ help="Print out debugging information")
199
+ parser.add_argument("-q", "--quiet", action="store_true",
200
+ help="Switch off logging information")
201
+ parser.add_argument("-ver", "--version", action="version", version=__version__)
202
+ FLAGS, unparsed = parser.parse_known_args()
203
+
204
+ if FLAGS.languages:
205
+ print("\n".join(Utils.get_language_table()))
206
+ sys.exit(0)
207
+ if FLAGS.mode == "":
208
+ print("ERROR: --mode was not passed in")
209
+ parser.print_usage()
210
+ sys.exit(21)
211
+
212
+ FLAGS.subtitle_path = [path for paths in FLAGS.subtitle_path for path in paths]
213
+
214
+ if not FLAGS.subtitle_path and FLAGS.mode != "transcribe":
215
+ print("ERROR: --subtitle_path was not passed in")
216
+ parser.print_usage()
217
+ sys.exit(21)
218
+ elif FLAGS.mode == "transcribe":
219
+ FLAGS.subtitle_path = ["{}.srt".format(tempfile.mkstemp()[1])]
220
+ if FLAGS.mode in ["single", "dual", "script", "transcribe"]:
221
+ for subtitle_path in FLAGS.subtitle_path:
222
+ if FLAGS.video_path == "":
223
+ print("ERROR: --video_path was not passed in")
224
+ parser.print_usage()
225
+ sys.exit(21)
226
+ if subtitle_path.lower().startswith("http") and FLAGS.output == "":
227
+ print("ERROR: --output was not passed in but required by alignment on a remote subtitle file")
228
+ parser.print_usage()
229
+ sys.exit(21)
230
+ if subtitle_path.lower().startswith("embedded:") and FLAGS.output == "":
231
+ print("ERROR: --output was not passed in but required by alignment on embedded subtitles")
232
+ parser.print_usage()
233
+ sys.exit(21)
234
+ if FLAGS.mode == "script" and FLAGS.output == "":
235
+ print("ERROR: --output was not passed in but required by alignment on plain texts")
236
+ parser.print_usage()
237
+ sys.exit(21)
238
+ if FLAGS.mode == "transcribe":
239
+ if FLAGS.output == "":
240
+ print("ERROR: --output was not passed in but required by mode 'transcribe'")
241
+ parser.print_usage()
242
+ sys.exit(21)
243
+ if FLAGS.main_language is None:
244
+ print("ERROR: --main_language was not passed in but required by mode 'transcribe'")
245
+ parser.print_usage()
246
+ sys.exit(21)
247
+ if FLAGS.translate is not None or FLAGS.mode == "transcribe":
248
+ if "transformers" not in {pkg.key for pkg in pkg_resources.working_set}:
249
+ print('ERROR: Alignment has been configured to use language models. Please install "subaligner[llm]" and run your command again.')
250
+ sys.exit(21)
251
+ if FLAGS.stretch_on or FLAGS.mode == "script":
252
+ if "aeneas" not in {pkg.key for pkg in pkg_resources.working_set}:
253
+ print('ERROR: Alignment has been configured to use extra features. Please install "subaligner[stretch]" and run your command again.')
254
+ sys.exit(21)
255
+
256
+ local_video_path = FLAGS.video_path
257
+ local_subtitle_path = subtitle_path
258
+ exit_segfail = FLAGS.exit_segfail
259
+ stretch = FLAGS.stretch_on
260
+ stretch_in_lang = FLAGS.main_language or FLAGS.stretch_in_language
261
+
262
+ from subaligner.logger import Logger
263
+ Logger.VERBOSE = FLAGS.debug
264
+ Logger.QUIET = FLAGS.quiet
265
+ from subaligner.predictor import Predictor
266
+ from subaligner.exception import UnsupportedFormatException, TranscriptionException
267
+ from subaligner.exception import TerminalException
268
+
269
+ try:
270
+ if FLAGS.video_path.lower().startswith("http"):
271
+ _, local_video_path = tempfile.mkstemp()
272
+ _, video_file_extension = os.path.splitext(FLAGS.video_path.lower())
273
+ local_video_path = "{}{}".format(local_video_path, video_file_extension)
274
+ Utils.download_file(FLAGS.video_path, local_video_path)
275
+
276
+ if subtitle_path.lower().startswith("http"):
277
+ _, local_subtitle_path = tempfile.mkstemp()
278
+ _, subtitle_file_extension = os.path.splitext(subtitle_path.lower())
279
+ local_subtitle_path = "{}{}".format(local_subtitle_path, subtitle_file_extension)
280
+ Utils.download_file(subtitle_path, local_subtitle_path)
281
+
282
+ if subtitle_path.lower().startswith("embedded:"):
283
+ _, local_subtitle_path = tempfile.mkstemp()
284
+ _, subtitle_file_extension = os.path.splitext(FLAGS.output)
285
+ local_subtitle_path = "{}{}".format(local_subtitle_path, subtitle_file_extension)
286
+ params = subtitle_path.lower().split(":")[1].split(",")
287
+ if params and "=" in params[0]:
288
+ params = {param.split("=")[0]: param.split("=")[1] for param in params}
289
+ if "page_num" in params:
290
+ Utils.extract_teletext_as_subtitle(local_video_path, int(params["page_num"]),
291
+ local_subtitle_path)
292
+ elif "stream_index" in params:
293
+ Utils.extract_matroska_subtitle(local_video_path, int(params["stream_index"]),
294
+ local_subtitle_path)
295
+ else:
296
+ print("ERROR: Embedded subtitle selector cannot be empty")
297
+ parser.print_usage()
298
+ sys.exit(21)
299
+
300
+ voice_probabilities = None
301
+ predictor = Predictor()
302
+ if FLAGS.mode == "single":
303
+ aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
304
+ video_file_path=local_video_path,
305
+ subtitle_file_path=local_subtitle_path,
306
+ weights_dir=os.path.join(FLAGS.training_output_directory, "models", "training", "weights")
307
+ )
308
+ elif FLAGS.mode == "dual":
309
+ aligned_subs, subs, voice_probabilities, frame_rate = predictor.predict_dual_pass(
310
+ video_file_path=local_video_path,
311
+ subtitle_file_path=local_subtitle_path,
312
+ weights_dir=os.path.join(FLAGS.training_output_directory, "models", "training", "weights"),
313
+ stretch=stretch,
314
+ stretch_in_lang=stretch_in_lang,
315
+ exit_segfail=exit_segfail,
316
+ )
317
+ elif FLAGS.mode == "script":
318
+ aligned_subs, _, voice_probabilities, frame_rate = predictor.predict_plain_text(
319
+ video_file_path=local_video_path,
320
+ subtitle_file_path=local_subtitle_path,
321
+ stretch_in_lang=stretch_in_lang,
322
+ )
323
+ elif FLAGS.mode == "transcribe":
324
+ from subaligner.transcriber import Transcriber
325
+ transcriber = Transcriber(recipe=FLAGS.transcription_recipe, flavour=FLAGS.transcription_flavour)
326
+ subtitle, frame_rate = transcriber.transcribe(local_video_path, stretch_in_lang)
327
+ aligned_subs = subtitle.subs
328
+ else:
329
+ print("ERROR: Unknown mode {}".format(FLAGS.mode))
330
+ parser.print_usage()
331
+ sys.exit(21)
332
+
333
+ aligned_subtitle_path = "_aligned.".join(
334
+ subtitle_path.rsplit(".", 1)).replace(".stl", ".srt") if FLAGS.output == "" else FLAGS.output
335
+
336
+ if FLAGS.translate is not None:
337
+ from subaligner.translator import Translator
338
+ source, target = FLAGS.translate.split(",")
339
+ translator = Translator(src_language=source, tgt_language=target, recipe=FLAGS.translation_recipe, flavour=FLAGS.translation_flavour)
340
+ aligned_subs = translator.translate(aligned_subs, local_video_path, (source, target))
341
+ Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
342
+ frame_rate, "utf-8")
343
+ elif FLAGS.mode == "transcribe":
344
+ Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
345
+ frame_rate, "utf-8")
346
+ else:
347
+ Subtitle.save_subs_as_target_format(aligned_subs, local_subtitle_path, aligned_subtitle_path,
348
+ frame_rate)
349
+
350
+ if voice_probabilities is not None:
351
+ log_loss = predictor.get_log_loss(voice_probabilities, aligned_subs)
352
+ if log_loss is None or log_loss > FLAGS.max_logloss:
353
+ print(
354
+ "ERROR: Alignment failed with a too high loss value: {}".format(log_loss)
355
+ )
356
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
357
+ sys.exit(22)
358
+
359
+ print("Aligned subtitle saved to: {}".format(aligned_subtitle_path))
360
+ except (UnsupportedFormatException, TranscriptionException) as e:
361
+ print(
362
+ "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
363
+ )
364
+ traceback.print_tb(e.__traceback__)
365
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
366
+ sys.exit(23)
367
+ except TerminalException as e:
368
+ print(
369
+ "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
370
+ )
371
+ traceback.print_tb(e.__traceback__)
372
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
373
+ sys.exit(24)
374
+ except Exception as e:
375
+ print(
376
+ "ERROR: {}\n{}".format(str(e), "".join(traceback.format_stack()) if FLAGS.debug else "")
377
+ )
378
+ traceback.print_tb(e.__traceback__)
379
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
380
+ sys.exit(1)
381
+ else:
382
+ _remove_tmp_files(FLAGS.video_path, subtitle_path, local_video_path, local_subtitle_path, FLAGS.mode)
383
+ sys.exit(0)
384
+ elif FLAGS.mode == "shift":
385
+ if FLAGS.offset_seconds is None:
386
+ print("ERROR: --offset_seconds was not passed in during subtitle shifting")
387
+ sys.exit(21)
388
+ from subaligner.subtitle import Subtitle
389
+
390
+ for subtitle_path in FLAGS.subtitle_path:
391
+ shifted_subtitle_file_path = Subtitle.shift_subtitle(subtitle_file_path=subtitle_path,
392
+ seconds=FLAGS.offset_seconds,
393
+ shifted_subtitle_file_path=FLAGS.output or None)
394
+ print("Shifted subtitle saved to: {}".format(shifted_subtitle_file_path))
395
+ sys.exit(0)
396
+
397
+
398
+ def _remove_tmp_files(video_path, subtitle_path, local_video_path, local_subtitle_path, mode):
399
+ if video_path.lower().startswith("http") and os.path.exists(local_video_path):
400
+ os.remove(local_video_path)
401
+ if subtitle_path.lower().startswith("http") and os.path.exists(local_subtitle_path):
402
+ os.remove(local_subtitle_path)
403
+ if mode == "transcribe" and os.path.exists(local_subtitle_path):
404
+ os.remove(local_subtitle_path)
405
+
406
+
407
+ if __name__ == "__main__":
408
+ main()
subaligner/_version.py ADDED
@@ -0,0 +1,2 @@
1
+ """The semver for the current release."""
2
+ __version__ = "0.3.4"