videopython 0.28.3__tar.gz → 0.29.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.28.3 → videopython-0.29.1}/PKG-INFO +7 -4
- {videopython-0.28.3 → videopython-0.29.1}/README.md +4 -2
- {videopython-0.28.3 → videopython-0.29.1}/pyproject.toml +13 -4
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/__init__.py +3 -5
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/cache.py +17 -1
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/dubber.py +8 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/pipeline.py +5 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/transforms.py +2 -478
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/understanding/__init__.py +3 -3
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/understanding/audio.py +97 -11
- videopython-0.29.1/src/videopython/ai/understanding/faces.py +592 -0
- videopython-0.29.1/src/videopython/ai/understanding/image.py +397 -0
- videopython-0.29.1/src/videopython/ai/understanding/temporal.py +218 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/video_analysis.py +217 -37
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/__init__.py +4 -2
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/description.py +82 -52
- videopython-0.28.3/src/videopython/ai/understanding/image.py +0 -215
- videopython-0.28.3/src/videopython/ai/understanding/temporal.py +0 -464
- {videopython-0.28.3 → videopython-0.29.1}/.gitignore +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/LICENSE +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/_device.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/quality.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/remux.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/qwen3.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/translation.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/registry.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/swapping/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/swapping/inpainter.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/swapping/models.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/swapping/segmenter.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/swapping/swapper.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/audio/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/audio/analysis.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/audio/audio.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/combine.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/effects.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/exceptions.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/progress.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/registry.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/scene.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/streaming.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/text/overlay.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/text/transcription.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/transforms.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/transitions.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/utils.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/base/video.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/editing/__init__.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/editing/multicam.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/editing/premiere_xml.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/editing/video_edit.py +0 -0
- {videopython-0.28.3 → videopython-0.29.1}/src/videopython/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.29.1
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Project-URL: Homepage, https://videopython.com
|
|
6
6
|
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
@@ -27,14 +27,15 @@ Requires-Dist: accelerate>=0.29.2; extra == 'ai'
|
|
|
27
27
|
Requires-Dist: chatterbox-tts>=0.1.7; extra == 'ai'
|
|
28
28
|
Requires-Dist: demucs>=4.0.0; extra == 'ai'
|
|
29
29
|
Requires-Dist: diffusers>=0.30.0; extra == 'ai'
|
|
30
|
-
Requires-Dist: easyocr>=1.7.0; extra == 'ai'
|
|
31
30
|
Requires-Dist: hf-transfer>=0.1.9; extra == 'ai'
|
|
31
|
+
Requires-Dist: imagehash>=4.3; extra == 'ai'
|
|
32
32
|
Requires-Dist: llama-cpp-python>=0.3.0; extra == 'ai'
|
|
33
33
|
Requires-Dist: numba>=0.61.0; extra == 'ai'
|
|
34
34
|
Requires-Dist: ollama>=0.4.5; extra == 'ai'
|
|
35
35
|
Requires-Dist: openai-whisper>=20240930; extra == 'ai'
|
|
36
36
|
Requires-Dist: pyannote-audio>=4.0.0; extra == 'ai'
|
|
37
37
|
Requires-Dist: pyloudnorm>=0.1.1; extra == 'ai'
|
|
38
|
+
Requires-Dist: qwen-vl-utils>=0.0.10; extra == 'ai'
|
|
38
39
|
Requires-Dist: scikit-learn>=1.3.0; extra == 'ai'
|
|
39
40
|
Requires-Dist: scipy>=1.10.0; extra == 'ai'
|
|
40
41
|
Requires-Dist: sentencepiece>=0.1.99; extra == 'ai'
|
|
@@ -56,6 +57,8 @@ Minimal, LLM-friendly Python library for programmatic video editing, processing,
|
|
|
56
57
|
|
|
57
58
|
Full documentation: [videopython.com](https://videopython.com)
|
|
58
59
|
|
|
60
|
+
> **Disclaimer:** This project started as a hand-written hobby project, but most of the code is now produced by LLM agents. Humans still drive direction, approve changes, and own design decisions.
|
|
61
|
+
|
|
59
62
|
## Installation
|
|
60
63
|
|
|
61
64
|
### 1. Install FFmpeg
|
|
@@ -193,10 +196,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
193
196
|
| Area | Highlights |
|
|
194
197
|
|---|---|
|
|
195
198
|
| **Generation** | `TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic` |
|
|
196
|
-
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (visual scene description), `
|
|
199
|
+
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
197
200
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
198
201
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
199
|
-
| **Transforms** | `
|
|
202
|
+
| **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
|
|
200
203
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
201
204
|
| **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
|
|
202
205
|
|
|
@@ -8,6 +8,8 @@ Minimal, LLM-friendly Python library for programmatic video editing, processing,
|
|
|
8
8
|
|
|
9
9
|
Full documentation: [videopython.com](https://videopython.com)
|
|
10
10
|
|
|
11
|
+
> **Disclaimer:** This project started as a hand-written hobby project, but most of the code is now produced by LLM agents. Humans still drive direction, approve changes, and own design decisions.
|
|
12
|
+
|
|
11
13
|
## Installation
|
|
12
14
|
|
|
13
15
|
### 1. Install FFmpeg
|
|
@@ -145,10 +147,10 @@ API docs: [Core](https://videopython.com/api/index/) | [Video](https://videopyth
|
|
|
145
147
|
| Area | Highlights |
|
|
146
148
|
|---|---|
|
|
147
149
|
| **Generation** | `TextToVideo`, `ImageToVideo`, `TextToImage`, `TextToSpeech`, `TextToMusic` |
|
|
148
|
-
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (visual scene description), `
|
|
150
|
+
| **Understanding** | `AudioToText` (transcription), `AudioClassifier`, `SceneVLM` (structured visual scene description), `FaceTracker` (per-shot face tracks) |
|
|
149
151
|
| **Scene detection** | `SemanticSceneDetector` (neural scene boundaries) |
|
|
150
152
|
| **Video analysis** | `VideoAnalyzer` - full-pipeline analysis combining multiple AI capabilities |
|
|
151
|
-
| **Transforms** | `
|
|
153
|
+
| **Transforms** | `FaceTrackingCrop`, `SplitScreenComposite` |
|
|
152
154
|
| **Dubbing** | `VideoDubber` - voice cloning and revoicing with timing sync |
|
|
153
155
|
| **Object swapping** | `ObjectSwapper` - detect, segment, and inpaint objects in video |
|
|
154
156
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "videopython"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.29.1"
|
|
4
4
|
description = "Minimal video generation and processing library."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
|
|
@@ -70,7 +70,6 @@ ai = [
|
|
|
70
70
|
"scikit-learn>=1.3.0",
|
|
71
71
|
# Detection backends
|
|
72
72
|
"ultralytics>=8.0.0",
|
|
73
|
-
"easyocr>=1.7.0",
|
|
74
73
|
# Audio classification (AST via transformers - no separate dep needed)
|
|
75
74
|
# Scene detection
|
|
76
75
|
"transnetv2-pytorch>=1.0.5",
|
|
@@ -84,6 +83,11 @@ ai = [
|
|
|
84
83
|
"llama-cpp-python>=0.3.0",
|
|
85
84
|
# Loudness measurement (BS.1770) for dub-vs-source loudness matching (M3)
|
|
86
85
|
"pyloudnorm>=0.1.1",
|
|
86
|
+
# Vision-language preprocessing for Qwen3.5 (M5) - documented prerequisite
|
|
87
|
+
# for AutoModelForImageTextToText with image/video chat templates.
|
|
88
|
+
"qwen-vl-utils>=0.0.10",
|
|
89
|
+
# Perceptual hashing for SceneVLM frame dedup (M5)
|
|
90
|
+
"imagehash>=4.3",
|
|
87
91
|
]
|
|
88
92
|
|
|
89
93
|
# Required for pip install videopython[ai] - pip uses optional-dependencies, not dependency-groups
|
|
@@ -105,7 +109,6 @@ ai = [
|
|
|
105
109
|
"scikit-learn>=1.3.0",
|
|
106
110
|
# Detection backends
|
|
107
111
|
"ultralytics>=8.0.0",
|
|
108
|
-
"easyocr>=1.7.0",
|
|
109
112
|
# Audio classification (AST via transformers - no separate dep needed)
|
|
110
113
|
# Scene detection
|
|
111
114
|
"transnetv2-pytorch>=1.0.5",
|
|
@@ -119,6 +122,11 @@ ai = [
|
|
|
119
122
|
"llama-cpp-python>=0.3.0",
|
|
120
123
|
# Loudness measurement (BS.1770) for dub-vs-source loudness matching (M3)
|
|
121
124
|
"pyloudnorm>=0.1.1",
|
|
125
|
+
# Vision-language preprocessing for Qwen3.5 (M5) - documented prerequisite
|
|
126
|
+
# for AutoModelForImageTextToText with image/video chat templates.
|
|
127
|
+
"qwen-vl-utils>=0.0.10",
|
|
128
|
+
# Perceptual hashing for SceneVLM frame dedup (M5)
|
|
129
|
+
"imagehash>=4.3",
|
|
122
130
|
]
|
|
123
131
|
|
|
124
132
|
[project.urls]
|
|
@@ -135,7 +143,6 @@ module = [
|
|
|
135
143
|
"diffusers", "diffusers.*",
|
|
136
144
|
"ollama", "ollama.*",
|
|
137
145
|
"ultralytics", "ultralytics.*",
|
|
138
|
-
"easyocr", "easyocr.*",
|
|
139
146
|
"transformers", "transformers.*",
|
|
140
147
|
"transnetv2_pytorch", "transnetv2_pytorch.*",
|
|
141
148
|
"chatterbox", "chatterbox.*",
|
|
@@ -146,6 +153,8 @@ module = [
|
|
|
146
153
|
"cv2", "cv2.*",
|
|
147
154
|
"llama_cpp", "llama_cpp.*",
|
|
148
155
|
"pyloudnorm", "pyloudnorm.*",
|
|
156
|
+
"qwen_vl_utils", "qwen_vl_utils.*",
|
|
157
|
+
"imagehash", "imagehash.*",
|
|
149
158
|
]
|
|
150
159
|
ignore_missing_imports = true
|
|
151
160
|
|
|
@@ -2,11 +2,11 @@ from videopython.ai import registry as _ai_registry # noqa: F401
|
|
|
2
2
|
|
|
3
3
|
from .generation import ImageToVideo, TextToImage, TextToMusic, TextToSpeech, TextToVideo
|
|
4
4
|
from .swapping import ObjectSwapper
|
|
5
|
-
from .transforms import
|
|
5
|
+
from .transforms import FaceTrackingCrop, SplitScreenComposite
|
|
6
6
|
from .understanding import (
|
|
7
|
-
ActionRecognizer,
|
|
8
7
|
AudioClassifier,
|
|
9
8
|
AudioToText,
|
|
9
|
+
FaceTracker,
|
|
10
10
|
SceneVLM,
|
|
11
11
|
SemanticSceneDetector,
|
|
12
12
|
)
|
|
@@ -22,12 +22,10 @@ __all__ = [
|
|
|
22
22
|
# Understanding
|
|
23
23
|
"AudioToText",
|
|
24
24
|
"AudioClassifier",
|
|
25
|
+
"FaceTracker",
|
|
25
26
|
"SceneVLM",
|
|
26
|
-
# Temporal
|
|
27
|
-
"ActionRecognizer",
|
|
28
27
|
"SemanticSceneDetector",
|
|
29
28
|
# Transforms (AI-powered)
|
|
30
|
-
"FaceTracker",
|
|
31
29
|
"FaceTrackingCrop",
|
|
32
30
|
"SplitScreenComposite",
|
|
33
31
|
# Swapping
|
|
@@ -27,6 +27,8 @@ from dataclasses import dataclass
|
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
from typing import TYPE_CHECKING, Any
|
|
29
29
|
|
|
30
|
+
from videopython.ai.understanding.audio import _normalize_vocabulary
|
|
31
|
+
|
|
30
32
|
if TYPE_CHECKING:
|
|
31
33
|
from videopython.base.audio import Audio
|
|
32
34
|
from videopython.base.text.transcription import Transcription
|
|
@@ -37,7 +39,12 @@ logger = logging.getLogger(__name__)
|
|
|
37
39
|
# Cache schema version. Bump on incompatible changes to any artifact's
|
|
38
40
|
# on-disk format (e.g. TranscriptionSegment field changes that break
|
|
39
41
|
# from_dict). Mismatched cache entries are treated as a miss.
|
|
40
|
-
|
|
42
|
+
#
|
|
43
|
+
# v2 (0.29.1): vocabulary added to transcription_kwargs_hash for M1
|
|
44
|
+
# vocabulary biasing. Pre-v2 transcription artifacts miss on first hit
|
|
45
|
+
# and re-run; translation/TTS artifacts are unaffected (hashed
|
|
46
|
+
# independently and survive).
|
|
47
|
+
SCHEMA_VERSION = 2
|
|
41
48
|
|
|
42
49
|
# Reserved for M4.3 per-speaker voice library. M3.2 does not write here;
|
|
43
50
|
# documented so future code knows the path is taken.
|
|
@@ -126,13 +133,22 @@ class DubCache:
|
|
|
126
133
|
condition_on_previous_text: bool,
|
|
127
134
|
no_speech_threshold: float,
|
|
128
135
|
logprob_threshold: float | None,
|
|
136
|
+
vocabulary: list[str] | None = None,
|
|
129
137
|
) -> str:
|
|
138
|
+
"""Hash captures the kwargs that affect Whisper's output.
|
|
139
|
+
|
|
140
|
+
``vocabulary`` is normalized (case-insensitive dedup, casing
|
|
141
|
+
preserved) before hashing so trivial reordering/casing
|
|
142
|
+
differences don't thrash the cache. Defaults to ``None`` so
|
|
143
|
+
pre-M1 callers keep hashing the same value as before.
|
|
144
|
+
"""
|
|
130
145
|
return _stable_hash(
|
|
131
146
|
whisper_model,
|
|
132
147
|
enable_diarization,
|
|
133
148
|
condition_on_previous_text,
|
|
134
149
|
no_speech_threshold,
|
|
135
150
|
logprob_threshold,
|
|
151
|
+
*_normalize_vocabulary(vocabulary),
|
|
136
152
|
)
|
|
137
153
|
|
|
138
154
|
@staticmethod
|
|
@@ -37,6 +37,11 @@ class VideoDubber:
|
|
|
37
37
|
gate; raise to drop more low-confidence windows.
|
|
38
38
|
logprob_threshold: Forwarded to ``AudioToText``. Whisper's average
|
|
39
39
|
log-probability gate.
|
|
40
|
+
vocabulary: Forwarded to ``AudioToText``. Optional list of brand
|
|
41
|
+
names, product names, or proper nouns to bias Whisper's first-
|
|
42
|
+
window decoder via ``initial_prompt``. Recovers near-mishears
|
|
43
|
+
(e.g. Klarna → "carna") on brand-monitoring inputs without new
|
|
44
|
+
model deps.
|
|
40
45
|
strict_quality: When True, the pipeline raises
|
|
41
46
|
:class:`GarbageTranscriptError` before Demucs/translation/TTS run
|
|
42
47
|
if the transcript-quality heuristic returns ``"reject"``. When
|
|
@@ -67,6 +72,7 @@ class VideoDubber:
|
|
|
67
72
|
condition_on_previous_text: bool = False,
|
|
68
73
|
no_speech_threshold: float = 0.6,
|
|
69
74
|
logprob_threshold: float | None = -1.0,
|
|
75
|
+
vocabulary: list[str] | None = None,
|
|
70
76
|
strict_quality: bool = False,
|
|
71
77
|
translator: TranslatorChoice = "auto",
|
|
72
78
|
cache_dir: str | Path | None = None,
|
|
@@ -77,6 +83,7 @@ class VideoDubber:
|
|
|
77
83
|
self.condition_on_previous_text = condition_on_previous_text
|
|
78
84
|
self.no_speech_threshold = no_speech_threshold
|
|
79
85
|
self.logprob_threshold = logprob_threshold
|
|
86
|
+
self.vocabulary = vocabulary
|
|
80
87
|
self.strict_quality = strict_quality
|
|
81
88
|
self.translator = translator
|
|
82
89
|
self.cache_dir = cache_dir
|
|
@@ -101,6 +108,7 @@ class VideoDubber:
|
|
|
101
108
|
condition_on_previous_text=self.condition_on_previous_text,
|
|
102
109
|
no_speech_threshold=self.no_speech_threshold,
|
|
103
110
|
logprob_threshold=self.logprob_threshold,
|
|
111
|
+
vocabulary=self.vocabulary,
|
|
104
112
|
strict_quality=self.strict_quality,
|
|
105
113
|
translator=self.translator,
|
|
106
114
|
cache_dir=self.cache_dir,
|
|
@@ -170,6 +170,7 @@ class LocalDubbingPipeline:
|
|
|
170
170
|
condition_on_previous_text: bool = False,
|
|
171
171
|
no_speech_threshold: float = 0.6,
|
|
172
172
|
logprob_threshold: float | None = -1.0,
|
|
173
|
+
vocabulary: list[str] | None = None,
|
|
173
174
|
strict_quality: bool = False,
|
|
174
175
|
translator: TranslatorChoice = "auto",
|
|
175
176
|
cache_dir: str | Path | None = None,
|
|
@@ -180,6 +181,7 @@ class LocalDubbingPipeline:
|
|
|
180
181
|
self.condition_on_previous_text = condition_on_previous_text
|
|
181
182
|
self.no_speech_threshold = no_speech_threshold
|
|
182
183
|
self.logprob_threshold = logprob_threshold
|
|
184
|
+
self.vocabulary = vocabulary
|
|
183
185
|
self.strict_quality = strict_quality
|
|
184
186
|
self.translator = translator
|
|
185
187
|
self.cache_dir = Path(cache_dir) if cache_dir is not None else None
|
|
@@ -256,6 +258,7 @@ class LocalDubbingPipeline:
|
|
|
256
258
|
"condition_on_previous_text": self.condition_on_previous_text,
|
|
257
259
|
"no_speech_threshold": self.no_speech_threshold,
|
|
258
260
|
"logprob_threshold": self.logprob_threshold,
|
|
261
|
+
"vocabulary": self.vocabulary,
|
|
259
262
|
},
|
|
260
263
|
)
|
|
261
264
|
return transcription
|
|
@@ -406,6 +409,7 @@ class LocalDubbingPipeline:
|
|
|
406
409
|
condition_on_previous_text=self.condition_on_previous_text,
|
|
407
410
|
no_speech_threshold=self.no_speech_threshold,
|
|
408
411
|
logprob_threshold=self.logprob_threshold,
|
|
412
|
+
vocabulary=self.vocabulary,
|
|
409
413
|
)
|
|
410
414
|
return src_hash, kwargs_hash
|
|
411
415
|
|
|
@@ -420,6 +424,7 @@ class LocalDubbingPipeline:
|
|
|
420
424
|
condition_on_previous_text=self.condition_on_previous_text,
|
|
421
425
|
no_speech_threshold=self.no_speech_threshold,
|
|
422
426
|
logprob_threshold=self.logprob_threshold,
|
|
427
|
+
vocabulary=self.vocabulary,
|
|
423
428
|
)
|
|
424
429
|
|
|
425
430
|
def _init_translator(self, source_lang: str, target_lang: str) -> None:
|