videopython 0.26.1__tar.gz → 0.26.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {videopython-0.26.1 → videopython-0.26.3}/PKG-INFO +1 -1
  2. {videopython-0.26.1 → videopython-0.26.3}/pyproject.toml +1 -1
  3. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/_device.py +27 -0
  4. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/dubbing/dubber.py +97 -7
  5. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/dubbing/pipeline.py +52 -13
  6. videopython-0.26.3/src/videopython/ai/dubbing/remux.py +73 -0
  7. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/generation/audio.py +11 -1
  8. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/generation/translation.py +11 -1
  9. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/understanding/audio.py +10 -1
  10. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/understanding/separation.py +9 -1
  11. {videopython-0.26.1 → videopython-0.26.3}/.gitignore +0 -0
  12. {videopython-0.26.1 → videopython-0.26.3}/LICENSE +0 -0
  13. {videopython-0.26.1 → videopython-0.26.3}/README.md +0 -0
  14. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/__init__.py +0 -0
  15. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/__init__.py +0 -0
  16. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/dubbing/__init__.py +0 -0
  17. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/dubbing/models.py +0 -0
  18. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/dubbing/timing.py +0 -0
  19. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/generation/__init__.py +0 -0
  20. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/generation/image.py +0 -0
  21. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/generation/video.py +0 -0
  22. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/registry.py +0 -0
  23. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/swapping/__init__.py +0 -0
  24. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/swapping/inpainter.py +0 -0
  25. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/swapping/models.py +0 -0
  26. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/swapping/segmenter.py +0 -0
  27. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/swapping/swapper.py +0 -0
  28. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/transforms.py +0 -0
  29. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/understanding/__init__.py +0 -0
  30. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/understanding/image.py +0 -0
  31. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/understanding/temporal.py +0 -0
  32. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/ai/video_analysis.py +0 -0
  33. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/__init__.py +0 -0
  34. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/audio/__init__.py +0 -0
  35. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/audio/analysis.py +0 -0
  36. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/audio/audio.py +0 -0
  37. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/combine.py +0 -0
  38. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/description.py +0 -0
  39. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/effects.py +0 -0
  40. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/exceptions.py +0 -0
  41. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/progress.py +0 -0
  42. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/registry.py +0 -0
  43. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/scene.py +0 -0
  44. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/streaming.py +0 -0
  45. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/text/__init__.py +0 -0
  46. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/text/overlay.py +0 -0
  47. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/text/transcription.py +0 -0
  48. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/transforms.py +0 -0
  49. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/transitions.py +0 -0
  50. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/utils.py +0 -0
  51. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/base/video.py +0 -0
  52. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/editing/__init__.py +0 -0
  53. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/editing/multicam.py +0 -0
  54. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/editing/premiere_xml.py +0 -0
  55. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/editing/video_edit.py +0 -0
  56. {videopython-0.26.1 → videopython-0.26.3}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.26.1
3
+ Version: 0.26.3
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.26.1"
3
+ version = "0.26.3"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -25,6 +25,33 @@ def log_device_initialization(
25
25
  )
26
26
 
27
27
 
28
+ def release_device_memory(device: str | None) -> None:
29
+ """Release cached allocator memory for the given device.
30
+
31
+ Safe to call when torch is not importable or the device is CPU/None.
32
+ """
33
+ try:
34
+ import torch
35
+ except ImportError:
36
+ return
37
+
38
+ import gc
39
+
40
+ gc.collect()
41
+
42
+ if device == "cuda" and torch.cuda.is_available():
43
+ torch.cuda.empty_cache()
44
+ return
45
+
46
+ if device == "mps":
47
+ mps_backend = getattr(torch.backends, "mps", None)
48
+ if mps_backend is not None and mps_backend.is_available():
49
+ mps_mod = getattr(torch, "mps", None)
50
+ empty_cache = getattr(mps_mod, "empty_cache", None) if mps_mod is not None else None
51
+ if callable(empty_cache):
52
+ empty_cache()
53
+
54
+
28
55
  def select_device(
29
56
  device: str | None,
30
57
  *,
@@ -3,6 +3,8 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ import tempfile
7
+ from pathlib import Path
6
8
  from typing import TYPE_CHECKING, Any, Callable
7
9
 
8
10
  from videopython.ai.dubbing.models import DubbingResult, RevoiceResult
@@ -14,18 +16,28 @@ logger = logging.getLogger(__name__)
14
16
 
15
17
 
16
18
  class VideoDubber:
17
- """Dubs videos into different languages using the local pipeline."""
18
-
19
- def __init__(self, device: str | None = None):
19
+ """Dubs videos into different languages using the local pipeline.
20
+
21
+ Args:
22
+ device: Execution device (``cpu``, ``cuda``, ``mps``, or ``None`` for auto).
23
+ low_memory: When True, each pipeline stage (Whisper, Demucs, MarianMT,
24
+ Chatterbox TTS) is unloaded from memory after it runs, so only one
25
+ model is resident at a time. Trades per-run latency (~10-30s of
26
+ extra model loads) for a much lower memory ceiling. Recommended for
27
+ GPUs with <=12GB VRAM or hosts with <32GB RAM. Default False.
28
+ """
29
+
30
+ def __init__(self, device: str | None = None, low_memory: bool = False):
20
31
  self.device = device
32
+ self.low_memory = low_memory
21
33
  self._local_pipeline: Any = None
22
34
  requested = device.lower() if isinstance(device, str) else "auto"
23
- logger.info("VideoDubber initialized with device=%s", requested)
35
+ logger.info("VideoDubber initialized with device=%s low_memory=%s", requested, low_memory)
24
36
 
25
37
  def _init_local_pipeline(self) -> None:
26
38
  from videopython.ai.dubbing.pipeline import LocalDubbingPipeline
27
39
 
28
- self._local_pipeline = LocalDubbingPipeline(device=self.device)
40
+ self._local_pipeline = LocalDubbingPipeline(device=self.device, low_memory=self.low_memory)
29
41
 
30
42
  def dub(
31
43
  self,
@@ -50,7 +62,7 @@ class VideoDubber:
50
62
  self._init_local_pipeline()
51
63
 
52
64
  return self._local_pipeline.process(
53
- video=video,
65
+ source_audio=video.audio,
54
66
  target_lang=target_lang,
55
67
  source_lang=source_lang,
56
68
  preserve_background=preserve_background,
@@ -89,6 +101,84 @@ class VideoDubber:
89
101
  )
90
102
  return video.add_audio(result.dubbed_audio, overlay=False)
91
103
 
104
+ def dub_file(
105
+ self,
106
+ input_path: str | Path,
107
+ output_path: str | Path,
108
+ target_lang: str,
109
+ source_lang: str | None = None,
110
+ preserve_background: bool = True,
111
+ voice_clone: bool = True,
112
+ enable_diarization: bool = False,
113
+ progress_callback: Callable[[str, float], None] | None = None,
114
+ transcription: Any = None,
115
+ ) -> DubbingResult:
116
+ """Dub a video file in place on disk without loading video frames into memory.
117
+
118
+ Extracts the audio track via ffmpeg, runs the dubbing pipeline on the
119
+ audio only, then muxes the dubbed audio back into the source video
120
+ using ffmpeg stream-copy (no video re-encode). Peak memory is bounded
121
+ by model weights and the audio track — independent of video length and
122
+ resolution.
123
+
124
+ Use this instead of ``dub_and_replace`` when the source video is long
125
+ or high-resolution and you don't need frame-level access in Python.
126
+
127
+ Args:
128
+ input_path: Path to the source video file.
129
+ output_path: Path to write the dubbed video. Overwritten if it exists.
130
+ target_lang: Target language code (e.g. ``"es"``, ``"fr"``).
131
+ source_lang: Source language code, or ``None`` to auto-detect.
132
+ preserve_background: Preserve background music/effects via source separation.
133
+ voice_clone: Clone the source speaker's voice for the dubbed track.
134
+ enable_diarization: Enable speaker diarization for per-speaker voice cloning.
135
+ progress_callback: Optional callback ``(stage: str, progress: float) -> None``.
136
+ transcription: Optional pre-computed ``Transcription`` to skip the Whisper step.
137
+
138
+ Returns:
139
+ ``DubbingResult`` with the dubbed audio, translated segments, and
140
+ source transcription. The output video is written to ``output_path``.
141
+ """
142
+ from videopython.ai.dubbing.remux import replace_audio_stream
143
+ from videopython.base.audio import Audio
144
+
145
+ input_path = Path(input_path)
146
+ output_path = Path(output_path)
147
+
148
+ if not input_path.exists():
149
+ raise FileNotFoundError(f"Input video not found: {input_path}")
150
+
151
+ logger.info("dub_file: loading audio from %s", input_path)
152
+ source_audio = Audio.from_path(input_path)
153
+
154
+ if self._local_pipeline is None:
155
+ self._init_local_pipeline()
156
+
157
+ result = self._local_pipeline.process(
158
+ source_audio=source_audio,
159
+ target_lang=target_lang,
160
+ source_lang=source_lang,
161
+ preserve_background=preserve_background,
162
+ voice_clone=voice_clone,
163
+ enable_diarization=enable_diarization,
164
+ progress_callback=progress_callback,
165
+ transcription=transcription,
166
+ )
167
+
168
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
169
+ dubbed_audio_path = Path(tmp.name)
170
+ try:
171
+ result.dubbed_audio.save(dubbed_audio_path)
172
+ replace_audio_stream(
173
+ video_path=input_path,
174
+ audio_path=dubbed_audio_path,
175
+ output_path=output_path,
176
+ )
177
+ finally:
178
+ dubbed_audio_path.unlink(missing_ok=True)
179
+
180
+ return result
181
+
92
182
  def revoice(
93
183
  self,
94
184
  video: Video,
@@ -101,7 +191,7 @@ class VideoDubber:
101
191
  self._init_local_pipeline()
102
192
 
103
193
  return self._local_pipeline.revoice(
104
- video=video,
194
+ source_audio=video.audio,
105
195
  text=text,
106
196
  preserve_background=preserve_background,
107
197
  progress_callback=progress_callback,
@@ -9,18 +9,29 @@ from videopython.ai.dubbing.models import DubbingResult, RevoiceResult, Separate
9
9
  from videopython.ai.dubbing.timing import TimingSynchronizer
10
10
 
11
11
  if TYPE_CHECKING:
12
- from videopython.base.video import Video
12
+ from videopython.base.audio import Audio
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  class LocalDubbingPipeline:
18
- """Local pipeline for video dubbing."""
18
+ """Local pipeline for video dubbing.
19
19
 
20
- def __init__(self, device: str | None = None):
20
+ When ``low_memory=True``, each stage's model is unloaded after it runs, so
21
+ only one model is resident at a time. This trades per-run latency (models
22
+ re-load from disk between stages) for peak memory. Recommended for GPUs
23
+ with <=12GB VRAM or hosts with <32GB RAM.
24
+ """
25
+
26
+ def __init__(self, device: str | None = None, low_memory: bool = False):
21
27
  self.device = device
28
+ self.low_memory = low_memory
22
29
  requested = device.lower() if isinstance(device, str) else "auto"
23
- logger.info("LocalDubbingPipeline initialized with device=%s", requested)
30
+ logger.info(
31
+ "LocalDubbingPipeline initialized with device=%s low_memory=%s",
32
+ requested,
33
+ low_memory,
34
+ )
24
35
 
25
36
  self._transcriber: Any = None
26
37
  self._transcriber_diarization: bool | None = None
@@ -31,6 +42,23 @@ class LocalDubbingPipeline:
31
42
  self._separator: Any = None
32
43
  self._synchronizer: TimingSynchronizer | None = None
33
44
 
45
+ def _maybe_unload(self, component_name: str) -> None:
46
+ """Unload a stage's model when low_memory mode is enabled.
47
+
48
+ No-op when low_memory=False or the component was never initialized
49
+ (e.g. caller supplied a pre-computed transcription so the transcriber
50
+ was skipped).
51
+ """
52
+ if not self.low_memory:
53
+ return
54
+ component = getattr(self, component_name, None)
55
+ if component is None:
56
+ return
57
+ unload = getattr(component, "unload", None)
58
+ if callable(unload):
59
+ logger.info("low_memory: unloading %s", component_name.lstrip("_"))
60
+ unload()
61
+
34
62
  def _init_transcriber(self, enable_diarization: bool = False) -> None:
35
63
  """Initialize the transcription model."""
36
64
  from videopython.ai.understanding.audio import AudioToText
@@ -74,7 +102,6 @@ class LocalDubbingPipeline:
74
102
  max_duration: float = 10.0,
75
103
  ) -> dict[str, Any]:
76
104
  """Extract voice samples for each speaker from the audio."""
77
- from videopython.base.audio import Audio
78
105
 
79
106
  voice_samples: dict[str, Audio] = {}
80
107
 
@@ -107,7 +134,7 @@ class LocalDubbingPipeline:
107
134
 
108
135
  def process(
109
136
  self,
110
- video: Video,
137
+ source_audio: Audio,
111
138
  target_lang: str,
112
139
  source_lang: str | None = None,
113
140
  preserve_background: bool = True,
@@ -116,22 +143,22 @@ class LocalDubbingPipeline:
116
143
  progress_callback: Callable[[str, float], None] | None = None,
117
144
  transcription: Any | None = None,
118
145
  ) -> DubbingResult:
119
- """Process a video through the local dubbing pipeline.
146
+ """Run the dubbing pipeline against the given source audio.
120
147
 
121
148
  Args:
149
+ source_audio: Source audio track to dub. Callers with a ``Video``
150
+ object should pass ``video.audio``; callers with only a file path
151
+ can use ``Audio.from_path(path)`` to avoid loading video frames.
122
152
  transcription: Optional pre-computed Transcription object. When provided,
123
153
  the internal Whisper transcription step is skipped (saving time and VRAM).
124
154
  Must be a ``videopython.base.text.transcription.Transcription`` instance
125
155
  with populated ``segments``.
126
156
  """
127
- from videopython.base.audio import Audio
128
157
 
129
158
  def report_progress(stage: str, progress: float) -> None:
130
159
  if progress_callback:
131
160
  progress_callback(stage, progress)
132
161
 
133
- source_audio = video.audio
134
-
135
162
  if transcription is not None:
136
163
  report_progress("Using provided transcription", 0.05)
137
164
  else:
@@ -141,6 +168,7 @@ class LocalDubbingPipeline:
141
168
  self._transcriber_diarization = enable_diarization
142
169
 
143
170
  transcription = self._transcriber.transcribe(source_audio)
171
+ self._maybe_unload("_transcriber")
144
172
 
145
173
  if not transcription.segments:
146
174
  return DubbingResult(
@@ -162,6 +190,7 @@ class LocalDubbingPipeline:
162
190
  self._init_separator()
163
191
 
164
192
  separated_audio = self._separator.separate(source_audio)
193
+ self._maybe_unload("_separator")
165
194
  vocal_audio = separated_audio.vocals
166
195
 
167
196
  voice_samples: dict[str, Audio] = {}
@@ -178,6 +207,7 @@ class LocalDubbingPipeline:
178
207
  target_lang=target_lang,
179
208
  source_lang=detected_lang,
180
209
  )
210
+ self._maybe_unload("_translator")
181
211
 
182
212
  report_progress("Generating dubbed speech", 0.50)
183
213
  if self._tts is None or self._tts_voice_clone != voice_clone or self._tts_language != target_lang:
@@ -208,6 +238,8 @@ class LocalDubbingPipeline:
208
238
  target_durations.append(segment.duration)
209
239
  start_times.append(segment.start)
210
240
 
241
+ self._maybe_unload("_tts")
242
+
211
243
  report_progress("Synchronizing timing", 0.85)
212
244
  if self._synchronizer is None:
213
245
  self._init_synchronizer()
@@ -242,19 +274,23 @@ class LocalDubbingPipeline:
242
274
 
243
275
  def revoice(
244
276
  self,
245
- video: Video,
277
+ source_audio: Audio,
246
278
  text: str,
247
279
  preserve_background: bool = True,
248
280
  progress_callback: Callable[[str, float], None] | None = None,
249
281
  ) -> RevoiceResult:
250
- """Replace speech in a video with new text using voice cloning."""
282
+ """Replace speech in audio with new text using voice cloning.
283
+
284
+ Args:
285
+ source_audio: Source audio track to revoice. Callers with a ``Video``
286
+ object should pass ``video.audio``.
287
+ """
251
288
  from videopython.base.audio import Audio
252
289
 
253
290
  def report_progress(stage: str, progress: float) -> None:
254
291
  if progress_callback:
255
292
  progress_callback(stage, progress)
256
293
 
257
- source_audio = video.audio
258
294
  original_duration = source_audio.metadata.duration_seconds
259
295
 
260
296
  report_progress("Analyzing audio", 0.05)
@@ -263,6 +299,7 @@ class LocalDubbingPipeline:
263
299
  self._transcriber_diarization = False
264
300
 
265
301
  transcription = self._transcriber.transcribe(source_audio)
302
+ self._maybe_unload("_transcriber")
266
303
 
267
304
  separated_audio: SeparatedAudio | None = None
268
305
  vocal_audio = source_audio
@@ -273,6 +310,7 @@ class LocalDubbingPipeline:
273
310
  self._init_separator()
274
311
 
275
312
  separated_audio = self._separator.separate(source_audio)
313
+ self._maybe_unload("_separator")
276
314
  vocal_audio = separated_audio.vocals
277
315
 
278
316
  report_progress("Extracting voice sample", 0.40)
@@ -295,6 +333,7 @@ class LocalDubbingPipeline:
295
333
 
296
334
  generated_speech = self._tts.generate_audio(text, voice_sample=voice_sample)
297
335
  speech_duration = generated_speech.metadata.duration_seconds
336
+ self._maybe_unload("_tts")
298
337
 
299
338
  report_progress("Assembling audio", 0.85)
300
339
 
@@ -0,0 +1,73 @@
1
+ """ffmpeg helper for replacing a video file's audio track without re-encoding video."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import subprocess
7
+ from pathlib import Path
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class RemuxError(RuntimeError):
13
+ """ffmpeg failed while replacing an audio stream."""
14
+
15
+
16
+ def replace_audio_stream(
17
+ video_path: str | Path,
18
+ audio_path: str | Path,
19
+ output_path: str | Path,
20
+ audio_codec: str = "aac",
21
+ audio_bitrate: str = "192k",
22
+ ) -> None:
23
+ """Copy ``video_path``'s video stream and mux in ``audio_path`` as the audio track.
24
+
25
+ Uses ffmpeg stream-copy for video (no re-encode) and encodes audio to AAC.
26
+ ``-shortest`` trims to the shorter of the two streams so the output duration
27
+ matches the source video when the dubbed audio is slightly longer.
28
+
29
+ Args:
30
+ video_path: Source video file (video stream is copied unchanged).
31
+ audio_path: Audio file to use as the new audio track.
32
+ output_path: Destination file. Overwritten if it exists.
33
+ audio_codec: ffmpeg audio codec name. Defaults to ``aac`` (MP4-compatible).
34
+ audio_bitrate: Audio bitrate passed to ffmpeg (``-b:a``).
35
+
36
+ Raises:
37
+ FileNotFoundError: If ``video_path`` or ``audio_path`` does not exist.
38
+ RemuxError: If ffmpeg returns a non-zero exit code.
39
+ """
40
+ video_path = Path(video_path)
41
+ audio_path = Path(audio_path)
42
+ output_path = Path(output_path)
43
+
44
+ if not video_path.exists():
45
+ raise FileNotFoundError(f"Video file not found: {video_path}")
46
+ if not audio_path.exists():
47
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
48
+
49
+ cmd = [
50
+ "ffmpeg",
51
+ "-y",
52
+ "-i",
53
+ str(video_path),
54
+ "-i",
55
+ str(audio_path),
56
+ "-map",
57
+ "0:v:0",
58
+ "-map",
59
+ "1:a:0",
60
+ "-c:v",
61
+ "copy",
62
+ "-c:a",
63
+ audio_codec,
64
+ "-b:a",
65
+ audio_bitrate,
66
+ "-shortest",
67
+ str(output_path),
68
+ ]
69
+
70
+ logger.info("replace_audio_stream: %s + %s -> %s", video_path, audio_path, output_path)
71
+ result = subprocess.run(cmd, capture_output=True)
72
+ if result.returncode != 0:
73
+ raise RemuxError(f"ffmpeg failed (exit {result.returncode}): {result.stderr.decode(errors='replace')}")
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any
6
6
 
7
- from videopython.ai._device import log_device_initialization, select_device
7
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
8
8
  from videopython.base.audio import Audio, AudioMetadata
9
9
 
10
10
 
@@ -151,6 +151,16 @@ class TextToSpeech:
151
151
 
152
152
  return self._generate_local(text, effective_voice)
153
153
 
154
+ def unload(self) -> None:
155
+ """Release the TTS model(s) so the next generate_audio() re-initializes.
156
+
157
+ Used by low-memory dubbing to free VRAM between pipeline stages.
158
+ """
159
+ self._model = None
160
+ self._processor = None
161
+ self._chatterbox_model = None
162
+ release_device_memory(self.device)
163
+
154
164
 
155
165
  class TextToMusic:
156
166
  """Generates music from text descriptions using MusicGen."""
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any
6
6
 
7
- from videopython.ai._device import log_device_initialization, select_device
7
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
8
8
  from videopython.ai.dubbing.models import TranslatedSegment
9
9
  from videopython.base.text.transcription import TranscriptionSegment
10
10
 
@@ -180,6 +180,16 @@ class TextTranslator:
180
180
 
181
181
  return translated_segments
182
182
 
183
+ def unload(self) -> None:
184
+ """Release the translation model so the next translate() re-initializes.
185
+
186
+ Used by low-memory dubbing to free VRAM between pipeline stages.
187
+ """
188
+ self._model = None
189
+ self._tokenizer = None
190
+ self._current_lang_pair = None
191
+ release_device_memory(self.device)
192
+
183
193
  @staticmethod
184
194
  def get_supported_languages() -> dict[str, str]:
185
195
  return LANGUAGE_NAMES.copy()
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any, Literal
6
6
 
7
- from videopython.ai._device import log_device_initialization, select_device
7
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
8
8
  from videopython.base.audio import Audio
9
9
  from videopython.base.description import AudioClassification, AudioEvent
10
10
  from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
@@ -51,6 +51,15 @@ class AudioToText:
51
51
  self._diarization_pipeline = Pipeline.from_pretrained(self.PYANNOTE_DIARIZATION_MODEL)
52
52
  self._diarization_pipeline.to(torch.device(self.device))
53
53
 
54
+ def unload(self) -> None:
55
+ """Release the Whisper and diarization models so the next call re-initializes.
56
+
57
+ Used by low-memory dubbing to free VRAM between pipeline stages.
58
+ """
59
+ self._model = None
60
+ self._diarization_pipeline = None
61
+ release_device_memory(self.device)
62
+
54
63
  def _process_transcription_result(self, transcription_result: dict) -> Transcription:
55
64
  """Process raw transcription result into a Transcription object."""
56
65
  transcription_segments = []
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any
6
6
 
7
- from videopython.ai._device import log_device_initialization, select_device
7
+ from videopython.ai._device import log_device_initialization, release_device_memory, select_device
8
8
  from videopython.ai.dubbing.models import SeparatedAudio
9
9
  from videopython.base.audio import Audio, AudioMetadata
10
10
 
@@ -134,3 +134,11 @@ class AudioSeparator:
134
134
  def extract_background(self, audio: Audio) -> Audio:
135
135
  """Convenience method to extract only background from audio."""
136
136
  return self.separate(audio).background
137
+
138
+ def unload(self) -> None:
139
+ """Release the Demucs model so the next separate() re-initializes.
140
+
141
+ Used by low-memory dubbing to free VRAM between pipeline stages.
142
+ """
143
+ self._model = None
144
+ release_device_memory(self.device)
File without changes
File without changes
File without changes