videopython 0.25.8__tar.gz → 0.26.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {videopython-0.25.8 → videopython-0.26.1}/PKG-INFO +1 -1
- {videopython-0.25.8 → videopython-0.26.1}/pyproject.toml +1 -1
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/dubber.py +12 -1
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/pipeline.py +19 -7
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/translation.py +12 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/effects.py +216 -45
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/registry.py +12 -3
- videopython-0.26.1/src/videopython/base/streaming.py +280 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/video.py +27 -5
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/video_edit.py +284 -3
- {videopython-0.25.8 → videopython-0.26.1}/.gitignore +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/LICENSE +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/README.md +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/_device.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/models.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/timing.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/audio.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/image.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/video.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/registry.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/inpainter.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/models.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/segmenter.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/swapper.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/transforms.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/audio.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/image.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/separation.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/temporal.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/video_analysis.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/analysis.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/audio.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/combine.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/description.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/exceptions.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/progress.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/scene.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/overlay.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/transcription.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/transforms.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/transitions.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/utils.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/__init__.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/multicam.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/premiere_xml.py +0 -0
- {videopython-0.25.8 → videopython-0.26.1}/src/videopython/py.typed +0 -0
|
@@ -36,12 +36,15 @@ class VideoDubber:
|
|
|
36
36
|
voice_clone: bool = True,
|
|
37
37
|
enable_diarization: bool = False,
|
|
38
38
|
progress_callback: Callable[[str, float], None] | None = None,
|
|
39
|
+
transcription: Any = None,
|
|
39
40
|
) -> DubbingResult:
|
|
40
41
|
"""Dub a video into a target language.
|
|
41
42
|
|
|
42
43
|
Args:
|
|
43
44
|
enable_diarization: Enable speaker diarization to clone each speaker's
|
|
44
45
|
voice separately. Requires additional VRAM for the diarization model.
|
|
46
|
+
transcription: Optional pre-computed Transcription object. When provided,
|
|
47
|
+
the internal Whisper transcription step is skipped.
|
|
45
48
|
"""
|
|
46
49
|
if self._local_pipeline is None:
|
|
47
50
|
self._init_local_pipeline()
|
|
@@ -54,6 +57,7 @@ class VideoDubber:
|
|
|
54
57
|
voice_clone=voice_clone,
|
|
55
58
|
enable_diarization=enable_diarization,
|
|
56
59
|
progress_callback=progress_callback,
|
|
60
|
+
transcription=transcription,
|
|
57
61
|
)
|
|
58
62
|
|
|
59
63
|
def dub_and_replace(
|
|
@@ -65,8 +69,14 @@ class VideoDubber:
|
|
|
65
69
|
voice_clone: bool = True,
|
|
66
70
|
enable_diarization: bool = False,
|
|
67
71
|
progress_callback: Callable[[str, float], None] | None = None,
|
|
72
|
+
transcription: Any = None,
|
|
68
73
|
) -> Video:
|
|
69
|
-
"""Dub a video and return a new video with the dubbed audio.
|
|
74
|
+
"""Dub a video and return a new video with the dubbed audio.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
transcription: Optional pre-computed Transcription object. When provided,
|
|
78
|
+
the internal Whisper transcription step is skipped.
|
|
79
|
+
"""
|
|
70
80
|
result = self.dub(
|
|
71
81
|
video=video,
|
|
72
82
|
target_lang=target_lang,
|
|
@@ -75,6 +85,7 @@ class VideoDubber:
|
|
|
75
85
|
voice_clone=voice_clone,
|
|
76
86
|
enable_diarization=enable_diarization,
|
|
77
87
|
progress_callback=progress_callback,
|
|
88
|
+
transcription=transcription,
|
|
78
89
|
)
|
|
79
90
|
return video.add_audio(result.dubbed_audio, overlay=False)
|
|
80
91
|
|
|
@@ -114,21 +114,33 @@ class LocalDubbingPipeline:
|
|
|
114
114
|
voice_clone: bool = True,
|
|
115
115
|
enable_diarization: bool = False,
|
|
116
116
|
progress_callback: Callable[[str, float], None] | None = None,
|
|
117
|
+
transcription: Any | None = None,
|
|
117
118
|
) -> DubbingResult:
|
|
118
|
-
"""Process a video through the local dubbing pipeline.
|
|
119
|
+
"""Process a video through the local dubbing pipeline.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
transcription: Optional pre-computed Transcription object. When provided,
|
|
123
|
+
the internal Whisper transcription step is skipped (saving time and VRAM).
|
|
124
|
+
Must be a ``videopython.base.text.transcription.Transcription`` instance
|
|
125
|
+
with populated ``segments``.
|
|
126
|
+
"""
|
|
119
127
|
from videopython.base.audio import Audio
|
|
120
128
|
|
|
121
129
|
def report_progress(stage: str, progress: float) -> None:
|
|
122
130
|
if progress_callback:
|
|
123
131
|
progress_callback(stage, progress)
|
|
124
132
|
|
|
125
|
-
report_progress("Transcribing audio", 0.05)
|
|
126
|
-
if self._transcriber is None or self._transcriber_diarization != enable_diarization:
|
|
127
|
-
self._init_transcriber(enable_diarization=enable_diarization)
|
|
128
|
-
self._transcriber_diarization = enable_diarization
|
|
129
|
-
|
|
130
133
|
source_audio = video.audio
|
|
131
|
-
|
|
134
|
+
|
|
135
|
+
if transcription is not None:
|
|
136
|
+
report_progress("Using provided transcription", 0.05)
|
|
137
|
+
else:
|
|
138
|
+
report_progress("Transcribing audio", 0.05)
|
|
139
|
+
if self._transcriber is None or self._transcriber_diarization != enable_diarization:
|
|
140
|
+
self._init_transcriber(enable_diarization=enable_diarization)
|
|
141
|
+
self._transcriber_diarization = enable_diarization
|
|
142
|
+
|
|
143
|
+
transcription = self._transcriber.transcribe(source_audio)
|
|
132
144
|
|
|
133
145
|
if not transcription.segments:
|
|
134
146
|
return DubbingResult(
|
|
@@ -48,6 +48,15 @@ LANGUAGE_NAMES = {
|
|
|
48
48
|
class TextTranslator:
|
|
49
49
|
"""Translates text between languages using local seq2seq models."""
|
|
50
50
|
|
|
51
|
+
# Languages without a direct opus-mt-{src}-{tgt} model. Maps (source, target)
|
|
52
|
+
# to an alternative HuggingFace model identifier.
|
|
53
|
+
_MODEL_OVERRIDES: dict[tuple[str, str], str] = {
|
|
54
|
+
("en", "pt"): "Helsinki-NLP/opus-mt-tc-big-en-pt",
|
|
55
|
+
("en", "ko"): "Helsinki-NLP/opus-mt-tc-big-en-ko",
|
|
56
|
+
("en", "ja"): "Helsinki-NLP/opus-mt-en-jap",
|
|
57
|
+
("en", "pl"): "Helsinki-NLP/opus-mt-en-zlw",
|
|
58
|
+
}
|
|
59
|
+
|
|
51
60
|
def __init__(self, model_name: str | None = None, device: str | None = None):
|
|
52
61
|
self.model_name = model_name
|
|
53
62
|
self.device = device
|
|
@@ -58,6 +67,9 @@ class TextTranslator:
|
|
|
58
67
|
def _get_local_model_name(self, source_lang: str, target_lang: str) -> str:
|
|
59
68
|
if self.model_name:
|
|
60
69
|
return self.model_name
|
|
70
|
+
override = self._MODEL_OVERRIDES.get((source_lang, target_lang))
|
|
71
|
+
if override:
|
|
72
|
+
return override
|
|
61
73
|
return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
|
62
74
|
|
|
63
75
|
def _init_local(self, source_lang: str, target_lang: str) -> None:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import TYPE_CHECKING, Literal
|
|
4
|
+
from typing import TYPE_CHECKING, ClassVar, Literal
|
|
5
5
|
|
|
6
6
|
import cv2
|
|
7
7
|
import numpy as np
|
|
@@ -11,6 +11,7 @@ from videopython.base.progress import log, progress_iter
|
|
|
11
11
|
from videopython.base.video import Video
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
+
from videopython.base.audio import Audio
|
|
14
15
|
from videopython.base.description import BoundingBox
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
@@ -50,6 +51,27 @@ class Effect(ABC):
|
|
|
50
51
|
The effect must not change the number of frames and the shape of the frames.
|
|
51
52
|
"""
|
|
52
53
|
|
|
54
|
+
supports_streaming: ClassVar[bool] = False
|
|
55
|
+
|
|
56
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
57
|
+
"""Called once before streaming begins to precompute per-frame parameters.
|
|
58
|
+
|
|
59
|
+
Override in subclasses that need precomputation (e.g., per-frame alpha
|
|
60
|
+
arrays, sigma schedules, crop regions).
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
64
|
+
"""Process a single frame in streaming mode.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
frame: Single RGB frame (H, W, 3) uint8.
|
|
68
|
+
frame_index: 0-based index within this effect's active range.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Processed frame, same shape and dtype.
|
|
72
|
+
"""
|
|
73
|
+
raise NotImplementedError(f"{type(self).__name__} does not support streaming")
|
|
74
|
+
|
|
53
75
|
def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
|
|
54
76
|
"""Apply the effect to a video, optionally within a time range.
|
|
55
77
|
|
|
@@ -106,6 +128,8 @@ class FullImageOverlay(Effect):
|
|
|
106
128
|
transparency via RGBA images and an overall opacity control.
|
|
107
129
|
"""
|
|
108
130
|
|
|
131
|
+
supports_streaming: ClassVar[bool] = True
|
|
132
|
+
|
|
109
133
|
def __init__(self, overlay_image: np.ndarray, alpha: float | None = None, fade_time: float = 0.0):
|
|
110
134
|
"""Initialize image overlay effect.
|
|
111
135
|
|
|
@@ -139,6 +163,17 @@ class FullImageOverlay(Effect):
|
|
|
139
163
|
img_pil.paste(overlay_pil, (0, 0), overlay_pil)
|
|
140
164
|
return np.array(img_pil)
|
|
141
165
|
|
|
166
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
167
|
+
self._stream_total = total_frames
|
|
168
|
+
self._stream_fade_frames = round(self.fade_time * fps) if self.fade_time > 0 else 0
|
|
169
|
+
|
|
170
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
171
|
+
if self._stream_fade_frames == 0:
|
|
172
|
+
return self._overlay(frame)
|
|
173
|
+
dist_from_end = min(frame_index, self._stream_total - 1 - frame_index)
|
|
174
|
+
fade_alpha = 1.0 if dist_from_end >= self._stream_fade_frames else dist_from_end / self._stream_fade_frames
|
|
175
|
+
return self._overlay(frame, fade_alpha)
|
|
176
|
+
|
|
142
177
|
def _apply(self, video: Video) -> Video:
|
|
143
178
|
if not video.frame_shape == self.overlay[:, :, :3].shape:
|
|
144
179
|
raise ValueError(
|
|
@@ -164,6 +199,8 @@ class FullImageOverlay(Effect):
|
|
|
164
199
|
class Blur(Effect):
|
|
165
200
|
"""Applies Gaussian blur that can stay constant or ramp up/down over the clip."""
|
|
166
201
|
|
|
202
|
+
supports_streaming: ClassVar[bool] = True
|
|
203
|
+
|
|
167
204
|
def __init__(
|
|
168
205
|
self,
|
|
169
206
|
mode: Literal["constant", "ascending", "descending"],
|
|
@@ -198,29 +235,31 @@ class Blur(Effect):
|
|
|
198
235
|
"""
|
|
199
236
|
return cv2.GaussianBlur(frame, self.kernel_size, sigma)
|
|
200
237
|
|
|
201
|
-
def
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
# Calculate base sigma from kernel size (OpenCV formula)
|
|
238
|
+
def _compute_sigmas(self, n_frames: int) -> np.ndarray:
|
|
239
|
+
"""Compute per-frame sigma values based on mode."""
|
|
205
240
|
base_sigma = 0.3 * ((self.kernel_size[0] - 1) * 0.5 - 1) + 0.8
|
|
206
|
-
|
|
207
|
-
# Multiple blur iterations with sigma S approximate single blur with sigma S*sqrt(iterations)
|
|
208
|
-
# This is much faster than iterative application
|
|
209
241
|
max_sigma = base_sigma * np.sqrt(self.iterations)
|
|
210
242
|
|
|
211
|
-
# Calculate sigma for each frame based on mode
|
|
212
243
|
if self.mode == "constant":
|
|
213
|
-
|
|
244
|
+
return np.full(n_frames, max_sigma)
|
|
214
245
|
elif self.mode == "ascending":
|
|
215
|
-
|
|
216
|
-
iteration_ratios = np.linspace(1 / n_frames, 1.0, n_frames)
|
|
217
|
-
sigmas = base_sigma * np.sqrt(np.maximum(1, np.round(iteration_ratios * self.iterations)))
|
|
246
|
+
ratios = np.linspace(1 / n_frames, 1.0, n_frames)
|
|
218
247
|
elif self.mode == "descending":
|
|
219
|
-
|
|
220
|
-
iteration_ratios = np.linspace(1.0, 1 / n_frames, n_frames)
|
|
221
|
-
sigmas = base_sigma * np.sqrt(np.maximum(1, np.round(iteration_ratios * self.iterations)))
|
|
248
|
+
ratios = np.linspace(1.0, 1 / n_frames, n_frames)
|
|
222
249
|
else:
|
|
223
250
|
raise ValueError(f"Unknown mode: `{self.mode}`.")
|
|
251
|
+
return base_sigma * np.sqrt(np.maximum(1, np.round(ratios * self.iterations)))
|
|
252
|
+
|
|
253
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
254
|
+
self._stream_sigmas = self._compute_sigmas(total_frames)
|
|
255
|
+
|
|
256
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
257
|
+
idx = min(frame_index, len(self._stream_sigmas) - 1)
|
|
258
|
+
return self._blur_frame(frame, self._stream_sigmas[idx])
|
|
259
|
+
|
|
260
|
+
def _apply(self, video: Video) -> Video:
|
|
261
|
+
n_frames = len(video.frames)
|
|
262
|
+
sigmas = self._compute_sigmas(n_frames)
|
|
224
263
|
|
|
225
264
|
log(f"Applying {self.mode} blur...")
|
|
226
265
|
for i in progress_iter(range(n_frames), desc="Blurring"):
|
|
@@ -231,6 +270,8 @@ class Blur(Effect):
|
|
|
231
270
|
class Zoom(Effect):
|
|
232
271
|
"""Progressively zooms into or out of the frame center over the clip duration."""
|
|
233
272
|
|
|
273
|
+
supports_streaming: ClassVar[bool] = True
|
|
274
|
+
|
|
234
275
|
def __init__(self, zoom_factor: float, mode: Literal["in", "out"]):
|
|
235
276
|
"""Initialize zoom effect.
|
|
236
277
|
|
|
@@ -245,6 +286,24 @@ class Zoom(Effect):
|
|
|
245
286
|
self.zoom_factor = zoom_factor
|
|
246
287
|
self.mode = mode
|
|
247
288
|
|
|
289
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
290
|
+
crop_w = np.linspace(width // self.zoom_factor, width, total_frames)
|
|
291
|
+
crop_h = np.linspace(height // self.zoom_factor, height, total_frames)
|
|
292
|
+
if self.mode == "in":
|
|
293
|
+
crop_w, crop_h = crop_w[::-1], crop_h[::-1]
|
|
294
|
+
self._stream_crops = np.stack([crop_w, crop_h], axis=1)
|
|
295
|
+
self._stream_width = width
|
|
296
|
+
self._stream_height = height
|
|
297
|
+
|
|
298
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
299
|
+
idx = min(frame_index, len(self._stream_crops) - 1)
|
|
300
|
+
w, h = self._stream_crops[idx]
|
|
301
|
+
width, height = self._stream_width, self._stream_height
|
|
302
|
+
x = width / 2 - w / 2
|
|
303
|
+
y = height / 2 - h / 2
|
|
304
|
+
cropped = frame[round(y) : round(y + h), round(x) : round(x + w)]
|
|
305
|
+
return cv2.resize(cropped, (width, height))
|
|
306
|
+
|
|
248
307
|
def _apply(self, video: Video) -> Video:
|
|
249
308
|
n_frames = len(video.frames)
|
|
250
309
|
width = video.metadata.width
|
|
@@ -270,6 +329,8 @@ class Zoom(Effect):
|
|
|
270
329
|
class ColorGrading(Effect):
|
|
271
330
|
"""Adjusts color properties: brightness, contrast, saturation, and temperature."""
|
|
272
331
|
|
|
332
|
+
supports_streaming: ClassVar[bool] = True
|
|
333
|
+
|
|
273
334
|
def __init__(
|
|
274
335
|
self,
|
|
275
336
|
brightness: float = 0.0,
|
|
@@ -335,6 +396,9 @@ class ColorGrading(Effect):
|
|
|
335
396
|
img = np.clip(img * 255, 0, 255).astype(np.uint8)
|
|
336
397
|
return img
|
|
337
398
|
|
|
399
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
400
|
+
return self._grade_frame(frame)
|
|
401
|
+
|
|
338
402
|
def _apply(self, video: Video) -> Video:
|
|
339
403
|
log("Applying color grading...")
|
|
340
404
|
for i in progress_iter(range(len(video.frames)), desc="Color grading"):
|
|
@@ -345,6 +409,8 @@ class ColorGrading(Effect):
|
|
|
345
409
|
class Vignette(Effect):
|
|
346
410
|
"""Darkens the edges of the frame, drawing attention to the center."""
|
|
347
411
|
|
|
412
|
+
supports_streaming: ClassVar[bool] = True
|
|
413
|
+
|
|
348
414
|
def __init__(self, strength: float = 0.5, radius: float = 1.0):
|
|
349
415
|
"""Initialize vignette effect.
|
|
350
416
|
|
|
@@ -378,6 +444,14 @@ class Vignette(Effect):
|
|
|
378
444
|
|
|
379
445
|
return mask.astype(np.float32)
|
|
380
446
|
|
|
447
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
448
|
+
if self._mask is None or self._mask.shape != (height, width):
|
|
449
|
+
self._mask = self._create_mask(height, width)
|
|
450
|
+
self._stream_mask_3d = self._mask[:, :, np.newaxis]
|
|
451
|
+
|
|
452
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
453
|
+
return (frame.astype(np.float32) * self._stream_mask_3d).astype(np.uint8)
|
|
454
|
+
|
|
381
455
|
def _apply(self, video: Video) -> Video:
|
|
382
456
|
log("Applying vignette effect...")
|
|
383
457
|
height, width = video.frame_shape[:2]
|
|
@@ -403,6 +477,8 @@ class KenBurns(Effect):
|
|
|
403
477
|
across a scene.
|
|
404
478
|
"""
|
|
405
479
|
|
|
480
|
+
supports_streaming: ClassVar[bool] = True
|
|
481
|
+
|
|
406
482
|
def __init__(
|
|
407
483
|
self,
|
|
408
484
|
start_region: "BoundingBox",
|
|
@@ -477,6 +553,38 @@ class KenBurns(Effect):
|
|
|
477
553
|
cropped = frame[y : y + crop_h, x : x + crop_w]
|
|
478
554
|
return cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
|
|
479
555
|
|
|
556
|
+
def _precompute_regions(self, n_frames: int, width: int, height: int) -> np.ndarray:
|
|
557
|
+
"""Precompute (x, y, crop_w, crop_h) for each frame."""
|
|
558
|
+
sx = int(self.start_region.x * width)
|
|
559
|
+
sy = int(self.start_region.y * height)
|
|
560
|
+
sw = int(self.start_region.width * width)
|
|
561
|
+
sh = int(self.start_region.height * height)
|
|
562
|
+
ex = int(self.end_region.x * width)
|
|
563
|
+
ey = int(self.end_region.y * height)
|
|
564
|
+
ew = int(self.end_region.width * width)
|
|
565
|
+
eh = int(self.end_region.height * height)
|
|
566
|
+
|
|
567
|
+
regions = np.empty((n_frames, 4), dtype=np.int32)
|
|
568
|
+
for i in range(n_frames):
|
|
569
|
+
t = i / max(1, n_frames - 1)
|
|
570
|
+
et = self._ease(t)
|
|
571
|
+
crop_w = int(sw + (ew - sw) * et)
|
|
572
|
+
crop_h = int(sh + (eh - sh) * et)
|
|
573
|
+
x = max(0, min(int(sx + (ex - sx) * et), width - crop_w))
|
|
574
|
+
y = max(0, min(int(sy + (ey - sy) * et), height - crop_h))
|
|
575
|
+
regions[i] = (x, y, crop_w, crop_h)
|
|
576
|
+
return regions
|
|
577
|
+
|
|
578
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
579
|
+
self._stream_regions = self._precompute_regions(total_frames, width, height)
|
|
580
|
+
self._stream_target_w = width
|
|
581
|
+
self._stream_target_h = height
|
|
582
|
+
|
|
583
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
584
|
+
idx = min(frame_index, len(self._stream_regions) - 1)
|
|
585
|
+
x, y, cw, ch = self._stream_regions[idx]
|
|
586
|
+
return self._crop_and_scale_frame(frame, x, y, cw, ch, self._stream_target_w, self._stream_target_h)
|
|
587
|
+
|
|
480
588
|
def _apply(self, video: Video) -> Video:
|
|
481
589
|
n_frames = len(video.frames)
|
|
482
590
|
height, width = video.frame_shape[:2]
|
|
@@ -525,6 +633,8 @@ def _compute_curve(t: np.ndarray, curve: str) -> np.ndarray:
|
|
|
525
633
|
class Fade(Effect):
|
|
526
634
|
"""Fades video and audio to or from black."""
|
|
527
635
|
|
|
636
|
+
supports_streaming: ClassVar[bool] = True
|
|
637
|
+
|
|
528
638
|
def __init__(
|
|
529
639
|
self,
|
|
530
640
|
mode: Literal["in", "out", "in_out"],
|
|
@@ -549,6 +659,28 @@ class Fade(Effect):
|
|
|
549
659
|
self.duration = duration
|
|
550
660
|
self.curve = curve
|
|
551
661
|
|
|
662
|
+
def _compute_alpha(self, n_frames: int, fps: float) -> np.ndarray:
|
|
663
|
+
"""Compute per-frame alpha values for the video fade."""
|
|
664
|
+
fade_frames = min(round(self.duration * fps), n_frames)
|
|
665
|
+
alpha = np.ones(n_frames, dtype=np.float32)
|
|
666
|
+
if self.mode in ("in", "in_out"):
|
|
667
|
+
t = np.linspace(0, 1, fade_frames, dtype=np.float32)
|
|
668
|
+
alpha[:fade_frames] = _compute_curve(t, self.curve)
|
|
669
|
+
if self.mode in ("out", "in_out"):
|
|
670
|
+
t = np.linspace(1, 0, fade_frames, dtype=np.float32)
|
|
671
|
+
alpha[-fade_frames:] = np.minimum(alpha[-fade_frames:], _compute_curve(t, self.curve))
|
|
672
|
+
return alpha
|
|
673
|
+
|
|
674
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
675
|
+
self._stream_alpha = self._compute_alpha(total_frames, fps)
|
|
676
|
+
|
|
677
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
678
|
+
idx = min(frame_index, len(self._stream_alpha) - 1)
|
|
679
|
+
a = self._stream_alpha[idx]
|
|
680
|
+
if a == 1.0:
|
|
681
|
+
return frame
|
|
682
|
+
return (frame.astype(np.float32) * a).astype(np.uint8)
|
|
683
|
+
|
|
552
684
|
def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
|
|
553
685
|
"""Apply fade effect to video and audio.
|
|
554
686
|
|
|
@@ -569,16 +701,8 @@ class Fade(Effect):
|
|
|
569
701
|
effect_start_frame = round(start_s * video.fps)
|
|
570
702
|
effect_end_frame = round(stop_s * video.fps)
|
|
571
703
|
n_effect_frames = effect_end_frame - effect_start_frame
|
|
572
|
-
fade_frames = min(round(self.duration * video.fps), n_effect_frames)
|
|
573
704
|
|
|
574
|
-
|
|
575
|
-
alpha = np.ones(n_effect_frames, dtype=np.float32)
|
|
576
|
-
if self.mode in ("in", "in_out"):
|
|
577
|
-
t = np.linspace(0, 1, fade_frames, dtype=np.float32)
|
|
578
|
-
alpha[:fade_frames] = _compute_curve(t, self.curve)
|
|
579
|
-
if self.mode in ("out", "in_out"):
|
|
580
|
-
t = np.linspace(1, 0, fade_frames, dtype=np.float32)
|
|
581
|
-
alpha[-fade_frames:] = np.minimum(alpha[-fade_frames:], _compute_curve(t, self.curve))
|
|
705
|
+
alpha = self._compute_alpha(n_effect_frames, video.fps)
|
|
582
706
|
|
|
583
707
|
# Apply to video frames in batches to avoid a full float32 copy
|
|
584
708
|
batch_size = 64
|
|
@@ -600,29 +724,38 @@ class Fade(Effect):
|
|
|
600
724
|
|
|
601
725
|
# Apply to audio
|
|
602
726
|
if video.audio is not None and not video.audio.is_silent:
|
|
603
|
-
|
|
604
|
-
audio_start = round(start_s * sample_rate)
|
|
605
|
-
audio_end = min(round(stop_s * sample_rate), len(video.audio.data))
|
|
606
|
-
n_audio_samples = audio_end - audio_start
|
|
607
|
-
fade_samples = min(round(self.duration * sample_rate), n_audio_samples)
|
|
608
|
-
|
|
609
|
-
audio_alpha = np.ones(n_audio_samples, dtype=np.float32)
|
|
610
|
-
if self.mode in ("in", "in_out"):
|
|
611
|
-
t = np.linspace(0, 1, fade_samples, dtype=np.float32)
|
|
612
|
-
audio_alpha[:fade_samples] = _compute_curve(t, self.curve)
|
|
613
|
-
if self.mode in ("out", "in_out"):
|
|
614
|
-
t = np.linspace(1, 0, fade_samples, dtype=np.float32)
|
|
615
|
-
audio_alpha[-fade_samples:] = np.minimum(audio_alpha[-fade_samples:], _compute_curve(t, self.curve))
|
|
616
|
-
|
|
617
|
-
audio_data = video.audio.data
|
|
618
|
-
if audio_data.ndim == 1:
|
|
619
|
-
audio_data[audio_start:audio_end] *= audio_alpha
|
|
620
|
-
else:
|
|
621
|
-
audio_data[audio_start:audio_end] *= audio_alpha[:, np.newaxis]
|
|
622
|
-
np.clip(audio_data, -1.0, 1.0, out=audio_data)
|
|
727
|
+
self.apply_audio(video.audio, start_s, stop_s)
|
|
623
728
|
|
|
624
729
|
return video
|
|
625
730
|
|
|
731
|
+
def apply_audio(self, audio: Audio, start_s: float, stop_s: float) -> None:
|
|
732
|
+
"""Apply fade to audio data in-place.
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
audio: Audio object to modify.
|
|
736
|
+
start_s: Start time in seconds.
|
|
737
|
+
stop_s: Stop time in seconds.
|
|
738
|
+
"""
|
|
739
|
+
sample_rate = audio.metadata.sample_rate
|
|
740
|
+
audio_start = round(start_s * sample_rate)
|
|
741
|
+
audio_end = min(round(stop_s * sample_rate), len(audio.data))
|
|
742
|
+
n_samples = audio_end - audio_start
|
|
743
|
+
fade_samples = min(round(self.duration * sample_rate), n_samples)
|
|
744
|
+
|
|
745
|
+
alpha = np.ones(n_samples, dtype=np.float32)
|
|
746
|
+
if self.mode in ("in", "in_out"):
|
|
747
|
+
t = np.linspace(0, 1, fade_samples, dtype=np.float32)
|
|
748
|
+
alpha[:fade_samples] = _compute_curve(t, self.curve)
|
|
749
|
+
if self.mode in ("out", "in_out"):
|
|
750
|
+
t = np.linspace(1, 0, fade_samples, dtype=np.float32)
|
|
751
|
+
alpha[-fade_samples:] = np.minimum(alpha[-fade_samples:], _compute_curve(t, self.curve))
|
|
752
|
+
|
|
753
|
+
if audio.data.ndim == 1:
|
|
754
|
+
audio.data[audio_start:audio_end] *= alpha
|
|
755
|
+
else:
|
|
756
|
+
audio.data[audio_start:audio_end] *= alpha[:, np.newaxis]
|
|
757
|
+
np.clip(audio.data, -1.0, 1.0, out=audio.data)
|
|
758
|
+
|
|
626
759
|
def _apply(self, video: Video) -> Video:
|
|
627
760
|
raise NotImplementedError("Fade overrides apply() directly")
|
|
628
761
|
|
|
@@ -634,6 +767,11 @@ class AudioEffect(Effect):
|
|
|
634
767
|
without modification. Overrides apply() to skip frame processing.
|
|
635
768
|
"""
|
|
636
769
|
|
|
770
|
+
supports_streaming: ClassVar[bool] = True
|
|
771
|
+
|
|
772
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
773
|
+
return frame # Audio effects don't touch frames
|
|
774
|
+
|
|
637
775
|
def _apply(self, video: Video) -> Video:
|
|
638
776
|
raise NotImplementedError("AudioEffect does not process frames -- use _apply_audio()")
|
|
639
777
|
|
|
@@ -717,6 +855,8 @@ class VolumeAdjust(AudioEffect):
|
|
|
717
855
|
class TextOverlay(Effect):
|
|
718
856
|
"""Draws text on video frames, with auto word-wrap and optional background box."""
|
|
719
857
|
|
|
858
|
+
supports_streaming: ClassVar[bool] = True
|
|
859
|
+
|
|
720
860
|
def __init__(
|
|
721
861
|
self,
|
|
722
862
|
text: str,
|
|
@@ -841,6 +981,37 @@ class TextOverlay(Effect):
|
|
|
841
981
|
return px - img_w, py - img_h
|
|
842
982
|
return px - img_w // 2, py - img_h // 2
|
|
843
983
|
|
|
984
|
+
def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
|
|
985
|
+
if self._rendered is None:
|
|
986
|
+
self._rendered = self._render_text_image(width, height)
|
|
987
|
+
oh, ow = self._rendered.shape[:2]
|
|
988
|
+
x, y = self._compute_position(width, height, ow, oh)
|
|
989
|
+
src_x = max(0, -x)
|
|
990
|
+
src_y = max(0, -y)
|
|
991
|
+
dst_x = max(0, x)
|
|
992
|
+
dst_y = max(0, y)
|
|
993
|
+
paste_w = min(ow - src_x, width - dst_x)
|
|
994
|
+
paste_h = min(oh - src_y, height - dst_y)
|
|
995
|
+
if paste_w <= 0 or paste_h <= 0:
|
|
996
|
+
self._stream_noop = True
|
|
997
|
+
return
|
|
998
|
+
self._stream_noop = False
|
|
999
|
+
overlay_region = self._rendered[src_y : src_y + paste_h, src_x : src_x + paste_w]
|
|
1000
|
+
self._stream_alpha = overlay_region[:, :, 3:4].astype(np.float32) / 255.0
|
|
1001
|
+
self._stream_rgb = overlay_region[:, :, :3].astype(np.float32)
|
|
1002
|
+
self._stream_dst = (dst_y, dst_x, paste_h, paste_w)
|
|
1003
|
+
|
|
1004
|
+
def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
|
|
1005
|
+
if self._stream_noop:
|
|
1006
|
+
return frame
|
|
1007
|
+
dy, dx, ph, pw = self._stream_dst
|
|
1008
|
+
region = frame[dy : dy + ph, dx : dx + pw]
|
|
1009
|
+
blended = (
|
|
1010
|
+
self._stream_rgb * self._stream_alpha + region.astype(np.float32) * (1.0 - self._stream_alpha)
|
|
1011
|
+
).astype(np.uint8)
|
|
1012
|
+
frame[dy : dy + ph, dx : dx + pw] = blended
|
|
1013
|
+
return frame
|
|
1014
|
+
|
|
844
1015
|
def _apply(self, video: Video) -> Video:
|
|
845
1016
|
frame_h, frame_w = video.frame_shape[:2]
|
|
846
1017
|
|
|
@@ -530,7 +530,7 @@ def _register_base_operations() -> None:
|
|
|
530
530
|
Resize,
|
|
531
531
|
op_id="resize",
|
|
532
532
|
category=OperationCategory.TRANSFORMATION,
|
|
533
|
-
tags={"changes_dimensions"},
|
|
533
|
+
tags={"changes_dimensions", "streamable"},
|
|
534
534
|
param_overrides={
|
|
535
535
|
"width": {"exclusive_minimum": 0},
|
|
536
536
|
"height": {"exclusive_minimum": 0},
|
|
@@ -543,7 +543,7 @@ def _register_base_operations() -> None:
|
|
|
543
543
|
ResampleFPS,
|
|
544
544
|
op_id="resample_fps",
|
|
545
545
|
category=OperationCategory.TRANSFORMATION,
|
|
546
|
-
tags={"changes_fps"},
|
|
546
|
+
tags={"changes_fps", "streamable"},
|
|
547
547
|
param_overrides={"fps": {"minimum": 1}},
|
|
548
548
|
metadata_method="resample_fps",
|
|
549
549
|
)
|
|
@@ -553,7 +553,7 @@ def _register_base_operations() -> None:
|
|
|
553
553
|
Crop,
|
|
554
554
|
op_id="crop",
|
|
555
555
|
category=OperationCategory.TRANSFORMATION,
|
|
556
|
-
tags={"changes_dimensions"},
|
|
556
|
+
tags={"changes_dimensions", "streamable"},
|
|
557
557
|
param_overrides={
|
|
558
558
|
"width": {"exclusive_minimum": 0},
|
|
559
559
|
"height": {"exclusive_minimum": 0},
|
|
@@ -634,6 +634,7 @@ def _register_base_operations() -> None:
|
|
|
634
634
|
Blur,
|
|
635
635
|
op_id="blur_effect",
|
|
636
636
|
category=OperationCategory.EFFECT,
|
|
637
|
+
tags={"streamable"},
|
|
637
638
|
aliases=("blur",),
|
|
638
639
|
param_overrides={"iterations": {"minimum": 1}},
|
|
639
640
|
apply_param_overrides=_time_range_apply_overrides,
|
|
@@ -644,6 +645,7 @@ def _register_base_operations() -> None:
|
|
|
644
645
|
Zoom,
|
|
645
646
|
op_id="zoom_effect",
|
|
646
647
|
category=OperationCategory.EFFECT,
|
|
648
|
+
tags={"streamable"},
|
|
647
649
|
aliases=("zoom",),
|
|
648
650
|
param_overrides={"zoom_factor": {"exclusive_minimum": 1}},
|
|
649
651
|
apply_param_overrides=_time_range_apply_overrides,
|
|
@@ -654,6 +656,7 @@ def _register_base_operations() -> None:
|
|
|
654
656
|
ColorGrading,
|
|
655
657
|
op_id="color_adjust",
|
|
656
658
|
category=OperationCategory.EFFECT,
|
|
659
|
+
tags={"streamable"},
|
|
657
660
|
aliases=("color_grading",),
|
|
658
661
|
param_overrides={
|
|
659
662
|
"brightness": {"minimum": -1, "maximum": 1},
|
|
@@ -669,6 +672,7 @@ def _register_base_operations() -> None:
|
|
|
669
672
|
Vignette,
|
|
670
673
|
op_id="vignette",
|
|
671
674
|
category=OperationCategory.EFFECT,
|
|
675
|
+
tags={"streamable"},
|
|
672
676
|
param_overrides={
|
|
673
677
|
"strength": {"minimum": 0, "maximum": 1},
|
|
674
678
|
"radius": {"minimum": 0.5, "maximum": 2.0},
|
|
@@ -681,6 +685,7 @@ def _register_base_operations() -> None:
|
|
|
681
685
|
KenBurns,
|
|
682
686
|
op_id="ken_burns",
|
|
683
687
|
category=OperationCategory.EFFECT,
|
|
688
|
+
tags={"streamable"},
|
|
684
689
|
exclude_params={"start_region", "end_region"},
|
|
685
690
|
# BoundingBox forward ref breaks get_type_hints, so fix easing type manually.
|
|
686
691
|
param_overrides={
|
|
@@ -697,6 +702,7 @@ def _register_base_operations() -> None:
|
|
|
697
702
|
FullImageOverlay,
|
|
698
703
|
op_id="full_image_overlay",
|
|
699
704
|
category=OperationCategory.EFFECT,
|
|
705
|
+
tags={"streamable"},
|
|
700
706
|
exclude_params={"overlay_image"},
|
|
701
707
|
param_overrides={
|
|
702
708
|
"alpha": {"minimum": 0, "maximum": 1},
|
|
@@ -710,6 +716,7 @@ def _register_base_operations() -> None:
|
|
|
710
716
|
Fade,
|
|
711
717
|
op_id="fade",
|
|
712
718
|
category=OperationCategory.EFFECT,
|
|
719
|
+
tags={"streamable"},
|
|
713
720
|
param_overrides={"duration": {"exclusive_minimum": 0}},
|
|
714
721
|
apply_param_overrides=_time_range_apply_overrides,
|
|
715
722
|
)
|
|
@@ -719,6 +726,7 @@ def _register_base_operations() -> None:
|
|
|
719
726
|
VolumeAdjust,
|
|
720
727
|
op_id="volume_adjust",
|
|
721
728
|
category=OperationCategory.EFFECT,
|
|
729
|
+
tags={"streamable"},
|
|
722
730
|
aliases=("volume",),
|
|
723
731
|
param_overrides={
|
|
724
732
|
"volume": {"minimum": 0},
|
|
@@ -732,6 +740,7 @@ def _register_base_operations() -> None:
|
|
|
732
740
|
TextOverlay,
|
|
733
741
|
op_id="text_overlay",
|
|
734
742
|
category=OperationCategory.EFFECT,
|
|
743
|
+
tags={"streamable"},
|
|
735
744
|
aliases=("lower_third", "title_card"),
|
|
736
745
|
exclude_params={"font_filename"},
|
|
737
746
|
param_overrides={
|