videopython 0.25.8__tar.gz → 0.26.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {videopython-0.25.8 → videopython-0.26.1}/PKG-INFO +1 -1
  2. {videopython-0.25.8 → videopython-0.26.1}/pyproject.toml +1 -1
  3. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/dubber.py +12 -1
  4. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/pipeline.py +19 -7
  5. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/translation.py +12 -0
  6. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/effects.py +216 -45
  7. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/registry.py +12 -3
  8. videopython-0.26.1/src/videopython/base/streaming.py +280 -0
  9. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/video.py +27 -5
  10. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/video_edit.py +284 -3
  11. {videopython-0.25.8 → videopython-0.26.1}/.gitignore +0 -0
  12. {videopython-0.25.8 → videopython-0.26.1}/LICENSE +0 -0
  13. {videopython-0.25.8 → videopython-0.26.1}/README.md +0 -0
  14. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/__init__.py +0 -0
  15. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/__init__.py +0 -0
  16. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/_device.py +0 -0
  17. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/__init__.py +0 -0
  18. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/models.py +0 -0
  19. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/dubbing/timing.py +0 -0
  20. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/__init__.py +0 -0
  21. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/audio.py +0 -0
  22. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/image.py +0 -0
  23. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/generation/video.py +0 -0
  24. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/registry.py +0 -0
  25. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/__init__.py +0 -0
  26. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/inpainter.py +0 -0
  27. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/models.py +0 -0
  28. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/segmenter.py +0 -0
  29. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/swapping/swapper.py +0 -0
  30. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/transforms.py +0 -0
  31. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/__init__.py +0 -0
  32. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/audio.py +0 -0
  33. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/image.py +0 -0
  34. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/separation.py +0 -0
  35. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/understanding/temporal.py +0 -0
  36. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/ai/video_analysis.py +0 -0
  37. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/__init__.py +0 -0
  38. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/__init__.py +0 -0
  39. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/analysis.py +0 -0
  40. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/audio/audio.py +0 -0
  41. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/combine.py +0 -0
  42. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/description.py +0 -0
  43. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/exceptions.py +0 -0
  44. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/progress.py +0 -0
  45. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/scene.py +0 -0
  46. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/__init__.py +0 -0
  47. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/overlay.py +0 -0
  48. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/text/transcription.py +0 -0
  49. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/transforms.py +0 -0
  50. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/transitions.py +0 -0
  51. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/base/utils.py +0 -0
  52. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/__init__.py +0 -0
  53. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/multicam.py +0 -0
  54. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/editing/premiere_xml.py +0 -0
  55. {videopython-0.25.8 → videopython-0.26.1}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.25.8
3
+ Version: 0.26.1
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.25.8"
3
+ version = "0.26.1"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -36,12 +36,15 @@ class VideoDubber:
36
36
  voice_clone: bool = True,
37
37
  enable_diarization: bool = False,
38
38
  progress_callback: Callable[[str, float], None] | None = None,
39
+ transcription: Any = None,
39
40
  ) -> DubbingResult:
40
41
  """Dub a video into a target language.
41
42
 
42
43
  Args:
43
44
  enable_diarization: Enable speaker diarization to clone each speaker's
44
45
  voice separately. Requires additional VRAM for the diarization model.
46
+ transcription: Optional pre-computed Transcription object. When provided,
47
+ the internal Whisper transcription step is skipped.
45
48
  """
46
49
  if self._local_pipeline is None:
47
50
  self._init_local_pipeline()
@@ -54,6 +57,7 @@ class VideoDubber:
54
57
  voice_clone=voice_clone,
55
58
  enable_diarization=enable_diarization,
56
59
  progress_callback=progress_callback,
60
+ transcription=transcription,
57
61
  )
58
62
 
59
63
  def dub_and_replace(
@@ -65,8 +69,14 @@ class VideoDubber:
65
69
  voice_clone: bool = True,
66
70
  enable_diarization: bool = False,
67
71
  progress_callback: Callable[[str, float], None] | None = None,
72
+ transcription: Any = None,
68
73
  ) -> Video:
69
- """Dub a video and return a new video with the dubbed audio."""
74
+ """Dub a video and return a new video with the dubbed audio.
75
+
76
+ Args:
77
+ transcription: Optional pre-computed Transcription object. When provided,
78
+ the internal Whisper transcription step is skipped.
79
+ """
70
80
  result = self.dub(
71
81
  video=video,
72
82
  target_lang=target_lang,
@@ -75,6 +85,7 @@ class VideoDubber:
75
85
  voice_clone=voice_clone,
76
86
  enable_diarization=enable_diarization,
77
87
  progress_callback=progress_callback,
88
+ transcription=transcription,
78
89
  )
79
90
  return video.add_audio(result.dubbed_audio, overlay=False)
80
91
 
@@ -114,21 +114,33 @@ class LocalDubbingPipeline:
114
114
  voice_clone: bool = True,
115
115
  enable_diarization: bool = False,
116
116
  progress_callback: Callable[[str, float], None] | None = None,
117
+ transcription: Any | None = None,
117
118
  ) -> DubbingResult:
118
- """Process a video through the local dubbing pipeline."""
119
+ """Process a video through the local dubbing pipeline.
120
+
121
+ Args:
122
+ transcription: Optional pre-computed Transcription object. When provided,
123
+ the internal Whisper transcription step is skipped (saving time and VRAM).
124
+ Must be a ``videopython.base.text.transcription.Transcription`` instance
125
+ with populated ``segments``.
126
+ """
119
127
  from videopython.base.audio import Audio
120
128
 
121
129
  def report_progress(stage: str, progress: float) -> None:
122
130
  if progress_callback:
123
131
  progress_callback(stage, progress)
124
132
 
125
- report_progress("Transcribing audio", 0.05)
126
- if self._transcriber is None or self._transcriber_diarization != enable_diarization:
127
- self._init_transcriber(enable_diarization=enable_diarization)
128
- self._transcriber_diarization = enable_diarization
129
-
130
133
  source_audio = video.audio
131
- transcription = self._transcriber.transcribe(source_audio)
134
+
135
+ if transcription is not None:
136
+ report_progress("Using provided transcription", 0.05)
137
+ else:
138
+ report_progress("Transcribing audio", 0.05)
139
+ if self._transcriber is None or self._transcriber_diarization != enable_diarization:
140
+ self._init_transcriber(enable_diarization=enable_diarization)
141
+ self._transcriber_diarization = enable_diarization
142
+
143
+ transcription = self._transcriber.transcribe(source_audio)
132
144
 
133
145
  if not transcription.segments:
134
146
  return DubbingResult(
@@ -48,6 +48,15 @@ LANGUAGE_NAMES = {
48
48
  class TextTranslator:
49
49
  """Translates text between languages using local seq2seq models."""
50
50
 
51
+ # Languages without a direct opus-mt-{src}-{tgt} model. Maps (source, target)
52
+ # to an alternative HuggingFace model identifier.
53
+ _MODEL_OVERRIDES: dict[tuple[str, str], str] = {
54
+ ("en", "pt"): "Helsinki-NLP/opus-mt-tc-big-en-pt",
55
+ ("en", "ko"): "Helsinki-NLP/opus-mt-tc-big-en-ko",
56
+ ("en", "ja"): "Helsinki-NLP/opus-mt-en-jap",
57
+ ("en", "pl"): "Helsinki-NLP/opus-mt-en-zlw",
58
+ }
59
+
51
60
  def __init__(self, model_name: str | None = None, device: str | None = None):
52
61
  self.model_name = model_name
53
62
  self.device = device
@@ -58,6 +67,9 @@ class TextTranslator:
58
67
  def _get_local_model_name(self, source_lang: str, target_lang: str) -> str:
59
68
  if self.model_name:
60
69
  return self.model_name
70
+ override = self._MODEL_OVERRIDES.get((source_lang, target_lang))
71
+ if override:
72
+ return override
61
73
  return f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
62
74
 
63
75
  def _init_local(self, source_lang: str, target_lang: str) -> None:
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from typing import TYPE_CHECKING, Literal
4
+ from typing import TYPE_CHECKING, ClassVar, Literal
5
5
 
6
6
  import cv2
7
7
  import numpy as np
@@ -11,6 +11,7 @@ from videopython.base.progress import log, progress_iter
11
11
  from videopython.base.video import Video
12
12
 
13
13
  if TYPE_CHECKING:
14
+ from videopython.base.audio import Audio
14
15
  from videopython.base.description import BoundingBox
15
16
 
16
17
  __all__ = [
@@ -50,6 +51,27 @@ class Effect(ABC):
50
51
  The effect must not change the number of frames and the shape of the frames.
51
52
  """
52
53
 
54
+ supports_streaming: ClassVar[bool] = False
55
+
56
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
57
+ """Called once before streaming begins to precompute per-frame parameters.
58
+
59
+ Override in subclasses that need precomputation (e.g., per-frame alpha
60
+ arrays, sigma schedules, crop regions).
61
+ """
62
+
63
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
64
+ """Process a single frame in streaming mode.
65
+
66
+ Args:
67
+ frame: Single RGB frame (H, W, 3) uint8.
68
+ frame_index: 0-based index within this effect's active range.
69
+
70
+ Returns:
71
+ Processed frame, same shape and dtype.
72
+ """
73
+ raise NotImplementedError(f"{type(self).__name__} does not support streaming")
74
+
53
75
  def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
54
76
  """Apply the effect to a video, optionally within a time range.
55
77
 
@@ -106,6 +128,8 @@ class FullImageOverlay(Effect):
106
128
  transparency via RGBA images and an overall opacity control.
107
129
  """
108
130
 
131
+ supports_streaming: ClassVar[bool] = True
132
+
109
133
  def __init__(self, overlay_image: np.ndarray, alpha: float | None = None, fade_time: float = 0.0):
110
134
  """Initialize image overlay effect.
111
135
 
@@ -139,6 +163,17 @@ class FullImageOverlay(Effect):
139
163
  img_pil.paste(overlay_pil, (0, 0), overlay_pil)
140
164
  return np.array(img_pil)
141
165
 
166
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
167
+ self._stream_total = total_frames
168
+ self._stream_fade_frames = round(self.fade_time * fps) if self.fade_time > 0 else 0
169
+
170
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
171
+ if self._stream_fade_frames == 0:
172
+ return self._overlay(frame)
173
+ dist_from_end = min(frame_index, self._stream_total - 1 - frame_index)
174
+ fade_alpha = 1.0 if dist_from_end >= self._stream_fade_frames else dist_from_end / self._stream_fade_frames
175
+ return self._overlay(frame, fade_alpha)
176
+
142
177
  def _apply(self, video: Video) -> Video:
143
178
  if not video.frame_shape == self.overlay[:, :, :3].shape:
144
179
  raise ValueError(
@@ -164,6 +199,8 @@ class FullImageOverlay(Effect):
164
199
  class Blur(Effect):
165
200
  """Applies Gaussian blur that can stay constant or ramp up/down over the clip."""
166
201
 
202
+ supports_streaming: ClassVar[bool] = True
203
+
167
204
  def __init__(
168
205
  self,
169
206
  mode: Literal["constant", "ascending", "descending"],
@@ -198,29 +235,31 @@ class Blur(Effect):
198
235
  """
199
236
  return cv2.GaussianBlur(frame, self.kernel_size, sigma)
200
237
 
201
- def _apply(self, video: Video) -> Video:
202
- n_frames = len(video.frames)
203
-
204
- # Calculate base sigma from kernel size (OpenCV formula)
238
+ def _compute_sigmas(self, n_frames: int) -> np.ndarray:
239
+ """Compute per-frame sigma values based on mode."""
205
240
  base_sigma = 0.3 * ((self.kernel_size[0] - 1) * 0.5 - 1) + 0.8
206
-
207
- # Multiple blur iterations with sigma S approximate single blur with sigma S*sqrt(iterations)
208
- # This is much faster than iterative application
209
241
  max_sigma = base_sigma * np.sqrt(self.iterations)
210
242
 
211
- # Calculate sigma for each frame based on mode
212
243
  if self.mode == "constant":
213
- sigmas = np.full(n_frames, max_sigma)
244
+ return np.full(n_frames, max_sigma)
214
245
  elif self.mode == "ascending":
215
- # Linearly increase blur intensity from start to end
216
- iteration_ratios = np.linspace(1 / n_frames, 1.0, n_frames)
217
- sigmas = base_sigma * np.sqrt(np.maximum(1, np.round(iteration_ratios * self.iterations)))
246
+ ratios = np.linspace(1 / n_frames, 1.0, n_frames)
218
247
  elif self.mode == "descending":
219
- # Linearly decrease blur intensity from start to end
220
- iteration_ratios = np.linspace(1.0, 1 / n_frames, n_frames)
221
- sigmas = base_sigma * np.sqrt(np.maximum(1, np.round(iteration_ratios * self.iterations)))
248
+ ratios = np.linspace(1.0, 1 / n_frames, n_frames)
222
249
  else:
223
250
  raise ValueError(f"Unknown mode: `{self.mode}`.")
251
+ return base_sigma * np.sqrt(np.maximum(1, np.round(ratios * self.iterations)))
252
+
253
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
254
+ self._stream_sigmas = self._compute_sigmas(total_frames)
255
+
256
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
257
+ idx = min(frame_index, len(self._stream_sigmas) - 1)
258
+ return self._blur_frame(frame, self._stream_sigmas[idx])
259
+
260
+ def _apply(self, video: Video) -> Video:
261
+ n_frames = len(video.frames)
262
+ sigmas = self._compute_sigmas(n_frames)
224
263
 
225
264
  log(f"Applying {self.mode} blur...")
226
265
  for i in progress_iter(range(n_frames), desc="Blurring"):
@@ -231,6 +270,8 @@ class Blur(Effect):
231
270
  class Zoom(Effect):
232
271
  """Progressively zooms into or out of the frame center over the clip duration."""
233
272
 
273
+ supports_streaming: ClassVar[bool] = True
274
+
234
275
  def __init__(self, zoom_factor: float, mode: Literal["in", "out"]):
235
276
  """Initialize zoom effect.
236
277
 
@@ -245,6 +286,24 @@ class Zoom(Effect):
245
286
  self.zoom_factor = zoom_factor
246
287
  self.mode = mode
247
288
 
289
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
290
+ crop_w = np.linspace(width // self.zoom_factor, width, total_frames)
291
+ crop_h = np.linspace(height // self.zoom_factor, height, total_frames)
292
+ if self.mode == "in":
293
+ crop_w, crop_h = crop_w[::-1], crop_h[::-1]
294
+ self._stream_crops = np.stack([crop_w, crop_h], axis=1)
295
+ self._stream_width = width
296
+ self._stream_height = height
297
+
298
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
299
+ idx = min(frame_index, len(self._stream_crops) - 1)
300
+ w, h = self._stream_crops[idx]
301
+ width, height = self._stream_width, self._stream_height
302
+ x = width / 2 - w / 2
303
+ y = height / 2 - h / 2
304
+ cropped = frame[round(y) : round(y + h), round(x) : round(x + w)]
305
+ return cv2.resize(cropped, (width, height))
306
+
248
307
  def _apply(self, video: Video) -> Video:
249
308
  n_frames = len(video.frames)
250
309
  width = video.metadata.width
@@ -270,6 +329,8 @@ class Zoom(Effect):
270
329
  class ColorGrading(Effect):
271
330
  """Adjusts color properties: brightness, contrast, saturation, and temperature."""
272
331
 
332
+ supports_streaming: ClassVar[bool] = True
333
+
273
334
  def __init__(
274
335
  self,
275
336
  brightness: float = 0.0,
@@ -335,6 +396,9 @@ class ColorGrading(Effect):
335
396
  img = np.clip(img * 255, 0, 255).astype(np.uint8)
336
397
  return img
337
398
 
399
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
400
+ return self._grade_frame(frame)
401
+
338
402
  def _apply(self, video: Video) -> Video:
339
403
  log("Applying color grading...")
340
404
  for i in progress_iter(range(len(video.frames)), desc="Color grading"):
@@ -345,6 +409,8 @@ class ColorGrading(Effect):
345
409
  class Vignette(Effect):
346
410
  """Darkens the edges of the frame, drawing attention to the center."""
347
411
 
412
+ supports_streaming: ClassVar[bool] = True
413
+
348
414
  def __init__(self, strength: float = 0.5, radius: float = 1.0):
349
415
  """Initialize vignette effect.
350
416
 
@@ -378,6 +444,14 @@ class Vignette(Effect):
378
444
 
379
445
  return mask.astype(np.float32)
380
446
 
447
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
448
+ if self._mask is None or self._mask.shape != (height, width):
449
+ self._mask = self._create_mask(height, width)
450
+ self._stream_mask_3d = self._mask[:, :, np.newaxis]
451
+
452
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
453
+ return (frame.astype(np.float32) * self._stream_mask_3d).astype(np.uint8)
454
+
381
455
  def _apply(self, video: Video) -> Video:
382
456
  log("Applying vignette effect...")
383
457
  height, width = video.frame_shape[:2]
@@ -403,6 +477,8 @@ class KenBurns(Effect):
403
477
  across a scene.
404
478
  """
405
479
 
480
+ supports_streaming: ClassVar[bool] = True
481
+
406
482
  def __init__(
407
483
  self,
408
484
  start_region: "BoundingBox",
@@ -477,6 +553,38 @@ class KenBurns(Effect):
477
553
  cropped = frame[y : y + crop_h, x : x + crop_w]
478
554
  return cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
479
555
 
556
+ def _precompute_regions(self, n_frames: int, width: int, height: int) -> np.ndarray:
557
+ """Precompute (x, y, crop_w, crop_h) for each frame."""
558
+ sx = int(self.start_region.x * width)
559
+ sy = int(self.start_region.y * height)
560
+ sw = int(self.start_region.width * width)
561
+ sh = int(self.start_region.height * height)
562
+ ex = int(self.end_region.x * width)
563
+ ey = int(self.end_region.y * height)
564
+ ew = int(self.end_region.width * width)
565
+ eh = int(self.end_region.height * height)
566
+
567
+ regions = np.empty((n_frames, 4), dtype=np.int32)
568
+ for i in range(n_frames):
569
+ t = i / max(1, n_frames - 1)
570
+ et = self._ease(t)
571
+ crop_w = int(sw + (ew - sw) * et)
572
+ crop_h = int(sh + (eh - sh) * et)
573
+ x = max(0, min(int(sx + (ex - sx) * et), width - crop_w))
574
+ y = max(0, min(int(sy + (ey - sy) * et), height - crop_h))
575
+ regions[i] = (x, y, crop_w, crop_h)
576
+ return regions
577
+
578
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
579
+ self._stream_regions = self._precompute_regions(total_frames, width, height)
580
+ self._stream_target_w = width
581
+ self._stream_target_h = height
582
+
583
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
584
+ idx = min(frame_index, len(self._stream_regions) - 1)
585
+ x, y, cw, ch = self._stream_regions[idx]
586
+ return self._crop_and_scale_frame(frame, x, y, cw, ch, self._stream_target_w, self._stream_target_h)
587
+
480
588
  def _apply(self, video: Video) -> Video:
481
589
  n_frames = len(video.frames)
482
590
  height, width = video.frame_shape[:2]
@@ -525,6 +633,8 @@ def _compute_curve(t: np.ndarray, curve: str) -> np.ndarray:
525
633
  class Fade(Effect):
526
634
  """Fades video and audio to or from black."""
527
635
 
636
+ supports_streaming: ClassVar[bool] = True
637
+
528
638
  def __init__(
529
639
  self,
530
640
  mode: Literal["in", "out", "in_out"],
@@ -549,6 +659,28 @@ class Fade(Effect):
549
659
  self.duration = duration
550
660
  self.curve = curve
551
661
 
662
+ def _compute_alpha(self, n_frames: int, fps: float) -> np.ndarray:
663
+ """Compute per-frame alpha values for the video fade."""
664
+ fade_frames = min(round(self.duration * fps), n_frames)
665
+ alpha = np.ones(n_frames, dtype=np.float32)
666
+ if self.mode in ("in", "in_out"):
667
+ t = np.linspace(0, 1, fade_frames, dtype=np.float32)
668
+ alpha[:fade_frames] = _compute_curve(t, self.curve)
669
+ if self.mode in ("out", "in_out"):
670
+ t = np.linspace(1, 0, fade_frames, dtype=np.float32)
671
+ alpha[-fade_frames:] = np.minimum(alpha[-fade_frames:], _compute_curve(t, self.curve))
672
+ return alpha
673
+
674
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
675
+ self._stream_alpha = self._compute_alpha(total_frames, fps)
676
+
677
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
678
+ idx = min(frame_index, len(self._stream_alpha) - 1)
679
+ a = self._stream_alpha[idx]
680
+ if a == 1.0:
681
+ return frame
682
+ return (frame.astype(np.float32) * a).astype(np.uint8)
683
+
552
684
  def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
553
685
  """Apply fade effect to video and audio.
554
686
 
@@ -569,16 +701,8 @@ class Fade(Effect):
569
701
  effect_start_frame = round(start_s * video.fps)
570
702
  effect_end_frame = round(stop_s * video.fps)
571
703
  n_effect_frames = effect_end_frame - effect_start_frame
572
- fade_frames = min(round(self.duration * video.fps), n_effect_frames)
573
704
 
574
- # Build per-frame alpha array (1.0 = fully visible, 0.0 = black)
575
- alpha = np.ones(n_effect_frames, dtype=np.float32)
576
- if self.mode in ("in", "in_out"):
577
- t = np.linspace(0, 1, fade_frames, dtype=np.float32)
578
- alpha[:fade_frames] = _compute_curve(t, self.curve)
579
- if self.mode in ("out", "in_out"):
580
- t = np.linspace(1, 0, fade_frames, dtype=np.float32)
581
- alpha[-fade_frames:] = np.minimum(alpha[-fade_frames:], _compute_curve(t, self.curve))
705
+ alpha = self._compute_alpha(n_effect_frames, video.fps)
582
706
 
583
707
  # Apply to video frames in batches to avoid a full float32 copy
584
708
  batch_size = 64
@@ -600,29 +724,38 @@ class Fade(Effect):
600
724
 
601
725
  # Apply to audio
602
726
  if video.audio is not None and not video.audio.is_silent:
603
- sample_rate = video.audio.metadata.sample_rate
604
- audio_start = round(start_s * sample_rate)
605
- audio_end = min(round(stop_s * sample_rate), len(video.audio.data))
606
- n_audio_samples = audio_end - audio_start
607
- fade_samples = min(round(self.duration * sample_rate), n_audio_samples)
608
-
609
- audio_alpha = np.ones(n_audio_samples, dtype=np.float32)
610
- if self.mode in ("in", "in_out"):
611
- t = np.linspace(0, 1, fade_samples, dtype=np.float32)
612
- audio_alpha[:fade_samples] = _compute_curve(t, self.curve)
613
- if self.mode in ("out", "in_out"):
614
- t = np.linspace(1, 0, fade_samples, dtype=np.float32)
615
- audio_alpha[-fade_samples:] = np.minimum(audio_alpha[-fade_samples:], _compute_curve(t, self.curve))
616
-
617
- audio_data = video.audio.data
618
- if audio_data.ndim == 1:
619
- audio_data[audio_start:audio_end] *= audio_alpha
620
- else:
621
- audio_data[audio_start:audio_end] *= audio_alpha[:, np.newaxis]
622
- np.clip(audio_data, -1.0, 1.0, out=audio_data)
727
+ self.apply_audio(video.audio, start_s, stop_s)
623
728
 
624
729
  return video
625
730
 
731
+ def apply_audio(self, audio: Audio, start_s: float, stop_s: float) -> None:
732
+ """Apply fade to audio data in-place.
733
+
734
+ Args:
735
+ audio: Audio object to modify.
736
+ start_s: Start time in seconds.
737
+ stop_s: Stop time in seconds.
738
+ """
739
+ sample_rate = audio.metadata.sample_rate
740
+ audio_start = round(start_s * sample_rate)
741
+ audio_end = min(round(stop_s * sample_rate), len(audio.data))
742
+ n_samples = audio_end - audio_start
743
+ fade_samples = min(round(self.duration * sample_rate), n_samples)
744
+
745
+ alpha = np.ones(n_samples, dtype=np.float32)
746
+ if self.mode in ("in", "in_out"):
747
+ t = np.linspace(0, 1, fade_samples, dtype=np.float32)
748
+ alpha[:fade_samples] = _compute_curve(t, self.curve)
749
+ if self.mode in ("out", "in_out"):
750
+ t = np.linspace(1, 0, fade_samples, dtype=np.float32)
751
+ alpha[-fade_samples:] = np.minimum(alpha[-fade_samples:], _compute_curve(t, self.curve))
752
+
753
+ if audio.data.ndim == 1:
754
+ audio.data[audio_start:audio_end] *= alpha
755
+ else:
756
+ audio.data[audio_start:audio_end] *= alpha[:, np.newaxis]
757
+ np.clip(audio.data, -1.0, 1.0, out=audio.data)
758
+
626
759
  def _apply(self, video: Video) -> Video:
627
760
  raise NotImplementedError("Fade overrides apply() directly")
628
761
 
@@ -634,6 +767,11 @@ class AudioEffect(Effect):
634
767
  without modification. Overrides apply() to skip frame processing.
635
768
  """
636
769
 
770
+ supports_streaming: ClassVar[bool] = True
771
+
772
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
773
+ return frame # Audio effects don't touch frames
774
+
637
775
  def _apply(self, video: Video) -> Video:
638
776
  raise NotImplementedError("AudioEffect does not process frames -- use _apply_audio()")
639
777
 
@@ -717,6 +855,8 @@ class VolumeAdjust(AudioEffect):
717
855
  class TextOverlay(Effect):
718
856
  """Draws text on video frames, with auto word-wrap and optional background box."""
719
857
 
858
+ supports_streaming: ClassVar[bool] = True
859
+
720
860
  def __init__(
721
861
  self,
722
862
  text: str,
@@ -841,6 +981,37 @@ class TextOverlay(Effect):
841
981
  return px - img_w, py - img_h
842
982
  return px - img_w // 2, py - img_h // 2
843
983
 
984
+ def streaming_init(self, total_frames: int, fps: float, width: int, height: int) -> None:
985
+ if self._rendered is None:
986
+ self._rendered = self._render_text_image(width, height)
987
+ oh, ow = self._rendered.shape[:2]
988
+ x, y = self._compute_position(width, height, ow, oh)
989
+ src_x = max(0, -x)
990
+ src_y = max(0, -y)
991
+ dst_x = max(0, x)
992
+ dst_y = max(0, y)
993
+ paste_w = min(ow - src_x, width - dst_x)
994
+ paste_h = min(oh - src_y, height - dst_y)
995
+ if paste_w <= 0 or paste_h <= 0:
996
+ self._stream_noop = True
997
+ return
998
+ self._stream_noop = False
999
+ overlay_region = self._rendered[src_y : src_y + paste_h, src_x : src_x + paste_w]
1000
+ self._stream_alpha = overlay_region[:, :, 3:4].astype(np.float32) / 255.0
1001
+ self._stream_rgb = overlay_region[:, :, :3].astype(np.float32)
1002
+ self._stream_dst = (dst_y, dst_x, paste_h, paste_w)
1003
+
1004
+ def process_frame(self, frame: np.ndarray, frame_index: int) -> np.ndarray:
1005
+ if self._stream_noop:
1006
+ return frame
1007
+ dy, dx, ph, pw = self._stream_dst
1008
+ region = frame[dy : dy + ph, dx : dx + pw]
1009
+ blended = (
1010
+ self._stream_rgb * self._stream_alpha + region.astype(np.float32) * (1.0 - self._stream_alpha)
1011
+ ).astype(np.uint8)
1012
+ frame[dy : dy + ph, dx : dx + pw] = blended
1013
+ return frame
1014
+
844
1015
  def _apply(self, video: Video) -> Video:
845
1016
  frame_h, frame_w = video.frame_shape[:2]
846
1017
 
@@ -530,7 +530,7 @@ def _register_base_operations() -> None:
530
530
  Resize,
531
531
  op_id="resize",
532
532
  category=OperationCategory.TRANSFORMATION,
533
- tags={"changes_dimensions"},
533
+ tags={"changes_dimensions", "streamable"},
534
534
  param_overrides={
535
535
  "width": {"exclusive_minimum": 0},
536
536
  "height": {"exclusive_minimum": 0},
@@ -543,7 +543,7 @@ def _register_base_operations() -> None:
543
543
  ResampleFPS,
544
544
  op_id="resample_fps",
545
545
  category=OperationCategory.TRANSFORMATION,
546
- tags={"changes_fps"},
546
+ tags={"changes_fps", "streamable"},
547
547
  param_overrides={"fps": {"minimum": 1}},
548
548
  metadata_method="resample_fps",
549
549
  )
@@ -553,7 +553,7 @@ def _register_base_operations() -> None:
553
553
  Crop,
554
554
  op_id="crop",
555
555
  category=OperationCategory.TRANSFORMATION,
556
- tags={"changes_dimensions"},
556
+ tags={"changes_dimensions", "streamable"},
557
557
  param_overrides={
558
558
  "width": {"exclusive_minimum": 0},
559
559
  "height": {"exclusive_minimum": 0},
@@ -634,6 +634,7 @@ def _register_base_operations() -> None:
634
634
  Blur,
635
635
  op_id="blur_effect",
636
636
  category=OperationCategory.EFFECT,
637
+ tags={"streamable"},
637
638
  aliases=("blur",),
638
639
  param_overrides={"iterations": {"minimum": 1}},
639
640
  apply_param_overrides=_time_range_apply_overrides,
@@ -644,6 +645,7 @@ def _register_base_operations() -> None:
644
645
  Zoom,
645
646
  op_id="zoom_effect",
646
647
  category=OperationCategory.EFFECT,
648
+ tags={"streamable"},
647
649
  aliases=("zoom",),
648
650
  param_overrides={"zoom_factor": {"exclusive_minimum": 1}},
649
651
  apply_param_overrides=_time_range_apply_overrides,
@@ -654,6 +656,7 @@ def _register_base_operations() -> None:
654
656
  ColorGrading,
655
657
  op_id="color_adjust",
656
658
  category=OperationCategory.EFFECT,
659
+ tags={"streamable"},
657
660
  aliases=("color_grading",),
658
661
  param_overrides={
659
662
  "brightness": {"minimum": -1, "maximum": 1},
@@ -669,6 +672,7 @@ def _register_base_operations() -> None:
669
672
  Vignette,
670
673
  op_id="vignette",
671
674
  category=OperationCategory.EFFECT,
675
+ tags={"streamable"},
672
676
  param_overrides={
673
677
  "strength": {"minimum": 0, "maximum": 1},
674
678
  "radius": {"minimum": 0.5, "maximum": 2.0},
@@ -681,6 +685,7 @@ def _register_base_operations() -> None:
681
685
  KenBurns,
682
686
  op_id="ken_burns",
683
687
  category=OperationCategory.EFFECT,
688
+ tags={"streamable"},
684
689
  exclude_params={"start_region", "end_region"},
685
690
  # BoundingBox forward ref breaks get_type_hints, so fix easing type manually.
686
691
  param_overrides={
@@ -697,6 +702,7 @@ def _register_base_operations() -> None:
697
702
  FullImageOverlay,
698
703
  op_id="full_image_overlay",
699
704
  category=OperationCategory.EFFECT,
705
+ tags={"streamable"},
700
706
  exclude_params={"overlay_image"},
701
707
  param_overrides={
702
708
  "alpha": {"minimum": 0, "maximum": 1},
@@ -710,6 +716,7 @@ def _register_base_operations() -> None:
710
716
  Fade,
711
717
  op_id="fade",
712
718
  category=OperationCategory.EFFECT,
719
+ tags={"streamable"},
713
720
  param_overrides={"duration": {"exclusive_minimum": 0}},
714
721
  apply_param_overrides=_time_range_apply_overrides,
715
722
  )
@@ -719,6 +726,7 @@ def _register_base_operations() -> None:
719
726
  VolumeAdjust,
720
727
  op_id="volume_adjust",
721
728
  category=OperationCategory.EFFECT,
729
+ tags={"streamable"},
722
730
  aliases=("volume",),
723
731
  param_overrides={
724
732
  "volume": {"minimum": 0},
@@ -732,6 +740,7 @@ def _register_base_operations() -> None:
732
740
  TextOverlay,
733
741
  op_id="text_overlay",
734
742
  category=OperationCategory.EFFECT,
743
+ tags={"streamable"},
735
744
  aliases=("lower_third", "title_card"),
736
745
  exclude_params={"font_filename"},
737
746
  param_overrides={