videopython 0.31.0__tar.gz → 0.31.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {videopython-0.31.0 → videopython-0.31.3}/PKG-INFO +1 -1
  2. {videopython-0.31.0 → videopython-0.31.3}/pyproject.toml +1 -1
  3. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/remux.py +11 -8
  4. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/transforms.py +11 -15
  5. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/video_analysis.py +4 -15
  6. videopython-0.31.3/src/videopython/base/_dimensions.py +41 -0
  7. videopython-0.31.3/src/videopython/base/_ffmpeg.py +152 -0
  8. videopython-0.31.3/src/videopython/base/_video_io.py +289 -0
  9. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/audio/audio.py +16 -34
  10. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/exceptions.py +18 -0
  11. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/streaming.py +33 -47
  12. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/text/__init__.py +2 -1
  13. videopython-0.31.0/src/videopython/base/text/overlay.py → videopython-0.31.3/src/videopython/base/text/image_text.py +7 -149
  14. videopython-0.31.3/src/videopython/base/text/overlay.py +160 -0
  15. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/transforms.py +8 -7
  16. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/video.py +66 -409
  17. {videopython-0.31.0 → videopython-0.31.3}/.gitignore +0 -0
  18. {videopython-0.31.0 → videopython-0.31.3}/LICENSE +0 -0
  19. {videopython-0.31.0 → videopython-0.31.3}/README.md +0 -0
  20. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/__init__.py +0 -0
  21. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/__init__.py +0 -0
  22. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/_device.py +0 -0
  23. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/__init__.py +0 -0
  24. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/dubber.py +0 -0
  25. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/models.py +0 -0
  26. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/pipeline.py +0 -0
  27. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/quality.py +0 -0
  28. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/dubbing/timing.py +0 -0
  29. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/__init__.py +0 -0
  30. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/audio.py +0 -0
  31. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/image.py +0 -0
  32. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/qwen3.py +0 -0
  33. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/translation.py +0 -0
  34. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/generation/video.py +0 -0
  35. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/__init__.py +0 -0
  36. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/audio.py +0 -0
  37. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/faces.py +0 -0
  38. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/image.py +0 -0
  39. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/separation.py +0 -0
  40. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/ai/understanding/temporal.py +0 -0
  41. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/__init__.py +0 -0
  42. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/audio/__init__.py +0 -0
  43. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/audio/analysis.py +0 -0
  44. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/description.py +0 -0
  45. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/effects.py +0 -0
  46. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/operation.py +0 -0
  47. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/scene.py +0 -0
  48. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/base/text/transcription.py +0 -0
  49. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/editing/__init__.py +0 -0
  50. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/editing/video_edit.py +0 -0
  51. {videopython-0.31.0 → videopython-0.31.3}/src/videopython/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videopython
3
- Version: 0.31.0
3
+ Version: 0.31.3
4
4
  Summary: Minimal video generation and processing library.
5
5
  Project-URL: Homepage, https://videopython.com
6
6
  Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "videopython"
3
- version = "0.31.0"
3
+ version = "0.31.3"
4
4
  description = "Minimal video generation and processing library."
5
5
  authors = [
6
6
  { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -4,13 +4,15 @@ from __future__ import annotations
4
4
 
5
5
  import io
6
6
  import logging
7
- import subprocess
8
7
  import wave
9
8
  from pathlib import Path
10
9
  from typing import TYPE_CHECKING
11
10
 
12
11
  import numpy as np
13
12
 
13
+ from videopython.base import _ffmpeg
14
+ from videopython.base.exceptions import FFmpegRunError
15
+
14
16
  if TYPE_CHECKING:
15
17
  from videopython.base.audio import Audio
16
18
 
@@ -95,9 +97,10 @@ def replace_audio_stream(
95
97
  ]
96
98
 
97
99
  logger.info("replace_audio_stream: %s + %s -> %s", video_path, audio_path, output_path)
98
- result = subprocess.run(cmd, capture_output=True)
99
- if result.returncode != 0:
100
- raise RemuxError(f"ffmpeg failed (exit {result.returncode}): {result.stderr.decode(errors='replace')}")
100
+ try:
101
+ _ffmpeg.run(cmd)
102
+ except FFmpegRunError as e:
103
+ raise RemuxError(str(e)) from e
101
104
 
102
105
 
103
106
  def replace_audio_stream_from_audio(
@@ -175,7 +178,7 @@ def replace_audio_stream_from_audio(
175
178
  len(wav_bytes),
176
179
  output_path,
177
180
  )
178
- process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
179
- _, stderr = process.communicate(wav_bytes)
180
- if process.returncode != 0:
181
- raise RemuxError(f"ffmpeg failed (exit {process.returncode}): {stderr.decode(errors='replace')}")
181
+ try:
182
+ _ffmpeg.run(cmd, stdin=wav_bytes)
183
+ except FFmpegRunError as e:
184
+ raise RemuxError(str(e)) from e
@@ -11,17 +11,13 @@ from pydantic import Field
11
11
  from tqdm import tqdm
12
12
 
13
13
  from videopython.ai.understanding.faces import FaceTracker
14
+ from videopython.base._dimensions import floor_to_even
14
15
  from videopython.base.operation import OpCategory, Operation
15
16
  from videopython.base.video import Video
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
20
 
20
- def _make_even(value: int) -> int:
21
- """Round down to nearest even number for H.264 compatibility."""
22
- return value - (value % 2)
23
-
24
-
25
21
  __all__ = [
26
22
  "FaceTrackingCrop",
27
23
  ]
@@ -105,17 +101,17 @@ class FaceTrackingCrop(Operation):
105
101
  frame_ratio = frame_w / frame_h
106
102
 
107
103
  if target_ratio < frame_ratio:
108
- crop_h = _make_even(frame_h)
109
- crop_w = _make_even(int(crop_h * target_ratio))
104
+ crop_h = floor_to_even(frame_h)
105
+ crop_w = floor_to_even(int(crop_h * target_ratio))
110
106
  else:
111
- crop_w = _make_even(frame_w)
112
- crop_h = _make_even(int(crop_w / target_ratio))
107
+ crop_w = floor_to_even(frame_w)
108
+ crop_h = floor_to_even(int(crop_w / target_ratio))
113
109
 
114
110
  min_face_dim = max(face_w * frame_w, face_h * frame_h)
115
111
  min_crop_dim = min_face_dim * (1 + 2 * self.padding)
116
112
  if crop_w < min_crop_dim * target_ratio:
117
- crop_w = _make_even(min(int(min_crop_dim * target_ratio), frame_w))
118
- crop_h = _make_even(min(int(crop_w / target_ratio), frame_h))
113
+ crop_w = floor_to_even(min(int(min_crop_dim * target_ratio), frame_w))
114
+ crop_h = floor_to_even(min(int(crop_w / target_ratio), frame_h))
119
115
 
120
116
  if center_position is None:
121
117
  center_position = self._apply_framing_offset(face_cx, face_cy, face_h)
@@ -141,11 +137,11 @@ class FaceTrackingCrop(Operation):
141
137
  h, w = video.frame_shape[:2]
142
138
  target_ratio = self.target_aspect[0] / self.target_aspect[1]
143
139
  if target_ratio < w / h:
144
- out_h = _make_even(h)
145
- out_w = _make_even(int(out_h * target_ratio))
140
+ out_h = floor_to_even(h)
141
+ out_w = floor_to_even(int(out_h * target_ratio))
146
142
  else:
147
- out_w = _make_even(w)
148
- out_h = _make_even(int(out_w / target_ratio))
143
+ out_w = floor_to_even(w)
144
+ out_h = floor_to_even(int(out_w / target_ratio))
149
145
 
150
146
  default_x = (w - out_w) // 2
151
147
  default_y = (h - out_h) // 2
@@ -5,7 +5,6 @@ import json
5
5
  import logging
6
6
  import math
7
7
  import re
8
- import subprocess
9
8
  import time
10
9
  from collections.abc import Callable, Iterator
11
10
  from concurrent.futures import ThreadPoolExecutor
@@ -26,6 +25,7 @@ from videopython.ai.understanding import (
26
25
  SemanticSceneDetector,
27
26
  )
28
27
  from videopython.ai.understanding.faces import FaceTracker
28
+ from videopython.base import _ffmpeg
29
29
  from videopython.base.audio import Audio
30
30
  from videopython.base.description import (
31
31
  AudioClassification,
@@ -34,6 +34,7 @@ from videopython.base.description import (
34
34
  SceneBoundary,
35
35
  SceneDescription,
36
36
  )
37
+ from videopython.base.exceptions import FFmpegProbeError
37
38
  from videopython.base.text.transcription import Transcription
38
39
  from videopython.base.video import Video, VideoMetadata, extract_frames_at_times
39
40
 
@@ -1032,21 +1033,9 @@ class VideoAnalyzer:
1032
1033
  if path is None:
1033
1034
  return {}
1034
1035
 
1035
- cmd = [
1036
- "ffprobe",
1037
- "-v",
1038
- "error",
1039
- "-show_entries",
1040
- "format_tags:stream_tags",
1041
- "-of",
1042
- "json",
1043
- str(path),
1044
- ]
1045
-
1046
1036
  try:
1047
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
1048
- payload = json.loads(result.stdout)
1049
- except (subprocess.CalledProcessError, json.JSONDecodeError, OSError):
1037
+ payload = _ffmpeg.probe(path, extra_args=["-show_entries", "format_tags:stream_tags"])
1038
+ except (FFmpegProbeError, OSError):
1050
1039
  return {}
1051
1040
 
1052
1041
  tags: dict[str, str] = {}
@@ -0,0 +1,41 @@
1
+ """Pure helpers for video dimension math.
2
+
3
+ Centralises the libx264+yuv420p even-dimension constraint and the
4
+ two "round to even" calculations that previously lived (with subtly
5
+ different semantics) in ``base/video.py``, ``ai/transforms.py``, and
6
+ ``base/transforms.py``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ def round_to_even(value: int | float) -> int:
13
+ """Round a dimension to the nearest even integer (minimum 2).
14
+
15
+ Use this when computing a target dimension from a ratio or scale
16
+ factor and either direction (up or down) is acceptable.
17
+ """
18
+ return max(2, int(round(float(value) / 2.0) * 2))
19
+
20
+
21
+ def floor_to_even(value: int | float) -> int:
22
+ """Round a dimension down to the next even integer (minimum 2).
23
+
24
+ Use this when the result must not exceed the source region — e.g.
25
+ cropping, where rounding up would read past the frame edge.
26
+ """
27
+ v = int(value)
28
+ return max(2, v - (v % 2))
29
+
30
+
31
+ def require_even(width: int, height: int) -> None:
32
+ """Guard for libx264+yuv420p output, which rejects odd dimensions.
33
+
34
+ Raises:
35
+ ValueError: If either dimension is odd.
36
+ """
37
+ if width % 2 != 0 or height % 2 != 0:
38
+ raise ValueError(
39
+ "libx264 with yuv420p requires even frame dimensions. "
40
+ f"Got {width}x{height}. Resize, crop, or pad to even width and height before saving."
41
+ )
@@ -0,0 +1,152 @@
1
+ """Internal wrappers for ffmpeg / ffprobe subprocess calls.
2
+
3
+ Centralises subprocess invocation patterns so that every call site shares
4
+ the same flag boilerplate, JSON parsing, and failure translation. Public
5
+ modules should keep raising their own domain exceptions (VideoLoadError,
6
+ AudioLoadError, etc.) and call into the helpers here, mapping
7
+ ``FFmpegError`` to whichever public exception they document.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import subprocess
14
+ from contextlib import contextmanager
15
+ from pathlib import Path
16
+ from typing import Iterator, Sequence
17
+
18
+ from videopython.base.exceptions import FFmpegProbeError, FFmpegRunError
19
+
20
+
21
+ def run(cmd: Sequence[str], *, stdin: bytes | None = None) -> bytes:
22
+ """Run a blocking ffmpeg/ffprobe command and return stdout.
23
+
24
+ Centralises non-zero exit handling so callers can map a single
25
+ ``FFmpegRunError`` to their own domain exception.
26
+
27
+ Args:
28
+ cmd: Full argv, starting with ``"ffmpeg"`` or ``"ffprobe"``.
29
+ stdin: Optional bytes to feed to the process's stdin (used by
30
+ the stdin-piped remux variant).
31
+
32
+ Returns:
33
+ Process stdout bytes (usually empty for muxing/concat commands).
34
+
35
+ Raises:
36
+ FFmpegRunError: On non-zero exit or missing binary.
37
+ """
38
+ try:
39
+ result = subprocess.run(cmd, capture_output=True, input=stdin)
40
+ except FileNotFoundError as e:
41
+ raise FFmpegRunError(f"binary not found on PATH: {cmd[0]}") from e
42
+ if result.returncode != 0:
43
+ raise FFmpegRunError(f"ffmpeg failed (exit {result.returncode}): {result.stderr.decode(errors='replace')}")
44
+ return result.stdout
45
+
46
+
47
+ def probe(path: str | Path, *, extra_args: Sequence[str] | None = None) -> dict:
48
+ """Run ffprobe and return the parsed JSON payload.
49
+
50
+ Args:
51
+ path: Path to the media file.
52
+ extra_args: Optional extra ffprobe flags inserted before ``-print_format``.
53
+ Defaults to ``("-show_streams", "-show_format")`` when omitted,
54
+ which mirrors the historical "everything" probe used by Audio.
55
+
56
+ Returns:
57
+ The decoded ffprobe JSON payload.
58
+
59
+ Raises:
60
+ FFmpegProbeError: On non-zero exit, JSON decode failure, or missing
61
+ ffprobe binary.
62
+ """
63
+ args = list(extra_args) if extra_args is not None else ["-show_streams", "-show_format"]
64
+ cmd = ["ffprobe", "-v", "error", *args, "-print_format", "json", str(path)]
65
+
66
+ try:
67
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
68
+ except subprocess.CalledProcessError as e:
69
+ raise FFmpegProbeError(f"ffprobe error: {e.stderr}") from e
70
+ except FileNotFoundError as e:
71
+ raise FFmpegProbeError("ffprobe binary not found on PATH") from e
72
+
73
+ try:
74
+ return json.loads(result.stdout)
75
+ except json.JSONDecodeError as e:
76
+ raise FFmpegProbeError(f"Error parsing ffprobe output: {e}") from e
77
+
78
+
79
+ def _terminate(proc: subprocess.Popen, *, timeout: float = 5) -> None:
80
+ """Terminate a still-running process, escalating to kill after ``timeout``."""
81
+ if proc.poll() is None:
82
+ proc.terminate()
83
+ try:
84
+ proc.wait(timeout=timeout)
85
+ except subprocess.TimeoutExpired:
86
+ proc.kill()
87
+ proc.wait()
88
+
89
+
90
+ @contextmanager
91
+ def popen_decode(cmd: Sequence[str], *, bufsize: int = -1) -> Iterator[subprocess.Popen]:
92
+ """Context manager wrapping an ffmpeg decode process.
93
+
94
+ Yields a Popen with ``stdout=PIPE`` and ``stderr=DEVNULL``. Callers
95
+ read raw bytes from ``proc.stdout``. On exit, the process is
96
+ terminated (with kill fallback) and stdout is closed.
97
+
98
+ Args:
99
+ cmd: Full ffmpeg argv. The output target is typically ``pipe:1``.
100
+ bufsize: Forwarded to ``subprocess.Popen``. Use a large value
101
+ (e.g. ``10**8``) for batched reads or a frame-sized value
102
+ for streaming reads.
103
+ """
104
+ proc = subprocess.Popen(
105
+ list(cmd),
106
+ stdout=subprocess.PIPE,
107
+ stderr=subprocess.DEVNULL,
108
+ bufsize=bufsize,
109
+ )
110
+ try:
111
+ yield proc
112
+ finally:
113
+ _terminate(proc)
114
+ if proc.stdout is not None and not proc.stdout.closed:
115
+ proc.stdout.close()
116
+
117
+
118
+ @contextmanager
119
+ def popen_encode(cmd: Sequence[str]) -> Iterator[subprocess.Popen]:
120
+ """Context manager wrapping an ffmpeg encode process via stdin pipe.
121
+
122
+ Yields a Popen with ``stdin=PIPE``, ``stdout=DEVNULL``, and
123
+ ``stderr=PIPE``. Callers write raw frames to ``proc.stdin``.
124
+
125
+ On clean exit, stdin and stderr are drained via ``communicate()``
126
+ and ``FFmpegRunError`` is raised if ffmpeg returns non-zero. On
127
+ exception exit, the process is killed and the caller's exception
128
+ propagates unmodified.
129
+ """
130
+ proc = subprocess.Popen(
131
+ list(cmd),
132
+ stdin=subprocess.PIPE,
133
+ stdout=subprocess.DEVNULL,
134
+ stderr=subprocess.PIPE,
135
+ )
136
+ try:
137
+ yield proc
138
+ except BaseException:
139
+ if proc.poll() is None:
140
+ proc.kill()
141
+ proc.wait()
142
+ for pipe in (proc.stdin, proc.stderr):
143
+ if pipe is not None and not pipe.closed:
144
+ try:
145
+ pipe.close()
146
+ except Exception:
147
+ pass
148
+ raise
149
+
150
+ _, stderr = proc.communicate()
151
+ if proc.returncode != 0:
152
+ raise FFmpegRunError(f"ffmpeg failed (exit {proc.returncode}): {stderr.decode(errors='replace')}")
@@ -0,0 +1,289 @@
1
+ """Internal ffmpeg decode/encode helpers for ``Video``.
2
+
3
+ Holds the subprocess-heavy bodies of ``Video.from_path`` (decode an
4
+ ffmpeg pipe into a frame array) and ``Video.save`` (stream a frame
5
+ array to an ffmpeg encode). Keeping these out of ``base/video.py``
6
+ lets the data class stay focused on the in-memory frame/audio
7
+ container.
8
+
9
+ Public callers should keep using ``Video.from_path`` and
10
+ ``Video.save``; this module is internal scaffolding.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import tempfile
16
+ import uuid
17
+ import warnings
18
+ from pathlib import Path
19
+ from typing import Literal, get_args
20
+
21
+ import numpy as np
22
+
23
+ from videopython.base import _ffmpeg
24
+ from videopython.base._dimensions import require_even
25
+ from videopython.base.audio import Audio
26
+ from videopython.base.exceptions import (
27
+ AudioLoadError,
28
+ FFmpegRunError,
29
+ VideoLoadError,
30
+ VideoMetadataError,
31
+ )
32
+
33
+ ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
34
+ ALLOWED_VIDEO_PRESETS = Literal[
35
+ "ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"
36
+ ]
37
+
38
+ # Pre-allocation safety margin for the decode frame array.
39
+ FRAME_BUFFER_MULTIPLIER = 1.1
40
+ FRAME_BUFFER_PADDING = 10
41
+
42
+
43
+ def decode_video(
44
+ path: str,
45
+ *,
46
+ read_batch_size: int = 100,
47
+ start_second: float | None = None,
48
+ end_second: float | None = None,
49
+ fps: float | None = None,
50
+ width: int | None = None,
51
+ height: int | None = None,
52
+ ) -> tuple[np.ndarray, float, Audio]:
53
+ """Decode a video file into an RGB frame array plus its audio track.
54
+
55
+ Returns ``(frames, fps, audio)`` ready to feed straight into the
56
+ ``Video`` constructor. Silent audio is substituted when the source
57
+ has no usable audio stream.
58
+
59
+ Raises:
60
+ FileNotFoundError: If ``path`` does not exist (via VideoMetadata).
61
+ VideoLoadError: On ffmpeg failure or unreadable I/O.
62
+ VideoMetadataError: When ffprobe cannot describe the source.
63
+ """
64
+ from videopython.base.video import VideoMetadata
65
+
66
+ try:
67
+ metadata = VideoMetadata.from_path(path)
68
+
69
+ out_width = width if width is not None else metadata.width
70
+ out_height = height if height is not None else metadata.height
71
+ out_fps = fps if fps is not None else metadata.fps
72
+ total_duration = metadata.total_seconds
73
+
74
+ if start_second is not None and start_second < 0:
75
+ raise ValueError("start_second must be non-negative")
76
+ if end_second is not None and end_second > total_duration:
77
+ raise ValueError(f"end_second ({end_second}) exceeds video duration ({total_duration})")
78
+ if start_second is not None and end_second is not None and start_second >= end_second:
79
+ raise ValueError("start_second must be less than end_second")
80
+
81
+ if start_second is not None and end_second is not None:
82
+ segment_duration = end_second - start_second
83
+ elif end_second is not None:
84
+ segment_duration = end_second
85
+ elif start_second is not None:
86
+ segment_duration = total_duration - start_second
87
+ else:
88
+ segment_duration = total_duration
89
+
90
+ estimated_bytes = int(segment_duration * out_fps) * out_height * out_width * 3
91
+ estimated_gb = estimated_bytes / (1024**3)
92
+ if estimated_gb > 10:
93
+ warnings.warn(
94
+ f"Loading this video will use ~{estimated_gb:.1f}GB of RAM. "
95
+ f"For large videos, consider using FrameIterator for memory-efficient streaming.",
96
+ ResourceWarning,
97
+ stacklevel=2,
98
+ )
99
+
100
+ ffmpeg_cmd = ["ffmpeg"]
101
+
102
+ if start_second is not None:
103
+ ffmpeg_cmd.extend(["-ss", str(start_second)])
104
+
105
+ ffmpeg_cmd.extend(["-i", path])
106
+
107
+ if end_second is not None and start_second is not None:
108
+ duration = end_second - start_second
109
+ ffmpeg_cmd.extend(["-t", str(duration)])
110
+ elif end_second is not None:
111
+ ffmpeg_cmd.extend(["-t", str(end_second)])
112
+
113
+ vf_filters: list[str] = []
114
+ if width is not None or height is not None:
115
+ vf_filters.append(f"scale={out_width}:{out_height}")
116
+ if fps is not None and fps != metadata.fps:
117
+ vf_filters.append(f"fps={out_fps}")
118
+ if vf_filters:
119
+ ffmpeg_cmd.extend(["-vf", ",".join(vf_filters)])
120
+
121
+ ffmpeg_cmd.extend(
122
+ [
123
+ "-f",
124
+ "rawvideo",
125
+ "-pix_fmt",
126
+ "rgb24",
127
+ "-vcodec",
128
+ "rawvideo",
129
+ "-avoid_negative_ts",
130
+ "make_zero",
131
+ "-y",
132
+ "pipe:1",
133
+ ]
134
+ )
135
+
136
+ frame_size = out_width * out_height * 3
137
+ estimated_frames = int(segment_duration * out_fps * FRAME_BUFFER_MULTIPLIER) + FRAME_BUFFER_PADDING
138
+
139
+ frames = np.empty((estimated_frames, out_height, out_width, 3), dtype=np.uint8)
140
+ frames_read = 0
141
+
142
+ with _ffmpeg.popen_decode(ffmpeg_cmd, bufsize=10**8) as process:
143
+ while frames_read < estimated_frames:
144
+ remaining_frames = estimated_frames - frames_read
145
+ batch_size = min(read_batch_size, remaining_frames)
146
+
147
+ batch_data = process.stdout.read(frame_size * batch_size) # type: ignore[union-attr]
148
+ if not batch_data:
149
+ break
150
+
151
+ batch_frames = np.frombuffer(batch_data, dtype=np.uint8)
152
+ complete_frames = len(batch_frames) // (out_height * out_width * 3)
153
+ if complete_frames == 0:
154
+ break
155
+
156
+ complete_data = batch_frames[: complete_frames * out_height * out_width * 3]
157
+ batch_frames_array = complete_data.reshape(complete_frames, out_height, out_width, 3)
158
+
159
+ if frames_read + complete_frames > estimated_frames:
160
+ new_size = max(estimated_frames * 2, frames_read + complete_frames + 100)
161
+ new_frames = np.empty((new_size, out_height, out_width, 3), dtype=np.uint8)
162
+ new_frames[:frames_read] = frames[:frames_read]
163
+ frames = new_frames
164
+ estimated_frames = new_size
165
+
166
+ end_idx = frames_read + complete_frames
167
+ frames[frames_read:end_idx] = batch_frames_array
168
+ frames_read += complete_frames
169
+
170
+ if process.returncode not in (0, None) and frames_read == 0:
171
+ raise ValueError(f"FFmpeg failed to process video (return code: {process.returncode})")
172
+
173
+ if frames_read == 0:
174
+ raise ValueError("No frames were read from the video")
175
+
176
+ frames = frames[:frames_read] # type: ignore
177
+
178
+ try:
179
+ audio = Audio.from_path(path)
180
+ if start_second is not None or end_second is not None:
181
+ audio_start = start_second if start_second is not None else 0
182
+ audio_end = end_second if end_second is not None else audio.metadata.duration_seconds
183
+ audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
184
+ except (AudioLoadError, FileNotFoundError):
185
+ warnings.warn(f"No audio found for `{path}`, adding silent track.")
186
+ segment_duration = frames_read / out_fps
187
+ audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
188
+
189
+ return frames, out_fps, audio
190
+
191
+ except VideoMetadataError:
192
+ raise
193
+ except FFmpegRunError as e:
194
+ raise VideoLoadError(f"FFmpeg failed: {e}") from e
195
+ except (OSError, IOError) as e:
196
+ raise VideoLoadError(f"I/O error: {e}")
197
+
198
+
199
+ def encode_video(
200
+ frames: np.ndarray,
201
+ fps: float,
202
+ audio: Audio,
203
+ *,
204
+ filename: str | Path | None = None,
205
+ format: ALLOWED_VIDEO_FORMATS = "mp4",
206
+ preset: ALLOWED_VIDEO_PRESETS = "medium",
207
+ crf: int = 23,
208
+ ) -> Path:
209
+ """Encode an RGB frame array + audio track to disk via ffmpeg.
210
+
211
+ Raises:
212
+ ValueError: If ``format`` or ``preset`` is not in the allowed set.
213
+ FFmpegRunError: If ffmpeg fails to encode.
214
+ """
215
+ allowed_formats = get_args(ALLOWED_VIDEO_FORMATS)
216
+ if format.lower() not in allowed_formats:
217
+ raise ValueError(f"Unsupported format: {format}. Allowed formats are: {', '.join(allowed_formats)}")
218
+
219
+ allowed_presets = get_args(ALLOWED_VIDEO_PRESETS)
220
+ if preset not in allowed_presets:
221
+ raise ValueError(f"Unsupported preset: {preset}. Allowed presets are: {', '.join(allowed_presets)}")
222
+
223
+ frame_height, frame_width = frames.shape[1:3]
224
+ require_even(frame_width, frame_height)
225
+
226
+ if filename is None:
227
+ filename = Path(f"{uuid.uuid4()}.{format}")
228
+ else:
229
+ filename = Path(filename).with_suffix(f".{format}")
230
+ filename.parent.mkdir(parents=True, exist_ok=True)
231
+
232
+ with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
233
+ audio.save(temp_audio.name, format="wav")
234
+
235
+ duration = len(frames) / fps
236
+
237
+ ffmpeg_command = [
238
+ "ffmpeg",
239
+ "-y",
240
+ "-hide_banner",
241
+ "-loglevel",
242
+ "error",
243
+ "-f",
244
+ "rawvideo",
245
+ "-pixel_format",
246
+ "rgb24",
247
+ "-video_size",
248
+ f"{frame_width}x{frame_height}",
249
+ "-framerate",
250
+ str(fps),
251
+ "-i",
252
+ "pipe:0",
253
+ "-i",
254
+ temp_audio.name,
255
+ "-c:v",
256
+ "libx264",
257
+ "-preset",
258
+ preset,
259
+ "-crf",
260
+ str(crf),
261
+ "-c:a",
262
+ "aac",
263
+ "-b:a",
264
+ "192k",
265
+ "-pix_fmt",
266
+ "yuv420p",
267
+ "-movflags",
268
+ "+faststart",
269
+ "-t",
270
+ str(duration),
271
+ "-vsync",
272
+ "cfr",
273
+ str(filename),
274
+ ]
275
+
276
+ with _ffmpeg.popen_encode(ffmpeg_command) as process:
277
+ if frames.dtype != np.uint8 or not frames.flags["C_CONTIGUOUS"]:
278
+ frames = np.ascontiguousarray(frames, dtype=np.uint8)
279
+
280
+ buffer = memoryview(frames)
281
+ try:
282
+ process.stdin.write(buffer) # type: ignore[union-attr]
283
+ except BrokenPipeError as e:
284
+ stderr = process.stderr.read() if process.stderr is not None else b""
285
+ raise FFmpegRunError(
286
+ f"ffmpeg terminated while receiving video data: {stderr.decode(errors='replace')}"
287
+ ) from e
288
+
289
+ return filename