videopython 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- videopython/ai/__init__.py +0 -0
- videopython/{generation → ai/generation}/image.py +0 -3
- videopython/ai/understanding/__init__.py +0 -0
- videopython/ai/understanding/transcribe.py +37 -0
- videopython/base/combine.py +45 -0
- videopython/base/effects.py +3 -3
- videopython/base/transcription.py +13 -0
- videopython/base/transforms.py +0 -2
- videopython/base/video.py +298 -158
- videopython/utils/__init__.py +3 -0
- videopython/utils/image.py +0 -232
- videopython/utils/text.py +727 -0
- {videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/METADATA +26 -13
- videopython-0.4.1.dist-info/RECORD +26 -0
- videopython-0.3.0.dist-info/RECORD +0 -20
- /videopython/{generation → ai/generation}/__init__.py +0 -0
- /videopython/{generation → ai/generation}/audio.py +0 -0
- /videopython/{generation → ai/generation}/video.py +0 -0
- {videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/WHEEL +0 -0
- {videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/licenses/LICENSE +0 -0
videopython/base/video.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import subprocess
|
|
4
5
|
import tempfile
|
|
5
6
|
from dataclasses import dataclass
|
|
7
|
+
from fractions import Fraction
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import Literal, get_args
|
|
8
10
|
|
|
9
|
-
import cv2
|
|
10
11
|
import numpy as np
|
|
11
12
|
from soundpython import Audio
|
|
12
13
|
|
|
@@ -15,6 +16,12 @@ from videopython.utils.common import generate_random_name
|
|
|
15
16
|
ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
class VideoMetadataError(Exception):
|
|
20
|
+
"""Raised when there's an error getting video metadata"""
|
|
21
|
+
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
18
25
|
@dataclass
|
|
19
26
|
class VideoMetadata:
|
|
20
27
|
"""Class to store video metadata."""
|
|
@@ -25,37 +32,80 @@ class VideoMetadata:
|
|
|
25
32
|
frame_count: int
|
|
26
33
|
total_seconds: float
|
|
27
34
|
|
|
28
|
-
def __str__(self):
|
|
35
|
+
def __str__(self) -> str:
|
|
29
36
|
return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
|
|
30
37
|
|
|
31
38
|
def __repr__(self) -> str:
|
|
32
39
|
return self.__str__()
|
|
33
40
|
|
|
34
|
-
def get_frame_shape(self):
|
|
41
|
+
def get_frame_shape(self) -> np.ndarray:
|
|
35
42
|
"""Returns frame shape."""
|
|
36
43
|
return np.array((self.height, self.width, 3))
|
|
37
44
|
|
|
38
|
-
def get_video_shape(self):
|
|
45
|
+
def get_video_shape(self) -> np.ndarray:
|
|
39
46
|
"""Returns video shape."""
|
|
40
47
|
return np.array((self.frame_count, self.height, self.width, 3))
|
|
41
48
|
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _run_ffprobe(video_path: str | Path) -> dict:
|
|
51
|
+
"""Run ffprobe and return parsed JSON output."""
|
|
52
|
+
cmd = [
|
|
53
|
+
"ffprobe",
|
|
54
|
+
"-v",
|
|
55
|
+
"error",
|
|
56
|
+
"-select_streams",
|
|
57
|
+
"v:0",
|
|
58
|
+
"-show_entries",
|
|
59
|
+
"stream=width,height,r_frame_rate,nb_frames",
|
|
60
|
+
"-show_entries",
|
|
61
|
+
"format=duration",
|
|
62
|
+
"-print_format",
|
|
63
|
+
"json",
|
|
64
|
+
str(video_path),
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
69
|
+
return json.loads(result.stdout)
|
|
70
|
+
except subprocess.CalledProcessError as e:
|
|
71
|
+
raise VideoMetadataError(f"FFprobe error: {e.stderr}")
|
|
72
|
+
except json.JSONDecodeError as e:
|
|
73
|
+
raise VideoMetadataError(f"Error parsing FFprobe output: {e}")
|
|
74
|
+
|
|
42
75
|
@classmethod
|
|
43
|
-
def from_path(cls, video_path: str) -> VideoMetadata:
|
|
44
|
-
"""Creates VideoMetadata object from video file."""
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
76
|
+
def from_path(cls, video_path: str | Path) -> VideoMetadata:
|
|
77
|
+
"""Creates VideoMetadata object from video file using ffprobe."""
|
|
78
|
+
if not Path(video_path).exists():
|
|
79
|
+
raise FileNotFoundError(f"Video file not found: {video_path}")
|
|
80
|
+
|
|
81
|
+
probe_data = cls._run_ffprobe(video_path)
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
stream_info = probe_data["streams"][0]
|
|
85
|
+
|
|
86
|
+
width = int(stream_info["width"])
|
|
87
|
+
height = int(stream_info["height"])
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
fps_fraction = Fraction(stream_info["r_frame_rate"])
|
|
91
|
+
fps = float(fps_fraction)
|
|
92
|
+
except (ValueError, ZeroDivisionError):
|
|
93
|
+
raise VideoMetadataError(f"Invalid frame rate: {stream_info['r_frame_rate']}")
|
|
94
|
+
|
|
95
|
+
if "nb_frames" in stream_info and stream_info["nb_frames"].isdigit():
|
|
96
|
+
frame_count = int(stream_info["nb_frames"])
|
|
97
|
+
else:
|
|
98
|
+
duration = float(probe_data["format"]["duration"])
|
|
99
|
+
frame_count = int(round(duration * fps))
|
|
100
|
+
|
|
101
|
+
total_seconds = round(frame_count / fps, 2)
|
|
102
|
+
|
|
103
|
+
return cls(height=height, width=width, fps=fps, frame_count=frame_count, total_seconds=total_seconds)
|
|
104
|
+
|
|
105
|
+
except KeyError as e:
|
|
106
|
+
raise VideoMetadataError(f"Missing required metadata field: {e}")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise VideoMetadataError(f"Error extracting video metadata: {e}")
|
|
59
109
|
|
|
60
110
|
@classmethod
|
|
61
111
|
def from_video(cls, video: Video) -> VideoMetadata:
|
|
@@ -63,15 +113,10 @@ class VideoMetadata:
|
|
|
63
113
|
frame_count, height, width, _ = video.frames.shape
|
|
64
114
|
total_seconds = round(frame_count / video.fps, 2)
|
|
65
115
|
|
|
66
|
-
return cls(
|
|
67
|
-
height=height,
|
|
68
|
-
width=width,
|
|
69
|
-
fps=video.fps,
|
|
70
|
-
frame_count=frame_count,
|
|
71
|
-
total_seconds=total_seconds,
|
|
72
|
-
)
|
|
116
|
+
return cls(height=height, width=width, fps=video.fps, frame_count=frame_count, total_seconds=total_seconds)
|
|
73
117
|
|
|
74
118
|
def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
|
|
119
|
+
"""Check if videos can be merged."""
|
|
75
120
|
return (
|
|
76
121
|
self.height == other_format.height
|
|
77
122
|
and self.width == other_format.width
|
|
@@ -79,14 +124,7 @@ class VideoMetadata:
|
|
|
79
124
|
)
|
|
80
125
|
|
|
81
126
|
def can_be_downsampled_to(self, target_format: VideoMetadata) -> bool:
|
|
82
|
-
"""Checks if video can be downsampled to
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
target_format: Desired video format.
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
True if video can be downsampled to `target_format`, False otherwise.
|
|
89
|
-
"""
|
|
127
|
+
"""Checks if video can be downsampled to target_format."""
|
|
90
128
|
return (
|
|
91
129
|
self.height >= target_format.height
|
|
92
130
|
and self.width >= target_format.width
|
|
@@ -96,53 +134,174 @@ class VideoMetadata:
|
|
|
96
134
|
|
|
97
135
|
|
|
98
136
|
class Video:
|
|
99
|
-
def __init__(self):
|
|
100
|
-
self.
|
|
101
|
-
self.
|
|
102
|
-
|
|
137
|
+
def __init__(self, frames: np.ndarray, fps: int | float, audio: Audio | None = None):
|
|
138
|
+
self.frames = frames
|
|
139
|
+
self.fps = fps
|
|
140
|
+
if audio:
|
|
141
|
+
self.audio = audio
|
|
142
|
+
else:
|
|
143
|
+
self.audio = Audio.create_silent(
|
|
144
|
+
duration_seconds=round(self.total_seconds, 2), stereo=True, sample_rate=44100
|
|
145
|
+
)
|
|
103
146
|
|
|
104
147
|
@classmethod
|
|
105
|
-
def from_path(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
148
|
+
def from_path(
|
|
149
|
+
cls, path: str, read_batch_size: int = 100, start_second: float | None = None, end_second: float | None = None
|
|
150
|
+
) -> Video:
|
|
109
151
|
try:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
152
|
+
# Get video metadata using VideoMetadata.from_path
|
|
153
|
+
metadata = VideoMetadata.from_path(path)
|
|
154
|
+
|
|
155
|
+
width = metadata.width
|
|
156
|
+
height = metadata.height
|
|
157
|
+
fps = metadata.fps
|
|
158
|
+
total_frames = metadata.frame_count
|
|
159
|
+
total_duration = metadata.total_seconds
|
|
160
|
+
|
|
161
|
+
# Validate time bounds
|
|
162
|
+
if start_second is not None and start_second < 0:
|
|
163
|
+
raise ValueError("start_second must be non-negative")
|
|
164
|
+
if end_second is not None and end_second > total_duration:
|
|
165
|
+
raise ValueError(f"end_second ({end_second}) exceeds video duration ({total_duration})")
|
|
166
|
+
if start_second is not None and end_second is not None and start_second >= end_second:
|
|
167
|
+
raise ValueError("start_second must be less than end_second")
|
|
168
|
+
|
|
169
|
+
# Calculate frame indices for the desired segment
|
|
170
|
+
start_frame = int(start_second * fps) if start_second is not None else 0
|
|
171
|
+
end_frame = int(end_second * fps) if end_second is not None else total_frames
|
|
172
|
+
|
|
173
|
+
# Ensure we don't exceed bounds
|
|
174
|
+
start_frame = max(0, start_frame)
|
|
175
|
+
end_frame = min(total_frames, end_frame)
|
|
176
|
+
segment_frames = end_frame - start_frame
|
|
177
|
+
|
|
178
|
+
# Set up FFmpeg command for raw video extraction with time bounds
|
|
179
|
+
ffmpeg_cmd = [
|
|
180
|
+
"ffmpeg",
|
|
181
|
+
"-i",
|
|
182
|
+
path,
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
# Add seek and duration options if specified
|
|
186
|
+
if start_second is not None:
|
|
187
|
+
ffmpeg_cmd.extend(["-ss", str(start_second)])
|
|
188
|
+
if end_second is not None and start_second is not None:
|
|
189
|
+
duration = end_second - start_second
|
|
190
|
+
ffmpeg_cmd.extend(["-t", str(duration)])
|
|
191
|
+
elif end_second is not None:
|
|
192
|
+
ffmpeg_cmd.extend(["-t", str(end_second)])
|
|
193
|
+
|
|
194
|
+
ffmpeg_cmd.extend(
|
|
195
|
+
[
|
|
196
|
+
"-f",
|
|
197
|
+
"rawvideo",
|
|
198
|
+
"-pix_fmt",
|
|
199
|
+
"rgb24",
|
|
200
|
+
"-vsync",
|
|
201
|
+
"0",
|
|
202
|
+
"-vcodec",
|
|
203
|
+
"rawvideo",
|
|
204
|
+
"-y",
|
|
205
|
+
"pipe:1",
|
|
206
|
+
]
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Start FFmpeg process
|
|
210
|
+
process = subprocess.Popen(
|
|
211
|
+
ffmpeg_cmd,
|
|
212
|
+
stdout=subprocess.PIPE,
|
|
213
|
+
stderr=subprocess.PIPE,
|
|
214
|
+
bufsize=10**8, # Use large buffer
|
|
115
215
|
)
|
|
116
|
-
|
|
216
|
+
|
|
217
|
+
# Calculate frame size in bytes
|
|
218
|
+
frame_size = width * height * 3 # 3 bytes per pixel for RGB
|
|
219
|
+
|
|
220
|
+
# Pre-allocate numpy array for segment frames
|
|
221
|
+
frames = np.empty((segment_frames, height, width, 3), dtype=np.uint8)
|
|
222
|
+
|
|
223
|
+
# Read frames in batches
|
|
224
|
+
frames_read = 0
|
|
225
|
+
for frame_idx in range(0, segment_frames, read_batch_size):
|
|
226
|
+
batch_end = min(frame_idx + read_batch_size, segment_frames)
|
|
227
|
+
batch_size = batch_end - frame_idx
|
|
228
|
+
|
|
229
|
+
# Read batch of frames
|
|
230
|
+
raw_data = process.stdout.read(frame_size * batch_size) # type: ignore
|
|
231
|
+
if not raw_data:
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
# Convert raw bytes to numpy array and reshape
|
|
235
|
+
batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
|
|
236
|
+
|
|
237
|
+
# Handle case where we might get fewer frames than expected
|
|
238
|
+
actual_frames = len(batch_frames) // (height * width * 3)
|
|
239
|
+
if actual_frames > 0:
|
|
240
|
+
batch_frames = batch_frames[: actual_frames * height * width * 3]
|
|
241
|
+
batch_frames = batch_frames.reshape(-1, height, width, 3)
|
|
242
|
+
|
|
243
|
+
# Store batch in pre-allocated array
|
|
244
|
+
end_idx = frame_idx + actual_frames
|
|
245
|
+
frames[frame_idx:end_idx] = batch_frames
|
|
246
|
+
frames_read += actual_frames
|
|
247
|
+
else:
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
# Clean up FFmpeg process
|
|
251
|
+
process.stdout.close() # type: ignore
|
|
252
|
+
process.stderr.close() # type: ignore
|
|
253
|
+
process.wait()
|
|
254
|
+
|
|
255
|
+
if process.returncode != 0:
|
|
256
|
+
stderr_output = process.stderr.read().decode() if process.stderr else "Unknown error"
|
|
257
|
+
raise ValueError(f"FFmpeg error: {stderr_output}")
|
|
258
|
+
|
|
259
|
+
# Trim frames array if we read fewer frames than expected
|
|
260
|
+
if frames_read < segment_frames:
|
|
261
|
+
frames = frames[:frames_read] # type: ignore[assignment]
|
|
262
|
+
|
|
263
|
+
# Load audio for the specified segment
|
|
264
|
+
try:
|
|
265
|
+
audio = Audio.from_file(path)
|
|
266
|
+
# Slice audio to match the video segment
|
|
267
|
+
if start_second is not None or end_second is not None:
|
|
268
|
+
audio_start = start_second if start_second is not None else 0
|
|
269
|
+
audio_end = end_second if end_second is not None else audio.metadata.duration_seconds
|
|
270
|
+
audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
|
|
271
|
+
except Exception:
|
|
272
|
+
print(f"No audio found for `{path}`, adding silent track!")
|
|
273
|
+
# Create silent audio for the segment duration
|
|
274
|
+
segment_duration = len(frames) / fps
|
|
275
|
+
audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
|
|
276
|
+
|
|
277
|
+
return cls(frames=frames, fps=fps, audio=audio)
|
|
278
|
+
|
|
279
|
+
except VideoMetadataError as e:
|
|
280
|
+
raise ValueError(f"Error getting video metadata: {e}")
|
|
281
|
+
except subprocess.CalledProcessError as e:
|
|
282
|
+
raise ValueError(f"Error processing video file: {e}")
|
|
283
|
+
except Exception as e:
|
|
284
|
+
raise ValueError(f"Error loading video: {e}")
|
|
117
285
|
|
|
118
286
|
@classmethod
|
|
119
287
|
def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
|
|
120
|
-
new_vid = cls()
|
|
121
288
|
if frames.ndim != 4:
|
|
122
289
|
raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
|
|
123
290
|
elif frames.shape[-1] == 4:
|
|
124
291
|
frames = frames[:, :, :, :3]
|
|
125
292
|
elif frames.shape[-1] != 3:
|
|
126
293
|
raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
|
|
127
|
-
|
|
128
|
-
new_vid.fps = fps
|
|
129
|
-
new_vid.audio = Audio.create_silent(
|
|
130
|
-
duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
|
|
131
|
-
)
|
|
132
|
-
return new_vid
|
|
294
|
+
return cls(frames=frames, fps=fps)
|
|
133
295
|
|
|
134
296
|
@classmethod
|
|
135
297
|
def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
|
|
136
|
-
new_vid = cls()
|
|
137
298
|
if len(image.shape) == 3:
|
|
138
299
|
image = np.expand_dims(image, axis=0)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
|
|
142
|
-
return new_vid
|
|
300
|
+
frames = np.repeat(image, round(length_seconds * fps), axis=0)
|
|
301
|
+
return cls(frames=frames, fps=fps)
|
|
143
302
|
|
|
144
303
|
def copy(self) -> Video:
|
|
145
|
-
copied = Video
|
|
304
|
+
copied = Video.from_frames(self.frames.copy(), self.fps)
|
|
146
305
|
copied.audio = self.audio # Audio objects are immutable, no need to copy
|
|
147
306
|
return copied
|
|
148
307
|
|
|
@@ -168,6 +327,19 @@ class Video:
|
|
|
168
327
|
return split_videos
|
|
169
328
|
|
|
170
329
|
def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
|
|
330
|
+
"""Save video to file with optimized performance.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
filename: Output filename. If None, generates random name
|
|
334
|
+
format: Output format (mp4, avi, mov, mkv, webm)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Path to saved video file
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
RuntimeError: If video is not loaded
|
|
341
|
+
ValueError: If format is not supported
|
|
342
|
+
"""
|
|
171
343
|
if not self.is_loaded():
|
|
172
344
|
raise RuntimeError("Video is not loaded, cannot save!")
|
|
173
345
|
|
|
@@ -182,80 +354,71 @@ class Video:
|
|
|
182
354
|
filename = Path(filename).with_suffix(f".{format}")
|
|
183
355
|
filename.parent.mkdir(parents=True, exist_ok=True)
|
|
184
356
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
cv2.imwrite(str(frame_path), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
|
|
192
|
-
|
|
193
|
-
# Calculate exact video duration
|
|
194
|
-
video_duration = len(self.frames) / self.fps
|
|
195
|
-
|
|
196
|
-
# Ensure audio duration matches video duration
|
|
197
|
-
if (
|
|
198
|
-
abs(self.audio.metadata.duration_seconds - video_duration) > 0.001
|
|
199
|
-
): # Small threshold for float comparison
|
|
200
|
-
if self.audio.metadata.duration_seconds < video_duration:
|
|
201
|
-
# Create silent audio for the remaining duration
|
|
202
|
-
remaining_duration = video_duration - self.audio.metadata.duration_seconds
|
|
203
|
-
silent_audio = Audio.create_silent(
|
|
204
|
-
duration_seconds=remaining_duration,
|
|
205
|
-
stereo=(self.audio.metadata.channels == 2),
|
|
206
|
-
sample_rate=self.audio.metadata.sample_rate,
|
|
207
|
-
sample_width=self.audio.metadata.sample_width,
|
|
208
|
-
)
|
|
209
|
-
# Concatenate original audio with silent padding
|
|
210
|
-
padded_audio = self.audio.concat(silent_audio)
|
|
211
|
-
else:
|
|
212
|
-
# Trim audio to match video duration
|
|
213
|
-
padded_audio = self.audio.slice(end_seconds=video_duration)
|
|
214
|
-
else:
|
|
215
|
-
padded_audio = self.audio
|
|
357
|
+
# Create a temporary raw video file
|
|
358
|
+
with tempfile.NamedTemporaryFile(suffix=".raw") as raw_video:
|
|
359
|
+
# Convert frames to raw video data
|
|
360
|
+
raw_data = self.frames.astype(np.uint8).tobytes()
|
|
361
|
+
raw_video.write(raw_data)
|
|
362
|
+
raw_video.flush()
|
|
216
363
|
|
|
217
364
|
# Save audio to temporary WAV file
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
365
|
+
with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
|
|
366
|
+
self.audio.save(temp_audio.name, format="wav")
|
|
367
|
+
|
|
368
|
+
# Calculate exact duration
|
|
369
|
+
duration = len(self.frames) / self.fps
|
|
370
|
+
|
|
371
|
+
# Construct FFmpeg command for maximum performance
|
|
372
|
+
ffmpeg_command = [
|
|
373
|
+
"ffmpeg",
|
|
374
|
+
"-y",
|
|
375
|
+
# Raw video input settings
|
|
376
|
+
"-f",
|
|
377
|
+
"rawvideo",
|
|
378
|
+
"-pixel_format",
|
|
379
|
+
"rgb24",
|
|
380
|
+
"-video_size",
|
|
381
|
+
f"{self.frame_shape[1]}x{self.frame_shape[0]}",
|
|
382
|
+
"-framerate",
|
|
383
|
+
str(self.fps),
|
|
384
|
+
"-i",
|
|
385
|
+
raw_video.name,
|
|
386
|
+
# Audio input
|
|
387
|
+
"-i",
|
|
388
|
+
temp_audio.name,
|
|
389
|
+
# Video encoding settings
|
|
390
|
+
"-c:v",
|
|
391
|
+
"libx264",
|
|
392
|
+
"-preset",
|
|
393
|
+
"ultrafast", # Fastest encoding
|
|
394
|
+
"-tune",
|
|
395
|
+
"zerolatency", # Reduce encoding latency
|
|
396
|
+
"-crf",
|
|
397
|
+
"23", # Reasonable quality/size tradeoff
|
|
398
|
+
# Audio settings
|
|
399
|
+
"-c:a",
|
|
400
|
+
"aac",
|
|
401
|
+
"-b:a",
|
|
402
|
+
"192k",
|
|
403
|
+
# Output settings
|
|
404
|
+
"-pix_fmt",
|
|
405
|
+
"yuv420p",
|
|
406
|
+
"-movflags",
|
|
407
|
+
"+faststart", # Enable fast start for web playback
|
|
408
|
+
"-t",
|
|
409
|
+
str(duration),
|
|
410
|
+
"-vsync",
|
|
411
|
+
"cfr",
|
|
412
|
+
str(filename),
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
|
|
417
|
+
return filename
|
|
418
|
+
except subprocess.CalledProcessError as e:
|
|
419
|
+
print(f"Error saving video: {e}")
|
|
420
|
+
print(f"FFmpeg stderr: {e.stderr}")
|
|
421
|
+
raise
|
|
259
422
|
|
|
260
423
|
def add_audio(self, audio: Audio, overlay: bool = True) -> None:
|
|
261
424
|
if self.audio.is_silent:
|
|
@@ -269,7 +432,7 @@ class Video:
|
|
|
269
432
|
try:
|
|
270
433
|
new_audio = Audio.from_file(path)
|
|
271
434
|
self.add_audio(new_audio, overlay)
|
|
272
|
-
except Exception
|
|
435
|
+
except Exception:
|
|
273
436
|
print(f"Audio file `{path}` not found or invalid, skipping!")
|
|
274
437
|
|
|
275
438
|
def __add__(self, other: Video) -> Video:
|
|
@@ -305,29 +468,6 @@ class Video:
|
|
|
305
468
|
sliced.audio = self.audio.slice(start_seconds=audio_start, end_seconds=audio_end)
|
|
306
469
|
return sliced
|
|
307
470
|
|
|
308
|
-
@staticmethod
|
|
309
|
-
def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
|
|
310
|
-
cap = cv2.VideoCapture(path)
|
|
311
|
-
if not cap.isOpened():
|
|
312
|
-
raise ValueError(f"Unable to open video file: {path}")
|
|
313
|
-
|
|
314
|
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
315
|
-
frames = []
|
|
316
|
-
|
|
317
|
-
while True:
|
|
318
|
-
ret, frame = cap.read()
|
|
319
|
-
if not ret:
|
|
320
|
-
break
|
|
321
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
322
|
-
frames.append(frame)
|
|
323
|
-
|
|
324
|
-
cap.release()
|
|
325
|
-
|
|
326
|
-
if not frames:
|
|
327
|
-
raise ValueError(f"No frames could be read from the video file: {path}")
|
|
328
|
-
|
|
329
|
-
return np.array(frames), fps
|
|
330
|
-
|
|
331
471
|
@property
|
|
332
472
|
def video_shape(self) -> tuple[int, int, int, int]:
|
|
333
473
|
return self.frames.shape
|
videopython/utils/__init__.py
CHANGED