videopython 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- videopython/ai/__init__.py +0 -0
- videopython/{generation → ai/generation}/audio.py +25 -13
- videopython/{generation → ai/generation}/image.py +0 -3
- videopython/ai/understanding/__init__.py +0 -0
- videopython/ai/understanding/transcribe.py +37 -0
- videopython/base/effects.py +3 -3
- videopython/base/transcription.py +13 -0
- videopython/base/transforms.py +0 -2
- videopython/base/transitions.py +2 -2
- videopython/base/video.py +269 -187
- videopython/utils/__init__.py +3 -0
- videopython/utils/image.py +0 -228
- videopython/utils/text.py +727 -0
- {videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/METADATA +13 -25
- videopython-0.4.0.dist-info/RECORD +25 -0
- {videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/WHEEL +1 -1
- videopython-0.2.1.dist-info/RECORD +0 -20
- /videopython/{generation → ai/generation}/__init__.py +0 -0
- /videopython/{generation → ai/generation}/video.py +0 -0
- {videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/licenses/LICENSE +0 -0
videopython/base/video.py
CHANGED
|
@@ -1,21 +1,27 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import json
|
|
4
4
|
import subprocess
|
|
5
5
|
import tempfile
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
+
from fractions import Fraction
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Literal, get_args
|
|
9
10
|
|
|
10
|
-
import cv2
|
|
11
11
|
import numpy as np
|
|
12
|
-
from
|
|
12
|
+
from soundpython import Audio
|
|
13
13
|
|
|
14
14
|
from videopython.utils.common import generate_random_name
|
|
15
15
|
|
|
16
16
|
ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
class VideoMetadataError(Exception):
|
|
20
|
+
"""Raised when there's an error getting video metadata"""
|
|
21
|
+
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
19
25
|
@dataclass
|
|
20
26
|
class VideoMetadata:
|
|
21
27
|
"""Class to store video metadata."""
|
|
@@ -26,63 +32,91 @@ class VideoMetadata:
|
|
|
26
32
|
frame_count: int
|
|
27
33
|
total_seconds: float
|
|
28
34
|
|
|
29
|
-
def __str__(self):
|
|
35
|
+
def __str__(self) -> str:
|
|
30
36
|
return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
|
|
31
37
|
|
|
32
38
|
def __repr__(self) -> str:
|
|
33
39
|
return self.__str__()
|
|
34
40
|
|
|
35
|
-
def get_frame_shape(self):
|
|
41
|
+
def get_frame_shape(self) -> np.ndarray:
|
|
36
42
|
"""Returns frame shape."""
|
|
37
43
|
return np.array((self.height, self.width, 3))
|
|
38
44
|
|
|
39
|
-
def get_video_shape(self):
|
|
45
|
+
def get_video_shape(self) -> np.ndarray:
|
|
40
46
|
"""Returns video shape."""
|
|
41
47
|
return np.array((self.frame_count, self.height, self.width, 3))
|
|
42
48
|
|
|
43
|
-
@
|
|
44
|
-
def
|
|
45
|
-
"""
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _run_ffprobe(video_path: str | Path) -> dict:
|
|
51
|
+
"""Run ffprobe and return parsed JSON output."""
|
|
52
|
+
cmd = [
|
|
53
|
+
"ffprobe",
|
|
54
|
+
"-v",
|
|
55
|
+
"error",
|
|
56
|
+
"-select_streams",
|
|
57
|
+
"v:0",
|
|
58
|
+
"-show_entries",
|
|
59
|
+
"stream=width,height,r_frame_rate,nb_frames",
|
|
60
|
+
"-show_entries",
|
|
61
|
+
"format=duration",
|
|
62
|
+
"-print_format",
|
|
63
|
+
"json",
|
|
64
|
+
str(video_path),
|
|
65
|
+
]
|
|
46
66
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
55
|
-
total_seconds = round(frame_count / fps, 2)
|
|
56
|
-
|
|
57
|
-
return cls(
|
|
58
|
-
height=height,
|
|
59
|
-
width=width,
|
|
60
|
-
fps=fps,
|
|
61
|
-
frame_count=frame_count,
|
|
62
|
-
total_seconds=total_seconds,
|
|
63
|
-
)
|
|
67
|
+
try:
|
|
68
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
69
|
+
return json.loads(result.stdout)
|
|
70
|
+
except subprocess.CalledProcessError as e:
|
|
71
|
+
raise VideoMetadataError(f"FFprobe error: {e.stderr}")
|
|
72
|
+
except json.JSONDecodeError as e:
|
|
73
|
+
raise VideoMetadataError(f"Error parsing FFprobe output: {e}")
|
|
64
74
|
|
|
65
75
|
@classmethod
|
|
66
|
-
def
|
|
67
|
-
"""Creates VideoMetadata object from
|
|
76
|
+
def from_path(cls, video_path: str | Path) -> VideoMetadata:
|
|
77
|
+
"""Creates VideoMetadata object from video file using ffprobe."""
|
|
78
|
+
if not Path(video_path).exists():
|
|
79
|
+
raise FileNotFoundError(f"Video file not found: {video_path}")
|
|
68
80
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
81
|
+
probe_data = cls._run_ffprobe(video_path)
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
stream_info = probe_data["streams"][0]
|
|
85
|
+
|
|
86
|
+
width = int(stream_info["width"])
|
|
87
|
+
height = int(stream_info["height"])
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
fps_fraction = Fraction(stream_info["r_frame_rate"])
|
|
91
|
+
fps = float(fps_fraction)
|
|
92
|
+
except (ValueError, ZeroDivisionError):
|
|
93
|
+
raise VideoMetadataError(f"Invalid frame rate: {stream_info['r_frame_rate']}")
|
|
94
|
+
|
|
95
|
+
if "nb_frames" in stream_info and stream_info["nb_frames"].isdigit():
|
|
96
|
+
frame_count = int(stream_info["nb_frames"])
|
|
97
|
+
else:
|
|
98
|
+
duration = float(probe_data["format"]["duration"])
|
|
99
|
+
frame_count = int(round(duration * fps))
|
|
100
|
+
|
|
101
|
+
total_seconds = round(frame_count / fps, 2)
|
|
102
|
+
|
|
103
|
+
return cls(height=height, width=width, fps=fps, frame_count=frame_count, total_seconds=total_seconds)
|
|
73
104
|
|
|
105
|
+
except KeyError as e:
|
|
106
|
+
raise VideoMetadataError(f"Missing required metadata field: {e}")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise VideoMetadataError(f"Error extracting video metadata: {e}")
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_video(cls, video: Video) -> VideoMetadata:
|
|
112
|
+
"""Creates VideoMetadata object from Video instance."""
|
|
74
113
|
frame_count, height, width, _ = video.frames.shape
|
|
75
114
|
total_seconds = round(frame_count / video.fps, 2)
|
|
76
115
|
|
|
77
|
-
return cls(
|
|
78
|
-
height=height,
|
|
79
|
-
width=width,
|
|
80
|
-
fps=video.fps,
|
|
81
|
-
frame_count=frame_count,
|
|
82
|
-
total_seconds=total_seconds,
|
|
83
|
-
)
|
|
116
|
+
return cls(height=height, width=width, fps=video.fps, frame_count=frame_count, total_seconds=total_seconds)
|
|
84
117
|
|
|
85
118
|
def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
|
|
119
|
+
"""Check if videos can be merged."""
|
|
86
120
|
return (
|
|
87
121
|
self.height == other_format.height
|
|
88
122
|
and self.width == other_format.width
|
|
@@ -90,14 +124,7 @@ class VideoMetadata:
|
|
|
90
124
|
)
|
|
91
125
|
|
|
92
126
|
def can_be_downsampled_to(self, target_format: VideoMetadata) -> bool:
|
|
93
|
-
"""Checks if video can be downsampled to
|
|
94
|
-
|
|
95
|
-
Args:
|
|
96
|
-
target_format: Desired video format.
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
True if video can be downsampled to `target_format`, False otherwise.
|
|
100
|
-
"""
|
|
127
|
+
"""Checks if video can be downsampled to target_format."""
|
|
101
128
|
return (
|
|
102
129
|
self.height >= target_format.height
|
|
103
130
|
and self.width >= target_format.width
|
|
@@ -113,15 +140,94 @@ class Video:
|
|
|
113
140
|
self.audio = None
|
|
114
141
|
|
|
115
142
|
@classmethod
|
|
116
|
-
def from_path(cls, path: str) -> Video:
|
|
143
|
+
def from_path(cls, path: str, read_batch_size: int = 100) -> Video:
|
|
117
144
|
new_vid = cls()
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Get video metadata using VideoMetadata.from_path
|
|
148
|
+
metadata = VideoMetadata.from_path(path)
|
|
149
|
+
|
|
150
|
+
width = metadata.width
|
|
151
|
+
height = metadata.height
|
|
152
|
+
fps = metadata.fps
|
|
153
|
+
total_frames = metadata.frame_count
|
|
154
|
+
|
|
155
|
+
# Set up FFmpeg command for raw video extraction
|
|
156
|
+
ffmpeg_cmd = [
|
|
157
|
+
"ffmpeg",
|
|
158
|
+
"-i",
|
|
159
|
+
path,
|
|
160
|
+
"-f",
|
|
161
|
+
"rawvideo",
|
|
162
|
+
"-pix_fmt",
|
|
163
|
+
"rgb24",
|
|
164
|
+
"-vsync",
|
|
165
|
+
"0",
|
|
166
|
+
"-vcodec",
|
|
167
|
+
"rawvideo",
|
|
168
|
+
"-y",
|
|
169
|
+
"pipe:1",
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
# Start FFmpeg process
|
|
173
|
+
process = subprocess.Popen(
|
|
174
|
+
ffmpeg_cmd,
|
|
175
|
+
stdout=subprocess.PIPE,
|
|
176
|
+
stderr=subprocess.PIPE,
|
|
177
|
+
bufsize=10**8, # Use large buffer
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Calculate frame size in bytes
|
|
181
|
+
frame_size = width * height * 3 # 3 bytes per pixel for RGB
|
|
182
|
+
|
|
183
|
+
# Pre-allocate numpy array for all frames
|
|
184
|
+
frames = np.empty((total_frames, height, width, 3), dtype=np.uint8)
|
|
185
|
+
|
|
186
|
+
# Read frames in batches
|
|
187
|
+
for frame_idx in range(0, total_frames, read_batch_size):
|
|
188
|
+
batch_end = min(frame_idx + read_batch_size, total_frames)
|
|
189
|
+
batch_size = batch_end - frame_idx
|
|
190
|
+
|
|
191
|
+
# Read batch of frames
|
|
192
|
+
raw_data = process.stdout.read(frame_size * batch_size) # type: ignore
|
|
193
|
+
if not raw_data:
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
# Convert raw bytes to numpy array and reshape
|
|
197
|
+
batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
|
|
198
|
+
batch_frames = batch_frames.reshape(-1, height, width, 3)
|
|
199
|
+
|
|
200
|
+
# Store batch in pre-allocated array
|
|
201
|
+
frames[frame_idx:batch_end] = batch_frames
|
|
202
|
+
|
|
203
|
+
# Clean up FFmpeg process
|
|
204
|
+
process.stdout.close() # type: ignore
|
|
205
|
+
process.stderr.close() # type: ignore
|
|
206
|
+
process.wait()
|
|
207
|
+
|
|
208
|
+
if process.returncode != 0:
|
|
209
|
+
raise ValueError(f"FFmpeg error: {process.stderr.read().decode()}") # type: ignore
|
|
210
|
+
|
|
211
|
+
new_vid.frames = frames
|
|
212
|
+
new_vid.fps = fps
|
|
213
|
+
|
|
214
|
+
# Load audio
|
|
215
|
+
try:
|
|
216
|
+
new_vid.audio = Audio.from_file(path)
|
|
217
|
+
except Exception:
|
|
218
|
+
print(f"No audio found for `{path}`, adding silent track!")
|
|
219
|
+
new_vid.audio = Audio.create_silent(
|
|
220
|
+
duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return new_vid
|
|
224
|
+
|
|
225
|
+
except VideoMetadataError as e:
|
|
226
|
+
raise ValueError(f"Error getting video metadata: {e}")
|
|
227
|
+
except subprocess.CalledProcessError as e:
|
|
228
|
+
raise ValueError(f"Error processing video file: {e}")
|
|
229
|
+
except Exception as e:
|
|
230
|
+
raise ValueError(f"Error loading video: {e}")
|
|
125
231
|
|
|
126
232
|
@classmethod
|
|
127
233
|
def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
|
|
@@ -134,7 +240,9 @@ class Video:
|
|
|
134
240
|
raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
|
|
135
241
|
new_vid.frames = frames
|
|
136
242
|
new_vid.fps = fps
|
|
137
|
-
new_vid.audio =
|
|
243
|
+
new_vid.audio = Audio.create_silent(
|
|
244
|
+
duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
|
|
245
|
+
)
|
|
138
246
|
return new_vid
|
|
139
247
|
|
|
140
248
|
@classmethod
|
|
@@ -144,12 +252,12 @@ class Video:
|
|
|
144
252
|
image = np.expand_dims(image, axis=0)
|
|
145
253
|
new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
|
|
146
254
|
new_vid.fps = fps
|
|
147
|
-
new_vid.audio =
|
|
255
|
+
new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
|
|
148
256
|
return new_vid
|
|
149
257
|
|
|
150
258
|
def copy(self) -> Video:
|
|
151
259
|
copied = Video().from_frames(self.frames.copy(), self.fps)
|
|
152
|
-
copied.audio = self.audio
|
|
260
|
+
copied.audio = self.audio # Audio objects are immutable, no need to copy
|
|
153
261
|
return copied
|
|
154
262
|
|
|
155
263
|
def is_loaded(self) -> bool:
|
|
@@ -165,25 +273,31 @@ class Video:
|
|
|
165
273
|
self.from_frames(self.frames[:frame_idx], self.fps),
|
|
166
274
|
self.from_frames(self.frames[frame_idx:], self.fps),
|
|
167
275
|
)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
276
|
+
|
|
277
|
+
# Split audio at the corresponding time point
|
|
278
|
+
split_time = frame_idx / self.fps
|
|
279
|
+
split_videos[0].audio = self.audio.slice(start_seconds=0, end_seconds=split_time)
|
|
280
|
+
split_videos[1].audio = self.audio.slice(start_seconds=split_time)
|
|
281
|
+
|
|
171
282
|
return split_videos
|
|
172
283
|
|
|
173
284
|
def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
|
|
174
|
-
"""
|
|
285
|
+
"""Save video to file with optimized performance.
|
|
175
286
|
|
|
176
287
|
Args:
|
|
177
|
-
filename:
|
|
178
|
-
format: Output format (
|
|
288
|
+
filename: Output filename. If None, generates random name
|
|
289
|
+
format: Output format (mp4, avi, mov, mkv, webm)
|
|
179
290
|
|
|
180
291
|
Returns:
|
|
181
|
-
Path to
|
|
292
|
+
Path to saved video file
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
RuntimeError: If video is not loaded
|
|
296
|
+
ValueError: If format is not supported
|
|
182
297
|
"""
|
|
183
298
|
if not self.is_loaded():
|
|
184
299
|
raise RuntimeError("Video is not loaded, cannot save!")
|
|
185
300
|
|
|
186
|
-
# Check if the format is allowed
|
|
187
301
|
if format.lower() not in get_args(ALLOWED_VIDEO_FORMATS):
|
|
188
302
|
raise ValueError(
|
|
189
303
|
f"Unsupported format: {format}. Allowed formats are: {', '.join(get_args(ALLOWED_VIDEO_FORMATS))}"
|
|
@@ -195,87 +309,94 @@ class Video:
|
|
|
195
309
|
filename = Path(filename).with_suffix(f".{format}")
|
|
196
310
|
filename.parent.mkdir(parents=True, exist_ok=True)
|
|
197
311
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
312
|
+
# Create a temporary raw video file
|
|
313
|
+
with tempfile.NamedTemporaryFile(suffix=".raw") as raw_video:
|
|
314
|
+
# Convert frames to raw video data
|
|
315
|
+
raw_data = self.frames.astype(np.uint8).tobytes()
|
|
316
|
+
raw_video.write(raw_data)
|
|
317
|
+
raw_video.flush()
|
|
318
|
+
|
|
319
|
+
# Save audio to temporary WAV file
|
|
320
|
+
with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
|
|
321
|
+
self.audio.save(temp_audio.name, format="wav")
|
|
322
|
+
|
|
323
|
+
# Calculate exact duration
|
|
324
|
+
duration = len(self.frames) / self.fps
|
|
325
|
+
|
|
326
|
+
# Construct FFmpeg command for maximum performance
|
|
327
|
+
ffmpeg_command = [
|
|
328
|
+
"ffmpeg",
|
|
329
|
+
"-y",
|
|
330
|
+
# Raw video input settings
|
|
331
|
+
"-f",
|
|
332
|
+
"rawvideo",
|
|
333
|
+
"-pixel_format",
|
|
334
|
+
"rgb24",
|
|
335
|
+
"-video_size",
|
|
336
|
+
f"{self.frame_shape[1]}x{self.frame_shape[0]}",
|
|
337
|
+
"-framerate",
|
|
338
|
+
str(self.fps),
|
|
339
|
+
"-i",
|
|
340
|
+
raw_video.name,
|
|
341
|
+
# Audio input
|
|
342
|
+
"-i",
|
|
343
|
+
temp_audio.name,
|
|
344
|
+
# Video encoding settings
|
|
345
|
+
"-c:v",
|
|
346
|
+
"libx264",
|
|
347
|
+
"-preset",
|
|
348
|
+
"ultrafast", # Fastest encoding
|
|
349
|
+
"-tune",
|
|
350
|
+
"zerolatency", # Reduce encoding latency
|
|
351
|
+
"-crf",
|
|
352
|
+
"23", # Reasonable quality/size tradeoff
|
|
353
|
+
# Audio settings
|
|
354
|
+
"-c:a",
|
|
355
|
+
"aac",
|
|
356
|
+
"-b:a",
|
|
357
|
+
"192k",
|
|
358
|
+
# Output settings
|
|
359
|
+
"-pix_fmt",
|
|
360
|
+
"yuv420p",
|
|
361
|
+
"-movflags",
|
|
362
|
+
"+faststart", # Enable fast start for web playback
|
|
363
|
+
"-t",
|
|
364
|
+
str(duration),
|
|
365
|
+
"-vsync",
|
|
366
|
+
"cfr",
|
|
367
|
+
str(filename),
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
|
|
372
|
+
return filename
|
|
373
|
+
except subprocess.CalledProcessError as e:
|
|
374
|
+
print(f"Error saving video: {e}")
|
|
375
|
+
print(f"FFmpeg stderr: {e.stderr}")
|
|
376
|
+
raise
|
|
377
|
+
|
|
378
|
+
def add_audio(self, audio: Audio, overlay: bool = True) -> None:
|
|
379
|
+
if self.audio.is_silent:
|
|
380
|
+
self.audio = audio
|
|
381
|
+
elif overlay:
|
|
382
|
+
self.audio = self.audio.overlay(audio, position=0.0)
|
|
383
|
+
else:
|
|
384
|
+
self.audio = audio
|
|
235
385
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
print(f"FFmpeg stderr: {e.stderr}")
|
|
243
|
-
raise
|
|
244
|
-
|
|
245
|
-
def add_audio(self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
|
|
246
|
-
self.audio = self._process_audio(audio=audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
|
|
247
|
-
|
|
248
|
-
def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
|
|
249
|
-
new_audio = self._load_audio_from_path(path)
|
|
250
|
-
if new_audio is None:
|
|
251
|
-
print(f"Audio file `{path}` not found, skipping!")
|
|
252
|
-
return
|
|
253
|
-
|
|
254
|
-
self.audio = self._process_audio(audio=new_audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
|
|
255
|
-
|
|
256
|
-
def _process_audio(
|
|
257
|
-
self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False
|
|
258
|
-
) -> AudioSegment:
|
|
259
|
-
if (duration_diff := round(self.total_seconds - audio.duration_seconds)) > 0 and not loop:
|
|
260
|
-
audio = audio + AudioSegment.silent(duration_diff * 1000)
|
|
261
|
-
elif audio.duration_seconds > self.total_seconds:
|
|
262
|
-
audio = audio[: round(self.total_seconds * 1000)]
|
|
263
|
-
|
|
264
|
-
if overlay:
|
|
265
|
-
return self.audio.overlay(audio, loop=loop, gain_during_overlay=overlay_gain)
|
|
266
|
-
return audio
|
|
386
|
+
def add_audio_from_file(self, path: str, overlay: bool = True) -> None:
|
|
387
|
+
try:
|
|
388
|
+
new_audio = Audio.from_file(path)
|
|
389
|
+
self.add_audio(new_audio, overlay)
|
|
390
|
+
except Exception:
|
|
391
|
+
print(f"Audio file `{path}` not found or invalid, skipping!")
|
|
267
392
|
|
|
268
393
|
def __add__(self, other: Video) -> Video:
|
|
269
|
-
# TODO: Should it be class method? How to make it work with sum()?
|
|
270
394
|
if self.fps != other.fps:
|
|
271
395
|
raise ValueError("FPS of videos do not match!")
|
|
272
396
|
elif self.frame_shape != other.frame_shape:
|
|
273
|
-
raise ValueError(
|
|
274
|
-
"Resolutions of the images do not match: "
|
|
275
|
-
f"{self.frame_shape} not compatible with {other.frame_shape}."
|
|
276
|
-
)
|
|
397
|
+
raise ValueError(f"Resolutions do not match: {self.frame_shape} vs {other.frame_shape}")
|
|
277
398
|
new_video = self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
|
|
278
|
-
new_video.audio = self.audio
|
|
399
|
+
new_video.audio = self.audio.concat(other.audio)
|
|
279
400
|
return new_video
|
|
280
401
|
|
|
281
402
|
def __str__(self) -> str:
|
|
@@ -285,74 +406,35 @@ class Video:
|
|
|
285
406
|
if not isinstance(val, slice):
|
|
286
407
|
raise ValueError("Only slices are supported for video indexing!")
|
|
287
408
|
|
|
288
|
-
# Sub-slice video
|
|
409
|
+
# Sub-slice video frames
|
|
289
410
|
sliced = self.from_frames(self.frames[val], fps=self.fps)
|
|
290
|
-
|
|
411
|
+
|
|
412
|
+
# Handle slicing bounds for audio
|
|
291
413
|
start = val.start if val.start else 0
|
|
292
414
|
stop = val.stop if val.stop else len(self.frames)
|
|
293
|
-
# Handle negative values for audio slices
|
|
294
415
|
if start < 0:
|
|
295
416
|
start = len(self.frames) + start
|
|
296
417
|
if stop < 0:
|
|
297
418
|
stop = len(self.frames) + stop
|
|
298
|
-
# Append audio to the slice
|
|
299
|
-
audio_start = round(start / self.fps) * 1000
|
|
300
|
-
audio_end = round(stop / self.fps) * 1000
|
|
301
|
-
sliced.audio = self.audio[audio_start:audio_end]
|
|
302
|
-
return sliced
|
|
303
|
-
|
|
304
|
-
@staticmethod
|
|
305
|
-
def _load_audio_from_path(path: str) -> AudioSegment | None:
|
|
306
|
-
try:
|
|
307
|
-
audio = AudioSegment.from_file(path)
|
|
308
|
-
return audio
|
|
309
|
-
except IndexError:
|
|
310
|
-
return None
|
|
311
419
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
path: Path to video file.
|
|
318
|
-
"""
|
|
319
|
-
cap = cv2.VideoCapture(path)
|
|
320
|
-
if not cap.isOpened():
|
|
321
|
-
raise ValueError(f"Unable to open video file: {path}")
|
|
322
|
-
|
|
323
|
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
324
|
-
frames = []
|
|
325
|
-
|
|
326
|
-
while True:
|
|
327
|
-
ret, frame = cap.read()
|
|
328
|
-
if not ret:
|
|
329
|
-
break
|
|
330
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
331
|
-
frames.append(frame)
|
|
332
|
-
|
|
333
|
-
cap.release()
|
|
334
|
-
|
|
335
|
-
if not frames:
|
|
336
|
-
raise ValueError(f"No frames could be read from the video file: {path}")
|
|
337
|
-
|
|
338
|
-
return np.array(frames), fps
|
|
420
|
+
# Slice audio to match video duration
|
|
421
|
+
audio_start = start / self.fps
|
|
422
|
+
audio_end = stop / self.fps
|
|
423
|
+
sliced.audio = self.audio.slice(start_seconds=audio_start, end_seconds=audio_end)
|
|
424
|
+
return sliced
|
|
339
425
|
|
|
340
426
|
@property
|
|
341
427
|
def video_shape(self) -> tuple[int, int, int, int]:
|
|
342
|
-
"""Returns 4D video shape."""
|
|
343
428
|
return self.frames.shape
|
|
344
429
|
|
|
345
430
|
@property
|
|
346
431
|
def frame_shape(self) -> tuple[int, int, int]:
|
|
347
|
-
"""Returns 3D frame shape."""
|
|
348
432
|
return self.frames.shape[1:]
|
|
349
433
|
|
|
350
434
|
@property
|
|
351
435
|
def total_seconds(self) -> float:
|
|
352
|
-
"""Returns total seconds of the video."""
|
|
353
436
|
return round(self.frames.shape[0] / self.fps, 4)
|
|
354
437
|
|
|
355
438
|
@property
|
|
356
439
|
def metadata(self) -> VideoMetadata:
|
|
357
|
-
"""Returns VideoMetadata object."""
|
|
358
440
|
return VideoMetadata.from_video(self)
|
videopython/utils/__init__.py
CHANGED