videopython 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

videopython/base/video.py CHANGED
@@ -1,12 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import subprocess
4
5
  import tempfile
5
6
  from dataclasses import dataclass
7
+ from fractions import Fraction
6
8
  from pathlib import Path
7
9
  from typing import Literal, get_args
8
10
 
9
- import cv2
10
11
  import numpy as np
11
12
  from soundpython import Audio
12
13
 
@@ -15,6 +16,12 @@ from videopython.utils.common import generate_random_name
15
16
  ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
16
17
 
17
18
 
19
+ class VideoMetadataError(Exception):
20
+ """Raised when there's an error getting video metadata"""
21
+
22
+ pass
23
+
24
+
18
25
  @dataclass
19
26
  class VideoMetadata:
20
27
  """Class to store video metadata."""
@@ -25,37 +32,80 @@ class VideoMetadata:
25
32
  frame_count: int
26
33
  total_seconds: float
27
34
 
28
- def __str__(self):
35
+ def __str__(self) -> str:
29
36
  return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
30
37
 
31
38
  def __repr__(self) -> str:
32
39
  return self.__str__()
33
40
 
34
- def get_frame_shape(self):
41
+ def get_frame_shape(self) -> np.ndarray:
35
42
  """Returns frame shape."""
36
43
  return np.array((self.height, self.width, 3))
37
44
 
38
- def get_video_shape(self):
45
+ def get_video_shape(self) -> np.ndarray:
39
46
  """Returns video shape."""
40
47
  return np.array((self.frame_count, self.height, self.width, 3))
41
48
 
49
+ @staticmethod
50
+ def _run_ffprobe(video_path: str | Path) -> dict:
51
+ """Run ffprobe and return parsed JSON output."""
52
+ cmd = [
53
+ "ffprobe",
54
+ "-v",
55
+ "error",
56
+ "-select_streams",
57
+ "v:0",
58
+ "-show_entries",
59
+ "stream=width,height,r_frame_rate,nb_frames",
60
+ "-show_entries",
61
+ "format=duration",
62
+ "-print_format",
63
+ "json",
64
+ str(video_path),
65
+ ]
66
+
67
+ try:
68
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
69
+ return json.loads(result.stdout)
70
+ except subprocess.CalledProcessError as e:
71
+ raise VideoMetadataError(f"FFprobe error: {e.stderr}")
72
+ except json.JSONDecodeError as e:
73
+ raise VideoMetadataError(f"Error parsing FFprobe output: {e}")
74
+
42
75
  @classmethod
43
- def from_path(cls, video_path: str) -> VideoMetadata:
44
- """Creates VideoMetadata object from video file."""
45
- video = cv2.VideoCapture(video_path)
46
- frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
47
- fps = round(video.get(cv2.CAP_PROP_FPS), 2)
48
- height = round(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
49
- width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
50
- total_seconds = round(frame_count / fps, 2)
51
-
52
- return cls(
53
- height=height,
54
- width=width,
55
- fps=fps,
56
- frame_count=frame_count,
57
- total_seconds=total_seconds,
58
- )
76
+ def from_path(cls, video_path: str | Path) -> VideoMetadata:
77
+ """Creates VideoMetadata object from video file using ffprobe."""
78
+ if not Path(video_path).exists():
79
+ raise FileNotFoundError(f"Video file not found: {video_path}")
80
+
81
+ probe_data = cls._run_ffprobe(video_path)
82
+
83
+ try:
84
+ stream_info = probe_data["streams"][0]
85
+
86
+ width = int(stream_info["width"])
87
+ height = int(stream_info["height"])
88
+
89
+ try:
90
+ fps_fraction = Fraction(stream_info["r_frame_rate"])
91
+ fps = float(fps_fraction)
92
+ except (ValueError, ZeroDivisionError):
93
+ raise VideoMetadataError(f"Invalid frame rate: {stream_info['r_frame_rate']}")
94
+
95
+ if "nb_frames" in stream_info and stream_info["nb_frames"].isdigit():
96
+ frame_count = int(stream_info["nb_frames"])
97
+ else:
98
+ duration = float(probe_data["format"]["duration"])
99
+ frame_count = int(round(duration * fps))
100
+
101
+ total_seconds = round(frame_count / fps, 2)
102
+
103
+ return cls(height=height, width=width, fps=fps, frame_count=frame_count, total_seconds=total_seconds)
104
+
105
+ except KeyError as e:
106
+ raise VideoMetadataError(f"Missing required metadata field: {e}")
107
+ except Exception as e:
108
+ raise VideoMetadataError(f"Error extracting video metadata: {e}")
59
109
 
60
110
  @classmethod
61
111
  def from_video(cls, video: Video) -> VideoMetadata:
@@ -63,15 +113,10 @@ class VideoMetadata:
63
113
  frame_count, height, width, _ = video.frames.shape
64
114
  total_seconds = round(frame_count / video.fps, 2)
65
115
 
66
- return cls(
67
- height=height,
68
- width=width,
69
- fps=video.fps,
70
- frame_count=frame_count,
71
- total_seconds=total_seconds,
72
- )
116
+ return cls(height=height, width=width, fps=video.fps, frame_count=frame_count, total_seconds=total_seconds)
73
117
 
74
118
  def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
119
+ """Check if videos can be merged."""
75
120
  return (
76
121
  self.height == other_format.height
77
122
  and self.width == other_format.width
@@ -79,14 +124,7 @@ class VideoMetadata:
79
124
  )
80
125
 
81
126
  def can_be_downsampled_to(self, target_format: VideoMetadata) -> bool:
82
- """Checks if video can be downsampled to `target_format`.
83
-
84
- Args:
85
- target_format: Desired video format.
86
-
87
- Returns:
88
- True if video can be downsampled to `target_format`, False otherwise.
89
- """
127
+ """Checks if video can be downsampled to target_format."""
90
128
  return (
91
129
  self.height >= target_format.height
92
130
  and self.width >= target_format.width
@@ -96,53 +134,174 @@ class VideoMetadata:
96
134
 
97
135
 
98
136
  class Video:
99
- def __init__(self):
100
- self.fps = None
101
- self.frames = None
102
- self.audio = None
137
+ def __init__(self, frames: np.ndarray, fps: int | float, audio: Audio | None = None):
138
+ self.frames = frames
139
+ self.fps = fps
140
+ if audio:
141
+ self.audio = audio
142
+ else:
143
+ self.audio = Audio.create_silent(
144
+ duration_seconds=round(self.total_seconds, 2), stereo=True, sample_rate=44100
145
+ )
103
146
 
104
147
  @classmethod
105
- def from_path(cls, path: str) -> Video:
106
- new_vid = cls()
107
- new_vid.frames, new_vid.fps = cls._load_video_from_path(path)
108
-
148
+ def from_path(
149
+ cls, path: str, read_batch_size: int = 100, start_second: float | None = None, end_second: float | None = None
150
+ ) -> Video:
109
151
  try:
110
- new_vid.audio = Audio.from_file(path)
111
- except Exception as e:
112
- print(f"No audio found for `{path}`, adding silent track!")
113
- new_vid.audio = Audio.create_silent(
114
- duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
152
+ # Get video metadata using VideoMetadata.from_path
153
+ metadata = VideoMetadata.from_path(path)
154
+
155
+ width = metadata.width
156
+ height = metadata.height
157
+ fps = metadata.fps
158
+ total_frames = metadata.frame_count
159
+ total_duration = metadata.total_seconds
160
+
161
+ # Validate time bounds
162
+ if start_second is not None and start_second < 0:
163
+ raise ValueError("start_second must be non-negative")
164
+ if end_second is not None and end_second > total_duration:
165
+ raise ValueError(f"end_second ({end_second}) exceeds video duration ({total_duration})")
166
+ if start_second is not None and end_second is not None and start_second >= end_second:
167
+ raise ValueError("start_second must be less than end_second")
168
+
169
+ # Calculate frame indices for the desired segment
170
+ start_frame = int(start_second * fps) if start_second is not None else 0
171
+ end_frame = int(end_second * fps) if end_second is not None else total_frames
172
+
173
+ # Ensure we don't exceed bounds
174
+ start_frame = max(0, start_frame)
175
+ end_frame = min(total_frames, end_frame)
176
+ segment_frames = end_frame - start_frame
177
+
178
+ # Set up FFmpeg command for raw video extraction with time bounds
179
+ ffmpeg_cmd = [
180
+ "ffmpeg",
181
+ "-i",
182
+ path,
183
+ ]
184
+
185
+ # Add seek and duration options if specified
186
+ if start_second is not None:
187
+ ffmpeg_cmd.extend(["-ss", str(start_second)])
188
+ if end_second is not None and start_second is not None:
189
+ duration = end_second - start_second
190
+ ffmpeg_cmd.extend(["-t", str(duration)])
191
+ elif end_second is not None:
192
+ ffmpeg_cmd.extend(["-t", str(end_second)])
193
+
194
+ ffmpeg_cmd.extend(
195
+ [
196
+ "-f",
197
+ "rawvideo",
198
+ "-pix_fmt",
199
+ "rgb24",
200
+ "-vsync",
201
+ "0",
202
+ "-vcodec",
203
+ "rawvideo",
204
+ "-y",
205
+ "pipe:1",
206
+ ]
207
+ )
208
+
209
+ # Start FFmpeg process
210
+ process = subprocess.Popen(
211
+ ffmpeg_cmd,
212
+ stdout=subprocess.PIPE,
213
+ stderr=subprocess.PIPE,
214
+ bufsize=10**8, # Use large buffer
115
215
  )
116
- return new_vid
216
+
217
+ # Calculate frame size in bytes
218
+ frame_size = width * height * 3 # 3 bytes per pixel for RGB
219
+
220
+ # Pre-allocate numpy array for segment frames
221
+ frames = np.empty((segment_frames, height, width, 3), dtype=np.uint8)
222
+
223
+ # Read frames in batches
224
+ frames_read = 0
225
+ for frame_idx in range(0, segment_frames, read_batch_size):
226
+ batch_end = min(frame_idx + read_batch_size, segment_frames)
227
+ batch_size = batch_end - frame_idx
228
+
229
+ # Read batch of frames
230
+ raw_data = process.stdout.read(frame_size * batch_size) # type: ignore
231
+ if not raw_data:
232
+ break
233
+
234
+ # Convert raw bytes to numpy array and reshape
235
+ batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
236
+
237
+ # Handle case where we might get fewer frames than expected
238
+ actual_frames = len(batch_frames) // (height * width * 3)
239
+ if actual_frames > 0:
240
+ batch_frames = batch_frames[: actual_frames * height * width * 3]
241
+ batch_frames = batch_frames.reshape(-1, height, width, 3)
242
+
243
+ # Store batch in pre-allocated array
244
+ end_idx = frame_idx + actual_frames
245
+ frames[frame_idx:end_idx] = batch_frames
246
+ frames_read += actual_frames
247
+ else:
248
+ break
249
+
250
+ # Clean up FFmpeg process
251
+ process.stdout.close() # type: ignore
252
+ process.stderr.close() # type: ignore
253
+ process.wait()
254
+
255
+ if process.returncode != 0:
256
+ stderr_output = process.stderr.read().decode() if process.stderr else "Unknown error"
257
+ raise ValueError(f"FFmpeg error: {stderr_output}")
258
+
259
+ # Trim frames array if we read fewer frames than expected
260
+ if frames_read < segment_frames:
261
+ frames = frames[:frames_read] # type: ignore[assignment]
262
+
263
+ # Load audio for the specified segment
264
+ try:
265
+ audio = Audio.from_file(path)
266
+ # Slice audio to match the video segment
267
+ if start_second is not None or end_second is not None:
268
+ audio_start = start_second if start_second is not None else 0
269
+ audio_end = end_second if end_second is not None else audio.metadata.duration_seconds
270
+ audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
271
+ except Exception:
272
+ print(f"No audio found for `{path}`, adding silent track!")
273
+ # Create silent audio for the segment duration
274
+ segment_duration = len(frames) / fps
275
+ audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
276
+
277
+ return cls(frames=frames, fps=fps, audio=audio)
278
+
279
+ except VideoMetadataError as e:
280
+ raise ValueError(f"Error getting video metadata: {e}")
281
+ except subprocess.CalledProcessError as e:
282
+ raise ValueError(f"Error processing video file: {e}")
283
+ except Exception as e:
284
+ raise ValueError(f"Error loading video: {e}")
117
285
 
118
286
  @classmethod
119
287
  def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
120
- new_vid = cls()
121
288
  if frames.ndim != 4:
122
289
  raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
123
290
  elif frames.shape[-1] == 4:
124
291
  frames = frames[:, :, :, :3]
125
292
  elif frames.shape[-1] != 3:
126
293
  raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
127
- new_vid.frames = frames
128
- new_vid.fps = fps
129
- new_vid.audio = Audio.create_silent(
130
- duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
131
- )
132
- return new_vid
294
+ return cls(frames=frames, fps=fps)
133
295
 
134
296
  @classmethod
135
297
  def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
136
- new_vid = cls()
137
298
  if len(image.shape) == 3:
138
299
  image = np.expand_dims(image, axis=0)
139
- new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
140
- new_vid.fps = fps
141
- new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
142
- return new_vid
300
+ frames = np.repeat(image, round(length_seconds * fps), axis=0)
301
+ return cls(frames=frames, fps=fps)
143
302
 
144
303
  def copy(self) -> Video:
145
- copied = Video().from_frames(self.frames.copy(), self.fps)
304
+ copied = Video.from_frames(self.frames.copy(), self.fps)
146
305
  copied.audio = self.audio # Audio objects are immutable, no need to copy
147
306
  return copied
148
307
 
@@ -168,6 +327,19 @@ class Video:
168
327
  return split_videos
169
328
 
170
329
  def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
330
+ """Save video to file with optimized performance.
331
+
332
+ Args:
333
+ filename: Output filename. If None, generates random name
334
+ format: Output format (mp4, avi, mov, mkv, webm)
335
+
336
+ Returns:
337
+ Path to saved video file
338
+
339
+ Raises:
340
+ RuntimeError: If video is not loaded
341
+ ValueError: If format is not supported
342
+ """
171
343
  if not self.is_loaded():
172
344
  raise RuntimeError("Video is not loaded, cannot save!")
173
345
 
@@ -182,80 +354,71 @@ class Video:
182
354
  filename = Path(filename).with_suffix(f".{format}")
183
355
  filename.parent.mkdir(parents=True, exist_ok=True)
184
356
 
185
- with tempfile.TemporaryDirectory() as temp_dir:
186
- temp_dir_path = Path(temp_dir)
187
-
188
- # Save frames as images
189
- for i, frame in enumerate(self.frames):
190
- frame_path = temp_dir_path / f"frame_{i:04d}.png"
191
- cv2.imwrite(str(frame_path), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
192
-
193
- # Calculate exact video duration
194
- video_duration = len(self.frames) / self.fps
195
-
196
- # Ensure audio duration matches video duration
197
- if (
198
- abs(self.audio.metadata.duration_seconds - video_duration) > 0.001
199
- ): # Small threshold for float comparison
200
- if self.audio.metadata.duration_seconds < video_duration:
201
- # Create silent audio for the remaining duration
202
- remaining_duration = video_duration - self.audio.metadata.duration_seconds
203
- silent_audio = Audio.create_silent(
204
- duration_seconds=remaining_duration,
205
- stereo=(self.audio.metadata.channels == 2),
206
- sample_rate=self.audio.metadata.sample_rate,
207
- sample_width=self.audio.metadata.sample_width,
208
- )
209
- # Concatenate original audio with silent padding
210
- padded_audio = self.audio.concat(silent_audio)
211
- else:
212
- # Trim audio to match video duration
213
- padded_audio = self.audio.slice(end_seconds=video_duration)
214
- else:
215
- padded_audio = self.audio
357
+ # Create a temporary raw video file
358
+ with tempfile.NamedTemporaryFile(suffix=".raw") as raw_video:
359
+ # Convert frames to raw video data
360
+ raw_data = self.frames.astype(np.uint8).tobytes()
361
+ raw_video.write(raw_data)
362
+ raw_video.flush()
216
363
 
217
364
  # Save audio to temporary WAV file
218
- temp_audio = temp_dir_path / "temp_audio.wav"
219
- padded_audio.save(str(temp_audio), format="wav")
220
-
221
- # Construct FFmpeg command with explicit duration
222
- ffmpeg_command = [
223
- "ffmpeg",
224
- "-y",
225
- "-framerate",
226
- str(self.fps), # Use -framerate instead of -r for input
227
- "-i",
228
- str(temp_dir_path / "frame_%04d.png"),
229
- "-i",
230
- str(temp_audio),
231
- "-c:v",
232
- "libx264",
233
- "-preset",
234
- "medium",
235
- "-crf",
236
- "23",
237
- "-c:a",
238
- "aac", # Use AAC instead of copy for more reliable audio
239
- "-b:a",
240
- "192k",
241
- "-pix_fmt",
242
- "yuv420p",
243
- "-map",
244
- "0:v:0", # Map video from first input
245
- "-map",
246
- "1:a:0", # Map audio from second input
247
- "-vsync",
248
- "cfr", # Force constant frame rate
249
- str(filename),
250
- ]
251
-
252
- try:
253
- subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
254
- return filename
255
- except subprocess.CalledProcessError as e:
256
- print(f"Error saving video: {e}")
257
- print(f"FFmpeg stderr: {e.stderr}")
258
- raise
365
+ with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
366
+ self.audio.save(temp_audio.name, format="wav")
367
+
368
+ # Calculate exact duration
369
+ duration = len(self.frames) / self.fps
370
+
371
+ # Construct FFmpeg command for maximum performance
372
+ ffmpeg_command = [
373
+ "ffmpeg",
374
+ "-y",
375
+ # Raw video input settings
376
+ "-f",
377
+ "rawvideo",
378
+ "-pixel_format",
379
+ "rgb24",
380
+ "-video_size",
381
+ f"{self.frame_shape[1]}x{self.frame_shape[0]}",
382
+ "-framerate",
383
+ str(self.fps),
384
+ "-i",
385
+ raw_video.name,
386
+ # Audio input
387
+ "-i",
388
+ temp_audio.name,
389
+ # Video encoding settings
390
+ "-c:v",
391
+ "libx264",
392
+ "-preset",
393
+ "ultrafast", # Fastest encoding
394
+ "-tune",
395
+ "zerolatency", # Reduce encoding latency
396
+ "-crf",
397
+ "23", # Reasonable quality/size tradeoff
398
+ # Audio settings
399
+ "-c:a",
400
+ "aac",
401
+ "-b:a",
402
+ "192k",
403
+ # Output settings
404
+ "-pix_fmt",
405
+ "yuv420p",
406
+ "-movflags",
407
+ "+faststart", # Enable fast start for web playback
408
+ "-t",
409
+ str(duration),
410
+ "-vsync",
411
+ "cfr",
412
+ str(filename),
413
+ ]
414
+
415
+ try:
416
+ subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
417
+ return filename
418
+ except subprocess.CalledProcessError as e:
419
+ print(f"Error saving video: {e}")
420
+ print(f"FFmpeg stderr: {e.stderr}")
421
+ raise
259
422
 
260
423
  def add_audio(self, audio: Audio, overlay: bool = True) -> None:
261
424
  if self.audio.is_silent:
@@ -269,7 +432,7 @@ class Video:
269
432
  try:
270
433
  new_audio = Audio.from_file(path)
271
434
  self.add_audio(new_audio, overlay)
272
- except Exception as e:
435
+ except Exception:
273
436
  print(f"Audio file `{path}` not found or invalid, skipping!")
274
437
 
275
438
  def __add__(self, other: Video) -> Video:
@@ -305,29 +468,6 @@ class Video:
305
468
  sliced.audio = self.audio.slice(start_seconds=audio_start, end_seconds=audio_end)
306
469
  return sliced
307
470
 
308
- @staticmethod
309
- def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
310
- cap = cv2.VideoCapture(path)
311
- if not cap.isOpened():
312
- raise ValueError(f"Unable to open video file: {path}")
313
-
314
- fps = cap.get(cv2.CAP_PROP_FPS)
315
- frames = []
316
-
317
- while True:
318
- ret, frame = cap.read()
319
- if not ret:
320
- break
321
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
322
- frames.append(frame)
323
-
324
- cap.release()
325
-
326
- if not frames:
327
- raise ValueError(f"No frames could be read from the video file: {path}")
328
-
329
- return np.array(frames), fps
330
-
331
471
  @property
332
472
  def video_shape(self) -> tuple[int, int, int, int]:
333
473
  return self.frames.shape
@@ -0,0 +1,3 @@
1
+ from videopython.utils.text import AnchorPoint, ImageText, TextAlign
2
+
3
+ __all__ = ["AnchorPoint", "ImageText", "TextAlign"]