videopython 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

videopython/base/video.py CHANGED
@@ -1,21 +1,27 @@
1
1
  from __future__ import annotations
2
2
 
3
- import shlex
3
+ import json
4
4
  import subprocess
5
5
  import tempfile
6
6
  from dataclasses import dataclass
7
+ from fractions import Fraction
7
8
  from pathlib import Path
8
9
  from typing import Literal, get_args
9
10
 
10
- import cv2
11
11
  import numpy as np
12
- from pydub import AudioSegment
12
+ from soundpython import Audio
13
13
 
14
14
  from videopython.utils.common import generate_random_name
15
15
 
16
16
  ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
17
17
 
18
18
 
19
+ class VideoMetadataError(Exception):
20
+ """Raised when there's an error getting video metadata"""
21
+
22
+ pass
23
+
24
+
19
25
  @dataclass
20
26
  class VideoMetadata:
21
27
  """Class to store video metadata."""
@@ -26,63 +32,91 @@ class VideoMetadata:
26
32
  frame_count: int
27
33
  total_seconds: float
28
34
 
29
- def __str__(self):
35
+ def __str__(self) -> str:
30
36
  return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
31
37
 
32
38
  def __repr__(self) -> str:
33
39
  return self.__str__()
34
40
 
35
- def get_frame_shape(self):
41
+ def get_frame_shape(self) -> np.ndarray:
36
42
  """Returns frame shape."""
37
43
  return np.array((self.height, self.width, 3))
38
44
 
39
- def get_video_shape(self):
45
+ def get_video_shape(self) -> np.ndarray:
40
46
  """Returns video shape."""
41
47
  return np.array((self.frame_count, self.height, self.width, 3))
42
48
 
43
- @classmethod
44
- def from_path(cls, video_path: str) -> VideoMetadata:
45
- """Creates VideoMetadata object from video file.
49
+ @staticmethod
50
+ def _run_ffprobe(video_path: str | Path) -> dict:
51
+ """Run ffprobe and return parsed JSON output."""
52
+ cmd = [
53
+ "ffprobe",
54
+ "-v",
55
+ "error",
56
+ "-select_streams",
57
+ "v:0",
58
+ "-show_entries",
59
+ "stream=width,height,r_frame_rate,nb_frames",
60
+ "-show_entries",
61
+ "format=duration",
62
+ "-print_format",
63
+ "json",
64
+ str(video_path),
65
+ ]
46
66
 
47
- Args:
48
- video_path: Path to video file.
49
- """
50
- video = cv2.VideoCapture(video_path)
51
- frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
52
- fps = round(video.get(cv2.CAP_PROP_FPS), 2)
53
- height = round(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
54
- width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
55
- total_seconds = round(frame_count / fps, 2)
56
-
57
- return cls(
58
- height=height,
59
- width=width,
60
- fps=fps,
61
- frame_count=frame_count,
62
- total_seconds=total_seconds,
63
- )
67
+ try:
68
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
69
+ return json.loads(result.stdout)
70
+ except subprocess.CalledProcessError as e:
71
+ raise VideoMetadataError(f"FFprobe error: {e.stderr}")
72
+ except json.JSONDecodeError as e:
73
+ raise VideoMetadataError(f"Error parsing FFprobe output: {e}")
64
74
 
65
75
  @classmethod
66
- def from_video(cls, video: Video) -> VideoMetadata:
67
- """Creates VideoMetadata object from frames.
76
+ def from_path(cls, video_path: str | Path) -> VideoMetadata:
77
+ """Creates VideoMetadata object from video file using ffprobe."""
78
+ if not Path(video_path).exists():
79
+ raise FileNotFoundError(f"Video file not found: {video_path}")
68
80
 
69
- Args:
70
- frames: Frames of the video.
71
- fps: Frames per second of the video.
72
- """
81
+ probe_data = cls._run_ffprobe(video_path)
82
+
83
+ try:
84
+ stream_info = probe_data["streams"][0]
85
+
86
+ width = int(stream_info["width"])
87
+ height = int(stream_info["height"])
88
+
89
+ try:
90
+ fps_fraction = Fraction(stream_info["r_frame_rate"])
91
+ fps = float(fps_fraction)
92
+ except (ValueError, ZeroDivisionError):
93
+ raise VideoMetadataError(f"Invalid frame rate: {stream_info['r_frame_rate']}")
94
+
95
+ if "nb_frames" in stream_info and stream_info["nb_frames"].isdigit():
96
+ frame_count = int(stream_info["nb_frames"])
97
+ else:
98
+ duration = float(probe_data["format"]["duration"])
99
+ frame_count = int(round(duration * fps))
100
+
101
+ total_seconds = round(frame_count / fps, 2)
102
+
103
+ return cls(height=height, width=width, fps=fps, frame_count=frame_count, total_seconds=total_seconds)
73
104
 
105
+ except KeyError as e:
106
+ raise VideoMetadataError(f"Missing required metadata field: {e}")
107
+ except Exception as e:
108
+ raise VideoMetadataError(f"Error extracting video metadata: {e}")
109
+
110
+ @classmethod
111
+ def from_video(cls, video: Video) -> VideoMetadata:
112
+ """Creates VideoMetadata object from Video instance."""
74
113
  frame_count, height, width, _ = video.frames.shape
75
114
  total_seconds = round(frame_count / video.fps, 2)
76
115
 
77
- return cls(
78
- height=height,
79
- width=width,
80
- fps=video.fps,
81
- frame_count=frame_count,
82
- total_seconds=total_seconds,
83
- )
116
+ return cls(height=height, width=width, fps=video.fps, frame_count=frame_count, total_seconds=total_seconds)
84
117
 
85
118
  def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
119
+ """Check if videos can be merged."""
86
120
  return (
87
121
  self.height == other_format.height
88
122
  and self.width == other_format.width
@@ -90,14 +124,7 @@ class VideoMetadata:
90
124
  )
91
125
 
92
126
  def can_be_downsampled_to(self, target_format: VideoMetadata) -> bool:
93
- """Checks if video can be downsampled to `target_format`.
94
-
95
- Args:
96
- target_format: Desired video format.
97
-
98
- Returns:
99
- True if video can be downsampled to `target_format`, False otherwise.
100
- """
127
+ """Checks if video can be downsampled to target_format."""
101
128
  return (
102
129
  self.height >= target_format.height
103
130
  and self.width >= target_format.width
@@ -113,15 +140,94 @@ class Video:
113
140
  self.audio = None
114
141
 
115
142
  @classmethod
116
- def from_path(cls, path: str) -> Video:
143
+ def from_path(cls, path: str, read_batch_size: int = 100) -> Video:
117
144
  new_vid = cls()
118
- new_vid.frames, new_vid.fps = cls._load_video_from_path(path)
119
- audio = cls._load_audio_from_path(path)
120
- if not audio:
121
- print(f"No audio found for `{path}`, adding silent track!")
122
- audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
123
- new_vid.audio = audio
124
- return new_vid
145
+
146
+ try:
147
+ # Get video metadata using VideoMetadata.from_path
148
+ metadata = VideoMetadata.from_path(path)
149
+
150
+ width = metadata.width
151
+ height = metadata.height
152
+ fps = metadata.fps
153
+ total_frames = metadata.frame_count
154
+
155
+ # Set up FFmpeg command for raw video extraction
156
+ ffmpeg_cmd = [
157
+ "ffmpeg",
158
+ "-i",
159
+ path,
160
+ "-f",
161
+ "rawvideo",
162
+ "-pix_fmt",
163
+ "rgb24",
164
+ "-vsync",
165
+ "0",
166
+ "-vcodec",
167
+ "rawvideo",
168
+ "-y",
169
+ "pipe:1",
170
+ ]
171
+
172
+ # Start FFmpeg process
173
+ process = subprocess.Popen(
174
+ ffmpeg_cmd,
175
+ stdout=subprocess.PIPE,
176
+ stderr=subprocess.PIPE,
177
+ bufsize=10**8, # Use large buffer
178
+ )
179
+
180
+ # Calculate frame size in bytes
181
+ frame_size = width * height * 3 # 3 bytes per pixel for RGB
182
+
183
+ # Pre-allocate numpy array for all frames
184
+ frames = np.empty((total_frames, height, width, 3), dtype=np.uint8)
185
+
186
+ # Read frames in batches
187
+ for frame_idx in range(0, total_frames, read_batch_size):
188
+ batch_end = min(frame_idx + read_batch_size, total_frames)
189
+ batch_size = batch_end - frame_idx
190
+
191
+ # Read batch of frames
192
+ raw_data = process.stdout.read(frame_size * batch_size) # type: ignore
193
+ if not raw_data:
194
+ break
195
+
196
+ # Convert raw bytes to numpy array and reshape
197
+ batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
198
+ batch_frames = batch_frames.reshape(-1, height, width, 3)
199
+
200
+ # Store batch in pre-allocated array
201
+ frames[frame_idx:batch_end] = batch_frames
202
+
203
+ # Clean up FFmpeg process
204
+ process.stdout.close() # type: ignore
205
+ process.stderr.close() # type: ignore
206
+ process.wait()
207
+
208
+ if process.returncode != 0:
209
+ raise ValueError(f"FFmpeg error: {process.stderr.read().decode()}") # type: ignore
210
+
211
+ new_vid.frames = frames
212
+ new_vid.fps = fps
213
+
214
+ # Load audio
215
+ try:
216
+ new_vid.audio = Audio.from_file(path)
217
+ except Exception:
218
+ print(f"No audio found for `{path}`, adding silent track!")
219
+ new_vid.audio = Audio.create_silent(
220
+ duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
221
+ )
222
+
223
+ return new_vid
224
+
225
+ except VideoMetadataError as e:
226
+ raise ValueError(f"Error getting video metadata: {e}")
227
+ except subprocess.CalledProcessError as e:
228
+ raise ValueError(f"Error processing video file: {e}")
229
+ except Exception as e:
230
+ raise ValueError(f"Error loading video: {e}")
125
231
 
126
232
  @classmethod
127
233
  def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
@@ -134,7 +240,9 @@ class Video:
134
240
  raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
135
241
  new_vid.frames = frames
136
242
  new_vid.fps = fps
137
- new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
243
+ new_vid.audio = Audio.create_silent(
244
+ duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
245
+ )
138
246
  return new_vid
139
247
 
140
248
  @classmethod
@@ -144,12 +252,12 @@ class Video:
144
252
  image = np.expand_dims(image, axis=0)
145
253
  new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
146
254
  new_vid.fps = fps
147
- new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
255
+ new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
148
256
  return new_vid
149
257
 
150
258
  def copy(self) -> Video:
151
259
  copied = Video().from_frames(self.frames.copy(), self.fps)
152
- copied.audio = self.audio
260
+ copied.audio = self.audio # Audio objects are immutable, no need to copy
153
261
  return copied
154
262
 
155
263
  def is_loaded(self) -> bool:
@@ -165,25 +273,31 @@ class Video:
165
273
  self.from_frames(self.frames[:frame_idx], self.fps),
166
274
  self.from_frames(self.frames[frame_idx:], self.fps),
167
275
  )
168
- audio_midpoint = (frame_idx / self.fps) * 1000
169
- split_videos[0].audio = self.audio[:audio_midpoint]
170
- split_videos[1].audio = self.audio[audio_midpoint:]
276
+
277
+ # Split audio at the corresponding time point
278
+ split_time = frame_idx / self.fps
279
+ split_videos[0].audio = self.audio.slice(start_seconds=0, end_seconds=split_time)
280
+ split_videos[1].audio = self.audio.slice(start_seconds=split_time)
281
+
171
282
  return split_videos
172
283
 
173
284
  def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
174
- """Saves the video with audio.
285
+ """Save video to file with optimized performance.
175
286
 
176
287
  Args:
177
- filename: Name of the output video file. Generates random name if not provided.
178
- format: Output format (default is 'mp4').
288
+ filename: Output filename. If None, generates random name
289
+ format: Output format (mp4, avi, mov, mkv, webm)
179
290
 
180
291
  Returns:
181
- Path to the saved video file.
292
+ Path to saved video file
293
+
294
+ Raises:
295
+ RuntimeError: If video is not loaded
296
+ ValueError: If format is not supported
182
297
  """
183
298
  if not self.is_loaded():
184
299
  raise RuntimeError("Video is not loaded, cannot save!")
185
300
 
186
- # Check if the format is allowed
187
301
  if format.lower() not in get_args(ALLOWED_VIDEO_FORMATS):
188
302
  raise ValueError(
189
303
  f"Unsupported format: {format}. Allowed formats are: {', '.join(get_args(ALLOWED_VIDEO_FORMATS))}"
@@ -195,87 +309,94 @@ class Video:
195
309
  filename = Path(filename).with_suffix(f".{format}")
196
310
  filename.parent.mkdir(parents=True, exist_ok=True)
197
311
 
198
- with tempfile.TemporaryDirectory() as temp_dir:
199
- temp_dir_path = Path(temp_dir)
200
-
201
- # Save frames as images
202
- for i, frame in enumerate(self.frames):
203
- frame_path = temp_dir_path / f"frame_{i:04d}.png"
204
- cv2.imwrite(str(frame_path), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
205
-
206
- # Save audio to a temporary file
207
- temp_audio = temp_dir_path / "temp_audio.wav"
208
- self.audio.export(str(temp_audio), format="adts", bitrate="192k")
209
-
210
- # Construct FFmpeg command
211
- ffmpeg_command = [
212
- "ffmpeg",
213
- "-y", # Overwrite output file if it exists
214
- "-r",
215
- str(self.fps), # Set the frame rate
216
- "-i",
217
- str(temp_dir_path / "frame_%04d.png"), # Input image sequence
218
- "-i",
219
- str(temp_audio), # Input audio file
220
- "-c:v",
221
- "libx264", # Video codec
222
- "-preset",
223
- "medium", # Encoding preset (tradeoff between encoding speed and compression)
224
- "-crf",
225
- "23", # Constant Rate Factor (lower means better quality, 23 is default)
226
- "-c:a",
227
- "copy", # Audio codec
228
- "-b:a",
229
- "192k", # Audio bitrate
230
- "-pix_fmt",
231
- "yuv420p", # Pixel format
232
- "-shortest", # Finish encoding when the shortest input stream ends
233
- str(filename),
234
- ]
312
+ # Create a temporary raw video file
313
+ with tempfile.NamedTemporaryFile(suffix=".raw") as raw_video:
314
+ # Convert frames to raw video data
315
+ raw_data = self.frames.astype(np.uint8).tobytes()
316
+ raw_video.write(raw_data)
317
+ raw_video.flush()
318
+
319
+ # Save audio to temporary WAV file
320
+ with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
321
+ self.audio.save(temp_audio.name, format="wav")
322
+
323
+ # Calculate exact duration
324
+ duration = len(self.frames) / self.fps
325
+
326
+ # Construct FFmpeg command for maximum performance
327
+ ffmpeg_command = [
328
+ "ffmpeg",
329
+ "-y",
330
+ # Raw video input settings
331
+ "-f",
332
+ "rawvideo",
333
+ "-pixel_format",
334
+ "rgb24",
335
+ "-video_size",
336
+ f"{self.frame_shape[1]}x{self.frame_shape[0]}",
337
+ "-framerate",
338
+ str(self.fps),
339
+ "-i",
340
+ raw_video.name,
341
+ # Audio input
342
+ "-i",
343
+ temp_audio.name,
344
+ # Video encoding settings
345
+ "-c:v",
346
+ "libx264",
347
+ "-preset",
348
+ "ultrafast", # Fastest encoding
349
+ "-tune",
350
+ "zerolatency", # Reduce encoding latency
351
+ "-crf",
352
+ "23", # Reasonable quality/size tradeoff
353
+ # Audio settings
354
+ "-c:a",
355
+ "aac",
356
+ "-b:a",
357
+ "192k",
358
+ # Output settings
359
+ "-pix_fmt",
360
+ "yuv420p",
361
+ "-movflags",
362
+ "+faststart", # Enable fast start for web playback
363
+ "-t",
364
+ str(duration),
365
+ "-vsync",
366
+ "cfr",
367
+ str(filename),
368
+ ]
369
+
370
+ try:
371
+ subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
372
+ return filename
373
+ except subprocess.CalledProcessError as e:
374
+ print(f"Error saving video: {e}")
375
+ print(f"FFmpeg stderr: {e.stderr}")
376
+ raise
377
+
378
+ def add_audio(self, audio: Audio, overlay: bool = True) -> None:
379
+ if self.audio.is_silent:
380
+ self.audio = audio
381
+ elif overlay:
382
+ self.audio = self.audio.overlay(audio, position=0.0)
383
+ else:
384
+ self.audio = audio
235
385
 
236
- try:
237
- subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
238
- print(f"Video saved successfully to: {filename}")
239
- return filename
240
- except subprocess.CalledProcessError as e:
241
- print(f"Error saving video: {e}")
242
- print(f"FFmpeg stderr: {e.stderr}")
243
- raise
244
-
245
- def add_audio(self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
246
- self.audio = self._process_audio(audio=audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
247
-
248
- def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
249
- new_audio = self._load_audio_from_path(path)
250
- if new_audio is None:
251
- print(f"Audio file `{path}` not found, skipping!")
252
- return
253
-
254
- self.audio = self._process_audio(audio=new_audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
255
-
256
- def _process_audio(
257
- self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False
258
- ) -> AudioSegment:
259
- if (duration_diff := round(self.total_seconds - audio.duration_seconds)) > 0 and not loop:
260
- audio = audio + AudioSegment.silent(duration_diff * 1000)
261
- elif audio.duration_seconds > self.total_seconds:
262
- audio = audio[: round(self.total_seconds * 1000)]
263
-
264
- if overlay:
265
- return self.audio.overlay(audio, loop=loop, gain_during_overlay=overlay_gain)
266
- return audio
386
+ def add_audio_from_file(self, path: str, overlay: bool = True) -> None:
387
+ try:
388
+ new_audio = Audio.from_file(path)
389
+ self.add_audio(new_audio, overlay)
390
+ except Exception:
391
+ print(f"Audio file `{path}` not found or invalid, skipping!")
267
392
 
268
393
  def __add__(self, other: Video) -> Video:
269
- # TODO: Should it be class method? How to make it work with sum()?
270
394
  if self.fps != other.fps:
271
395
  raise ValueError("FPS of videos do not match!")
272
396
  elif self.frame_shape != other.frame_shape:
273
- raise ValueError(
274
- "Resolutions of the images do not match: "
275
- f"{self.frame_shape} not compatible with {other.frame_shape}."
276
- )
397
+ raise ValueError(f"Resolutions do not match: {self.frame_shape} vs {other.frame_shape}")
277
398
  new_video = self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
278
- new_video.audio = self.audio + other.audio
399
+ new_video.audio = self.audio.concat(other.audio)
279
400
  return new_video
280
401
 
281
402
  def __str__(self) -> str:
@@ -285,74 +406,35 @@ class Video:
285
406
  if not isinstance(val, slice):
286
407
  raise ValueError("Only slices are supported for video indexing!")
287
408
 
288
- # Sub-slice video if given a slice
409
+ # Sub-slice video frames
289
410
  sliced = self.from_frames(self.frames[val], fps=self.fps)
290
- # Handle slicing without value for audio
411
+
412
+ # Handle slicing bounds for audio
291
413
  start = val.start if val.start else 0
292
414
  stop = val.stop if val.stop else len(self.frames)
293
- # Handle negative values for audio slices
294
415
  if start < 0:
295
416
  start = len(self.frames) + start
296
417
  if stop < 0:
297
418
  stop = len(self.frames) + stop
298
- # Append audio to the slice
299
- audio_start = round(start / self.fps) * 1000
300
- audio_end = round(stop / self.fps) * 1000
301
- sliced.audio = self.audio[audio_start:audio_end]
302
- return sliced
303
-
304
- @staticmethod
305
- def _load_audio_from_path(path: str) -> AudioSegment | None:
306
- try:
307
- audio = AudioSegment.from_file(path)
308
- return audio
309
- except IndexError:
310
- return None
311
419
 
312
- @staticmethod
313
- def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
314
- """Loads frames and fps information from video file.
315
-
316
- Args:
317
- path: Path to video file.
318
- """
319
- cap = cv2.VideoCapture(path)
320
- if not cap.isOpened():
321
- raise ValueError(f"Unable to open video file: {path}")
322
-
323
- fps = cap.get(cv2.CAP_PROP_FPS)
324
- frames = []
325
-
326
- while True:
327
- ret, frame = cap.read()
328
- if not ret:
329
- break
330
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
331
- frames.append(frame)
332
-
333
- cap.release()
334
-
335
- if not frames:
336
- raise ValueError(f"No frames could be read from the video file: {path}")
337
-
338
- return np.array(frames), fps
420
+ # Slice audio to match video duration
421
+ audio_start = start / self.fps
422
+ audio_end = stop / self.fps
423
+ sliced.audio = self.audio.slice(start_seconds=audio_start, end_seconds=audio_end)
424
+ return sliced
339
425
 
340
426
  @property
341
427
  def video_shape(self) -> tuple[int, int, int, int]:
342
- """Returns 4D video shape."""
343
428
  return self.frames.shape
344
429
 
345
430
  @property
346
431
  def frame_shape(self) -> tuple[int, int, int]:
347
- """Returns 3D frame shape."""
348
432
  return self.frames.shape[1:]
349
433
 
350
434
  @property
351
435
  def total_seconds(self) -> float:
352
- """Returns total seconds of the video."""
353
436
  return round(self.frames.shape[0] / self.fps, 4)
354
437
 
355
438
  @property
356
439
  def metadata(self) -> VideoMetadata:
357
- """Returns VideoMetadata object."""
358
440
  return VideoMetadata.from_video(self)
@@ -0,0 +1,3 @@
1
+ from videopython.utils.text import AnchorPoint, ImageText, TextAlign
2
+
3
+ __all__ = ["AnchorPoint", "ImageText", "TextAlign"]