PyPI - videopython - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

videopython 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (20) hide show

videopython/ai/__init__.py +0 -0
videopython/{generation → ai/generation}/audio.py +25 -13
videopython/{generation → ai/generation}/image.py +0 -3
videopython/ai/understanding/__init__.py +0 -0
videopython/ai/understanding/transcribe.py +37 -0
videopython/base/effects.py +3 -3
videopython/base/transcription.py +13 -0
videopython/base/transforms.py +0 -2
videopython/base/transitions.py +2 -2
videopython/base/video.py +269 -187
videopython/utils/__init__.py +3 -0
videopython/utils/image.py +0 -228
videopython/utils/text.py +727 -0
{videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/METADATA +13 -25
videopython-0.4.0.dist-info/RECORD +25 -0
{videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/WHEEL +1 -1
videopython-0.2.1.dist-info/RECORD +0 -20
/videopython/{generation → ai/generation}/__init__.py +0 -0
/videopython/{generation → ai/generation}/video.py +0 -0
{videopython-0.2.1.dist-info → videopython-0.4.0.dist-info}/licenses/LICENSE +0 -0

videopython/base/video.py CHANGED Viewed

@@ -1,21 +1,27 @@
 from __future__ import annotations
-import shlex
+import json
 import subprocess
 import tempfile
 from dataclasses import dataclass
+from fractions import Fraction
 from pathlib import Path
 from typing import Literal, get_args
-import cv2
 import numpy as np
-from pydub import AudioSegment
+from soundpython import Audio
 from videopython.utils.common import generate_random_name
 ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
+class VideoMetadataError(Exception):
+    """Raised when there's an error getting video metadata"""
+    pass
 @dataclass
 class VideoMetadata:
     """Class to store video metadata."""
@@ -26,63 +32,91 @@ class VideoMetadata:
     frame_count: int
     total_seconds: float
-    def __str__(self):
+    def __str__(self) -> str:
         return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
     def __repr__(self) -> str:
         return self.__str__()
-    def get_frame_shape(self):
+    def get_frame_shape(self) -> np.ndarray:
         """Returns frame shape."""
         return np.array((self.height, self.width, 3))
-    def get_video_shape(self):
+    def get_video_shape(self) -> np.ndarray:
         """Returns video shape."""
         return np.array((self.frame_count, self.height, self.width, 3))
-    @classmethod
-    def from_path(cls, video_path: str) -> VideoMetadata:
-        """Creates VideoMetadata object from video file.
+    @staticmethod
+    def _run_ffprobe(video_path: str | Path) -> dict:
+        """Run ffprobe and return parsed JSON output."""
+        cmd = [
+            "ffprobe",
+            "-v",
+            "error",
+            "-select_streams",
+            "v:0",
+            "-show_entries",
+            "stream=width,height,r_frame_rate,nb_frames",
+            "-show_entries",
+            "format=duration",
+            "-print_format",
+            "json",
+            str(video_path),
+        ]
-        Args:
-            video_path: Path to video file.
-        """
-        video = cv2.VideoCapture(video_path)
-        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps = round(video.get(cv2.CAP_PROP_FPS), 2)
-        height = round(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-        total_seconds = round(frame_count / fps, 2)
-        return cls(
-            height=height,
-            width=width,
-            fps=fps,
-            frame_count=frame_count,
-            total_seconds=total_seconds,
-        )
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            return json.loads(result.stdout)
+        except subprocess.CalledProcessError as e:
+            raise VideoMetadataError(f"FFprobe error: {e.stderr}")
+        except json.JSONDecodeError as e:
+            raise VideoMetadataError(f"Error parsing FFprobe output: {e}")
     @classmethod
-    def from_video(cls, video: Video) -> VideoMetadata:
-        """Creates VideoMetadata object from frames.
+    def from_path(cls, video_path: str | Path) -> VideoMetadata:
+        """Creates VideoMetadata object from video file using ffprobe."""
+        if not Path(video_path).exists():
+            raise FileNotFoundError(f"Video file not found: {video_path}")
-        Args:
-            frames: Frames of the video.
-            fps: Frames per second of the video.
-        """
+        probe_data = cls._run_ffprobe(video_path)
+        try:
+            stream_info = probe_data["streams"][0]
+            width = int(stream_info["width"])
+            height = int(stream_info["height"])
+            try:
+                fps_fraction = Fraction(stream_info["r_frame_rate"])
+                fps = float(fps_fraction)
+            except (ValueError, ZeroDivisionError):
+                raise VideoMetadataError(f"Invalid frame rate: {stream_info['r_frame_rate']}")
+            if "nb_frames" in stream_info and stream_info["nb_frames"].isdigit():
+                frame_count = int(stream_info["nb_frames"])
+            else:
+                duration = float(probe_data["format"]["duration"])
+                frame_count = int(round(duration * fps))
+            total_seconds = round(frame_count / fps, 2)
+            return cls(height=height, width=width, fps=fps, frame_count=frame_count, total_seconds=total_seconds)
+        except KeyError as e:
+            raise VideoMetadataError(f"Missing required metadata field: {e}")
+        except Exception as e:
+            raise VideoMetadataError(f"Error extracting video metadata: {e}")
+    @classmethod
+    def from_video(cls, video: Video) -> VideoMetadata:
+        """Creates VideoMetadata object from Video instance."""
         frame_count, height, width, _ = video.frames.shape
         total_seconds = round(frame_count / video.fps, 2)
-        return cls(
-            height=height,
-            width=width,
-            fps=video.fps,
-            frame_count=frame_count,
-            total_seconds=total_seconds,
-        )
+        return cls(height=height, width=width, fps=video.fps, frame_count=frame_count, total_seconds=total_seconds)
     def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
+        """Check if videos can be merged."""
         return (
             self.height == other_format.height
             and self.width == other_format.width
@@ -90,14 +124,7 @@ class VideoMetadata:
         )
     def can_be_downsampled_to(self, target_format: VideoMetadata) -> bool:
-        """Checks if video can be downsampled to `target_format`.
-        Args:
-            target_format: Desired video format.
-        Returns:
-            True if video can be downsampled to `target_format`, False otherwise.
-        """
+        """Checks if video can be downsampled to target_format."""
         return (
             self.height >= target_format.height
             and self.width >= target_format.width
@@ -113,15 +140,94 @@ class Video:
         self.audio = None
     @classmethod
-    def from_path(cls, path: str) -> Video:
+    def from_path(cls, path: str, read_batch_size: int = 100) -> Video:
         new_vid = cls()
-        new_vid.frames, new_vid.fps = cls._load_video_from_path(path)
-        audio = cls._load_audio_from_path(path)
-        if not audio:
-            print(f"No audio found for `{path}`, adding silent track!")
-            audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
-        new_vid.audio = audio
-        return new_vid
+        try:
+            # Get video metadata using VideoMetadata.from_path
+            metadata = VideoMetadata.from_path(path)
+            width = metadata.width
+            height = metadata.height
+            fps = metadata.fps
+            total_frames = metadata.frame_count
+            # Set up FFmpeg command for raw video extraction
+            ffmpeg_cmd = [
+                "ffmpeg",
+                "-i",
+                path,
+                "-f",
+                "rawvideo",
+                "-pix_fmt",
+                "rgb24",
+                "-vsync",
+                "0",
+                "-vcodec",
+                "rawvideo",
+                "-y",
+                "pipe:1",
+            ]
+            # Start FFmpeg process
+            process = subprocess.Popen(
+                ffmpeg_cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                bufsize=10**8,  # Use large buffer
+            )
+            # Calculate frame size in bytes
+            frame_size = width * height * 3  # 3 bytes per pixel for RGB
+            # Pre-allocate numpy array for all frames
+            frames = np.empty((total_frames, height, width, 3), dtype=np.uint8)
+            # Read frames in batches
+            for frame_idx in range(0, total_frames, read_batch_size):
+                batch_end = min(frame_idx + read_batch_size, total_frames)
+                batch_size = batch_end - frame_idx
+                # Read batch of frames
+                raw_data = process.stdout.read(frame_size * batch_size)  # type: ignore
+                if not raw_data:
+                    break
+                # Convert raw bytes to numpy array and reshape
+                batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
+                batch_frames = batch_frames.reshape(-1, height, width, 3)
+                # Store batch in pre-allocated array
+                frames[frame_idx:batch_end] = batch_frames
+            # Clean up FFmpeg process
+            process.stdout.close()  # type: ignore
+            process.stderr.close()  # type: ignore
+            process.wait()
+            if process.returncode != 0:
+                raise ValueError(f"FFmpeg error: {process.stderr.read().decode()}")  # type: ignore
+            new_vid.frames = frames
+            new_vid.fps = fps
+            # Load audio
+            try:
+                new_vid.audio = Audio.from_file(path)
+            except Exception:
+                print(f"No audio found for `{path}`, adding silent track!")
+                new_vid.audio = Audio.create_silent(
+                    duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
+                )
+            return new_vid
+        except VideoMetadataError as e:
+            raise ValueError(f"Error getting video metadata: {e}")
+        except subprocess.CalledProcessError as e:
+            raise ValueError(f"Error processing video file: {e}")
+        except Exception as e:
+            raise ValueError(f"Error loading video: {e}")
     @classmethod
     def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
@@ -134,7 +240,9 @@ class Video:
             raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
         new_vid.frames = frames
         new_vid.fps = fps
-        new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
+        new_vid.audio = Audio.create_silent(
+            duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
+        )
         return new_vid
     @classmethod
@@ -144,12 +252,12 @@ class Video:
             image = np.expand_dims(image, axis=0)
         new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
         new_vid.fps = fps
-        new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
+        new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
         return new_vid
     def copy(self) -> Video:
         copied = Video().from_frames(self.frames.copy(), self.fps)
-        copied.audio = self.audio
+        copied.audio = self.audio  # Audio objects are immutable, no need to copy
         return copied
     def is_loaded(self) -> bool:
@@ -165,25 +273,31 @@ class Video:
             self.from_frames(self.frames[:frame_idx], self.fps),
             self.from_frames(self.frames[frame_idx:], self.fps),
         )
-        audio_midpoint = (frame_idx / self.fps) * 1000
-        split_videos[0].audio = self.audio[:audio_midpoint]
-        split_videos[1].audio = self.audio[audio_midpoint:]
+        # Split audio at the corresponding time point
+        split_time = frame_idx / self.fps
+        split_videos[0].audio = self.audio.slice(start_seconds=0, end_seconds=split_time)
+        split_videos[1].audio = self.audio.slice(start_seconds=split_time)
         return split_videos
     def save(self, filename: str | Path | None = None, format: ALLOWED_VIDEO_FORMATS = "mp4") -> Path:
-        """Saves the video with audio.
+        """Save video to file with optimized performance.
         Args:
-            filename: Name of the output video file. Generates random name if not provided.
-            format: Output format (default is 'mp4').
+            filename: Output filename. If None, generates random name
+            format: Output format (mp4, avi, mov, mkv, webm)
         Returns:
-            Path to the saved video file.
+            Path to saved video file
+        Raises:
+            RuntimeError: If video is not loaded
+            ValueError: If format is not supported
         """
         if not self.is_loaded():
             raise RuntimeError("Video is not loaded, cannot save!")
-        # Check if the format is allowed
         if format.lower() not in get_args(ALLOWED_VIDEO_FORMATS):
             raise ValueError(
                 f"Unsupported format: {format}. Allowed formats are: {', '.join(get_args(ALLOWED_VIDEO_FORMATS))}"
@@ -195,87 +309,94 @@ class Video:
             filename = Path(filename).with_suffix(f".{format}")
             filename.parent.mkdir(parents=True, exist_ok=True)
-        with tempfile.TemporaryDirectory() as temp_dir:
-            temp_dir_path = Path(temp_dir)
-            # Save frames as images
-            for i, frame in enumerate(self.frames):
-                frame_path = temp_dir_path / f"frame_{i:04d}.png"
-                cv2.imwrite(str(frame_path), cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-            # Save audio to a temporary file
-            temp_audio = temp_dir_path / "temp_audio.wav"
-            self.audio.export(str(temp_audio), format="adts", bitrate="192k")
-            # Construct FFmpeg command
-            ffmpeg_command = [
-                "ffmpeg",
-                "-y",  # Overwrite output file if it exists
-                "-r",
-                str(self.fps),  # Set the frame rate
-                "-i",
-                str(temp_dir_path / "frame_%04d.png"),  # Input image sequence
-                "-i",
-                str(temp_audio),  # Input audio file
-                "-c:v",
-                "libx264",  # Video codec
-                "-preset",
-                "medium",  # Encoding preset (tradeoff between encoding speed and compression)
-                "-crf",
-                "23",  # Constant Rate Factor (lower means better quality, 23 is default)
-                "-c:a",
-                "copy",  # Audio codec
-                "-b:a",
-                "192k",  # Audio bitrate
-                "-pix_fmt",
-                "yuv420p",  # Pixel format
-                "-shortest",  # Finish encoding when the shortest input stream ends
-                str(filename),
-            ]
+        # Create a temporary raw video file
+        with tempfile.NamedTemporaryFile(suffix=".raw") as raw_video:
+            # Convert frames to raw video data
+            raw_data = self.frames.astype(np.uint8).tobytes()
+            raw_video.write(raw_data)
+            raw_video.flush()
+            # Save audio to temporary WAV file
+            with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio:
+                self.audio.save(temp_audio.name, format="wav")
+                # Calculate exact duration
+                duration = len(self.frames) / self.fps
+                # Construct FFmpeg command for maximum performance
+                ffmpeg_command = [
+                    "ffmpeg",
+                    "-y",
+                    # Raw video input settings
+                    "-f",
+                    "rawvideo",
+                    "-pixel_format",
+                    "rgb24",
+                    "-video_size",
+                    f"{self.frame_shape[1]}x{self.frame_shape[0]}",
+                    "-framerate",
+                    str(self.fps),
+                    "-i",
+                    raw_video.name,
+                    # Audio input
+                    "-i",
+                    temp_audio.name,
+                    # Video encoding settings
+                    "-c:v",
+                    "libx264",
+                    "-preset",
+                    "ultrafast",  # Fastest encoding
+                    "-tune",
+                    "zerolatency",  # Reduce encoding latency
+                    "-crf",
+                    "23",  # Reasonable quality/size tradeoff
+                    # Audio settings
+                    "-c:a",
+                    "aac",
+                    "-b:a",
+                    "192k",
+                    # Output settings
+                    "-pix_fmt",
+                    "yuv420p",
+                    "-movflags",
+                    "+faststart",  # Enable fast start for web playback
+                    "-t",
+                    str(duration),
+                    "-vsync",
+                    "cfr",
+                    str(filename),
+                ]
+                try:
+                    subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
+                    return filename
+                except subprocess.CalledProcessError as e:
+                    print(f"Error saving video: {e}")
+                    print(f"FFmpeg stderr: {e.stderr}")
+                    raise
+    def add_audio(self, audio: Audio, overlay: bool = True) -> None:
+        if self.audio.is_silent:
+            self.audio = audio
+        elif overlay:
+            self.audio = self.audio.overlay(audio, position=0.0)
+        else:
+            self.audio = audio
-            try:
-                subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
-                print(f"Video saved successfully to: {filename}")
-                return filename
-            except subprocess.CalledProcessError as e:
-                print(f"Error saving video: {e}")
-                print(f"FFmpeg stderr: {e.stderr}")
-                raise
-    def add_audio(self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
-        self.audio = self._process_audio(audio=audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
-    def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
-        new_audio = self._load_audio_from_path(path)
-        if new_audio is None:
-            print(f"Audio file `{path}` not found, skipping!")
-            return
-        self.audio = self._process_audio(audio=new_audio, overlay=overlay, overlay_gain=overlay_gain, loop=loop)
-    def _process_audio(
-        self, audio: AudioSegment, overlay: bool = True, overlay_gain: int = 0, loop: bool = False
-    ) -> AudioSegment:
-        if (duration_diff := round(self.total_seconds - audio.duration_seconds)) > 0 and not loop:
-            audio = audio + AudioSegment.silent(duration_diff * 1000)
-        elif audio.duration_seconds > self.total_seconds:
-            audio = audio[: round(self.total_seconds * 1000)]
-        if overlay:
-            return self.audio.overlay(audio, loop=loop, gain_during_overlay=overlay_gain)
-        return audio
+    def add_audio_from_file(self, path: str, overlay: bool = True) -> None:
+        try:
+            new_audio = Audio.from_file(path)
+            self.add_audio(new_audio, overlay)
+        except Exception:
+            print(f"Audio file `{path}` not found or invalid, skipping!")
     def __add__(self, other: Video) -> Video:
-        # TODO: Should it be class method? How to make it work with sum()?
         if self.fps != other.fps:
             raise ValueError("FPS of videos do not match!")
         elif self.frame_shape != other.frame_shape:
-            raise ValueError(
-                "Resolutions of the images do not match: "
-                f"{self.frame_shape} not compatible with {other.frame_shape}."
-            )
+            raise ValueError(f"Resolutions do not match: {self.frame_shape} vs {other.frame_shape}")
         new_video = self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
-        new_video.audio = self.audio + other.audio
+        new_video.audio = self.audio.concat(other.audio)
         return new_video
     def __str__(self) -> str:
@@ -285,74 +406,35 @@ class Video:
         if not isinstance(val, slice):
             raise ValueError("Only slices are supported for video indexing!")
-        # Sub-slice video if given a slice
+        # Sub-slice video frames
         sliced = self.from_frames(self.frames[val], fps=self.fps)
-        # Handle slicing without value for audio
+        # Handle slicing bounds for audio
         start = val.start if val.start else 0
         stop = val.stop if val.stop else len(self.frames)
-        # Handle negative values for audio slices
         if start < 0:
             start = len(self.frames) + start
         if stop < 0:
             stop = len(self.frames) + stop
-        # Append audio to the slice
-        audio_start = round(start / self.fps) * 1000
-        audio_end = round(stop / self.fps) * 1000
-        sliced.audio = self.audio[audio_start:audio_end]
-        return sliced
-    @staticmethod
-    def _load_audio_from_path(path: str) -> AudioSegment | None:
-        try:
-            audio = AudioSegment.from_file(path)
-            return audio
-        except IndexError:
-            return None
-    @staticmethod
-    def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
-        """Loads frames and fps information from video file.
-        Args:
-            path: Path to video file.
-        """
-        cap = cv2.VideoCapture(path)
-        if not cap.isOpened():
-            raise ValueError(f"Unable to open video file: {path}")
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        frames = []
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frames.append(frame)
-        cap.release()
-        if not frames:
-            raise ValueError(f"No frames could be read from the video file: {path}")
-        return np.array(frames), fps
+        # Slice audio to match video duration
+        audio_start = start / self.fps
+        audio_end = stop / self.fps
+        sliced.audio = self.audio.slice(start_seconds=audio_start, end_seconds=audio_end)
+        return sliced
     @property
     def video_shape(self) -> tuple[int, int, int, int]:
-        """Returns 4D video shape."""
         return self.frames.shape
     @property
     def frame_shape(self) -> tuple[int, int, int]:
-        """Returns 3D frame shape."""
         return self.frames.shape[1:]
     @property
     def total_seconds(self) -> float:
-        """Returns total seconds of the video."""
         return round(self.frames.shape[0] / self.fps, 4)
     @property
     def metadata(self) -> VideoMetadata:
-        """Returns VideoMetadata object."""
         return VideoMetadata.from_video(self)

videopython/utils/__init__.py CHANGED Viewed

@@ -0,0 +1,3 @@
+from videopython.utils.text import AnchorPoint, ImageText, TextAlign
+__all__ = ["AnchorPoint", "ImageText", "TextAlign"]

videopython 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

videopython 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl