PyPI - videopython - Versions diffs - 0.4.0__tar.gz → 0.4.1__tar.gz - Mend

videopython 0.4.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (27) hide show

{videopython-0.4.0 → videopython-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.4.0
+Version: 0.4.1
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -18,9 +18,22 @@ Requires-Python: <3.13,>=3.10
 Requires-Dist: numpy>=1.25.2
 Requires-Dist: opencv-python>=4.9.0.80
 Requires-Dist: pillow>=10.3.0
-Requires-Dist: pydub>=0.25.1
 Requires-Dist: soundpython>=0.1.11
 Requires-Dist: tqdm>=4.66.3
+Provides-Extra: ai
+Requires-Dist: accelerate>=0.29.2; extra == 'ai'
+Requires-Dist: diffusers>=0.26.3; extra == 'ai'
+Requires-Dist: numba>=0.61.0; extra == 'ai'
+Requires-Dist: openai-whisper>=20240930; extra == 'ai'
+Requires-Dist: torch>=2.1.0; extra == 'ai'
+Requires-Dist: transformers>=4.38.1; extra == 'ai'
+Provides-Extra: dev
+Requires-Dist: mypy>=1.8.0; extra == 'dev'
+Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
+Requires-Dist: pytest>=7.4.0; extra == 'dev'
+Requires-Dist: ruff>=0.1.14; extra == 'dev'
+Requires-Dist: types-pillow>=10.2.0.20240213; extra == 'dev'
+Requires-Dist: types-tqdm>=4.66.0.20240106; extra == 'dev'
 Description-Content-Type: text/markdown
 # About

{videopython-0.4.0 → videopython-0.4.1}/pyproject.toml RENAMED Viewed

@@ -1,16 +1,24 @@
 [project]
 name = "videopython"
-version = "0.4.0"
+version = "0.4.1"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
     { name = "Bartosz Rudnikowicz", email = "bartoszrudnikowicz840@gmail.com" },
-    { name = "Piotr Pukisz", email = "piotr.pukisz@gmail.com" }
+    { name = "Piotr Pukisz", email = "piotr.pukisz@gmail.com" },
 ]
 license = { text = "Apache-2.0" }
 readme = "README.md"
 requires-python = ">=3.10, <3.13"
-keywords = ["python", "videopython", "video", "movie", "opencv", "generation", "editing"]
+keywords = [
+    "python",
+    "videopython",
+    "video",
+    "movie",
+    "opencv",
+    "generation",
+    "editing",
+]
 classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3",
@@ -23,10 +31,8 @@ dependencies = [
     "numpy>=1.25.2",
     "opencv-python>=4.9.0.80",
     "pillow>=10.3.0",
-    "pydub>=0.25.1",
-    "soundpython>=0.1.11",
     "tqdm>=4.66.3",
+    "soundpython>=0.1.11",
 ]
 [dependency-groups]
@@ -47,6 +53,25 @@ ai = [
     "numba>=0.61.0",
 ]
+# We have to keep it to make PIP use those dependency groups, not only UV
+[project.optional-dependencies]
+dev = [
+    "ruff>=0.1.14",
+    "mypy>=1.8.0",
+    "pytest>=7.4.0",
+    "types-Pillow>=10.2.0.20240213",
+    "types-tqdm>=4.66.0.20240106",
+    "pytest-cov>=6.1.1",
+]
+ai = [
+    "accelerate>=0.29.2",
+    "diffusers>=0.26.3",
+    "torch>=2.1.0",
+    "transformers>=4.38.1",
+    "openai-whisper>=20240930",
+    "numba>=0.61.0",
+]
 [project.urls]
 Homepage = "https://github.com/bartwojtowicz/videopython/"
 Repository = "https://github.com/bartwojtowicz/videopython/"
@@ -77,9 +102,9 @@ target-version = "py310"
 [tool.ruff.lint]
 select = [
-    "E",   # pycodestyle errors
-    "F",   # pyflakes
-    "I",   # isort
+    "E", # pycodestyle errors
+    "F", # pyflakes
+    "I", # isort
 ]
 isort.known-first-party = ["videopython"]

videopython-0.4.1/src/videopython/base/combine.py ADDED Viewed

@@ -0,0 +1,45 @@
+from typing import Literal
+import numpy as np
+from videopython.base.transforms import ResampleFPS, Resize
+from videopython.base.video import Video
+class StackVideos:
+    def __init__(self, mode: Literal["horizontal", "vertical"]) -> None:
+        self.mode = mode
+    def _validate(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        video1, video2 = self._align_shapes(video1, video2)
+        video1, video2 = self._align_fps(video1, video2)
+        video1, video2 = self._align_duration(video1, video2)
+        return video1, video2
+    def _align_fps(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if video1.fps > video2.fps:
+            video1 = ResampleFPS(fps=video2.fps).apply(video1)
+        elif video1.fps < video2.fps:
+            video2 = ResampleFPS(fps=video1.fps).apply(video2)
+        return (video1, video2)
+    def _align_shapes(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if self.mode == "horizontal":
+            video2 = Resize(height=video1.metadata.height).apply(video2)
+        elif self.mode == "vertical":
+            video2 = Resize(width=video1.metadata.width).apply(video2)
+        return (video1, video2)
+    def _align_duration(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if len(video1.frames) > len(video2.frames):
+            video1 = video1[: len(video2.frames)]
+        elif len(video1.frames) < len(video2.frames):
+            video2 = video2[: len(video1.frames)]
+        return (video1, video2)
+    def apply(self, videos: tuple[Video, Video]) -> Video:
+        videos = self._validate(*videos)
+        axis = 1 if self.mode == "vertical" else 2
+        new_frames = np.concatenate((videos[0].frames, videos[1].frames), axis=axis)
+        new_audio = videos[0].audio.overlay(videos[1].audio)
+        return Video(frames=new_frames, fps=videos[0].fps, audio=new_audio)

{videopython-0.4.0 → videopython-0.4.1}/src/videopython/base/video.py RENAMED Viewed

@@ -134,15 +134,20 @@ class VideoMetadata:
 class Video:
-    def __init__(self):
-        self.fps = None
-        self.frames = None
-        self.audio = None
+    def __init__(self, frames: np.ndarray, fps: int | float, audio: Audio | None = None):
+        self.frames = frames
+        self.fps = fps
+        if audio:
+            self.audio = audio
+        else:
+            self.audio = Audio.create_silent(
+                duration_seconds=round(self.total_seconds, 2), stereo=True, sample_rate=44100
+            )
     @classmethod
-    def from_path(cls, path: str, read_batch_size: int = 100) -> Video:
-        new_vid = cls()
+    def from_path(
+        cls, path: str, read_batch_size: int = 100, start_second: float | None = None, end_second: float | None = None
+    ) -> Video:
         try:
             # Get video metadata using VideoMetadata.from_path
             metadata = VideoMetadata.from_path(path)
@@ -151,24 +156,56 @@ class Video:
             height = metadata.height
             fps = metadata.fps
             total_frames = metadata.frame_count
-            # Set up FFmpeg command for raw video extraction
+            total_duration = metadata.total_seconds
+            # Validate time bounds
+            if start_second is not None and start_second < 0:
+                raise ValueError("start_second must be non-negative")
+            if end_second is not None and end_second > total_duration:
+                raise ValueError(f"end_second ({end_second}) exceeds video duration ({total_duration})")
+            if start_second is not None and end_second is not None and start_second >= end_second:
+                raise ValueError("start_second must be less than end_second")
+            # Calculate frame indices for the desired segment
+            start_frame = int(start_second * fps) if start_second is not None else 0
+            end_frame = int(end_second * fps) if end_second is not None else total_frames
+            # Ensure we don't exceed bounds
+            start_frame = max(0, start_frame)
+            end_frame = min(total_frames, end_frame)
+            segment_frames = end_frame - start_frame
+            # Set up FFmpeg command for raw video extraction with time bounds
             ffmpeg_cmd = [
                 "ffmpeg",
                 "-i",
                 path,
-                "-f",
-                "rawvideo",
-                "-pix_fmt",
-                "rgb24",
-                "-vsync",
-                "0",
-                "-vcodec",
-                "rawvideo",
-                "-y",
-                "pipe:1",
             ]
+            # Add seek and duration options if specified
+            if start_second is not None:
+                ffmpeg_cmd.extend(["-ss", str(start_second)])
+            if end_second is not None and start_second is not None:
+                duration = end_second - start_second
+                ffmpeg_cmd.extend(["-t", str(duration)])
+            elif end_second is not None:
+                ffmpeg_cmd.extend(["-t", str(end_second)])
+            ffmpeg_cmd.extend(
+                [
+                    "-f",
+                    "rawvideo",
+                    "-pix_fmt",
+                    "rgb24",
+                    "-vsync",
+                    "0",
+                    "-vcodec",
+                    "rawvideo",
+                    "-y",
+                    "pipe:1",
+                ]
+            )
             # Start FFmpeg process
             process = subprocess.Popen(
                 ffmpeg_cmd,
@@ -180,12 +217,13 @@ class Video:
             # Calculate frame size in bytes
             frame_size = width * height * 3  # 3 bytes per pixel for RGB
-            # Pre-allocate numpy array for all frames
-            frames = np.empty((total_frames, height, width, 3), dtype=np.uint8)
+            # Pre-allocate numpy array for segment frames
+            frames = np.empty((segment_frames, height, width, 3), dtype=np.uint8)
             # Read frames in batches
-            for frame_idx in range(0, total_frames, read_batch_size):
-                batch_end = min(frame_idx + read_batch_size, total_frames)
+            frames_read = 0
+            for frame_idx in range(0, segment_frames, read_batch_size):
+                batch_end = min(frame_idx + read_batch_size, segment_frames)
                 batch_size = batch_end - frame_idx
                 # Read batch of frames
@@ -195,10 +233,19 @@ class Video:
                 # Convert raw bytes to numpy array and reshape
                 batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
-                batch_frames = batch_frames.reshape(-1, height, width, 3)
-                # Store batch in pre-allocated array
-                frames[frame_idx:batch_end] = batch_frames
+                # Handle case where we might get fewer frames than expected
+                actual_frames = len(batch_frames) // (height * width * 3)
+                if actual_frames > 0:
+                    batch_frames = batch_frames[: actual_frames * height * width * 3]
+                    batch_frames = batch_frames.reshape(-1, height, width, 3)
+                    # Store batch in pre-allocated array
+                    end_idx = frame_idx + actual_frames
+                    frames[frame_idx:end_idx] = batch_frames
+                    frames_read += actual_frames
+                else:
+                    break
             # Clean up FFmpeg process
             process.stdout.close()  # type: ignore
@@ -206,21 +253,28 @@ class Video:
             process.wait()
             if process.returncode != 0:
-                raise ValueError(f"FFmpeg error: {process.stderr.read().decode()}")  # type: ignore
+                stderr_output = process.stderr.read().decode() if process.stderr else "Unknown error"
+                raise ValueError(f"FFmpeg error: {stderr_output}")
-            new_vid.frames = frames
-            new_vid.fps = fps
+            # Trim frames array if we read fewer frames than expected
+            if frames_read < segment_frames:
+                frames = frames[:frames_read]  # type: ignore[assignment]
-            # Load audio
+            # Load audio for the specified segment
             try:
-                new_vid.audio = Audio.from_file(path)
+                audio = Audio.from_file(path)
+                # Slice audio to match the video segment
+                if start_second is not None or end_second is not None:
+                    audio_start = start_second if start_second is not None else 0
+                    audio_end = end_second if end_second is not None else audio.metadata.duration_seconds
+                    audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
             except Exception:
                 print(f"No audio found for `{path}`, adding silent track!")
-                new_vid.audio = Audio.create_silent(
-                    duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
-                )
+                # Create silent audio for the segment duration
+                segment_duration = len(frames) / fps
+                audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
-            return new_vid
+            return cls(frames=frames, fps=fps, audio=audio)
         except VideoMetadataError as e:
             raise ValueError(f"Error getting video metadata: {e}")
@@ -231,32 +285,23 @@ class Video:
     @classmethod
     def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
-        new_vid = cls()
         if frames.ndim != 4:
             raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
         elif frames.shape[-1] == 4:
             frames = frames[:, :, :, :3]
         elif frames.shape[-1] != 3:
             raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
-        new_vid.frames = frames
-        new_vid.fps = fps
-        new_vid.audio = Audio.create_silent(
-            duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
-        )
-        return new_vid
+        return cls(frames=frames, fps=fps)
     @classmethod
     def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
-        new_vid = cls()
         if len(image.shape) == 3:
             image = np.expand_dims(image, axis=0)
-        new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
-        new_vid.fps = fps
-        new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
-        return new_vid
+        frames = np.repeat(image, round(length_seconds * fps), axis=0)
+        return cls(frames=frames, fps=fps)
     def copy(self) -> Video:
-        copied = Video().from_frames(self.frames.copy(), self.fps)
+        copied = Video.from_frames(self.frames.copy(), self.fps)
         copied.audio = self.audio  # Audio objects are immutable, no need to copy
         return copied