PyPI - videopython - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

videopython 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (15) hide show

videopython/ai/understanding/transcribe.py +48 -19
videopython/base/text/__init__.py +0 -0
videopython/{utils/text.py → base/text/overlay.py} +383 -8
videopython/base/text/transcription.py +121 -0
videopython/base/utils.py +6 -0
videopython/base/video.py +100 -58
{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/METADATA +91 -28
{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/RECORD +10 -12
videopython/base/compose.py +0 -55
videopython/base/transcription.py +0 -13
videopython/utils/__init__.py +0 -3
videopython/utils/common.py +0 -31
videopython/utils/image.py +0 -47
{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/WHEEL +0 -0
{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/licenses/LICENSE +0 -0

videopython/ai/understanding/transcribe.py CHANGED Viewed

@@ -1,37 +1,66 @@
-from typing import Literal
+from typing import Literal, Union
 import whisper
+from soundpython import Audio
-from videopython.base.transcription import Transcription, TranscriptionSegment
+from videopython.base.text.transcription import Transcription, TranscriptionSegment, TranscriptionWord
 from videopython.base.video import Video
-class VideoTranscription:
+class CreateTranscription:
+    """Unified transcription service for both audio and video."""
     def __init__(self, model_name: Literal["tiny", "base", "small", "medium", "large", "turbo"] = "small") -> None:
         self.model = whisper.load_model(name=model_name)
-    def transcribe_video(self, video: Video) -> Transcription:
-        """Transcribes video to text.
+    def _process_transcription_result(self, transcription_result: dict) -> Transcription:
+        """Process raw transcription result into Transcription object.
         Args:
-            video: Video to transcribe.
+            transcription_result: Raw result from whisper model
         Returns:
-            List of dictionaries with segments of text and their start and end times.
+            Processed Transcription object
         """
-        if video.audio.is_silent:
-            return Transcription(segments=[])
+        transcription_segments = []
+        for segment in transcription_result["segments"]:
+            transcription_words = [
+                TranscriptionWord(word=word["word"], start=float(word["start"]), end=float(word["end"]))
+                for word in segment["words"]
+            ]
+            transcription_segment = TranscriptionSegment(
+                start=segment["start"], end=segment["end"], text=segment["text"], words=transcription_words
+            )
+            transcription_segments.append(transcription_segment)
+        return Transcription(segments=transcription_segments)
+    def transcribe(self, media: Union[Audio, Video]) -> Transcription:
+        """Transcribe audio or video to text.
+        Args:
+            media: Audio or Video to transcribe.
+        Returns:
+            Transcription object with segments of text and their timestamps.
+        """
+        if isinstance(media, Video):
+            # Handle video transcription
+            if media.audio.is_silent:
+                return Transcription(segments=[])
+            audio = media.audio.to_mono().resample(whisper.audio.SAMPLE_RATE)
+            transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
-        audio = video.audio.to_mono()
-        audio = audio.resample(whisper.audio.SAMPLE_RATE)
-        audio_data = audio.data
+        elif isinstance(media, Audio):
+            # Handle audio transcription
+            if media.is_silent:
+                return Transcription(segments=[])
-        transcription = self.model.transcribe(audio=audio_data, word_timestamps=True)
+            audio = media.to_mono().resample(whisper.audio.SAMPLE_RATE)
+            transcription_result = self.model.transcribe(audio=audio.data, word_timestamps=True)
-        transcription_segments = [
-            TranscriptionSegment(start=segment["start"], end=segment["end"], text=segment["text"])
-            for segment in transcription["segments"]
-        ]
-        result = Transcription(segments=transcription_segments)
+        else:
+            raise TypeError(f"Unsupported media type: {type(media)}. Expected Audio or Video.")
-        return result
+        return self._process_transcription_result(transcription_result)

videopython/base/text/__init__.py ADDED Viewed

File without changes

videopython/{utils/text.py → base/text/overlay.py} RENAMED Viewed

@@ -1,10 +1,21 @@
+"""
+Beware, the code below was heavily "vibe-coded".
+The main purpose of this file are 2 classes:
+1. `ImageText` class for creating RGBA image with rendered subtitles
+2. `TranscriptionOverlay` class, which takes the `Transcription` and `Video` objects and overlays subtitles on `Video`.
+"""
 from enum import Enum
 from typing import TypeAlias, Union
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
+from tqdm import tqdm
 from videopython.base.exceptions import OutOfBoundsError
+from videopython.base.text.transcription import Transcription, TranscriptionSegment
+from videopython.base.video import Video
 # Type aliases for clarity
 MarginType: TypeAlias = Union[int, tuple[int, int, int, int]]
@@ -319,6 +330,7 @@ class ImageText:
         font_filename: str,
         xy: PositionType,
         font_size: int | None = 11,
+        font_border_size: int = 0,
         color: RGBColor = (0, 0, 0),
         max_width: int | None = None,
         max_height: int | None = None,
@@ -333,6 +345,7 @@ class ImageText:
             font_filename: Path to the font file
             xy: Position (x,y) either as absolute pixels (int) or relative to frame (float 0-1)
             font_size: Size of the font in points, or None to auto-calculate
+            font_border_size: Size of border around text in pixels (0 for no border)
             color: RGB color of the text
             max_width: Maximum width for auto font sizing
             max_height: Maximum height for auto font sizing
@@ -355,6 +368,9 @@ class ImageText:
         if font_size is not None and font_size <= 0:
             raise ValueError("Font size must be positive")
+        if font_border_size < 0:
+            raise ValueError("Font border size cannot be negative")
         if font_size is None and (max_width is None or max_height is None):
             raise ValueError("Must set either `font_size`, or both `max_width` and `max_height`!")
         elif font_size is None:
@@ -371,6 +387,15 @@ class ImageText:
         if x < 0 or y < 0 or x + text_dimensions[0] > self.image_size[0] or y + text_dimensions[1] > self.image_size[1]:
             raise OutOfBoundsError(f"Text with size {text_dimensions} at position ({x}, {y}) is out of bounds!")
+        # Draw border if requested
+        if font_border_size > 0:
+            # Draw text border by drawing text in multiple positions around the main text
+            for border_x in range(-font_border_size, font_border_size + 1):
+                for border_y in range(-font_border_size, font_border_size + 1):
+                    if border_x != 0 or border_y != 0:  # Skip the center position
+                        self._draw.text((x + border_x, y + border_y), text, font=font, fill=(0, 0, 0))
+        # Draw the main text on top
         self._draw.text((x, y), text, font=font, fill=color)
         return text_dimensions
@@ -423,6 +448,46 @@ class ImageText:
         except Exception as e:
             raise ValueError(f"Error measuring text: {str(e)}")
+    def _get_font_baseline_offset(
+        self, base_font_filename: str, base_font_size: int, highlight_font_filename: str, highlight_font_size: int
+    ) -> int:
+        """
+        Calculate the vertical offset needed to align baselines of different fonts and sizes.
+        Args:
+            base_font_filename: Path to the base font file
+            base_font_size: Font size of normal text
+            highlight_font_filename: Path to the highlight font file
+            highlight_font_size: Font size of highlighted text
+        Returns:
+            Vertical offset in pixels to align highlighted text baseline with normal text baseline
+        """
+        base_font = self._get_font(base_font_filename, base_font_size)
+        highlight_font = self._get_font(highlight_font_filename, highlight_font_size)
+        # Use a reference character to get baseline metrics
+        # We use 'A' as it's a good reference for ascender height
+        ref_char = "A"
+        # Get bounding boxes for the reference character
+        base_bbox = base_font.getbbox(ref_char)
+        highlight_bbox = highlight_font.getbbox(ref_char)
+        if base_bbox is None or highlight_bbox is None:
+            return 0  # Fallback if bbox calculation fails
+        # The baseline offset is the difference in the top of the bounding box
+        # since getbbox returns (left, top, right, bottom) where top is negative for ascenders
+        base_ascent = -base_bbox[1]  # Distance from baseline to top of character
+        highlight_ascent = -highlight_bbox[1]  # Distance from baseline to top of character
+        # Calculate the offset needed to align baselines
+        # If highlighted text has a larger ascent, we need to move it down
+        baseline_offset = highlight_ascent - base_ascent
+        return baseline_offset
     def _split_lines_by_width(
         self,
         text: str,
@@ -499,12 +564,18 @@ class ImageText:
         xy: PositionType,
         box_width: Union[int, float] | None = None,
         font_size: int = 11,
+        font_border_size: int = 0,
         text_color: RGBColor = (0, 0, 0),
         background_color: RGBAColor | None = None,
         background_padding: int = 0,
         place: TextAlign = TextAlign.LEFT,
         anchor: AnchorPoint = AnchorPoint.TOP_LEFT,
         margin: MarginType = 0,
+        words: list[str] | None = None,
+        highlight_word_index: int | None = None,
+        highlight_color: RGBColor | None = None,
+        highlight_size_multiplier: float = 1.5,
+        highlight_bold_font: str | None = None,
     ) -> tuple[int, int]:
         """
         Write text in a box with advanced positioning and alignment options.
@@ -515,12 +586,18 @@ class ImageText:
             xy: Position (x,y) either as absolute pixels (int) or relative to frame (float 0-1)
             box_width: Width of the box in pixels (int) or relative to frame width (float 0-1)
             font_size: Font size in points
+            font_border_size: Size of border around text in pixels (0 for no border)
             text_color: RGB color of the text
             background_color: If set, adds background color to the text box. Expects RGBA values.
             background_padding: Number of padding pixels to add when adding text background color
             place: Text alignment within the box (TextAlign.LEFT, TextAlign.RIGHT, TextAlign.CENTER)
             anchor: Which part of the text box to anchor at the position
             margin: Margin in pixels (single value or [top, right, bottom, left])
+            words: All words occuring in text, helpful for highlighting.
+            highlight_word_index: Index of word to highlight (0-based, None to disable highlighting)
+            highlight_color: RGB color for the highlighted word (defaults to text_color if None)
+            highlight_size_multiplier: Font size multiplier for highlighted word
+            highlight_bold_font: Path to bold font file for highlighted word (defaults to font_filename if None)
         Returns:
             Coordinates of the lower-right corner of the written text box (x, y)
@@ -541,6 +618,25 @@ class ImageText:
         if background_padding < 0:
             raise ValueError("Background padding cannot be negative")
+        if font_border_size < 0:
+            raise ValueError("Font border size cannot be negative")
+        # Validate highlighting parameters
+        if highlight_word_index is not None:
+            if not words:
+                words = text.split()
+            if highlight_word_index < 0 or highlight_word_index >= len(words):
+                raise ValueError(
+                    f"highlight_word_index {highlight_word_index} out of range for text with {len(words)} words"
+                )
+        if highlight_size_multiplier <= 0:
+            raise ValueError("highlight_size_multiplier must be positive")
+        # Set default highlight color if not provided
+        if highlight_word_index is not None and highlight_color is None:
+            highlight_color = text_color
         # Process margins to determine available area
         margin_top, margin_right, margin_bottom, margin_left = self._process_margin(margin)
         available_width = self.image_size[0] - margin_left - margin_right
@@ -590,6 +686,7 @@ class ImageText:
         # Write lines
         current_text_height = y_pos
+        word_index_offset = 0  # Track global word index across lines
         for line in lines:
             line_dimensions = self.get_text_dimensions(font_filename, font_size, line)
@@ -604,14 +701,49 @@ class ImageText:
                 valid_places = [e.value for e in TextAlign]
                 raise ValueError(f"Place '{place}' is not supported. Must be one of: {', '.join(valid_places)}")
-            # Write the line
-            self.write_text(
-                text=line,
-                font_filename=font_filename,
-                xy=(x_left, current_text_height),
-                font_size=font_size,
-                color=text_color,
-            )
+            # Check if highlighting is needed for this line
+            if highlight_word_index is not None:
+                line_words = line.split()
+                line_start_word_index = word_index_offset
+                line_end_word_index = word_index_offset + len(line_words) - 1
+                # Check if the highlighted word is in this line
+                if line_start_word_index <= highlight_word_index <= line_end_word_index:
+                    self._write_line_with_highlight(
+                        line=line,
+                        font_filename=font_filename,
+                        font_size=font_size,
+                        font_border_size=font_border_size,
+                        text_color=text_color,
+                        highlight_color=highlight_color or (255, 255, 255),
+                        highlight_size_multiplier=highlight_size_multiplier,
+                        highlight_word_local_index=highlight_word_index - line_start_word_index,
+                        highlight_bold_font=highlight_bold_font,
+                        x_left=int(x_left),
+                        y_top=int(current_text_height),
+                    )
+                else:
+                    # Write normal line without highlighting
+                    self.write_text(
+                        text=line,
+                        font_filename=font_filename,
+                        xy=(x_left, current_text_height),
+                        font_size=font_size,
+                        font_border_size=font_border_size,
+                        color=text_color,
+                    )
+                word_index_offset += len(line_words)
+            else:
+                # Write normal line without highlighting
+                self.write_text(
+                    text=line,
+                    font_filename=font_filename,
+                    xy=(x_left, current_text_height),
+                    font_size=font_size,
+                    font_border_size=font_border_size,
+                    color=text_color,
+                )
             # Increment vertical position for next line
             current_text_height += line_dimensions[1]
@@ -690,6 +822,88 @@ class ImageText:
         return (int(x_pos + box_width), int(current_text_height))
+    def _write_line_with_highlight(
+        self,
+        line: str,
+        font_filename: str,
+        font_size: int,
+        font_border_size: int,
+        text_color: RGBColor,
+        highlight_color: RGBColor,
+        highlight_size_multiplier: float,
+        highlight_word_local_index: int,
+        highlight_bold_font: str | None,
+        x_left: int,
+        y_top: int,
+    ) -> None:
+        """
+        Write a line of text with one word highlighted using word-by-word rendering with baseline alignment.
+        Args:
+            line: The text line to render
+            font_filename: Path to the font file
+            font_size: Base font size in points
+            font_border_size: Size of border around text in pixels (0 for no border)
+            text_color: RGB color for normal text
+            highlight_color: RGB color for highlighted word
+            highlight_size_multiplier: Font size multiplier for highlighted word
+            highlight_word_local_index: Index of word to highlight within this line (0-based)
+            highlight_bold_font: Path to bold font file for highlighted word (defaults to font_filename if None)
+            x_left: Left x position for the line
+            y_top: Top y position for the line
+        """
+        # Split line into words
+        words = line.split()
+        if highlight_word_local_index >= len(words):
+            return  # Safety check
+        # Calculate highlighted font size and determine font files
+        highlight_font_size = int(font_size * highlight_size_multiplier)
+        highlight_font_file = highlight_bold_font if highlight_bold_font is not None else font_filename
+        # Calculate baseline offset for highlighted words (using the appropriate font files)
+        baseline_offset = self._get_font_baseline_offset(
+            font_filename, font_size, highlight_font_file, highlight_font_size
+        )
+        # Render words one by one with proper spacing
+        current_x = x_left
+        for i, word in enumerate(words):
+            # Determine if this is the highlighted word
+            is_highlighted = i == highlight_word_local_index
+            # Choose font file, size, and color based on highlighting
+            word_font_file = highlight_font_file if is_highlighted else font_filename
+            word_font_size = highlight_font_size if is_highlighted else font_size
+            word_color = highlight_color if is_highlighted else text_color
+            # Calculate y position with baseline alignment
+            word_y = y_top
+            if is_highlighted:
+                word_y += baseline_offset
+            # Render the word
+            self.write_text(
+                text=word,
+                font_filename=word_font_file,
+                xy=(current_x, word_y),
+                font_size=word_font_size,
+                font_border_size=font_border_size,
+                color=word_color,
+            )
+            # Calculate the width of this word for spacing
+            word_width = self.get_text_dimensions(word_font_file, word_font_size, word)[0]
+            # Update current_x for next word (add word width plus space)
+            current_x += word_width
+            # Add space between words (except after the last word)
+            if i < len(words) - 1:
+                space_width = self.get_text_dimensions(font_filename, font_size, " ")[0]
+                current_x += space_width
     def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
         """
         Find the smallest bounding rectangle containing non-zero values in the mask.
@@ -725,3 +939,164 @@ class ImageText:
         xmin, xmax = col_indices[[0, -1]]
         return xmin, xmax, ymin, ymax
+class TranscriptionOverlay:
+    def __init__(
+        self,
+        font_filename: str,
+        font_size: int = 40,
+        font_border_size: int = 2,
+        text_color: RGBColor = (255, 235, 59),
+        background_color: RGBAColor | None = (0, 0, 0, 100),
+        background_padding: int = 15,
+        position: PositionType = (0.5, 0.7),
+        box_width: Union[int, float] = 0.6,
+        text_align: TextAlign = TextAlign.CENTER,
+        anchor: AnchorPoint = AnchorPoint.CENTER,
+        margin: MarginType = 20,
+        highlight_color: RGBColor = (76, 175, 80),
+        highlight_size_multiplier: float = 1.2,
+        highlight_bold_font: str | None = None,
+    ):
+        """
+        Initialize TranscriptionOverlay effect.
+        Args:
+            font_filename: Path to font file for text rendering
+            font_size: Base font size for text
+            text_color: RGB color for normal text
+            font_border_size: Size of border around text in pixels (0 for no border)
+            background_color: RGBA background color (None for no background)
+            background_padding: Padding around text background
+            position: Position of text box (relative 0-1 or absolute pixels)
+            box_width: Width of text box (relative 0-1 or absolute pixels)
+            text_align: Text alignment within box
+            anchor: Anchor point for text positioning
+            margin: Margin around text box
+            highlight_color: RGB color for highlighted words
+            highlight_size_multiplier: Size multiplier for highlighted words
+            highlight_bold_font: Optional bold font for highlighting
+        """
+        self.font_filename = font_filename
+        self.font_size = font_size
+        self.text_color = text_color
+        self.font_border_size = font_border_size
+        self.background_color = background_color
+        self.background_padding = background_padding
+        self.position = position
+        self.box_width = box_width
+        self.text_align = text_align
+        self.anchor = anchor
+        self.margin = margin
+        self.highlight_color = highlight_color
+        self.highlight_size_multiplier = highlight_size_multiplier
+        self.highlight_bold_font = highlight_bold_font
+        # Cache for text overlays to avoid regenerating identical frames
+        self._overlay_cache: dict[tuple[str, int | None], np.ndarray] = {}
+    def _get_active_segment(self, transcription: Transcription, timestamp: float) -> TranscriptionSegment | None:
+        """Get the transcription segment active at the given timestamp."""
+        for segment in transcription.segments:
+            if segment.start <= timestamp <= segment.end:
+                return segment
+        return None
+    def _get_active_word_index(self, segment: TranscriptionSegment, timestamp: float) -> int | None:
+        """Get the index of the word being spoken at the given timestamp within a segment."""
+        for i, word in enumerate(segment.words):
+            if word.start <= timestamp <= word.end:
+                return i
+        return None
+    def _create_text_overlay(
+        self, video_shape: tuple[int, int, int], segment: TranscriptionSegment, highlight_word_index: int | None
+    ) -> np.ndarray:
+        """Create a text overlay image for the given segment and highlight."""
+        # Use video frame dimensions for overlay
+        height, width = video_shape[:2]
+        # Create cache key based on segment text and highlight
+        cache_key = (segment.text, highlight_word_index)
+        if cache_key in self._overlay_cache:
+            return self._overlay_cache[cache_key]
+        # Create ImageText with video dimensions
+        img_text = ImageText(image_size=(width, height), background=(0, 0, 0, 0))
+        # Write text with highlighting
+        img_text.write_text_box(
+            text=segment.text,
+            font_filename=self.font_filename,
+            xy=self.position,
+            box_width=self.box_width,
+            font_size=self.font_size,
+            font_border_size=self.font_border_size,
+            text_color=self.text_color,
+            background_color=self.background_color,
+            background_padding=self.background_padding,
+            place=self.text_align,
+            anchor=self.anchor,
+            margin=self.margin,
+            words=[w.word for w in segment.words],
+            highlight_word_index=highlight_word_index,
+            highlight_color=self.highlight_color,
+            highlight_size_multiplier=self.highlight_size_multiplier,
+            highlight_bold_font=self.highlight_bold_font,
+        )
+        overlay_image = img_text.img_array
+        # Cache the overlay
+        self._overlay_cache[cache_key] = overlay_image
+        return overlay_image
+    def apply(self, video: Video, transcription: Transcription) -> Video:
+        """Apply transcription overlay to video frames."""
+        print("Applying transcription overlay...")
+        new_frames = []
+        for frame_idx, frame in enumerate(tqdm(video.frames)):
+            # Calculate timestamp for this frame
+            timestamp = frame_idx / video.fps
+            # Get active segment at this timestamp
+            active_segment = self._get_active_segment(transcription, timestamp)
+            if active_segment is None:
+                # No active transcription, keep original frame
+                new_frames.append(frame)
+                continue
+            # Get active word index for highlighting
+            highlight_word_index = self._get_active_word_index(active_segment, timestamp)
+            # Create text overlay
+            text_overlay = self._create_text_overlay(video.frame_shape, active_segment, highlight_word_index)
+            # Apply overlay to frame
+            overlaid_frame = self._apply_overlay_to_frame(frame, text_overlay)
+            new_frames.append(overlaid_frame)
+        # Create new video with overlaid frames
+        new_video = Video.from_frames(np.array(new_frames), fps=video.fps)
+        new_video.audio = video.audio  # Preserve audio
+        return new_video
+    def _apply_overlay_to_frame(self, frame: np.ndarray, overlay: np.ndarray) -> np.ndarray:
+        """Apply a text overlay to a single frame."""
+        # Convert frame to PIL Image
+        frame_pil = Image.fromarray(frame)
+        # Convert overlay to PIL Image
+        overlay_pil = Image.fromarray(overlay)
+        # Paste overlay onto frame using alpha channel
+        frame_pil.paste(overlay_pil, (0, 0), overlay_pil)
+        return np.array(frame_pil)

videopython/base/text/transcription.py ADDED Viewed

@@ -0,0 +1,121 @@
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass
+class TranscriptionWord:
+    start: float
+    end: float
+    word: str
+@dataclass
+class TranscriptionSegment:
+    start: float
+    end: float
+    text: str
+    words: list[TranscriptionWord]
+@dataclass
+class Transcription:
+    segments: list[TranscriptionSegment]
+    def offset(self, time: float) -> Transcription:
+        """Return a new Transcription with all timings offset by the provided time value."""
+        offset_segments = []
+        for segment in self.segments:
+            offset_words = []
+            for word in segment.words:
+                offset_words.append(TranscriptionWord(start=word.start + time, end=word.end + time, word=word.word))
+            offset_segments.append(
+                TranscriptionSegment(
+                    start=segment.start + time, end=segment.end + time, text=segment.text, words=offset_words
+                )
+            )
+        return Transcription(segments=offset_segments)
+    def standardize_segments(self, *, time: float | None = None, num_words: int | None = None) -> Transcription:
+        """Return a new Transcription with standardized segments.
+        Args:
+            time: Maximum duration in seconds for each segment
+            num_words: Exact number of words per segment
+        Raises:
+            ValueError: If both time and num_words are provided or if neither is provided
+        """
+        if (time is None) == (num_words is None):
+            raise ValueError("Exactly one of 'time' or 'num_words' must be provided")
+        if time is not None and time <= 0:
+            raise ValueError("Time must be positive")
+        if num_words is not None and num_words <= 0:
+            raise ValueError("Number of words must be positive")
+        # Collect all words from all segments
+        all_words = []
+        for segment in self.segments:
+            all_words.extend(segment.words)
+        if not all_words:
+            return Transcription(segments=[])
+        standardized_segments = []
+        if time is not None:
+            # Group words by time constraint
+            current_words = []
+            current_start = None
+            for word in all_words:
+                if current_start is None:
+                    current_start = word.start
+                    current_words = [word]
+                elif word.end - current_start <= time:
+                    current_words.append(word)
+                else:
+                    # Create segment from current words
+                    if current_words:
+                        segment_text = " ".join(w.word for w in current_words)
+                        standardized_segments.append(
+                            TranscriptionSegment(
+                                start=current_start,
+                                end=current_words[-1].end,
+                                text=segment_text,
+                                words=current_words.copy(),
+                            )
+                        )
+                    # Start new segment
+                    current_start = word.start
+                    current_words = [word]
+            # Add final segment
+            if current_words:
+                segment_text = " ".join(w.word for w in current_words)
+                standardized_segments.append(
+                    TranscriptionSegment(
+                        start=current_start,  # type: ignore
+                        end=current_words[-1].end,
+                        text=segment_text,
+                        words=current_words.copy(),
+                    )
+                )
+        elif num_words is not None:
+            # Group words by word count constraint
+            for i in range(0, len(all_words), num_words):
+                segment_words = all_words[i : i + num_words]
+                segment_text = " ".join(w.word for w in segment_words)
+                standardized_segments.append(
+                    TranscriptionSegment(
+                        start=segment_words[0].start, end=segment_words[-1].end, text=segment_text, words=segment_words
+                    )
+                )
+        return Transcription(segments=standardized_segments)

videopython/base/utils.py ADDED Viewed

@@ -0,0 +1,6 @@
+import uuid
+def generate_random_name(suffix=".mp4"):
+    """Generates random name."""
+    return f"{uuid.uuid4()}{suffix}"

videopython/base/video.py CHANGED Viewed

@@ -11,7 +11,7 @@ from typing import Literal, get_args
 import numpy as np
 from soundpython import Audio
-from videopython.utils.common import generate_random_name
+from videopython.base.utils import generate_random_name
 ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
@@ -155,7 +155,6 @@ class Video:
             width = metadata.width
             height = metadata.height
             fps = metadata.fps
-            total_frames = metadata.frame_count
             total_duration = metadata.total_seconds
             # Validate time bounds
@@ -166,99 +165,128 @@ class Video:
             if start_second is not None and end_second is not None and start_second >= end_second:
                 raise ValueError("start_second must be less than end_second")
-            # Calculate frame indices for the desired segment
-            start_frame = int(start_second * fps) if start_second is not None else 0
-            end_frame = int(end_second * fps) if end_second is not None else total_frames
+            # Build FFmpeg command with improved segment handling
+            ffmpeg_cmd = ["ffmpeg"]
-            # Ensure we don't exceed bounds
-            start_frame = max(0, start_frame)
-            end_frame = min(total_frames, end_frame)
-            segment_frames = end_frame - start_frame
-            # Set up FFmpeg command for raw video extraction with time bounds
-            ffmpeg_cmd = [
-                "ffmpeg",
-                "-i",
-                path,
-            ]
-            # Add seek and duration options if specified
+            # Add seek option BEFORE input for more efficient seeking
             if start_second is not None:
                 ffmpeg_cmd.extend(["-ss", str(start_second)])
+            ffmpeg_cmd.extend(["-i", path])
+            # Add duration AFTER input for more precise timing
             if end_second is not None and start_second is not None:
                 duration = end_second - start_second
                 ffmpeg_cmd.extend(["-t", str(duration)])
             elif end_second is not None:
                 ffmpeg_cmd.extend(["-t", str(end_second)])
+            # Output format settings - removed problematic -vsync 0
             ffmpeg_cmd.extend(
                 [
                     "-f",
                     "rawvideo",
                     "-pix_fmt",
                     "rgb24",
-                    "-vsync",
-                    "0",
                     "-vcodec",
                     "rawvideo",
+                    "-avoid_negative_ts",
+                    "make_zero",  # Handle timing issues
                     "-y",
                     "pipe:1",
                 ]
             )
-            # Start FFmpeg process
+            # Start FFmpeg process with stderr redirected to avoid deadlock
             process = subprocess.Popen(
                 ffmpeg_cmd,
                 stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                bufsize=10**8,  # Use large buffer
+                stderr=subprocess.DEVNULL,  # Redirect stderr to avoid deadlock
+                bufsize=10**8,  # Use large buffer for efficient I/O
             )
             # Calculate frame size in bytes
             frame_size = width * height * 3  # 3 bytes per pixel for RGB
-            # Pre-allocate numpy array for segment frames
-            frames = np.empty((segment_frames, height, width, 3), dtype=np.uint8)
+            # Estimate frame count for pre-allocation
+            if start_second is not None and end_second is not None:
+                estimated_duration = end_second - start_second
+            elif end_second is not None:
+                estimated_duration = end_second
+            elif start_second is not None:
+                estimated_duration = total_duration - start_second
+            else:
+                estimated_duration = total_duration
+            # Add 10% buffer to handle frame rate variations and rounding
+            estimated_frames = int(estimated_duration * fps * 1.1) + 10
-            # Read frames in batches
+            # Pre-allocate numpy array
+            frames = np.empty((estimated_frames, height, width, 3), dtype=np.uint8)
             frames_read = 0
-            for frame_idx in range(0, segment_frames, read_batch_size):
-                batch_end = min(frame_idx + read_batch_size, segment_frames)
-                batch_size = batch_end - frame_idx
-                # Read batch of frames
-                raw_data = process.stdout.read(frame_size * batch_size)  # type: ignore
-                if not raw_data:
-                    break
+            try:
+                while frames_read < estimated_frames:
+                    # Calculate remaining frames to read
+                    remaining_frames = estimated_frames - frames_read
+                    batch_size = min(read_batch_size, remaining_frames)
-                # Convert raw bytes to numpy array and reshape
-                batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
+                    # Read batch of data
+                    batch_data = process.stdout.read(frame_size * batch_size)  # type: ignore
-                # Handle case where we might get fewer frames than expected
-                actual_frames = len(batch_frames) // (height * width * 3)
-                if actual_frames > 0:
-                    batch_frames = batch_frames[: actual_frames * height * width * 3]
-                    batch_frames = batch_frames.reshape(-1, height, width, 3)
+                    if not batch_data:
+                        break
-                    # Store batch in pre-allocated array
-                    end_idx = frame_idx + actual_frames
-                    frames[frame_idx:end_idx] = batch_frames
-                    frames_read += actual_frames
-                else:
-                    break
+                    # Convert to numpy array
+                    batch_frames = np.frombuffer(batch_data, dtype=np.uint8)
-            # Clean up FFmpeg process
-            process.stdout.close()  # type: ignore
-            process.stderr.close()  # type: ignore
-            process.wait()
+                    # Calculate how many complete frames we got
+                    complete_frames = len(batch_frames) // (height * width * 3)
-            if process.returncode != 0:
-                stderr_output = process.stderr.read().decode() if process.stderr else "Unknown error"
-                raise ValueError(f"FFmpeg error: {stderr_output}")
+                    if complete_frames == 0:
+                        break
-            # Trim frames array if we read fewer frames than expected
-            if frames_read < segment_frames:
-                frames = frames[:frames_read]  # type: ignore[assignment]
+                    # Only keep complete frames
+                    complete_data = batch_frames[: complete_frames * height * width * 3]
+                    batch_frames_array = complete_data.reshape(complete_frames, height, width, 3)
+                    # Check if we have room in pre-allocated array
+                    if frames_read + complete_frames > estimated_frames:
+                        # Need to expand array - this should be rare with our buffer
+                        new_size = max(estimated_frames * 2, frames_read + complete_frames + 100)
+                        new_frames = np.empty((new_size, height, width, 3), dtype=np.uint8)
+                        new_frames[:frames_read] = frames[:frames_read]
+                        frames = new_frames
+                        estimated_frames = new_size
+                    # Store batch in pre-allocated array
+                    end_idx = frames_read + complete_frames
+                    frames[frames_read:end_idx] = batch_frames_array
+                    frames_read += complete_frames
+            finally:
+                # Ensure process is properly terminated
+                if process.poll() is None:
+                    process.terminate()
+                    try:
+                        process.wait(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        process.kill()
+                        process.wait()
+                # Clean up pipes
+                if process.stdout:
+                    process.stdout.close()
+            # Check if FFmpeg had an error (non-zero return code)
+            if process.returncode not in (0, None) and frames_read == 0:
+                raise ValueError(f"FFmpeg failed to process video (return code: {process.returncode})")
+            if frames_read == 0:
+                raise ValueError("No frames were read from the video")
+            # Trim the pre-allocated array to actual frames read
+            frames = frames[:frames_read]  # type: ignore
             # Load audio for the specified segment
             try:
@@ -270,8 +298,8 @@ class Video:
                     audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
             except Exception:
                 print(f"No audio found for `{path}`, adding silent track!")
-                # Create silent audio for the segment duration
-                segment_duration = len(frames) / fps
+                # Create silent audio based on actual frames read
+                segment_duration = frames_read / fps
                 audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
             return cls(frames=frames, fps=fps, audio=audio)
@@ -421,6 +449,20 @@ class Video:
                     raise
     def add_audio(self, audio: Audio, overlay: bool = True) -> None:
+        video_duration = self.total_seconds
+        audio_duration = audio.metadata.duration_seconds
+        if audio_duration > video_duration:
+            audio = audio.slice(start_seconds=0, end_seconds=video_duration)
+        elif audio_duration < video_duration:
+            silence_duration = video_duration - audio_duration
+            silence = Audio.create_silent(
+                duration_seconds=silence_duration,
+                stereo=audio.metadata.channels == 2,
+                sample_rate=audio.metadata.sample_rate,
+            )
+            audio = audio.concat(silence)
         if self.audio.is_silent:
             self.audio = audio
         elif overlay:

{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: videopython
-Version: 0.4.1
+Version: 0.5.0
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -8,12 +8,13 @@ Project-URL: Documentation, https://github.com/bartwojtowicz/videopython/
 Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
 License: Apache-2.0
 License-File: LICENSE
-Keywords: editing,generation,movie,opencv,python,video,videopython
+Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Requires-Python: <3.13,>=3.10
 Requires-Dist: numpy>=1.25.2
 Requires-Dist: opencv-python>=4.9.0.80
@@ -38,11 +39,11 @@ Description-Content-Type: text/markdown
 # About
-Minimal video generation and processing library.
+Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
-## Setup
+# Setup
-### Install ffmpeg
+## Install ffmpeg
 ```bash
 # Install with brew for MacOS:
 brew install ffmpeg
@@ -50,16 +51,22 @@ brew install ffmpeg
 sudo apt-get install ffmpeg
 ```
-### Install with pip
+## Install library
 ```bash
+# Install with your favourite package manager
+uv add videopython --extra ai
+# pip install works as well :)
 pip install videopython[ai]
 ```
-> You can install without `[ai]` dependencies for basic video handling and processing.
-> The funcionalities found in `videopython.ai` won't work.
-## Basic Usage
+> You can install without `[ai]` dependencies for basic video handling and processing.
+> The functionalities found in `videopython.ai` won't work.
+# Usage examples
-### Video handling
+## Basic video editing
 ```python
 from videopython.base.video import Video
@@ -90,6 +97,8 @@ video.add_audio_from_file("tests/test_data/test_audio.mp3")
 savepath = video.save()
 ```
+## AI powered examples
 ### Video Generation
 > Using Nvidia A40 or better is recommended for the `videopython.ai` module.
@@ -97,7 +106,6 @@ savepath = video.save()
 # Generate image and animate it
 from videopython.ai.generation import ImageToVideo
 from videopython.ai.generation import TextToImage
-from videopython.ai.generation import TextToMusic
 image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
 video = ImageToVideo().generate_video(image=image, fps=24)
@@ -105,27 +113,82 @@ video = ImageToVideo().generate_video(image=image, fps=24)
 # Video generation directly from prompt
 from videopython.ai.generation import TextToVideo
 video_gen = TextToVideo()
-video = video_gen.generate_video("Dogs playing in the snow")
+video = video_gen.generate_video("Dogs playing in the park")
 for _ in range(10):
-    video += video_gen.generate_video("Dogs playing in the snow")
-# Cut the first 2 seconds
-from videopython.base.transforms import CutSeconds
-transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
-# Upsample to 30 FPS
-from videopython.base.transforms import ResampleFPS
-transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
+    video += video_gen.generate_video("Dogs playing in the park")
+```
-# Resize to 1000x1000
-from videopython.base.transforms import Resize
-transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
+### Audio generation
+```python
+from videopython.base.video import Video
+video = Video.from_path("<PATH_TO_VIDEO>")
-# Add generated music
-# MusicGen cannot generate more than 1503 tokens (~30seconds of audio)
+# Generate music on top of video
+from videopython.ai.generation import TextToMusic
 text_to_music = TextToMusic()
 audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
-transformed_video.add_audio(audio=audio)
+video.add_audio(audio=audio)
+# Add TTS on top of video
+from videopython.ai.generation import TextToSpeech
+text_to_speech = TextToSpeech()
+audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
+video.add_audio(audio=audio)
+```
+### Generate and overlay subtitles
+```python
+from videopython.base.video import Video
+video = Video.from_path("<PATH_TO_VIDEO>")
+# Generate transcription with timestamps
+from videopython.ai.understanding.transcribe import CreateTranscription
+transcription = CreateTranscription("base").transcribe(video)
+# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
+from videopython.base.text.overlay import TranscriptionOverlay
+transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
-filepath = transformed_video.save()
+video = transcription_overlay.apply(video, transcription)
+video.save()
+```
+# Development notes
+## Project structure
+Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
+```
+.
+└── src
+    ├── stubs # Contains stubs for mypy
+    ├── tests # Unit tests
+    └── videopython # Library code
+```
+----
+The `videopython` library is divided into 2 separate high-level modules:
+* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
+* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
+## Running locally
+We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
+```bash
+uv sync --all-extras
+```
+To run the unit tests, you can simply run:
+```bash
+uv run pytest
+```
+We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
+```bash
+# Run formatting
+uv run ruff format
+# Run linting and apply fixes
+uv run ruff check --fix
+# Run type checks
+uv run mypy src/
 ```

{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/RECORD RENAMED Viewed

@@ -6,21 +6,19 @@ videopython/ai/generation/audio.py,sha256=CNf6ZeV3iU4CU0Kq8HtDLwLPP2ABq9AGQD1TBO
 videopython/ai/generation/image.py,sha256=gS0zqzyIoCvjTjfks31ApG8lX0nUKXWRRgFGGLN4RjM,654
 videopython/ai/generation/video.py,sha256=206YON_XjPTYyjIJ3j5uBgd_yHmCDg7SqbkIU9GzEgw,1831
 videopython/ai/understanding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-videopython/ai/understanding/transcribe.py,sha256=VNgXnzbTH0NHDKHjanj6CjUnl-XwT-nsOkd5zqn9a_E,1219
+videopython/ai/understanding/transcribe.py,sha256=hm2f5Fm1O_tMrSmUlcUdl_rQRhc5Sz_kaV4tnJ4IxbQ,2557
 videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 videopython/base/combine.py,sha256=XC_pzyhbIh6h0fmxX1LhhhtlmOBbUQX9Y4EtDJqQn8g,1900
-videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
 videopython/base/effects.py,sha256=1RbRLTQD0V26DBc4jbRCDI4eGr6-TyBdX-Ia2USKxmc,7554
 videopython/base/exceptions.py,sha256=68_16lUPOR9_zhWdeBGS8_NFI32VbrcoDbN5KHHg0_w,44
-videopython/base/transcription.py,sha256=FloqvY-OlBQPOCkPnSx6R7azn4smD5-JYd-pMNssuYw,196
 videopython/base/transforms.py,sha256=FDh-8EgQoZxB6Gv-T15kZGctcu9_4XHsTy_n7kgxlQw,5828
 videopython/base/transitions.py,sha256=P1bBsxugf5i0JEtx7MoRgxWSIDcBli-0QucRwBIFGqs,3687
-videopython/base/video.py,sha256=m_AzlUVvZYIkLih7EbJS7TSC2FIm6q06I1Zp9UHadl0,18444
-videopython/utils/__init__.py,sha256=uhFG_cnw6zZUWxpfs_I3-82mh-NBLqivbPDnsdOEUmI,122
-videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
-videopython/utils/image.py,sha256=zR5_WnSBXGgyE9gNpXnNXmPtfdmnlY7kdOsgkZUGOds,1747
-videopython/utils/text.py,sha256=T0W6VgpLfLczMMdUXEhkEftUQmuNzuQusO9I7-HU8Zg,27962
-videopython-0.4.1.dist-info/METADATA,sha256=xd-a02H1P_mq8nNGBIkuPfGYhO950MY3kJFm18rpAHs,4453
-videopython-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-videopython-0.4.1.dist-info/licenses/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
-videopython-0.4.1.dist-info/RECORD,,
+videopython/base/utils.py,sha256=bAwIagHvd1NWu8UYAsS-pDm38E4R8qRfeHvWk-O2__0,125
+videopython/base/video.py,sha256=RxKHmR39EEvBa5m2xFDNj4_mq213RUG3NQ_lhk5U-PA,20462
+videopython/base/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+videopython/base/text/overlay.py,sha256=EiBDSsnn2pSGeWGajblUxovcP_IdA6gk2zZ5rsjhdI8,44434
+videopython/base/text/transcription.py,sha256=9c3FRBr7RkialHhdfSwEX303QnIt1sCSiXoId9_DRkk,4246
+videopython-0.5.0.dist-info/METADATA,sha256=FTo8Bo3YLhp9bGTrctiehMMksQwecH1DN84JO5RydyU,6574
+videopython-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+videopython-0.5.0.dist-info/licenses/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
+videopython-0.5.0.dist-info/RECORD,,

videopython/base/compose.py DELETED Viewed

@@ -1,55 +0,0 @@
-from itertools import repeat
-from multiprocessing import Pool
-from videopython.base.transforms import TransformationPipeline
-from videopython.base.transitions import InstantTransition, Transition
-from videopython.base.video import Video
-class VideoComposer:
-    """
-    Composes multiple Videos into single video using selected transformations
-    on each video and applies transitions.
-    """
-    def __init__(
-        self,
-        transformation_pipeline: TransformationPipeline | None = None,
-        transition: Transition = InstantTransition(),
-    ):
-        """Initializes VideoComposer.
-        Args:
-            transformation_pipeline: Pipeline of transformations to apply on each video.
-            transition: Transition to apply between videos
-        """
-        self.transition = transition
-        self.transformation_pipeline = transformation_pipeline
-    def _apply_transformation(self, video: Video, transformation_pipeline: TransformationPipeline) -> Video:
-        return transformation_pipeline(video)
-    def compose(self, videos: list[Video]) -> Video:
-        # Apply transformation on each video using multiprocessing pool:
-        if self.transformation_pipeline:
-            transformed_videos = []
-            with Pool() as pool:
-                transformed_videos = pool.starmap(
-                    self._apply_transformation,
-                    zip(videos, repeat(self.transformation_pipeline)),
-                )
-            videos = transformed_videos
-        # Check if videos are compatible:
-        self._compatibility_check(videos)
-        # Apply transition:
-        final_video = videos.pop(0)
-        for _ in range(len(videos)):
-            final_video = self.transition.apply((final_video, videos.pop(0)))
-        return final_video
-    @staticmethod
-    def _compatibility_check(videos: list[Video]):
-        assert all([videos[0].metadata.can_be_merged_with(other_video.metadata) for other_video in videos])

videopython/base/transcription.py DELETED Viewed

@@ -1,13 +0,0 @@
-from dataclasses import dataclass
-@dataclass
-class TranscriptionSegment:
-    start: float
-    end: float
-    text: str
-@dataclass
-class Transcription:
-    segments: list[TranscriptionSegment]

videopython/utils/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from videopython.utils.text import AnchorPoint, ImageText, TextAlign
-__all__ = ["AnchorPoint", "ImageText", "TextAlign"]

videopython/utils/common.py DELETED Viewed

@@ -1,31 +0,0 @@
-import time
-import uuid
-from pathlib import Path
-from typing import Callable
-def generate_random_name(suffix=".mp4"):
-    """Generates random name."""
-    return f"{uuid.uuid4()}{suffix}"
-def timeit(func: Callable):
-    """Decorator to measure execution time of a function."""
-    def timed(*args, **kwargs):
-        start = time.time()
-        result = func(*args, **kwargs)
-        end = time.time()
-        print(f"Execution time: {end - start:.3f} seconds.")
-        return result
-    return timed
-def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
-    fullpath = Path(path).resolve()
-    if dir_exists and not fullpath.parent.exists():
-        raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
-    if suffix and suffix != fullpath.suffix:
-        raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
-    return str(fullpath)

videopython/utils/image.py DELETED Viewed

@@ -1,47 +0,0 @@
-from typing import Literal
-import cv2
-import numpy as np
-from videopython.base.video import Video
-class SlideOverImage:
-    def __init__(
-        self,
-        direction: Literal["left", "right"],
-        video_shape: tuple[int, int] = (1080, 1920),
-        fps: float = 24.0,
-        length_seconds: float = 1.0,
-    ) -> None:
-        self.direction = direction
-        self.video_width, self.video_height = video_shape
-        self.fps = fps
-        self.length_seconds = length_seconds
-    def apply(self, image: np.ndarray) -> Video:
-        image = self._resize(image)
-        max_offset = image.shape[1] - self.video_width
-        frame_count = round(self.fps * self.length_seconds)
-        deltas = np.linspace(0, max_offset, frame_count)
-        frames = []
-        for delta in deltas:
-            if self.direction == "right":
-                frame = image[:, round(delta) : round(delta) + self.video_width]
-            elif self.direction == "left":
-                frame = image[:, image.shape[1] - round(delta) - self.video_width : image.shape[1] - round(delta)]
-            frames.append(frame)
-        return Video.from_frames(frames=np.stack(frames, axis=0), fps=self.fps)
-    def _resize(self, image: np.ndarray) -> np.ndarray:
-        resize_factor = image.shape[0] / self.video_height
-        resize_dims = (round(image.shape[1] / resize_factor), round(image.shape[0] / resize_factor))  # width, height
-        image = cv2.resize(image, resize_dims)
-        if self.video_height > image.shape[0] or self.video_width > image.shape[1]:
-            raise ValueError(
-                f"Image `{image.shape}` is too small for the video frame `({self.video_width}, {self.video_height})`!"
-            )
-        return image

{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{videopython-0.4.1.dist-info → videopython-0.5.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

videopython 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

Potentially problematic release.

videopython 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl