PyPI - videopython - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

videopython 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (20) hide show

videopython/ai/__init__.py +0 -0
videopython/{generation → ai/generation}/image.py +0 -3
videopython/ai/understanding/__init__.py +0 -0
videopython/ai/understanding/transcribe.py +37 -0
videopython/base/combine.py +45 -0
videopython/base/effects.py +3 -3
videopython/base/transcription.py +13 -0
videopython/base/transforms.py +0 -2
videopython/base/video.py +298 -158
videopython/utils/__init__.py +3 -0
videopython/utils/image.py +0 -232
videopython/utils/text.py +727 -0
{videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/METADATA +26 -13
videopython-0.4.1.dist-info/RECORD +26 -0
videopython-0.3.0.dist-info/RECORD +0 -20
/videopython/{generation → ai/generation}/__init__.py +0 -0
/videopython/{generation → ai/generation}/audio.py +0 -0
/videopython/{generation → ai/generation}/video.py +0 -0
{videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/WHEEL +0 -0
{videopython-0.3.0.dist-info → videopython-0.4.1.dist-info}/licenses/LICENSE +0 -0

videopython/ai/__init__.py ADDED Viewed

File without changes

videopython/{generation → ai/generation}/image.py RENAMED Viewed

@@ -1,6 +1,3 @@
-import io
-import os
 import torch
 from diffusers import DiffusionPipeline
 from PIL import Image

videopython/ai/understanding/__init__.py ADDED Viewed

File without changes

videopython/ai/understanding/transcribe.py ADDED Viewed

@@ -0,0 +1,37 @@
+from typing import Literal
+import whisper
+from videopython.base.transcription import Transcription, TranscriptionSegment
+from videopython.base.video import Video
+class VideoTranscription:
+    def __init__(self, model_name: Literal["tiny", "base", "small", "medium", "large", "turbo"] = "small") -> None:
+        self.model = whisper.load_model(name=model_name)
+    def transcribe_video(self, video: Video) -> Transcription:
+        """Transcribes video to text.
+        Args:
+            video: Video to transcribe.
+        Returns:
+            List of dictionaries with segments of text and their start and end times.
+        """
+        if video.audio.is_silent:
+            return Transcription(segments=[])
+        audio = video.audio.to_mono()
+        audio = audio.resample(whisper.audio.SAMPLE_RATE)
+        audio_data = audio.data
+        transcription = self.model.transcribe(audio=audio_data, word_timestamps=True)
+        transcription_segments = [
+            TranscriptionSegment(start=segment["start"], end=segment["end"], text=segment["text"])
+            for segment in transcription["segments"]
+        ]
+        result = Transcription(segments=transcription_segments)
+        return result

videopython/base/combine.py ADDED Viewed

@@ -0,0 +1,45 @@
+from typing import Literal
+import numpy as np
+from videopython.base.transforms import ResampleFPS, Resize
+from videopython.base.video import Video
+class StackVideos:
+    def __init__(self, mode: Literal["horizontal", "vertical"]) -> None:
+        self.mode = mode
+    def _validate(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        video1, video2 = self._align_shapes(video1, video2)
+        video1, video2 = self._align_fps(video1, video2)
+        video1, video2 = self._align_duration(video1, video2)
+        return video1, video2
+    def _align_fps(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if video1.fps > video2.fps:
+            video1 = ResampleFPS(fps=video2.fps).apply(video1)
+        elif video1.fps < video2.fps:
+            video2 = ResampleFPS(fps=video1.fps).apply(video2)
+        return (video1, video2)
+    def _align_shapes(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if self.mode == "horizontal":
+            video2 = Resize(height=video1.metadata.height).apply(video2)
+        elif self.mode == "vertical":
+            video2 = Resize(width=video1.metadata.width).apply(video2)
+        return (video1, video2)
+    def _align_duration(self, video1: Video, video2: Video) -> tuple[Video, Video]:
+        if len(video1.frames) > len(video2.frames):
+            video1 = video1[: len(video2.frames)]
+        elif len(video1.frames) < len(video2.frames):
+            video2 = video2[: len(video1.frames)]
+        return (video1, video2)
+    def apply(self, videos: tuple[Video, Video]) -> Video:
+        videos = self._validate(*videos)
+        axis = 1 if self.mode == "vertical" else 2
+        new_frames = np.concatenate((videos[0].frames, videos[1].frames), axis=axis)
+        new_audio = videos[0].audio.overlay(videos[1].audio)
+        return Video(frames=new_frames, fps=videos[0].fps, audio=new_audio)

videopython/base/effects.py CHANGED Viewed

@@ -156,13 +156,13 @@ class Zoom(Effect):
         width = video.metadata.width
         height = video.metadata.height
-        crop_sizes_w, crop_sizes_h = np.linspace(width // self.zoom_factor, width, n_frames), np.linspace(
-            height // self.zoom_factor, height, n_frames
+        crop_sizes_w, crop_sizes_h = (
+            np.linspace(width // self.zoom_factor, width, n_frames),
+            np.linspace(height // self.zoom_factor, height, n_frames),
         )
         if self.mode == "in":
             for frame, w, h in tqdm(zip(video.frames, reversed(crop_sizes_w), reversed(crop_sizes_h))):
                 x = width / 2 - w / 2
                 y = height / 2 - h / 2

videopython/base/transcription.py ADDED Viewed

@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+@dataclass
+class TranscriptionSegment:
+    start: float
+    end: float
+    text: str
+@dataclass
+class Transcription:
+    segments: list[TranscriptionSegment]

videopython/base/transforms.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
 from multiprocessing import Pool
-from typing import Literal
 import cv2
 import numpy as np
@@ -154,7 +153,6 @@ class CropMode(Enum):
 class Crop(Transformation):
     def __init__(self, width: int, height: int, mode: CropMode = CropMode.CENTER):
         self.width = width
         self.height = height

videopython 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

videopython 0.3.0py3-none-any.whl → 0.4.1py3-none-any.whl