PyPI - videopython - Versions diffs - 0.2.1__tar.gz → 0.4.0__tar.gz - Mend

videopython 0.2.1tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (28) hide show

{videopython-0.2.1 → videopython-0.4.0}/.gitignore RENAMED Viewed

@@ -137,4 +137,7 @@ dmypy.json
 # Data directories
 data/downloaded/*.mp4
 data/exported/*.mp4
-!data/exported/example.mp4
+!data/exported/example.mp4
+# Mac
+*.DS_Store

{videopython-0.2.1 → videopython-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: videopython
-Version: 0.2.1
+Version: 0.4.0
 Summary: Minimal video generation and processing library.
 Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
 Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
@@ -19,20 +19,8 @@ Requires-Dist: numpy>=1.25.2
 Requires-Dist: opencv-python>=4.9.0.80
 Requires-Dist: pillow>=10.3.0
 Requires-Dist: pydub>=0.25.1
+Requires-Dist: soundpython>=0.1.11
 Requires-Dist: tqdm>=4.66.3
-Provides-Extra: dev
-Requires-Dist: black==24.3.0; extra == 'dev'
-Requires-Dist: isort==5.12.0; extra == 'dev'
-Requires-Dist: mypy==1.8.0; extra == 'dev'
-Requires-Dist: pydub-stubs==0.25.1.1; extra == 'dev'
-Requires-Dist: pytest==7.4.0; extra == 'dev'
-Requires-Dist: types-pillow==10.2.0.20240213; extra == 'dev'
-Requires-Dist: types-tqdm==4.66.0.20240106; extra == 'dev'
-Provides-Extra: generation
-Requires-Dist: accelerate>=0.29.2; extra == 'generation'
-Requires-Dist: diffusers>=0.26.3; extra == 'generation'
-Requires-Dist: torch>=2.1.0; extra == 'generation'
-Requires-Dist: transformers>=4.38.1; extra == 'generation'
 Description-Content-Type: text/markdown
 # About
@@ -51,10 +39,10 @@ sudo apt-get install ffmpeg
 ### Install with pip
 ```bash
-pip install videopython[generation]
+pip install videopython[ai]
 ```
-> You can install without `[generation]` dependencies for basic video handling and processing.
-> The funcionalities found in `videopython.generation` won't work.
+> You can install without `[ai]` dependencies for basic video handling and processing.
+> The funcionalities found in `videopython.ai` won't work.
 ## Basic Usage
@@ -64,10 +52,10 @@ pip install videopython[generation]
 from videopython.base.video import Video
 # Load videos and print metadata
-video1 = Video.from_path("tests/test_data/fast_benchmark.mp4")
+video1 = Video.from_path("tests/test_data/small_video.mp4")
 print(video1)
-video2 = Video.from_path("tests/test_data/slow_benchmark.mp4")
+video2 = Video.from_path("tests/test_data/big_video.mp4")
 print(video2)
 # Define the transformations
@@ -91,18 +79,18 @@ savepath = video.save()
 ### Video Generation
-> Using Nvidia A40 or better is recommended for the `videopython.generation` module.
+> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
 ```python
 # Generate image and animate it
-from videopython.generation import ImageToVideo
-from videopython.generation import TextToImage
-from videopython.generation import TextToMusic
+from videopython.ai.generation import ImageToVideo
+from videopython.ai.generation import TextToImage
+from videopython.ai.generation import TextToMusic
 image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
 video = ImageToVideo().generate_video(image=image, fps=24)
 # Video generation directly from prompt
-from videopython.generation import TextToVideo
+from videopython.ai.generation import TextToVideo
 video_gen = TextToVideo()
 video = video_gen.generate_video("Dogs playing in the snow")
 for _ in range(10):

{videopython-0.2.1 → videopython-0.4.0}/README.md RENAMED Viewed

@@ -14,10 +14,10 @@ sudo apt-get install ffmpeg
 ### Install with pip
 ```bash
-pip install videopython[generation]
+pip install videopython[ai]
 ```
-> You can install without `[generation]` dependencies for basic video handling and processing.
-> The funcionalities found in `videopython.generation` won't work.
+> You can install without `[ai]` dependencies for basic video handling and processing.
+> The funcionalities found in `videopython.ai` won't work.
 ## Basic Usage
@@ -27,10 +27,10 @@ pip install videopython[generation]
 from videopython.base.video import Video
 # Load videos and print metadata
-video1 = Video.from_path("tests/test_data/fast_benchmark.mp4")
+video1 = Video.from_path("tests/test_data/small_video.mp4")
 print(video1)
-video2 = Video.from_path("tests/test_data/slow_benchmark.mp4")
+video2 = Video.from_path("tests/test_data/big_video.mp4")
 print(video2)
 # Define the transformations
@@ -54,18 +54,18 @@ savepath = video.save()
 ### Video Generation
-> Using Nvidia A40 or better is recommended for the `videopython.generation` module.
+> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
 ```python
 # Generate image and animate it
-from videopython.generation import ImageToVideo
-from videopython.generation import TextToImage
-from videopython.generation import TextToMusic
+from videopython.ai.generation import ImageToVideo
+from videopython.ai.generation import TextToImage
+from videopython.ai.generation import TextToMusic
 image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
 video = ImageToVideo().generate_video(image=image, fps=24)
 # Video generation directly from prompt
-from videopython.generation import TextToVideo
+from videopython.ai.generation import TextToVideo
 video_gen = TextToVideo()
 video = video_gen.generate_video("Dogs playing in the snow")
 for _ in range(10):

{videopython-0.2.1 → videopython-0.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "videopython"
-version = "0.2.1"
+version = "0.4.0"
 description = "Minimal video generation and processing library."
 authors = [
     { name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -24,24 +24,27 @@ dependencies = [
     "opencv-python>=4.9.0.80",
     "pillow>=10.3.0",
     "pydub>=0.25.1",
+    "soundpython>=0.1.11",
     "tqdm>=4.66.3",
 ]
-[project.optional-dependencies]
+[dependency-groups]
 dev = [
-    "black==24.3.0",
-    "isort==5.12.0",
-    "mypy==1.8.0",
-    "pytest==7.4.0",
-    "types-Pillow==10.2.0.20240213",
-    "types-tqdm==4.66.0.20240106",
-    "pydub-stubs==0.25.1.1",
+    "ruff>=0.1.14",
+    "mypy>=1.8.0",
+    "pytest>=7.4.0",
+    "types-Pillow>=10.2.0.20240213",
+    "types-tqdm>=4.66.0.20240106",
+    "pytest-cov>=6.1.1",
 ]
-generation = [
+ai = [
     "accelerate>=0.29.2",
     "diffusers>=0.26.3",
     "torch>=2.1.0",
     "transformers>=4.38.1",
+    "openai-whisper>=20240930",
+    "numba>=0.61.0",
 ]
 [project.urls]
@@ -49,25 +52,8 @@ Homepage = "https://github.com/bartwojtowicz/videopython/"
 Repository = "https://github.com/bartwojtowicz/videopython/"
 Documentation = "https://github.com/bartwojtowicz/videopython/"
-[tool.rye]
-managed = true
-dev-dependencies = [
-    "black==24.3.0",
-    "isort==5.12.0",
-    "mypy==1.8.0",
-    "pytest==7.4.0",
-    "types-Pillow==10.2.0.20240213",
-    "types-tqdm==4.66.0.20240106",
-    "pydub-stubs==0.25.1.1",
-]
-[tool.rye.scripts]
-test-unit = "pytest"
-test-type = "mypy src"
-test-static = { chain = [
-    "black src -l 120 --check",
-    "isort src --profile black --check"
-]}
+[tool.mypy]
+mypy_path = "stubs"
 [build-system]
 requires = ["hatchling"]
@@ -79,10 +65,24 @@ packages = ["src/videopython"]
 [tool.hatch.build.targets.sdist]
 include = ["src/videopython", "src/videopython/py.typed"]
-[tool.mypy]
-mypy_path = "stubs"
 [tool.pytest]
+pythonpath = [".src/"]
 testpaths = ["src/tests"]
 python_files = ["test_*.py"]
 addopts = "-v --tb=short"
+[tool.ruff]
+line-length = 120
+target-version = "py310"
+[tool.ruff.lint]
+select = [
+    "E",   # pycodestyle errors
+    "F",   # pyflakes
+    "I",   # isort
+]
+isort.known-first-party = ["videopython"]
+[tool.ruff.format]
+indent-style = "space"
+quote-style = "double"

{videopython-0.2.1/src/videopython → videopython-0.4.0/src/videopython/ai}/generation/audio.py RENAMED Viewed

@@ -1,6 +1,5 @@
-import numpy as np
 import torch
-from pydub import AudioSegment
+from soundpython import Audio, AudioMetadata
 from transformers import (
     AutoProcessor,
     AutoTokenizer,
@@ -17,15 +16,24 @@ class TextToSpeech:
         self.pipeline = VitsModel.from_pretrained(TEXT_TO_SPEECH_MODEL)
         self.tokenizer = AutoTokenizer.from_pretrained(TEXT_TO_SPEECH_MODEL)
-    def generate_audio(self, text: str) -> AudioSegment:
+    def generate_audio(self, text: str) -> Audio:
         tokenized = self.tokenizer(text, return_tensors="pt")
         with torch.no_grad():
             output = self.pipeline(**tokenized).waveform
-        output = (output.T.float().numpy() * (2**31 - 1)).astype(np.int32)
-        audio = AudioSegment(data=output, frame_rate=self.pipeline.config.sampling_rate, sample_width=4, channels=1)
-        return audio
+        # Convert to float32 and normalize to [-1, 1]
+        audio_data = output.T.float().numpy()
+        metadata = AudioMetadata(
+            sample_rate=self.pipeline.config.sampling_rate,
+            channels=1,
+            sample_width=4,
+            duration_seconds=len(audio_data) / self.pipeline.config.sampling_rate,
+            frame_count=len(audio_data),
+        )
+        return Audio(audio_data, metadata)
 class TextToMusic:
@@ -37,7 +45,7 @@ class TextToMusic:
         self.processor = AutoProcessor.from_pretrained(MUSIC_GENERATION_MODEL_SMALL)
         self.model = MusicgenForConditionalGeneration.from_pretrained(MUSIC_GENERATION_MODEL_SMALL)
-    def generate_audio(self, text: str, max_new_tokens: int) -> AudioSegment:
+    def generate_audio(self, text: str, max_new_tokens: int) -> Audio:
         inputs = self.processor(
             text=[text],
             padding=True,
@@ -45,12 +53,16 @@ class TextToMusic:
         )
         audio_values = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
         sampling_rate = self.model.config.audio_encoder.sampling_rate
-        output = (audio_values[0, 0].float().numpy() * (2**31 - 1)).astype(np.int32)
-        audio = AudioSegment(
-            data=output.tobytes(),
-            frame_rate=sampling_rate,
-            sample_width=4,
+        # Convert to float32 and normalize to [-1, 1]
+        audio_data = audio_values[0, 0].float().numpy()
+        metadata = AudioMetadata(
+            sample_rate=sampling_rate,
             channels=1,
+            sample_width=4,
+            duration_seconds=len(audio_data) / sampling_rate,
+            frame_count=len(audio_data),
         )
-        return audio
+        return Audio(audio_data, metadata)

{videopython-0.2.1/src/videopython → videopython-0.4.0/src/videopython/ai}/generation/image.py RENAMED Viewed

@@ -1,6 +1,3 @@
-import io
-import os
 import torch
 from diffusers import DiffusionPipeline
 from PIL import Image

videopython-0.4.0/src/videopython/ai/understanding/transcribe.py ADDED Viewed

@@ -0,0 +1,37 @@
+from typing import Literal
+import whisper
+from videopython.base.transcription import Transcription, TranscriptionSegment
+from videopython.base.video import Video
+class VideoTranscription:
+    def __init__(self, model_name: Literal["tiny", "base", "small", "medium", "large", "turbo"] = "small") -> None:
+        self.model = whisper.load_model(name=model_name)
+    def transcribe_video(self, video: Video) -> Transcription:
+        """Transcribes video to text.
+        Args:
+            video: Video to transcribe.
+        Returns:
+            List of dictionaries with segments of text and their start and end times.
+        """
+        if video.audio.is_silent:
+            return Transcription(segments=[])
+        audio = video.audio.to_mono()
+        audio = audio.resample(whisper.audio.SAMPLE_RATE)
+        audio_data = audio.data
+        transcription = self.model.transcribe(audio=audio_data, word_timestamps=True)
+        transcription_segments = [
+            TranscriptionSegment(start=segment["start"], end=segment["end"], text=segment["text"])
+            for segment in transcription["segments"]
+        ]
+        result = Transcription(segments=transcription_segments)
+        return result

{videopython-0.2.1 → videopython-0.4.0}/src/videopython/base/effects.py RENAMED Viewed

@@ -156,13 +156,13 @@ class Zoom(Effect):
         width = video.metadata.width
         height = video.metadata.height
-        crop_sizes_w, crop_sizes_h = np.linspace(width // self.zoom_factor, width, n_frames), np.linspace(
-            height // self.zoom_factor, height, n_frames
+        crop_sizes_w, crop_sizes_h = (
+            np.linspace(width // self.zoom_factor, width, n_frames),
+            np.linspace(height // self.zoom_factor, height, n_frames),
         )
         if self.mode == "in":
             for frame, w, h in tqdm(zip(video.frames, reversed(crop_sizes_w), reversed(crop_sizes_h))):
                 x = width / 2 - w / 2
                 y = height / 2 - h / 2

videopython-0.4.0/src/videopython/base/transcription.py ADDED Viewed

@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+@dataclass
+class TranscriptionSegment:
+    start: float
+    end: float
+    text: str
+@dataclass
+class Transcription:
+    segments: list[TranscriptionSegment]

{videopython-0.2.1 → videopython-0.4.0}/src/videopython/base/transforms.py RENAMED Viewed

@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
 from multiprocessing import Pool
-from typing import Literal
 import cv2
 import numpy as np
@@ -154,7 +153,6 @@ class CropMode(Enum):
 class Crop(Transformation):
     def __init__(self, width: int, height: int, mode: CropMode = CropMode.CENTER):
         self.width = width
         self.height = height

{videopython-0.2.1 → videopython-0.4.0}/src/videopython/base/transitions.py RENAMED Viewed

@@ -67,7 +67,7 @@ class FadeTransition(Transition):
             ],
             fps=video_fps,
         )
-        faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
+        faded_videos.audio = videos[0].audio.concat(videos[1].audio, crossfade=(effect_time_fps / video_fps))
         return faded_videos
@@ -102,5 +102,5 @@ class BlurTransition(Transition):
             ],
             fps=video_fps,
         )
-        blurred_videos.audio = videos[0].audio.append(videos[1].audio)
+        blurred_videos.audio = videos[0].audio.concat(videos[1].audio)
         return blurred_videos

videopython 0.2.1__tar.gz → 0.4.0__tar.gz

Potentially problematic release.

videopython 0.2.1tar.gz → 0.4.0tar.gz