PyPI - videopython - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

videopython 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of videopython might be problematic. Click here for more details.

Files changed (18) hide show

videopython/base/effects.py +57 -0
videopython/base/transforms.py +41 -5
videopython/base/transitions.py +7 -5
videopython/base/video.py +121 -129
videopython/generation/__init__.py +10 -0
videopython/generation/audio.py +30 -0
videopython/generation/image.py +60 -0
videopython/generation/video.py +47 -0
videopython/utils/common.py +13 -2
{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/METADATA +5 -5
videopython-0.1.3.dist-info/RECORD +17 -0
videopython/__init__.py +0 -0
videopython/utils/stability_generation.py +0 -75
videopython/utils/text_to_speech.py +0 -24
videopython-0.1.1.dist-info/RECORD +0 -15
{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/LICENSE +0 -0
{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/WHEEL +0 -0
{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/top_level.txt +0 -0

videopython/base/effects.py ADDED Viewed

@@ -0,0 +1,57 @@
+from abc import ABC, abstractmethod
+from typing import final
+import numpy as np
+from tqdm import tqdm
+from videopython.base.video import Video
+class Effect(ABC):
+    """Abstract class for effect on frames of video.
+    The effect must not change the number of frames and the shape of the frames.
+    """
+    @final
+    def apply(self, video: Video) -> Video:
+        original_shape = video.video_shape
+        video_with_effect = self._apply(video)
+        if not video_with_effect.video_shape == original_shape:
+            raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
+        return video_with_effect
+    @abstractmethod
+    def _apply(self, video: Video) -> Video:
+        pass
+class FullImageOverlay(Effect):
+    def __init__(self, overlay_image: np.ndarray, alpha: float | None = None):
+        if alpha is not None and not 0 <= alpha <= 1:
+            raise ValueError("Alpha must be in range [0, 1]!")
+        elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
+            raise ValueError("Only RGB and RGBA images are supported as an overlay!")
+        elif alpha is None:
+            alpha = 1.0
+        if overlay_image.shape[-1] == 3:
+            overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
+        overlay_image[:, :, 3] = overlay_image[:, :, 3] * alpha
+        self._overlay_alpha = (overlay_image[:, :, 3] / 255.0)[:, :, np.newaxis]
+        self._base_transparency = 1 - self._overlay_alpha
+        self.overlay = overlay_image[:, :, :3] * self._overlay_alpha
+    def _overlay(self, img: np.ndarray) -> np.ndarray:
+        return self.overlay + (img * self._base_transparency)
+    def _apply(self, video: Video) -> Video:
+        if not video.frame_shape == self.overlay.shape:
+            raise ValueError(
+                f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
+            )
+        print("Overlaying video...")
+        video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
+        return video

videopython/base/transforms.py CHANGED Viewed

@@ -3,6 +3,7 @@ from multiprocessing import Pool
 import cv2
 import numpy as np
+from tqdm import tqdm
 from videopython.base.video import Video
@@ -14,9 +15,6 @@ class Transformation(ABC):
     def apply(self, video: Video) -> Video:
         pass
-    def __call__(self, video: Video) -> Video:
-        return self.apply(video)
 class TransformationPipeline:
     def __init__(self, transformations: list[Transformation] | None):
@@ -58,7 +56,7 @@ class CutFrames(Transformation):
         self.end_frame = end_frame
     def apply(self, video: Video) -> Video:
-        video.frames = video.frames[self.start_frame : self.end_frame]
+        video = video[self.start_frame : self.end_frame]
         return video
@@ -68,7 +66,7 @@ class CutSeconds(Transformation):
         self.end_second = end_second
     def apply(self, video: Video) -> Video:
-        video.frames = video.frames[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
+        video = video[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
         return video
@@ -92,3 +90,41 @@ class Resize(Transformation):
             )
         video.frames = np.array(frames_copy)
         return video
+class ResampleFPS(Transformation):
+    def __init__(self, new_fps: int | float):
+        self.new_fps = float(new_fps)
+    def _downsample(self, video: Video) -> Video:
+        target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
+        new_frame_indices = np.round(np.linspace(0, len(video.frames) - 1, target_frame_count)).astype(int)
+        video.frames = video.frames[new_frame_indices]
+        video.fps = self.new_fps
+        return video
+    def _upsample(self, video: Video) -> Video:
+        target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
+        new_frame_indices = np.linspace(0, len(video.frames) - 1, target_frame_count)
+        new_frames = []
+        for i in tqdm(range(len(new_frame_indices) - 1)):
+            # Interpolate between the two nearest frames
+            ratio = new_frame_indices[i] % 1
+            new_frame = (1 - ratio) * video.frames[int(new_frame_indices[i])] + ratio * video.frames[
+                int(np.ceil(new_frame_indices[i]))
+            ]
+            new_frames.append(new_frame.astype(np.uint8))
+        video.frames = np.array(new_frames, dtype=np.uint8)
+        video.fps = self.new_fps
+        return video
+    def apply(self, video: Video) -> Video:
+        if video.fps == self.new_fps:
+            return video
+        elif video.fps > self.new_fps:
+            print(f"Downsampling video from {video.fps} to {self.new_fps} FPS.")
+            video = self._downsample(video)
+        else:
+            print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
+            video = self._upsample(video)
+        return video

videopython/base/transitions.py CHANGED Viewed

@@ -15,19 +15,19 @@ class Transition(ABC):
     """
     @final
-    def apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
+    def apply(self, videos: tuple[Video, Video]) -> Video:
         assert videos[0].metadata.can_be_merged_with(videos[1].metadata)
-        return self._apply(videos, **kwargs)
+        return self._apply(videos)
     @abstractmethod
-    def _apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
+    def _apply(self, videos: tuple[Video, Video]) -> Video:
         pass
 class InstantTransition(Transition):
     """Instant cut without any transition."""
-    def _apply(self, videos: list[Video] | tuple[Video]) -> Video:
+    def _apply(self, videos: tuple[Video, Video]) -> Video:
         return videos[0] + videos[1]
@@ -57,7 +57,7 @@ class FadeTransition(Transition):
         effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
         transition = self.fade(videos[0].frames[-effect_time_fps:], videos[1].frames[:effect_time_fps])
-        return Video.from_frames(
+        faded_videos = Video.from_frames(
             np.r_[
                 "0,2",
                 videos[0].frames[:-effect_time_fps],
@@ -66,3 +66,5 @@ class FadeTransition(Transition):
             ],
             fps=video_fps,
         )
+        faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
+        return faded_videos

videopython/base/video.py CHANGED Viewed

@@ -1,17 +1,15 @@
 from __future__ import annotations
+import shlex
 import subprocess
-import tempfile
 from dataclasses import dataclass
 from pathlib import Path
 import cv2
 import numpy as np
-import torch
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
 from pydub import AudioSegment
-from videopython.utils.common import generate_random_name
+from videopython.utils.common import check_path, generate_random_name
 @dataclass
@@ -20,10 +18,9 @@ class VideoMetadata:
     height: int
     width: int
-    fps: int
+    fps: float
     frame_count: int
     total_seconds: float
-    with_audio: bool = False
     def __str__(self):
         return f"{self.height}x{self.width} @ {self.fps}fps, {self.total_seconds} seconds"
@@ -40,7 +37,7 @@ class VideoMetadata:
         return np.array((self.frame_count, self.height, self.width, 3))
     @classmethod
-    def from_path(cls, video_path: str):
+    def from_path(cls, video_path: str) -> VideoMetadata:
         """Creates VideoMetadata object from video file.
         Args:
@@ -48,7 +45,7 @@ class VideoMetadata:
         """
         video = cv2.VideoCapture(video_path)
         frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps = round(video.get(cv2.CAP_PROP_FPS))
+        fps = round(video.get(cv2.CAP_PROP_FPS), 2)
         height = round(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
         width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
         total_seconds = round(frame_count / fps, 2)
@@ -62,7 +59,7 @@ class VideoMetadata:
         )
     @classmethod
-    def from_video(cls, video: Video):
+    def from_video(cls, video: Video) -> VideoMetadata:
         """Creates VideoMetadata object from frames.
         Args:
@@ -73,15 +70,12 @@ class VideoMetadata:
         frame_count, height, width, _ = video.frames.shape
         total_seconds = round(frame_count / video.fps, 2)
-        with_audio = bool(video.audio)
         return cls(
             height=height,
             width=width,
             fps=video.fps,
             frame_count=frame_count,
             total_seconds=total_seconds,
-            with_audio=with_audio,
         )
     def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
@@ -115,146 +109,123 @@ class Video:
         self.audio = None
     @classmethod
-    def from_path(cls, path):
+    def from_path(cls, path: str) -> Video:
         new_vid = cls()
         new_vid.frames, new_vid.fps = cls._load_video_from_path(path)
+        audio = cls._load_audio_from_path(path)
+        if not audio:
+            print(f"No audio found for `{path}`, adding silent track!")
+            audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
+        new_vid.audio = audio
         return new_vid
     @classmethod
-    def from_frames(cls, frames, fps):
+    def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
         new_vid = cls()
         new_vid.frames = frames
         new_vid.fps = fps
+        new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
         return new_vid
     @classmethod
-    def from_image(cls, image: np.ndarray, fps: int = 24, length_seconds: float = 1.0):
+    def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
         new_vid = cls()
         if len(image.shape) == 3:
             image = np.expand_dims(image, axis=0)
         new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
         new_vid.fps = fps
+        new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
         return new_vid
-    @classmethod
-    def from_prompt(
-        cls,
-        prompt: str,
-        num_steps: int = 25,
-        height: int = 320,
-        width: int = 576,
-        num_frames: int = 24,
-        gpu_optimized: bool = False,
-    ):
-        torch_dtype = torch.float16 if gpu_optimized else torch.float32
-        # TODO: Make it model independent
-        pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch_dtype)
-        if gpu_optimized:
-            pipe.enable_model_cpu_offload()
-        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-        video_frames = np.asarray(
-            pipe(
-                prompt,
-                num_inference_steps=num_steps,
-                height=height,
-                width=width,
-                num_frames=num_frames,
-            ).frames
-        )
-        return Video.from_frames(video_frames, fps=24)
-    def add_audio_from_file(self, audio_path: str):
-        self.audio = AudioSegment.from_file(audio_path)
-    def __getitem__(self, val):
-        if isinstance(val, slice):
-            return self.from_frames(self.frames[val], fps=self.fps)
-        elif isinstance(val, int):
-            return self.frames[val]
-    def copy(self):
-        return Video().from_frames(self.frames.copy(), self.fps)
+    def copy(self) -> Video:
+        copied = Video().from_frames(self.frames.copy(), self.fps)
+        copied.audio = self.audio
+        return copied
     def is_loaded(self) -> bool:
-        return self.fps and self.frames
+        return self.fps is not None and self.frames is not None and self.audio is not None
-    def split(self, frame_idx: int | None = None):
+    def split(self, frame_idx: int | None = None) -> tuple[Video, Video]:
         if frame_idx:
             assert 0 <= frame_idx <= len(self.frames)
         else:
             frame_idx = len(self.frames) // 2
-        return (
+        split_videos = (
             self.from_frames(self.frames[:frame_idx], self.fps),
             self.from_frames(self.frames[frame_idx:], self.fps),
         )
+        audio_midpoint = (frame_idx / self.fps) * 1000
+        split_videos[0].audio = self.audio[:audio_midpoint]
+        split_videos[1].audio = self.audio[audio_midpoint:]
+        return split_videos
-    def _prepare_new_canvas(self, output_path: str):
-        """Prepares a new `self._transformed_video` canvas for cut video."""
-        canvas = cv2.VideoWriter(
-            filename=output_path,
-            fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
-            fps=self.fps,
-            frameSize=(self.video_shape[2], self.video_shape[1]),
-        )
-        return canvas
-    def save(self, filename: str = None) -> str:
-        """Transforms the video and saves as `filename`.
+    def save(self, filename: str | None = None) -> str:
+        """Saves the video.
         Args:
-            filename: Name of the output video file.
+            filename: Name of the output video file. Generates random UUID name if not provided.
         """
-        # Check correctness
-        if not filename:
-            filename = Path(generate_random_name()).resolve()
-            directory = filename.parent
-        elif not Path(filename).suffix == ".mp4":
-            raise ValueError("Only .mp4 save option is supported.")
-        else:
-            filename = Path(filename)
-            directory = filename.parent
-            if not directory.exists():
-                raise ValueError(f"Selected directory `{directory}` does not exist!")
-        filename, directory = str(filename), str(directory)
-        # Save video video opencv
-        canvas = self._prepare_new_canvas(filename)
-        for frame in self.frames[:, :, :, ::-1]:
-            canvas.write(frame)
-        cv2.destroyAllWindows()
-        canvas.release()
-        # If Video has audio, overlaay audio using ffmpeg
-        if self.audio:
-            filename_with_audio = tempfile.NamedTemporaryFile(suffix=".mp4").name
-            if len(self.audio) > self.total_seconds * 1000:
-                self.audio = self.audio[: self.total_seconds * 1000]
-            else:
-                self.audio += AudioSegment.silent(duration=self.total_seconds * 1000 - len(self.audio))
-            raw_audio = self.audio.raw_data
-            channels = self.audio.channels
-            frame_rate = self.audio.frame_rate
-            ffmpeg_command = (
-                f"ffmpeg -loglevel error -y -i {filename} -f s16le -acodec pcm_s16le -ar {frame_rate} -ac "
-                f"{channels} -i pipe:0 -c:v copy -c:a aac -strict experimental {filename_with_audio}"
-            )
+        if not self.is_loaded():
+            raise RuntimeError(f"Video is not loaded, cannot save!")
-            try:
-                subprocess.run(ffmpeg_command, input=raw_audio, check=True, shell=True)
-                print("Video with audio saved successfully.")
-            except subprocess.CalledProcessError as e:
-                print(f"Error saving video with audio: {e}")
+        if filename is None:
+            filename = generate_random_name(suffix=".mp4")
+        filename = check_path(filename, dir_exists=True, suffix=".mp4")
+        ffmpeg_video_command = (
+            f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
+            f" -s {self.metadata.width}x{self.metadata.height} "
+            f"-i pipe:0 -c:v libx264 -pix_fmt yuv420p {filename}"
+        )
+        ffmpeg_audio_command = (
+            f"ffmpeg -loglevel error -y -i {filename} -f s16le -acodec pcm_s16le "
+            f"-ar {self.audio.frame_rate} -ac {self.audio.channels} -i pipe:0 "
+            f"-c:v copy -c:a aac -strict experimental {filename}_temp.mp4"
+        )
+        try:
+            print("Saving frames to video...")
+            subprocess.run(
+                ffmpeg_video_command,
+                input=self.frames.tobytes(),
+                check=True,
+                shell=True,
+            )
+        except subprocess.CalledProcessError as e:
+            print("Error saving frames to video!")
+            raise e
+        try:
+            print("Adding audio track...")
+            subprocess.run(ffmpeg_audio_command, input=self.audio.raw_data, check=True, shell=True)
             Path(filename).unlink()
-            Path(filename_with_audio).rename(filename)
+            Path(filename + "_temp.mp4").rename(filename)
+        except subprocess.CalledProcessError as e:
+            print(f"Error adding audio track!")
+            raise e
+        print(f"Video saved into `{filename}`!")
         return filename
-    def __add__(self, other):
+    def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
+        new_audio = self._load_audio_from_path(path)
+        if new_audio is None:
+            print(f"Audio file `{path}` not found, skipping!")
+            return
+        if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
+            new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
+        elif new_audio.duration_seconds > self.total_seconds:
+            new_audio = new_audio[: round(self.total_seconds * 1000)]
+        if overlay:
+            self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
+        else:
+            self.audio = new_audio
+    def __add__(self, other: Video) -> Video:
         # TODO: Should it be class method? How to make it work with sum()?
         if self.fps != other.fps:
             raise ValueError("FPS of videos do not match!")
@@ -263,32 +234,53 @@ class Video:
                 "Resolutions of the images do not match: "
                 f"{self.frame_shape} not compatible with {other.frame_shape}."
             )
+        new_video = self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
+        new_video.audio = self.audio + other.audio
+        return new_video
+    def __str__(self) -> str:
+        return str(self.metadata)
+    def __getitem__(self, val: slice) -> Video:
+        if not isinstance(val, slice):
+            raise ValueError("Only slices are supported for video indexing!")
+        # Sub-slice video if given a slice
+        sliced = self.from_frames(self.frames[val], fps=self.fps)
+        # Handle slicing without value for audio
+        start = val.start if val.start else 0
+        stop = val.stop if val.stop else len(self.frames)
+        # Handle negative values for audio slices
+        if start < 0:
+            start = len(self.frames) + start
+        if stop < 0:
+            stop = len(self.frames) + stop
+        # Append audio to the slice
+        audio_start = round(start / self.fps) * 1000
+        audio_end = round(stop / self.fps) * 1000
+        sliced.audio = self.audio[audio_start:audio_end]
+        return sliced
-        return self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
+    @staticmethod
+    def _load_audio_from_path(path: str) -> AudioSegment | None:
+        try:
+            audio = AudioSegment.from_file(path)
+            return audio
+        except IndexError:
+            return None
     @staticmethod
-    def _load_video_from_path(path: str):
+    def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
         """Loads frames and fps information from video file.
         Args:
             path: Path to video file.
         """
         metadata = VideoMetadata.from_path(path)
-        ffmpeg_command = [
-            "ffmpeg",
-            "-i",
-            path,
-            "-f",
-            "rawvideo",
-            "-pix_fmt",
-            "rgb24",
-            "-loglevel",
-            "quiet",
-            "pipe:1",
-        ]
+        ffmpeg_command = f"ffmpeg -i {path} -f rawvideo -pix_fmt rgb24 -loglevel quiet pipe:1"
         # Run the ffmpeg command and capture the stdout
-        ffmpeg_process = subprocess.Popen(ffmpeg_command, stdout=subprocess.PIPE)
+        ffmpeg_process = subprocess.Popen(shlex.split(ffmpeg_command), stdout=subprocess.PIPE)
         ffmpeg_out, _ = ffmpeg_process.communicate()
         # Convert the raw video data to a NumPy array
@@ -309,7 +301,7 @@ class Video:
     @property
     def total_seconds(self) -> float:
         """Returns total seconds of the video."""
-        return round(self.frames.shape[0] / self.fps, 1)
+        return round(self.frames.shape[0] / self.fps, 4)
     @property
     def metadata(self) -> VideoMetadata:

videopython/generation/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .audio import TextToSpeech
+from .image import TextToImage
+from .video import ImageToVideo, TextToVideo
+__all__ = [
+    "ImageToVideo",
+    "TextToSpeech",
+    "TextToImage",
+    "TextToVideo",
+]

videopython/generation/audio.py ADDED Viewed

@@ -0,0 +1,30 @@
+import os
+from pathlib import Path
+from typing import Literal
+from openai import OpenAI
+from pydub import AudioSegment
+from videopython.utils.common import generate_random_name
+class TextToSpeech:
+    def __init__(self, openai_key: str | None = None, save_audio: bool = True):
+        self.client = OpenAI(api_key=openai_key)
+        self._save = save_audio
+    def generate_audio(
+        self,
+        text: str,
+        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
+    ) -> AudioSegment:
+        filename = generate_random_name(suffix=".mp3")
+        output_path = str((Path(os.getcwd()) / filename).resolve())
+        response = self.client.audio.speech.create(model="tts-1", voice=voice, input=text)
+        response.stream_to_file(output_path)
+        audio = AudioSegment.from_file(output_path)
+        if self._save:
+            print(f"Audio saved to {output_path}")
+        else:
+            os.remove(output_path)
+        return audio

videopython/generation/image.py ADDED Viewed

@@ -0,0 +1,60 @@
+import io
+import os
+import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
+from PIL import Image
+from stability_sdk import client
+class TextToImage:
+    def __init__(
+        self,
+        stability_key: str | None = None,
+        engine: str = "stable-diffusion-xl-1024-v1-0",
+        verbose: bool = True,
+    ):
+        stability_key = stability_key or os.getenv("STABILITY_KEY")
+        if stability_key is None:
+            raise ValueError(
+                "API Key for stability is required. Please provide it as an argument"
+                " or set it as an environment variable `STABILITY_KEY`. "
+            )
+        self.client = client.StabilityInference(stability_key, verbose=verbose, engine=engine)
+    def generate_image(
+        self,
+        prompt: str,
+        width: int = 1024,
+        height: int = 1024,
+        steps: int = 30,
+        cfg_scale: float = 8.0,
+        seed: int = 1,
+    ) -> Image.Image:
+        answers = self.client.generate(
+            prompt=prompt,
+            seed=seed,
+            steps=steps,  # Amount of inference steps performed on image generation.
+            cfg_scale=cfg_scale,  # Influences how strongly your generation is guided to match your prompt.
+            # Setting this value higher increases the strength in which it tries to match your prompt.
+            # Defaults to 7.0 if not specified.
+            width=width,
+            height=height,
+            safety=False,
+            samples=1,
+            sampler=generation.SAMPLER_K_DPMPP_2M,  # Choose which sampler we want to denoise our generation with.
+            # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
+            # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
+        )
+        for resp in answers:
+            for artifact in resp.artifacts:
+                if artifact.finish_reason == generation.FILTER:
+                    raise RuntimeError(
+                        "Your request activated the API's safety filters and could not be processed."
+                        "Please modify the prompt and try again."
+                    )
+                if artifact.type == generation.ARTIFACT_IMAGE:
+                    img = Image.open(io.BytesIO(artifact.binary))
+                else:
+                    raise ValueError(f"Unknown artifact type: {artifact.type}")
+        return img

videopython/generation/video.py ADDED Viewed

@@ -0,0 +1,47 @@
+import numpy as np
+import torch
+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+from PIL.Image import Image
+from videopython.base.video import Video
+TEXT_TO_VIDEO_MODEL = "cerspense/zeroscope_v2_576w"
+IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
+class TextToVideo:
+    def __init__(self, gpu_optimized: bool = True):
+        self.pipeline = DiffusionPipeline.from_pretrained(
+            TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
+        )
+        self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
+        if gpu_optimized:
+            self.pipeline.enable_model_cpu_offload()
+    def generate_video(
+        self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
+    ) -> Video:
+        video_frames = self.pipeline(
+            prompt,
+            num_inference_steps=num_steps,
+            height=height,
+            width=width,
+            num_frames=num_frames,
+        ).frames[0]
+        video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
+        return Video.from_frames(video_frames, fps=24.0)
+class ImageToVideo:
+    def __init__(self):
+        if not torch.cuda.is_available():
+            raise ValueError("CUDA is not available, but ImageToVideo model requires CUDA.")
+        self.pipeline = DiffusionPipeline.from_pretrained(
+            IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
+        ).to("cuda")
+        self.pipeline.enable_model_cpu_offload()
+    def generate_video(self, image: Image, fps: int = 24) -> Video:
+        video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
+        video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
+        return Video.from_frames(video_frames, fps=float(fps))

videopython/utils/common.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import time
 import uuid
+from pathlib import Path
+from typing import Callable
 def generate_random_name(suffix=".mp4"):
-    """Generates random video name."""
+    """Generates random name."""
     return f"{uuid.uuid4()}{suffix}"
-def timeit(func: callable):
+def timeit(func: Callable):
     """Decorator to measure execution time of a function."""
     def timed(*args, **kwargs):
@@ -18,3 +20,12 @@ def timeit(func: callable):
         return result
     return timed
+def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
+    fullpath = Path(path).resolve()
+    if dir_exists and not fullpath.parent.exists():
+        raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
+    if suffix and suffix != fullpath.suffix:
+        raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
+    return str(fullpath)

{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: videopython
-Version: 0.1.1
+Version: 0.1.3
 Summary: Minimal video generation and processing library.
 Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
 License: Apache License
@@ -210,12 +210,12 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click >=8.1.7
 Requires-Dist: numpy >=1.25.2
-Requires-Dist: opencv-python >=4.7.0.68
+Requires-Dist: opencv-python >=4.9.0.80
 Requires-Dist: pytest >=7.4.0
-Requires-Dist: transformers >=4.35.0
-Requires-Dist: diffusers >=0.21.4
+Requires-Dist: transformers >=4.38.1
+Requires-Dist: diffusers >=0.26.3
 Requires-Dist: torch >=2.1.0
-Requires-Dist: stability-sdk >=0.8.4
+Requires-Dist: stability-sdk >=0.8.5
 Requires-Dist: openai ==1.3.5
 Requires-Dist: pydub >=0.25.1

videopython-0.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
+videopython/base/effects.py,sha256=DpA8V89Es7YWPEq72l_h_D7MG1QYf1iuslAl-QgzZx8,2153
+videopython/base/transforms.py,sha256=DQcG8tZ8nlGj3khlp3v4C0MISpRY2rZr-6B6GtPZykE,4251
+videopython/base/transitions.py,sha256=efuJdls2xJVpXV8RGaFd--ii8cLUPz6FdmhSvOjaiTM,2275
+videopython/base/video.py,sha256=40leF8bSjNIhP_L8loOh9ptlZNTZAZ95Dgv9FH4mSz4,10791
+videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
+videopython/generation/audio.py,sha256=YPqUdAcB0mGCt0mgFrxzupX08xx0O_qwfVdjFGlAxaw,985
+videopython/generation/image.py,sha256=B-TlrNXFu18NnMi3KO5fjk0paTSmIsQk400iZb76K8w,2507
+videopython/generation/video.py,sha256=4P4DhHS-_eDColsXK6YefSdoQbU3Ce0n6fHuY5zewYI,1874
+videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
+videopython-0.1.3.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
+videopython-0.1.3.dist-info/METADATA,sha256=xj8k5j3qPIVKgXbr4uTi6ad2BSs9j6-V6baonpQKoJI,14709
+videopython-0.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+videopython-0.1.3.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
+videopython-0.1.3.dist-info/RECORD,,

videopython/__init__.py DELETED Viewed

File without changes

videopython/utils/stability_generation.py DELETED Viewed

@@ -1,75 +0,0 @@
-import io
-import os
-from pathlib import Path
-import numpy as np
-import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
-from PIL import Image
-from stability_sdk import client
-from videopython.utils.common import generate_random_name
-API_KEY = os.getenv("STABILITY_KEY")
-if not API_KEY:
-    raise KeyError(
-        "Stability API key was not found in the environment! Please set in as `STABILITY_KEY` in your environment."
-    )
-def get_image_from_prompt(
-    prompt: str,
-    output_dir: str | None = None,
-    width: int = 1024,
-    height: int = 1024,
-    num_samples: int = 1,
-    steps: int = 30,
-    cfg_scale: float = 8.0,
-    engine: str = "stable-diffusion-xl-1024-v1-0",
-    verbose: bool = True,
-    seed: int = 1,
-) -> tuple[np.ndarray, str]:
-    """Generates image from prompt using the stability.ai API."""
-    # Generate image
-    stability_api = client.StabilityInference(
-        key=API_KEY,
-        verbose=verbose,
-        engine=engine,  # Set the engine to use for generation.
-        # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
-    )
-    answers = stability_api.generate(
-        prompt=prompt,
-        seed=seed,
-        steps=steps,  # Amount of inference steps performed on image generation.
-        cfg_scale=cfg_scale,  # Influences how strongly your generation is guided to match your prompt.
-        # Setting this value higher increases the strength in which it tries to match your prompt.
-        # Defaults to 7.0 if not specified.
-        width=width,
-        height=height,
-        samples=num_samples,
-        sampler=generation.SAMPLER_K_DPMPP_2M  # Choose which sampler we want to denoise our generation with.
-        # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
-        # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
-    )
-    # Create output path
-    if output_dir:
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-    else:
-        output_dir = Path(os.getcwd())
-    filename = output_dir / generate_random_name(suffix=".png")
-    # Parse API response
-    for resp in answers:
-        for artifact in resp.artifacts:
-            if artifact.finish_reason == generation.FILTER:
-                raise RuntimeError(
-                    "Your request activated the API's safety filters and could not be processed."
-                    "Please modify the prompt and try again."
-                )
-            if artifact.type == generation.ARTIFACT_IMAGE:
-                img = Image.open(io.BytesIO(artifact.binary))
-                img.save(filename)
-            else:
-                raise ValueError(f"Unknown artifact type: {artifact.type}")
-    return np.array(img), filename

videopython/utils/text_to_speech.py DELETED Viewed

@@ -1,24 +0,0 @@
-import os
-from pathlib import Path
-from openai import OpenAI
-from videopython.utils.common import generate_random_name
-def text_to_speech_openai(text: str, voice: str = "alloy", output_dir: Path | None = None) -> Path:
-    client = OpenAI()
-    filename = generate_random_name(suffix=".mp3")
-    if output_dir:
-        output_dir = Path(output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-    else:
-        output_dir = Path(os.getcwd())
-    save_path = output_dir / filename
-    response = client.audio.speech.create(model="tts-1", voice=voice, input=text)
-    response.stream_to_file(save_path)
-    return save_path

videopython-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-videopython/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
-videopython/base/transforms.py,sha256=aXIqbp9sZkZI5PYRn0uDSxLoQxCdku1BAmzfQpnGW_w,2701
-videopython/base/transitions.py,sha256=VQXJ-sGL7lcr3Q6uhb66hLlqW9213UBUAAH6DqJa9xs,2159
-videopython/base/video.py,sha256=KxhQt_xJp9YhuTsbBdCdXgEMRZCckUyv0I2xwZYWSrk,10167
-videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-videopython/utils/common.py,sha256=lms--xc-5Jj4cVsD_W_FBw1n_8XnHGiCtJEOx4f5dV4,461
-videopython/utils/stability_generation.py,sha256=POAQLgrwhyl-tvPsZpRphe8du2azqotaLWBu70_IaH8,2928
-videopython/utils/text_to_speech.py,sha256=wSRd2JnDDubIu2-vqnN80hGdvf4EpS1XZ68S8uQei8w,640
-videopython-0.1.1.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
-videopython-0.1.1.dist-info/METADATA,sha256=VJpGeOrbHUkDh2wajI-K6hKYjjSAIHn5z9eB5SQGQqA,14709
-videopython-0.1.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-videopython-0.1.1.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
-videopython-0.1.1.dist-info/RECORD,,

{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{videopython-0.1.1.dist-info → videopython-0.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

videopython 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

Potentially problematic release.

videopython 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl