videopython 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

Files changed (32) hide show
  1. {videopython-0.1.2 → videopython-0.1.3}/PKG-INFO +5 -5
  2. {videopython-0.1.2 → videopython-0.1.3}/pyproject.toml +12 -5
  3. videopython-0.1.3/src/videopython/base/effects.py +57 -0
  4. {videopython-0.1.2 → videopython-0.1.3}/src/videopython/base/transforms.py +41 -5
  5. {videopython-0.1.2 → videopython-0.1.3}/src/videopython/base/transitions.py +7 -5
  6. {videopython-0.1.2 → videopython-0.1.3}/src/videopython/base/video.py +32 -59
  7. videopython-0.1.3/src/videopython/generation/__init__.py +10 -0
  8. videopython-0.1.3/src/videopython/generation/audio.py +30 -0
  9. videopython-0.1.3/src/videopython/generation/image.py +60 -0
  10. videopython-0.1.3/src/videopython/generation/video.py +47 -0
  11. videopython-0.1.3/src/videopython/utils/common.py +31 -0
  12. {videopython-0.1.2 → videopython-0.1.3}/src/videopython.egg-info/PKG-INFO +5 -5
  13. {videopython-0.1.2 → videopython-0.1.3}/src/videopython.egg-info/SOURCES.txt +6 -3
  14. videopython-0.1.3/src/videopython.egg-info/requires.txt +10 -0
  15. videopython-0.1.3/tests/test_effects.py +24 -0
  16. videopython-0.1.2/src/videopython/generation/openai/text_to_speech.py +0 -31
  17. videopython-0.1.2/src/videopython/generation/stability/text_to_image.py +0 -77
  18. videopython-0.1.2/src/videopython/utils/__init__.py +0 -0
  19. videopython-0.1.2/src/videopython/utils/common.py +0 -20
  20. videopython-0.1.2/src/videopython.egg-info/requires.txt +0 -10
  21. {videopython-0.1.2 → videopython-0.1.3}/LICENSE +0 -0
  22. {videopython-0.1.2 → videopython-0.1.3}/README.md +0 -0
  23. {videopython-0.1.2 → videopython-0.1.3}/setup.cfg +0 -0
  24. {videopython-0.1.2/src/videopython → videopython-0.1.3/src/videopython/base}/__init__.py +0 -0
  25. {videopython-0.1.2 → videopython-0.1.3}/src/videopython/base/compose.py +0 -0
  26. {videopython-0.1.2/src/videopython/base → videopython-0.1.3/src/videopython/utils}/__init__.py +0 -0
  27. {videopython-0.1.2 → videopython-0.1.3}/src/videopython.egg-info/dependency_links.txt +0 -0
  28. {videopython-0.1.2 → videopython-0.1.3}/src/videopython.egg-info/top_level.txt +0 -0
  29. {videopython-0.1.2 → videopython-0.1.3}/tests/test_compose.py +0 -0
  30. {videopython-0.1.2 → videopython-0.1.3}/tests/test_transforms.py +0 -0
  31. {videopython-0.1.2 → videopython-0.1.3}/tests/test_transitions.py +0 -0
  32. {videopython-0.1.2 → videopython-0.1.3}/tests/test_video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -210,12 +210,12 @@ Description-Content-Type: text/markdown
210
210
  License-File: LICENSE
211
211
  Requires-Dist: click>=8.1.7
212
212
  Requires-Dist: numpy>=1.25.2
213
- Requires-Dist: opencv-python>=4.7.0.68
213
+ Requires-Dist: opencv-python>=4.9.0.80
214
214
  Requires-Dist: pytest>=7.4.0
215
- Requires-Dist: transformers>=4.36.0
216
- Requires-Dist: diffusers>=0.21.4
215
+ Requires-Dist: transformers>=4.38.1
216
+ Requires-Dist: diffusers>=0.26.3
217
217
  Requires-Dist: torch>=2.1.0
218
- Requires-Dist: stability-sdk>=0.8.4
218
+ Requires-Dist: stability-sdk>=0.8.5
219
219
  Requires-Dist: openai==1.3.5
220
220
  Requires-Dist: pydub>=0.25.1
221
221
 
@@ -2,9 +2,16 @@
2
2
  requires = ["setuptools>=61.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
+ [tool.setuptools.packages.find]
6
+ where = ["src"]
7
+ include = ["videopython.*"]
8
+
9
+ [tool.setuptools.package-data]
10
+ "videopython" = ["py.typed"]
11
+
5
12
  [project]
6
13
  name = "videopython"
7
- version = "0.1.2"
14
+ version = "0.1.3"
8
15
  description = "Minimal video generation and processing library."
9
16
  readme = "README.md"
10
17
  requires-python = ">=3.10"
@@ -28,12 +35,12 @@ classifiers = [
28
35
  dependencies = [
29
36
  "click>=8.1.7",
30
37
  "numpy>=1.25.2",
31
- "opencv-python>=4.7.0.68",
38
+ "opencv-python>=4.9.0.80",
32
39
  "pytest>=7.4.0",
33
- "transformers>=4.36.0",
34
- "diffusers>=0.21.4",
40
+ "transformers>=4.38.1",
41
+ "diffusers>=0.26.3",
35
42
  "torch>=2.1.0",
36
- "stability-sdk>=0.8.4",
43
+ "stability-sdk>=0.8.5",
37
44
  "openai==1.3.5",
38
45
  "pydub>=0.25.1"
39
46
  ]
@@ -0,0 +1,57 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import final
3
+
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+
7
+ from videopython.base.video import Video
8
+
9
+
10
+ class Effect(ABC):
11
+ """Abstract class for effect on frames of video.
12
+
13
+ The effect must not change the number of frames and the shape of the frames.
14
+ """
15
+
16
+ @final
17
+ def apply(self, video: Video) -> Video:
18
+ original_shape = video.video_shape
19
+ video_with_effect = self._apply(video)
20
+ if not video_with_effect.video_shape == original_shape:
21
+ raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
22
+ return video_with_effect
23
+
24
+ @abstractmethod
25
+ def _apply(self, video: Video) -> Video:
26
+ pass
27
+
28
+
29
+ class FullImageOverlay(Effect):
30
+ def __init__(self, overlay_image: np.ndarray, alpha: float | None = None):
31
+ if alpha is not None and not 0 <= alpha <= 1:
32
+ raise ValueError("Alpha must be in range [0, 1]!")
33
+ elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
34
+ raise ValueError("Only RGB and RGBA images are supported as an overlay!")
35
+ elif alpha is None:
36
+ alpha = 1.0
37
+
38
+ if overlay_image.shape[-1] == 3:
39
+ overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
40
+ overlay_image[:, :, 3] = overlay_image[:, :, 3] * alpha
41
+
42
+ self._overlay_alpha = (overlay_image[:, :, 3] / 255.0)[:, :, np.newaxis]
43
+ self._base_transparency = 1 - self._overlay_alpha
44
+
45
+ self.overlay = overlay_image[:, :, :3] * self._overlay_alpha
46
+
47
+ def _overlay(self, img: np.ndarray) -> np.ndarray:
48
+ return self.overlay + (img * self._base_transparency)
49
+
50
+ def _apply(self, video: Video) -> Video:
51
+ if not video.frame_shape == self.overlay.shape:
52
+ raise ValueError(
53
+ f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
54
+ )
55
+ print("Overlaying video...")
56
+ video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
57
+ return video
@@ -3,6 +3,7 @@ from multiprocessing import Pool
3
3
 
4
4
  import cv2
5
5
  import numpy as np
6
+ from tqdm import tqdm
6
7
 
7
8
  from videopython.base.video import Video
8
9
 
@@ -14,9 +15,6 @@ class Transformation(ABC):
14
15
  def apply(self, video: Video) -> Video:
15
16
  pass
16
17
 
17
- def __call__(self, video: Video) -> Video:
18
- return self.apply(video)
19
-
20
18
 
21
19
  class TransformationPipeline:
22
20
  def __init__(self, transformations: list[Transformation] | None):
@@ -58,7 +56,7 @@ class CutFrames(Transformation):
58
56
  self.end_frame = end_frame
59
57
 
60
58
  def apply(self, video: Video) -> Video:
61
- video.frames = video.frames[self.start_frame : self.end_frame]
59
+ video = video[self.start_frame : self.end_frame]
62
60
  return video
63
61
 
64
62
 
@@ -68,7 +66,7 @@ class CutSeconds(Transformation):
68
66
  self.end_second = end_second
69
67
 
70
68
  def apply(self, video: Video) -> Video:
71
- video.frames = video.frames[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
69
+ video = video[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
72
70
  return video
73
71
 
74
72
 
@@ -92,3 +90,41 @@ class Resize(Transformation):
92
90
  )
93
91
  video.frames = np.array(frames_copy)
94
92
  return video
93
+
94
+
95
+ class ResampleFPS(Transformation):
96
+ def __init__(self, new_fps: int | float):
97
+ self.new_fps = float(new_fps)
98
+
99
+ def _downsample(self, video: Video) -> Video:
100
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
101
+ new_frame_indices = np.round(np.linspace(0, len(video.frames) - 1, target_frame_count)).astype(int)
102
+ video.frames = video.frames[new_frame_indices]
103
+ video.fps = self.new_fps
104
+ return video
105
+
106
+ def _upsample(self, video: Video) -> Video:
107
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
108
+ new_frame_indices = np.linspace(0, len(video.frames) - 1, target_frame_count)
109
+ new_frames = []
110
+ for i in tqdm(range(len(new_frame_indices) - 1)):
111
+ # Interpolate between the two nearest frames
112
+ ratio = new_frame_indices[i] % 1
113
+ new_frame = (1 - ratio) * video.frames[int(new_frame_indices[i])] + ratio * video.frames[
114
+ int(np.ceil(new_frame_indices[i]))
115
+ ]
116
+ new_frames.append(new_frame.astype(np.uint8))
117
+ video.frames = np.array(new_frames, dtype=np.uint8)
118
+ video.fps = self.new_fps
119
+ return video
120
+
121
+ def apply(self, video: Video) -> Video:
122
+ if video.fps == self.new_fps:
123
+ return video
124
+ elif video.fps > self.new_fps:
125
+ print(f"Downsampling video from {video.fps} to {self.new_fps} FPS.")
126
+ video = self._downsample(video)
127
+ else:
128
+ print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
129
+ video = self._upsample(video)
130
+ return video
@@ -15,19 +15,19 @@ class Transition(ABC):
15
15
  """
16
16
 
17
17
  @final
18
- def apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
18
+ def apply(self, videos: tuple[Video, Video]) -> Video:
19
19
  assert videos[0].metadata.can_be_merged_with(videos[1].metadata)
20
- return self._apply(videos, **kwargs)
20
+ return self._apply(videos)
21
21
 
22
22
  @abstractmethod
23
- def _apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
23
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
24
24
  pass
25
25
 
26
26
 
27
27
  class InstantTransition(Transition):
28
28
  """Instant cut without any transition."""
29
29
 
30
- def _apply(self, videos: list[Video] | tuple[Video]) -> Video:
30
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
31
31
  return videos[0] + videos[1]
32
32
 
33
33
 
@@ -57,7 +57,7 @@ class FadeTransition(Transition):
57
57
  effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
58
58
  transition = self.fade(videos[0].frames[-effect_time_fps:], videos[1].frames[:effect_time_fps])
59
59
 
60
- return Video.from_frames(
60
+ faded_videos = Video.from_frames(
61
61
  np.r_[
62
62
  "0,2",
63
63
  videos[0].frames[:-effect_time_fps],
@@ -66,3 +66,5 @@ class FadeTransition(Transition):
66
66
  ],
67
67
  fps=video_fps,
68
68
  )
69
+ faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
70
+ return faded_videos
@@ -7,11 +7,9 @@ from pathlib import Path
7
7
 
8
8
  import cv2
9
9
  import numpy as np
10
- import torch
11
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
12
10
  from pydub import AudioSegment
13
11
 
14
- from videopython.utils.common import generate_random_name
12
+ from videopython.utils.common import check_path, generate_random_name
15
13
 
16
14
 
17
15
  @dataclass
@@ -117,7 +115,7 @@ class Video:
117
115
  audio = cls._load_audio_from_path(path)
118
116
  if not audio:
119
117
  print(f"No audio found for `{path}`, adding silent track!")
120
- audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
118
+ audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
121
119
  new_vid.audio = audio
122
120
  return new_vid
123
121
 
@@ -126,7 +124,7 @@ class Video:
126
124
  new_vid = cls()
127
125
  new_vid.frames = frames
128
126
  new_vid.fps = fps
129
- new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
127
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
130
128
  return new_vid
131
129
 
132
130
  @classmethod
@@ -136,37 +134,9 @@ class Video:
136
134
  image = np.expand_dims(image, axis=0)
137
135
  new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
138
136
  new_vid.fps = fps
139
- new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
137
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
140
138
  return new_vid
141
139
 
142
- @classmethod
143
- def from_prompt(
144
- cls,
145
- prompt: str,
146
- num_steps: int = 25,
147
- height: int = 320,
148
- width: int = 576,
149
- num_frames: int = 24,
150
- gpu_optimized: bool = False,
151
- ) -> Video:
152
- pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch_dtype)
153
- if gpu_optimized:
154
- pipe.enable_model_cpu_offload()
155
- torch_dtype = torch.float16
156
- else:
157
- torch_dtype = torch.float32
158
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
159
- video_frames = np.asarray(
160
- pipe(
161
- prompt,
162
- num_inference_steps=num_steps,
163
- height=height,
164
- width=width,
165
- num_frames=num_frames,
166
- ).frames
167
- )
168
- return Video.from_frames(video_frames, fps=24.0)
169
-
170
140
  def copy(self) -> Video:
171
141
  copied = Video().from_frames(self.frames.copy(), self.fps)
172
142
  copied.audio = self.audio
@@ -198,19 +168,10 @@ class Video:
198
168
  """
199
169
  if not self.is_loaded():
200
170
  raise RuntimeError(f"Video is not loaded, cannot save!")
201
- # Check filename correctness or generate a new one if not given
202
- if not filename:
203
- filename = Path(generate_random_name()).resolve()
204
- directory = filename.parent
205
- elif not Path(filename).suffix == ".mp4":
206
- raise ValueError("Only .mp4 save option is supported.")
207
- else:
208
- filename = Path(filename)
209
- directory = filename.parent
210
- if not directory.exists():
211
- raise ValueError(f"Selected directory `{directory}` does not exist!")
212
171
 
213
- filename, directory = str(filename), str(directory)
172
+ if filename is None:
173
+ filename = generate_random_name(suffix=".mp4")
174
+ filename = check_path(filename, dir_exists=True, suffix=".mp4")
214
175
 
215
176
  ffmpeg_video_command = (
216
177
  f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
@@ -250,10 +211,14 @@ class Video:
250
211
 
251
212
  def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
252
213
  new_audio = self._load_audio_from_path(path)
253
- if (duration_diff := self.total_seconds - new_audio.duration_seconds) > 0 and not loop:
214
+ if new_audio is None:
215
+ print(f"Audio file `{path}` not found, skipping!")
216
+ return
217
+
218
+ if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
254
219
  new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
255
220
  elif new_audio.duration_seconds > self.total_seconds:
256
- new_audio = new_audio[: self.total_seconds * 1000]
221
+ new_audio = new_audio[: round(self.total_seconds * 1000)]
257
222
 
258
223
  if overlay:
259
224
  self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
@@ -276,17 +241,25 @@ class Video:
276
241
  def __str__(self) -> str:
277
242
  return str(self.metadata)
278
243
 
279
- def __getitem__(self, val: int | slice) -> Video | np.ndarray:
280
- if isinstance(val, slice):
281
- # Sub-slice video if given a slice
282
- sliced = self.from_frames(self.frames[val], fps=self.fps)
283
- audio_start = (val.start / self.fps) * 1000
284
- audio_end = (val.stop / self.fps) * 1000
285
- sliced.audio = self.audio[audio_start:audio_end]
286
- return sliced
287
- elif isinstance(val, int):
288
- # Return single frame for integer indexing
289
- return self.frames[val]
244
+ def __getitem__(self, val: slice) -> Video:
245
+ if not isinstance(val, slice):
246
+ raise ValueError("Only slices are supported for video indexing!")
247
+
248
+ # Sub-slice video if given a slice
249
+ sliced = self.from_frames(self.frames[val], fps=self.fps)
250
+ # Handle slicing without value for audio
251
+ start = val.start if val.start else 0
252
+ stop = val.stop if val.stop else len(self.frames)
253
+ # Handle negative values for audio slices
254
+ if start < 0:
255
+ start = len(self.frames) + start
256
+ if stop < 0:
257
+ stop = len(self.frames) + stop
258
+ # Append audio to the slice
259
+ audio_start = round(start / self.fps) * 1000
260
+ audio_end = round(stop / self.fps) * 1000
261
+ sliced.audio = self.audio[audio_start:audio_end]
262
+ return sliced
290
263
 
291
264
  @staticmethod
292
265
  def _load_audio_from_path(path: str) -> AudioSegment | None:
@@ -0,0 +1,10 @@
1
+ from .audio import TextToSpeech
2
+ from .image import TextToImage
3
+ from .video import ImageToVideo, TextToVideo
4
+
5
+ __all__ = [
6
+ "ImageToVideo",
7
+ "TextToSpeech",
8
+ "TextToImage",
9
+ "TextToVideo",
10
+ ]
@@ -0,0 +1,30 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Literal
4
+
5
+ from openai import OpenAI
6
+ from pydub import AudioSegment
7
+
8
+ from videopython.utils.common import generate_random_name
9
+
10
+
11
+ class TextToSpeech:
12
+ def __init__(self, openai_key: str | None = None, save_audio: bool = True):
13
+ self.client = OpenAI(api_key=openai_key)
14
+ self._save = save_audio
15
+
16
+ def generate_audio(
17
+ self,
18
+ text: str,
19
+ voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
20
+ ) -> AudioSegment:
21
+ filename = generate_random_name(suffix=".mp3")
22
+ output_path = str((Path(os.getcwd()) / filename).resolve())
23
+ response = self.client.audio.speech.create(model="tts-1", voice=voice, input=text)
24
+ response.stream_to_file(output_path)
25
+ audio = AudioSegment.from_file(output_path)
26
+ if self._save:
27
+ print(f"Audio saved to {output_path}")
28
+ else:
29
+ os.remove(output_path)
30
+ return audio
@@ -0,0 +1,60 @@
1
+ import io
2
+ import os
3
+
4
+ import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
5
+ from PIL import Image
6
+ from stability_sdk import client
7
+
8
+
9
+ class TextToImage:
10
+ def __init__(
11
+ self,
12
+ stability_key: str | None = None,
13
+ engine: str = "stable-diffusion-xl-1024-v1-0",
14
+ verbose: bool = True,
15
+ ):
16
+ stability_key = stability_key or os.getenv("STABILITY_KEY")
17
+ if stability_key is None:
18
+ raise ValueError(
19
+ "API Key for stability is required. Please provide it as an argument"
20
+ " or set it as an environment variable `STABILITY_KEY`. "
21
+ )
22
+
23
+ self.client = client.StabilityInference(stability_key, verbose=verbose, engine=engine)
24
+
25
+ def generate_image(
26
+ self,
27
+ prompt: str,
28
+ width: int = 1024,
29
+ height: int = 1024,
30
+ steps: int = 30,
31
+ cfg_scale: float = 8.0,
32
+ seed: int = 1,
33
+ ) -> Image.Image:
34
+ answers = self.client.generate(
35
+ prompt=prompt,
36
+ seed=seed,
37
+ steps=steps, # Amount of inference steps performed on image generation.
38
+ cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
39
+ # Setting this value higher increases the strength in which it tries to match your prompt.
40
+ # Defaults to 7.0 if not specified.
41
+ width=width,
42
+ height=height,
43
+ safety=False,
44
+ samples=1,
45
+ sampler=generation.SAMPLER_K_DPMPP_2M, # Choose which sampler we want to denoise our generation with.
46
+ # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
47
+ # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
48
+ )
49
+ for resp in answers:
50
+ for artifact in resp.artifacts:
51
+ if artifact.finish_reason == generation.FILTER:
52
+ raise RuntimeError(
53
+ "Your request activated the API's safety filters and could not be processed."
54
+ "Please modify the prompt and try again."
55
+ )
56
+ if artifact.type == generation.ARTIFACT_IMAGE:
57
+ img = Image.open(io.BytesIO(artifact.binary))
58
+ else:
59
+ raise ValueError(f"Unknown artifact type: {artifact.type}")
60
+ return img
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+ import torch
3
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
4
+ from PIL.Image import Image
5
+
6
+ from videopython.base.video import Video
7
+
8
+ TEXT_TO_VIDEO_MODEL = "cerspense/zeroscope_v2_576w"
9
+ IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
10
+
11
+
12
+ class TextToVideo:
13
+ def __init__(self, gpu_optimized: bool = True):
14
+ self.pipeline = DiffusionPipeline.from_pretrained(
15
+ TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
16
+ )
17
+ self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
18
+ if gpu_optimized:
19
+ self.pipeline.enable_model_cpu_offload()
20
+
21
+ def generate_video(
22
+ self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
23
+ ) -> Video:
24
+ video_frames = self.pipeline(
25
+ prompt,
26
+ num_inference_steps=num_steps,
27
+ height=height,
28
+ width=width,
29
+ num_frames=num_frames,
30
+ ).frames[0]
31
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
32
+ return Video.from_frames(video_frames, fps=24.0)
33
+
34
+
35
+ class ImageToVideo:
36
+ def __init__(self):
37
+ if not torch.cuda.is_available():
38
+ raise ValueError("CUDA is not available, but ImageToVideo model requires CUDA.")
39
+ self.pipeline = DiffusionPipeline.from_pretrained(
40
+ IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
41
+ ).to("cuda")
42
+ self.pipeline.enable_model_cpu_offload()
43
+
44
+ def generate_video(self, image: Image, fps: int = 24) -> Video:
45
+ video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
46
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
47
+ return Video.from_frames(video_frames, fps=float(fps))
@@ -0,0 +1,31 @@
1
+ import time
2
+ import uuid
3
+ from pathlib import Path
4
+ from typing import Callable
5
+
6
+
7
+ def generate_random_name(suffix=".mp4"):
8
+ """Generates random name."""
9
+ return f"{uuid.uuid4()}{suffix}"
10
+
11
+
12
+ def timeit(func: Callable):
13
+ """Decorator to measure execution time of a function."""
14
+
15
+ def timed(*args, **kwargs):
16
+ start = time.time()
17
+ result = func(*args, **kwargs)
18
+ end = time.time()
19
+ print(f"Execution time: {end - start:.3f} seconds.")
20
+ return result
21
+
22
+ return timed
23
+
24
+
25
+ def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
26
+ fullpath = Path(path).resolve()
27
+ if dir_exists and not fullpath.parent.exists():
28
+ raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
29
+ if suffix and suffix != fullpath.suffix:
30
+ raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
31
+ return str(fullpath)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -210,12 +210,12 @@ Description-Content-Type: text/markdown
210
210
  License-File: LICENSE
211
211
  Requires-Dist: click>=8.1.7
212
212
  Requires-Dist: numpy>=1.25.2
213
- Requires-Dist: opencv-python>=4.7.0.68
213
+ Requires-Dist: opencv-python>=4.9.0.80
214
214
  Requires-Dist: pytest>=7.4.0
215
- Requires-Dist: transformers>=4.36.0
216
- Requires-Dist: diffusers>=0.21.4
215
+ Requires-Dist: transformers>=4.38.1
216
+ Requires-Dist: diffusers>=0.26.3
217
217
  Requires-Dist: torch>=2.1.0
218
- Requires-Dist: stability-sdk>=0.8.4
218
+ Requires-Dist: stability-sdk>=0.8.5
219
219
  Requires-Dist: openai==1.3.5
220
220
  Requires-Dist: pydub>=0.25.1
221
221
 
@@ -1,7 +1,6 @@
1
1
  LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
- src/videopython/__init__.py
5
4
  src/videopython.egg-info/PKG-INFO
6
5
  src/videopython.egg-info/SOURCES.txt
7
6
  src/videopython.egg-info/dependency_links.txt
@@ -9,14 +8,18 @@ src/videopython.egg-info/requires.txt
9
8
  src/videopython.egg-info/top_level.txt
10
9
  src/videopython/base/__init__.py
11
10
  src/videopython/base/compose.py
11
+ src/videopython/base/effects.py
12
12
  src/videopython/base/transforms.py
13
13
  src/videopython/base/transitions.py
14
14
  src/videopython/base/video.py
15
- src/videopython/generation/openai/text_to_speech.py
16
- src/videopython/generation/stability/text_to_image.py
15
+ src/videopython/generation/__init__.py
16
+ src/videopython/generation/audio.py
17
+ src/videopython/generation/image.py
18
+ src/videopython/generation/video.py
17
19
  src/videopython/utils/__init__.py
18
20
  src/videopython/utils/common.py
19
21
  tests/test_compose.py
22
+ tests/test_effects.py
20
23
  tests/test_transforms.py
21
24
  tests/test_transitions.py
22
25
  tests/test_video.py
@@ -0,0 +1,10 @@
1
+ click>=8.1.7
2
+ numpy>=1.25.2
3
+ opencv-python>=4.9.0.80
4
+ pytest>=7.4.0
5
+ transformers>=4.38.1
6
+ diffusers>=0.26.3
7
+ torch>=2.1.0
8
+ stability-sdk>=0.8.5
9
+ openai==1.3.5
10
+ pydub>=0.25.1
@@ -0,0 +1,24 @@
1
+ import numpy as np
2
+
3
+ from videopython.base.effects import FullImageOverlay
4
+
5
+
6
+ def test_full_image_overlay_rgba(black_frames_video):
7
+ overlay_shape = (*black_frames_video.frame_shape[:2], 4) # RGBA
8
+ overlay = 255 * np.ones(shape=overlay_shape, dtype=np.uint8)
9
+ overlay[:, :, 3] = 127
10
+
11
+ original_shape = black_frames_video.video_shape
12
+ overlayed_video = FullImageOverlay(overlay).apply(black_frames_video)
13
+
14
+ assert (overlayed_video.frames.flatten() == 127).all()
15
+ assert overlayed_video.video_shape == original_shape
16
+
17
+
18
+ def test_full_image_overlay_rgb(black_frames_video):
19
+ overlay = 255 * np.ones(shape=black_frames_video.frame_shape, dtype=np.uint8)
20
+ original_shape = black_frames_video.video_shape
21
+ overlayed_video = FullImageOverlay(overlay, alpha=0.5).apply(black_frames_video)
22
+
23
+ assert (overlayed_video.frames.flatten() == 127).all()
24
+ assert overlayed_video.video_shape == original_shape
@@ -1,31 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from openai import OpenAI
5
- from pydub import AudioSegment
6
-
7
- from videopython.utils.common import generate_random_name
8
-
9
-
10
- def text_to_speech_openai(
11
- text: str, voice: str = "alloy", save: bool = True, output_dir: str | None = None
12
- ) -> str | AudioSegment:
13
- client = OpenAI()
14
-
15
- filename = generate_random_name(suffix=".mp3")
16
- if output_dir:
17
- output_dir = Path(output_dir)
18
- output_dir.mkdir(parents=True, exist_ok=True)
19
- else:
20
- output_dir = Path(os.getcwd())
21
- save_path = output_dir / filename
22
-
23
- response = client.audio.speech.create(model="tts-1", voice=voice, input=text)
24
- response.stream_to_file(save_path)
25
-
26
- if save:
27
- return str(save_path.resolve())
28
- else:
29
- audio = AudioSegment.from_mp3(str(save_path))
30
- save_path.unlink()
31
- return audio
@@ -1,77 +0,0 @@
1
- import io
2
- import os
3
- from pathlib import Path
4
-
5
- import numpy as np
6
- import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
7
- from PIL import Image
8
- from stability_sdk import client
9
-
10
- from videopython.utils.common import generate_random_name
11
-
12
- API_KEY = os.getenv("STABILITY_KEY")
13
- if not API_KEY:
14
- raise KeyError(
15
- "Stability API key was not found in the environment! Please set in as `STABILITY_KEY` in your environment."
16
- )
17
-
18
-
19
- def text_to_image(
20
- prompt: str,
21
- save: bool = True,
22
- output_dir: str | None = None,
23
- width: int = 1024,
24
- height: int = 1024,
25
- num_samples: int = 1,
26
- steps: int = 30,
27
- cfg_scale: float = 8.0,
28
- engine: str = "stable-diffusion-xl-1024-v1-0",
29
- verbose: bool = True,
30
- seed: int = 1,
31
- ) -> np.ndarray | str:
32
- """Generates image from prompt using the stability.ai API."""
33
- # Generate image
34
- stability_api = client.StabilityInference(
35
- key=API_KEY,
36
- verbose=verbose,
37
- engine=engine, # Set the engine to use for generation.
38
- # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
39
- )
40
- answers = stability_api.generate(
41
- prompt=prompt,
42
- seed=seed,
43
- steps=steps, # Amount of inference steps performed on image generation.
44
- cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
45
- # Setting this value higher increases the strength in which it tries to match your prompt.
46
- # Defaults to 7.0 if not specified.
47
- width=width,
48
- height=height,
49
- samples=num_samples,
50
- sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
51
- # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
52
- # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
53
- )
54
- # Parse API response
55
- for resp in answers:
56
- for artifact in resp.artifacts:
57
- if artifact.finish_reason == generation.FILTER:
58
- raise RuntimeError(
59
- "Your request activated the API's safety filters and could not be processed."
60
- "Please modify the prompt and try again."
61
- )
62
- if artifact.type == generation.ARTIFACT_IMAGE:
63
- img = Image.open(io.BytesIO(artifact.binary))
64
- else:
65
- raise ValueError(f"Unknown artifact type: {artifact.type}")
66
-
67
- if save:
68
- if output_dir:
69
- output_dir = Path(output_dir)
70
- output_dir.mkdir(parents=True, exist_ok=True)
71
- else:
72
- output_dir = Path(os.getcwd())
73
- filename = output_dir / generate_random_name(suffix=".png")
74
- img.save(filename)
75
- return str(filename.resolve())
76
- else:
77
- return np.array(img)
File without changes
@@ -1,20 +0,0 @@
1
- import time
2
- import uuid
3
-
4
-
5
- def generate_random_name(suffix=".mp4"):
6
- """Generates random name."""
7
- return f"{uuid.uuid4()}{suffix}"
8
-
9
-
10
- def timeit(func: callable):
11
- """Decorator to measure execution time of a function."""
12
-
13
- def timed(*args, **kwargs):
14
- start = time.time()
15
- result = func(*args, **kwargs)
16
- end = time.time()
17
- print(f"Execution time: {end - start:.3f} seconds.")
18
- return result
19
-
20
- return timed
@@ -1,10 +0,0 @@
1
- click>=8.1.7
2
- numpy>=1.25.2
3
- opencv-python>=4.7.0.68
4
- pytest>=7.4.0
5
- transformers>=4.36.0
6
- diffusers>=0.21.4
7
- torch>=2.1.0
8
- stability-sdk>=0.8.4
9
- openai==1.3.5
10
- pydub>=0.25.1
File without changes
File without changes
File without changes