videopython 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

@@ -0,0 +1,57 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import final
3
+
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+
7
+ from videopython.base.video import Video
8
+
9
+
10
+ class Effect(ABC):
11
+ """Abstract class for effect on frames of video.
12
+
13
+ The effect must not change the number of frames and the shape of the frames.
14
+ """
15
+
16
+ @final
17
+ def apply(self, video: Video) -> Video:
18
+ original_shape = video.video_shape
19
+ video_with_effect = self._apply(video)
20
+ if not video_with_effect.video_shape == original_shape:
21
+ raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
22
+ return video_with_effect
23
+
24
+ @abstractmethod
25
+ def _apply(self, video: Video) -> Video:
26
+ pass
27
+
28
+
29
+ class FullImageOverlay(Effect):
30
+ def __init__(self, overlay_image: np.ndarray, alpha: float | None = None):
31
+ if alpha is not None and not 0 <= alpha <= 1:
32
+ raise ValueError("Alpha must be in range [0, 1]!")
33
+ elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
34
+ raise ValueError("Only RGB and RGBA images are supported as an overlay!")
35
+ elif alpha is None:
36
+ alpha = 1.0
37
+
38
+ if overlay_image.shape[-1] == 3:
39
+ overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
40
+ overlay_image[:, :, 3] = overlay_image[:, :, 3] * alpha
41
+
42
+ self._overlay_alpha = (overlay_image[:, :, 3] / 255.0)[:, :, np.newaxis]
43
+ self._base_transparency = 1 - self._overlay_alpha
44
+
45
+ self.overlay = overlay_image[:, :, :3] * self._overlay_alpha
46
+
47
+ def _overlay(self, img: np.ndarray) -> np.ndarray:
48
+ return self.overlay + (img * self._base_transparency)
49
+
50
+ def _apply(self, video: Video) -> Video:
51
+ if not video.frame_shape == self.overlay.shape:
52
+ raise ValueError(
53
+ f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
54
+ )
55
+ print("Overlaying video...")
56
+ video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
57
+ return video
@@ -3,6 +3,7 @@ from multiprocessing import Pool
3
3
 
4
4
  import cv2
5
5
  import numpy as np
6
+ from tqdm import tqdm
6
7
 
7
8
  from videopython.base.video import Video
8
9
 
@@ -14,9 +15,6 @@ class Transformation(ABC):
14
15
  def apply(self, video: Video) -> Video:
15
16
  pass
16
17
 
17
- def __call__(self, video: Video) -> Video:
18
- return self.apply(video)
19
-
20
18
 
21
19
  class TransformationPipeline:
22
20
  def __init__(self, transformations: list[Transformation] | None):
@@ -58,7 +56,7 @@ class CutFrames(Transformation):
58
56
  self.end_frame = end_frame
59
57
 
60
58
  def apply(self, video: Video) -> Video:
61
- video.frames = video.frames[self.start_frame : self.end_frame]
59
+ video = video[self.start_frame : self.end_frame]
62
60
  return video
63
61
 
64
62
 
@@ -68,7 +66,7 @@ class CutSeconds(Transformation):
68
66
  self.end_second = end_second
69
67
 
70
68
  def apply(self, video: Video) -> Video:
71
- video.frames = video.frames[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
69
+ video = video[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
72
70
  return video
73
71
 
74
72
 
@@ -92,3 +90,41 @@ class Resize(Transformation):
92
90
  )
93
91
  video.frames = np.array(frames_copy)
94
92
  return video
93
+
94
+
95
+ class ResampleFPS(Transformation):
96
+ def __init__(self, new_fps: int | float):
97
+ self.new_fps = float(new_fps)
98
+
99
+ def _downsample(self, video: Video) -> Video:
100
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
101
+ new_frame_indices = np.round(np.linspace(0, len(video.frames) - 1, target_frame_count)).astype(int)
102
+ video.frames = video.frames[new_frame_indices]
103
+ video.fps = self.new_fps
104
+ return video
105
+
106
+ def _upsample(self, video: Video) -> Video:
107
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
108
+ new_frame_indices = np.linspace(0, len(video.frames) - 1, target_frame_count)
109
+ new_frames = []
110
+ for i in tqdm(range(len(new_frame_indices) - 1)):
111
+ # Interpolate between the two nearest frames
112
+ ratio = new_frame_indices[i] % 1
113
+ new_frame = (1 - ratio) * video.frames[int(new_frame_indices[i])] + ratio * video.frames[
114
+ int(np.ceil(new_frame_indices[i]))
115
+ ]
116
+ new_frames.append(new_frame.astype(np.uint8))
117
+ video.frames = np.array(new_frames, dtype=np.uint8)
118
+ video.fps = self.new_fps
119
+ return video
120
+
121
+ def apply(self, video: Video) -> Video:
122
+ if video.fps == self.new_fps:
123
+ return video
124
+ elif video.fps > self.new_fps:
125
+ print(f"Downsampling video from {video.fps} to {self.new_fps} FPS.")
126
+ video = self._downsample(video)
127
+ else:
128
+ print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
129
+ video = self._upsample(video)
130
+ return video
@@ -15,19 +15,19 @@ class Transition(ABC):
15
15
  """
16
16
 
17
17
  @final
18
- def apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
18
+ def apply(self, videos: tuple[Video, Video]) -> Video:
19
19
  assert videos[0].metadata.can_be_merged_with(videos[1].metadata)
20
- return self._apply(videos, **kwargs)
20
+ return self._apply(videos)
21
21
 
22
22
  @abstractmethod
23
- def _apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
23
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
24
24
  pass
25
25
 
26
26
 
27
27
  class InstantTransition(Transition):
28
28
  """Instant cut without any transition."""
29
29
 
30
- def _apply(self, videos: list[Video] | tuple[Video]) -> Video:
30
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
31
31
  return videos[0] + videos[1]
32
32
 
33
33
 
@@ -57,7 +57,7 @@ class FadeTransition(Transition):
57
57
  effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
58
58
  transition = self.fade(videos[0].frames[-effect_time_fps:], videos[1].frames[:effect_time_fps])
59
59
 
60
- return Video.from_frames(
60
+ faded_videos = Video.from_frames(
61
61
  np.r_[
62
62
  "0,2",
63
63
  videos[0].frames[:-effect_time_fps],
@@ -66,3 +66,5 @@ class FadeTransition(Transition):
66
66
  ],
67
67
  fps=video_fps,
68
68
  )
69
+ faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
70
+ return faded_videos
videopython/base/video.py CHANGED
@@ -1,17 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import shlex
3
4
  import subprocess
4
- import tempfile
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
 
8
8
  import cv2
9
9
  import numpy as np
10
- import torch
11
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
12
10
  from pydub import AudioSegment
13
11
 
14
- from videopython.utils.common import generate_random_name
12
+ from videopython.utils.common import check_path, generate_random_name
15
13
 
16
14
 
17
15
  @dataclass
@@ -20,10 +18,9 @@ class VideoMetadata:
20
18
 
21
19
  height: int
22
20
  width: int
23
- fps: int
21
+ fps: float
24
22
  frame_count: int
25
23
  total_seconds: float
26
- with_audio: bool = False
27
24
 
28
25
  def __str__(self):
29
26
  return f"{self.height}x{self.width} @ {self.fps}fps, {self.total_seconds} seconds"
@@ -40,7 +37,7 @@ class VideoMetadata:
40
37
  return np.array((self.frame_count, self.height, self.width, 3))
41
38
 
42
39
  @classmethod
43
- def from_path(cls, video_path: str):
40
+ def from_path(cls, video_path: str) -> VideoMetadata:
44
41
  """Creates VideoMetadata object from video file.
45
42
 
46
43
  Args:
@@ -48,7 +45,7 @@ class VideoMetadata:
48
45
  """
49
46
  video = cv2.VideoCapture(video_path)
50
47
  frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
51
- fps = round(video.get(cv2.CAP_PROP_FPS))
48
+ fps = round(video.get(cv2.CAP_PROP_FPS), 2)
52
49
  height = round(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
53
50
  width = round(video.get(cv2.CAP_PROP_FRAME_WIDTH))
54
51
  total_seconds = round(frame_count / fps, 2)
@@ -62,7 +59,7 @@ class VideoMetadata:
62
59
  )
63
60
 
64
61
  @classmethod
65
- def from_video(cls, video: Video):
62
+ def from_video(cls, video: Video) -> VideoMetadata:
66
63
  """Creates VideoMetadata object from frames.
67
64
 
68
65
  Args:
@@ -73,15 +70,12 @@ class VideoMetadata:
73
70
  frame_count, height, width, _ = video.frames.shape
74
71
  total_seconds = round(frame_count / video.fps, 2)
75
72
 
76
- with_audio = bool(video.audio)
77
-
78
73
  return cls(
79
74
  height=height,
80
75
  width=width,
81
76
  fps=video.fps,
82
77
  frame_count=frame_count,
83
78
  total_seconds=total_seconds,
84
- with_audio=with_audio,
85
79
  )
86
80
 
87
81
  def can_be_merged_with(self, other_format: VideoMetadata) -> bool:
@@ -115,146 +109,123 @@ class Video:
115
109
  self.audio = None
116
110
 
117
111
  @classmethod
118
- def from_path(cls, path):
112
+ def from_path(cls, path: str) -> Video:
119
113
  new_vid = cls()
120
114
  new_vid.frames, new_vid.fps = cls._load_video_from_path(path)
115
+ audio = cls._load_audio_from_path(path)
116
+ if not audio:
117
+ print(f"No audio found for `{path}`, adding silent track!")
118
+ audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
119
+ new_vid.audio = audio
121
120
  return new_vid
122
121
 
123
122
  @classmethod
124
- def from_frames(cls, frames, fps):
123
+ def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
125
124
  new_vid = cls()
126
125
  new_vid.frames = frames
127
126
  new_vid.fps = fps
127
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
128
128
  return new_vid
129
129
 
130
130
  @classmethod
131
- def from_image(cls, image: np.ndarray, fps: int = 24, length_seconds: float = 1.0):
131
+ def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
132
132
  new_vid = cls()
133
133
  if len(image.shape) == 3:
134
134
  image = np.expand_dims(image, axis=0)
135
-
136
135
  new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
137
136
  new_vid.fps = fps
137
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
138
138
  return new_vid
139
139
 
140
- @classmethod
141
- def from_prompt(
142
- cls,
143
- prompt: str,
144
- num_steps: int = 25,
145
- height: int = 320,
146
- width: int = 576,
147
- num_frames: int = 24,
148
- gpu_optimized: bool = False,
149
- ):
150
- torch_dtype = torch.float16 if gpu_optimized else torch.float32
151
- # TODO: Make it model independent
152
- pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch_dtype)
153
- if gpu_optimized:
154
- pipe.enable_model_cpu_offload()
155
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
156
- video_frames = np.asarray(
157
- pipe(
158
- prompt,
159
- num_inference_steps=num_steps,
160
- height=height,
161
- width=width,
162
- num_frames=num_frames,
163
- ).frames
164
- )
165
- return Video.from_frames(video_frames, fps=24)
166
-
167
- def add_audio_from_file(self, audio_path: str):
168
- self.audio = AudioSegment.from_file(audio_path)
169
-
170
- def __getitem__(self, val):
171
- if isinstance(val, slice):
172
- return self.from_frames(self.frames[val], fps=self.fps)
173
- elif isinstance(val, int):
174
- return self.frames[val]
175
-
176
- def copy(self):
177
- return Video().from_frames(self.frames.copy(), self.fps)
140
+ def copy(self) -> Video:
141
+ copied = Video().from_frames(self.frames.copy(), self.fps)
142
+ copied.audio = self.audio
143
+ return copied
178
144
 
179
145
  def is_loaded(self) -> bool:
180
- return self.fps and self.frames
146
+ return self.fps is not None and self.frames is not None and self.audio is not None
181
147
 
182
- def split(self, frame_idx: int | None = None):
148
+ def split(self, frame_idx: int | None = None) -> tuple[Video, Video]:
183
149
  if frame_idx:
184
150
  assert 0 <= frame_idx <= len(self.frames)
185
151
  else:
186
152
  frame_idx = len(self.frames) // 2
187
153
 
188
- return (
154
+ split_videos = (
189
155
  self.from_frames(self.frames[:frame_idx], self.fps),
190
156
  self.from_frames(self.frames[frame_idx:], self.fps),
191
157
  )
158
+ audio_midpoint = (frame_idx / self.fps) * 1000
159
+ split_videos[0].audio = self.audio[:audio_midpoint]
160
+ split_videos[1].audio = self.audio[audio_midpoint:]
161
+ return split_videos
192
162
 
193
- def _prepare_new_canvas(self, output_path: str):
194
- """Prepares a new `self._transformed_video` canvas for cut video."""
195
- canvas = cv2.VideoWriter(
196
- filename=output_path,
197
- fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
198
- fps=self.fps,
199
- frameSize=(self.video_shape[2], self.video_shape[1]),
200
- )
201
- return canvas
202
-
203
- def save(self, filename: str = None) -> str:
204
- """Transforms the video and saves as `filename`.
163
+ def save(self, filename: str | None = None) -> str:
164
+ """Saves the video.
205
165
 
206
166
  Args:
207
- filename: Name of the output video file.
167
+ filename: Name of the output video file. Generates random UUID name if not provided.
208
168
  """
209
- # Check correctness
210
- if not filename:
211
- filename = Path(generate_random_name()).resolve()
212
- directory = filename.parent
213
- elif not Path(filename).suffix == ".mp4":
214
- raise ValueError("Only .mp4 save option is supported.")
215
- else:
216
- filename = Path(filename)
217
- directory = filename.parent
218
- if not directory.exists():
219
- raise ValueError(f"Selected directory `{directory}` does not exist!")
220
-
221
- filename, directory = str(filename), str(directory)
222
- # Save video video opencv
223
- canvas = self._prepare_new_canvas(filename)
224
- for frame in self.frames[:, :, :, ::-1]:
225
- canvas.write(frame)
226
- cv2.destroyAllWindows()
227
- canvas.release()
228
- # If Video has audio, overlaay audio using ffmpeg
229
- if self.audio:
230
- filename_with_audio = tempfile.NamedTemporaryFile(suffix=".mp4").name
231
-
232
- if len(self.audio) > self.total_seconds * 1000:
233
- self.audio = self.audio[: self.total_seconds * 1000]
234
- else:
235
- self.audio += AudioSegment.silent(duration=self.total_seconds * 1000 - len(self.audio))
236
-
237
- raw_audio = self.audio.raw_data
238
- channels = self.audio.channels
239
- frame_rate = self.audio.frame_rate
240
-
241
- ffmpeg_command = (
242
- f"ffmpeg -loglevel error -y -i {filename} -f s16le -acodec pcm_s16le -ar {frame_rate} -ac "
243
- f"{channels} -i pipe:0 -c:v copy -c:a aac -strict experimental {filename_with_audio}"
244
- )
169
+ if not self.is_loaded():
170
+ raise RuntimeError(f"Video is not loaded, cannot save!")
245
171
 
246
- try:
247
- subprocess.run(ffmpeg_command, input=raw_audio, check=True, shell=True)
248
- print("Video with audio saved successfully.")
249
- except subprocess.CalledProcessError as e:
250
- print(f"Error saving video with audio: {e}")
172
+ if filename is None:
173
+ filename = generate_random_name(suffix=".mp4")
174
+ filename = check_path(filename, dir_exists=True, suffix=".mp4")
251
175
 
176
+ ffmpeg_video_command = (
177
+ f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
178
+ f" -s {self.metadata.width}x{self.metadata.height} "
179
+ f"-i pipe:0 -c:v libx264 -pix_fmt yuv420p {filename}"
180
+ )
181
+
182
+ ffmpeg_audio_command = (
183
+ f"ffmpeg -loglevel error -y -i {filename} -f s16le -acodec pcm_s16le "
184
+ f"-ar {self.audio.frame_rate} -ac {self.audio.channels} -i pipe:0 "
185
+ f"-c:v copy -c:a aac -strict experimental {filename}_temp.mp4"
186
+ )
187
+
188
+ try:
189
+ print("Saving frames to video...")
190
+ subprocess.run(
191
+ ffmpeg_video_command,
192
+ input=self.frames.tobytes(),
193
+ check=True,
194
+ shell=True,
195
+ )
196
+ except subprocess.CalledProcessError as e:
197
+ print("Error saving frames to video!")
198
+ raise e
199
+
200
+ try:
201
+ print("Adding audio track...")
202
+ subprocess.run(ffmpeg_audio_command, input=self.audio.raw_data, check=True, shell=True)
252
203
  Path(filename).unlink()
253
- Path(filename_with_audio).rename(filename)
204
+ Path(filename + "_temp.mp4").rename(filename)
205
+ except subprocess.CalledProcessError as e:
206
+ print(f"Error adding audio track!")
207
+ raise e
254
208
 
209
+ print(f"Video saved into `{filename}`!")
255
210
  return filename
256
211
 
257
- def __add__(self, other):
212
+ def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
213
+ new_audio = self._load_audio_from_path(path)
214
+ if new_audio is None:
215
+ print(f"Audio file `{path}` not found, skipping!")
216
+ return
217
+
218
+ if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
219
+ new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
220
+ elif new_audio.duration_seconds > self.total_seconds:
221
+ new_audio = new_audio[: round(self.total_seconds * 1000)]
222
+
223
+ if overlay:
224
+ self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
225
+ else:
226
+ self.audio = new_audio
227
+
228
+ def __add__(self, other: Video) -> Video:
258
229
  # TODO: Should it be class method? How to make it work with sum()?
259
230
  if self.fps != other.fps:
260
231
  raise ValueError("FPS of videos do not match!")
@@ -263,32 +234,53 @@ class Video:
263
234
  "Resolutions of the images do not match: "
264
235
  f"{self.frame_shape} not compatible with {other.frame_shape}."
265
236
  )
237
+ new_video = self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
238
+ new_video.audio = self.audio + other.audio
239
+ return new_video
240
+
241
+ def __str__(self) -> str:
242
+ return str(self.metadata)
243
+
244
+ def __getitem__(self, val: slice) -> Video:
245
+ if not isinstance(val, slice):
246
+ raise ValueError("Only slices are supported for video indexing!")
247
+
248
+ # Sub-slice video if given a slice
249
+ sliced = self.from_frames(self.frames[val], fps=self.fps)
250
+ # Handle slicing without value for audio
251
+ start = val.start if val.start else 0
252
+ stop = val.stop if val.stop else len(self.frames)
253
+ # Handle negative values for audio slices
254
+ if start < 0:
255
+ start = len(self.frames) + start
256
+ if stop < 0:
257
+ stop = len(self.frames) + stop
258
+ # Append audio to the slice
259
+ audio_start = round(start / self.fps) * 1000
260
+ audio_end = round(stop / self.fps) * 1000
261
+ sliced.audio = self.audio[audio_start:audio_end]
262
+ return sliced
266
263
 
267
- return self.from_frames(np.r_["0,2", self.frames, other.frames], fps=self.fps)
264
+ @staticmethod
265
+ def _load_audio_from_path(path: str) -> AudioSegment | None:
266
+ try:
267
+ audio = AudioSegment.from_file(path)
268
+ return audio
269
+ except IndexError:
270
+ return None
268
271
 
269
272
  @staticmethod
270
- def _load_video_from_path(path: str):
273
+ def _load_video_from_path(path: str) -> tuple[np.ndarray, float]:
271
274
  """Loads frames and fps information from video file.
272
275
 
273
276
  Args:
274
277
  path: Path to video file.
275
278
  """
276
279
  metadata = VideoMetadata.from_path(path)
277
- ffmpeg_command = [
278
- "ffmpeg",
279
- "-i",
280
- path,
281
- "-f",
282
- "rawvideo",
283
- "-pix_fmt",
284
- "rgb24",
285
- "-loglevel",
286
- "quiet",
287
- "pipe:1",
288
- ]
280
+ ffmpeg_command = f"ffmpeg -i {path} -f rawvideo -pix_fmt rgb24 -loglevel quiet pipe:1"
289
281
 
290
282
  # Run the ffmpeg command and capture the stdout
291
- ffmpeg_process = subprocess.Popen(ffmpeg_command, stdout=subprocess.PIPE)
283
+ ffmpeg_process = subprocess.Popen(shlex.split(ffmpeg_command), stdout=subprocess.PIPE)
292
284
  ffmpeg_out, _ = ffmpeg_process.communicate()
293
285
 
294
286
  # Convert the raw video data to a NumPy array
@@ -309,7 +301,7 @@ class Video:
309
301
  @property
310
302
  def total_seconds(self) -> float:
311
303
  """Returns total seconds of the video."""
312
- return round(self.frames.shape[0] / self.fps, 1)
304
+ return round(self.frames.shape[0] / self.fps, 4)
313
305
 
314
306
  @property
315
307
  def metadata(self) -> VideoMetadata:
@@ -0,0 +1,10 @@
1
+ from .audio import TextToSpeech
2
+ from .image import TextToImage
3
+ from .video import ImageToVideo, TextToVideo
4
+
5
+ __all__ = [
6
+ "ImageToVideo",
7
+ "TextToSpeech",
8
+ "TextToImage",
9
+ "TextToVideo",
10
+ ]
@@ -0,0 +1,30 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Literal
4
+
5
+ from openai import OpenAI
6
+ from pydub import AudioSegment
7
+
8
+ from videopython.utils.common import generate_random_name
9
+
10
+
11
+ class TextToSpeech:
12
+ def __init__(self, openai_key: str | None = None, save_audio: bool = True):
13
+ self.client = OpenAI(api_key=openai_key)
14
+ self._save = save_audio
15
+
16
+ def generate_audio(
17
+ self,
18
+ text: str,
19
+ voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
20
+ ) -> AudioSegment:
21
+ filename = generate_random_name(suffix=".mp3")
22
+ output_path = str((Path(os.getcwd()) / filename).resolve())
23
+ response = self.client.audio.speech.create(model="tts-1", voice=voice, input=text)
24
+ response.stream_to_file(output_path)
25
+ audio = AudioSegment.from_file(output_path)
26
+ if self._save:
27
+ print(f"Audio saved to {output_path}")
28
+ else:
29
+ os.remove(output_path)
30
+ return audio
@@ -0,0 +1,60 @@
1
+ import io
2
+ import os
3
+
4
+ import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
5
+ from PIL import Image
6
+ from stability_sdk import client
7
+
8
+
9
+ class TextToImage:
10
+ def __init__(
11
+ self,
12
+ stability_key: str | None = None,
13
+ engine: str = "stable-diffusion-xl-1024-v1-0",
14
+ verbose: bool = True,
15
+ ):
16
+ stability_key = stability_key or os.getenv("STABILITY_KEY")
17
+ if stability_key is None:
18
+ raise ValueError(
19
+ "API Key for stability is required. Please provide it as an argument"
20
+ " or set it as an environment variable `STABILITY_KEY`. "
21
+ )
22
+
23
+ self.client = client.StabilityInference(stability_key, verbose=verbose, engine=engine)
24
+
25
+ def generate_image(
26
+ self,
27
+ prompt: str,
28
+ width: int = 1024,
29
+ height: int = 1024,
30
+ steps: int = 30,
31
+ cfg_scale: float = 8.0,
32
+ seed: int = 1,
33
+ ) -> Image.Image:
34
+ answers = self.client.generate(
35
+ prompt=prompt,
36
+ seed=seed,
37
+ steps=steps, # Amount of inference steps performed on image generation.
38
+ cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
39
+ # Setting this value higher increases the strength in which it tries to match your prompt.
40
+ # Defaults to 7.0 if not specified.
41
+ width=width,
42
+ height=height,
43
+ safety=False,
44
+ samples=1,
45
+ sampler=generation.SAMPLER_K_DPMPP_2M, # Choose which sampler we want to denoise our generation with.
46
+ # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
47
+ # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
48
+ )
49
+ for resp in answers:
50
+ for artifact in resp.artifacts:
51
+ if artifact.finish_reason == generation.FILTER:
52
+ raise RuntimeError(
53
+ "Your request activated the API's safety filters and could not be processed."
54
+ "Please modify the prompt and try again."
55
+ )
56
+ if artifact.type == generation.ARTIFACT_IMAGE:
57
+ img = Image.open(io.BytesIO(artifact.binary))
58
+ else:
59
+ raise ValueError(f"Unknown artifact type: {artifact.type}")
60
+ return img
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+ import torch
3
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
4
+ from PIL.Image import Image
5
+
6
+ from videopython.base.video import Video
7
+
8
+ TEXT_TO_VIDEO_MODEL = "cerspense/zeroscope_v2_576w"
9
+ IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
10
+
11
+
12
+ class TextToVideo:
13
+ def __init__(self, gpu_optimized: bool = True):
14
+ self.pipeline = DiffusionPipeline.from_pretrained(
15
+ TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
16
+ )
17
+ self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
18
+ if gpu_optimized:
19
+ self.pipeline.enable_model_cpu_offload()
20
+
21
+ def generate_video(
22
+ self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
23
+ ) -> Video:
24
+ video_frames = self.pipeline(
25
+ prompt,
26
+ num_inference_steps=num_steps,
27
+ height=height,
28
+ width=width,
29
+ num_frames=num_frames,
30
+ ).frames[0]
31
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
32
+ return Video.from_frames(video_frames, fps=24.0)
33
+
34
+
35
+ class ImageToVideo:
36
+ def __init__(self):
37
+ if not torch.cuda.is_available():
38
+ raise ValueError("CUDA is not available, but ImageToVideo model requires CUDA.")
39
+ self.pipeline = DiffusionPipeline.from_pretrained(
40
+ IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
41
+ ).to("cuda")
42
+ self.pipeline.enable_model_cpu_offload()
43
+
44
+ def generate_video(self, image: Image, fps: int = 24) -> Video:
45
+ video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
46
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
47
+ return Video.from_frames(video_frames, fps=float(fps))
@@ -1,13 +1,15 @@
1
1
  import time
2
2
  import uuid
3
+ from pathlib import Path
4
+ from typing import Callable
3
5
 
4
6
 
5
7
  def generate_random_name(suffix=".mp4"):
6
- """Generates random video name."""
8
+ """Generates random name."""
7
9
  return f"{uuid.uuid4()}{suffix}"
8
10
 
9
11
 
10
- def timeit(func: callable):
12
+ def timeit(func: Callable):
11
13
  """Decorator to measure execution time of a function."""
12
14
 
13
15
  def timed(*args, **kwargs):
@@ -18,3 +20,12 @@ def timeit(func: callable):
18
20
  return result
19
21
 
20
22
  return timed
23
+
24
+
25
+ def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
26
+ fullpath = Path(path).resolve()
27
+ if dir_exists and not fullpath.parent.exists():
28
+ raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
29
+ if suffix and suffix != fullpath.suffix:
30
+ raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
31
+ return str(fullpath)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -210,12 +210,12 @@ Description-Content-Type: text/markdown
210
210
  License-File: LICENSE
211
211
  Requires-Dist: click >=8.1.7
212
212
  Requires-Dist: numpy >=1.25.2
213
- Requires-Dist: opencv-python >=4.7.0.68
213
+ Requires-Dist: opencv-python >=4.9.0.80
214
214
  Requires-Dist: pytest >=7.4.0
215
- Requires-Dist: transformers >=4.35.0
216
- Requires-Dist: diffusers >=0.21.4
215
+ Requires-Dist: transformers >=4.38.1
216
+ Requires-Dist: diffusers >=0.26.3
217
217
  Requires-Dist: torch >=2.1.0
218
- Requires-Dist: stability-sdk >=0.8.4
218
+ Requires-Dist: stability-sdk >=0.8.5
219
219
  Requires-Dist: openai ==1.3.5
220
220
  Requires-Dist: pydub >=0.25.1
221
221
 
@@ -0,0 +1,17 @@
1
+ videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
3
+ videopython/base/effects.py,sha256=DpA8V89Es7YWPEq72l_h_D7MG1QYf1iuslAl-QgzZx8,2153
4
+ videopython/base/transforms.py,sha256=DQcG8tZ8nlGj3khlp3v4C0MISpRY2rZr-6B6GtPZykE,4251
5
+ videopython/base/transitions.py,sha256=efuJdls2xJVpXV8RGaFd--ii8cLUPz6FdmhSvOjaiTM,2275
6
+ videopython/base/video.py,sha256=40leF8bSjNIhP_L8loOh9ptlZNTZAZ95Dgv9FH4mSz4,10791
7
+ videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
8
+ videopython/generation/audio.py,sha256=YPqUdAcB0mGCt0mgFrxzupX08xx0O_qwfVdjFGlAxaw,985
9
+ videopython/generation/image.py,sha256=B-TlrNXFu18NnMi3KO5fjk0paTSmIsQk400iZb76K8w,2507
10
+ videopython/generation/video.py,sha256=4P4DhHS-_eDColsXK6YefSdoQbU3Ce0n6fHuY5zewYI,1874
11
+ videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
13
+ videopython-0.1.3.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
14
+ videopython-0.1.3.dist-info/METADATA,sha256=xj8k5j3qPIVKgXbr4uTi6ad2BSs9j6-V6baonpQKoJI,14709
15
+ videopython-0.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
16
+ videopython-0.1.3.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
17
+ videopython-0.1.3.dist-info/RECORD,,
videopython/__init__.py DELETED
File without changes
@@ -1,75 +0,0 @@
1
- import io
2
- import os
3
- from pathlib import Path
4
-
5
- import numpy as np
6
- import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
7
- from PIL import Image
8
- from stability_sdk import client
9
-
10
- from videopython.utils.common import generate_random_name
11
-
12
- API_KEY = os.getenv("STABILITY_KEY")
13
- if not API_KEY:
14
- raise KeyError(
15
- "Stability API key was not found in the environment! Please set in as `STABILITY_KEY` in your environment."
16
- )
17
-
18
-
19
- def get_image_from_prompt(
20
- prompt: str,
21
- output_dir: str | None = None,
22
- width: int = 1024,
23
- height: int = 1024,
24
- num_samples: int = 1,
25
- steps: int = 30,
26
- cfg_scale: float = 8.0,
27
- engine: str = "stable-diffusion-xl-1024-v1-0",
28
- verbose: bool = True,
29
- seed: int = 1,
30
- ) -> tuple[np.ndarray, str]:
31
- """Generates image from prompt using the stability.ai API."""
32
- # Generate image
33
- stability_api = client.StabilityInference(
34
- key=API_KEY,
35
- verbose=verbose,
36
- engine=engine, # Set the engine to use for generation.
37
- # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
38
- )
39
- answers = stability_api.generate(
40
- prompt=prompt,
41
- seed=seed,
42
- steps=steps, # Amount of inference steps performed on image generation.
43
- cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
44
- # Setting this value higher increases the strength in which it tries to match your prompt.
45
- # Defaults to 7.0 if not specified.
46
- width=width,
47
- height=height,
48
- samples=num_samples,
49
- sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
50
- # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
51
- # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
52
- )
53
- # Create output path
54
- if output_dir:
55
- output_dir = Path(output_dir)
56
- output_dir.mkdir(parents=True, exist_ok=True)
57
- else:
58
- output_dir = Path(os.getcwd())
59
- filename = output_dir / generate_random_name(suffix=".png")
60
- # Parse API response
61
- for resp in answers:
62
- for artifact in resp.artifacts:
63
- if artifact.finish_reason == generation.FILTER:
64
- raise RuntimeError(
65
- "Your request activated the API's safety filters and could not be processed."
66
- "Please modify the prompt and try again."
67
- )
68
-
69
- if artifact.type == generation.ARTIFACT_IMAGE:
70
- img = Image.open(io.BytesIO(artifact.binary))
71
- img.save(filename)
72
- else:
73
- raise ValueError(f"Unknown artifact type: {artifact.type}")
74
-
75
- return np.array(img), filename
@@ -1,24 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from openai import OpenAI
5
-
6
- from videopython.utils.common import generate_random_name
7
-
8
-
9
- def text_to_speech_openai(text: str, voice: str = "alloy", output_dir: Path | None = None) -> Path:
10
- client = OpenAI()
11
- filename = generate_random_name(suffix=".mp3")
12
-
13
- if output_dir:
14
- output_dir = Path(output_dir)
15
- output_dir.mkdir(parents=True, exist_ok=True)
16
- else:
17
- output_dir = Path(os.getcwd())
18
-
19
- save_path = output_dir / filename
20
-
21
- response = client.audio.speech.create(model="tts-1", voice=voice, input=text)
22
- response.stream_to_file(save_path)
23
-
24
- return save_path
@@ -1,15 +0,0 @@
1
- videopython/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
4
- videopython/base/transforms.py,sha256=aXIqbp9sZkZI5PYRn0uDSxLoQxCdku1BAmzfQpnGW_w,2701
5
- videopython/base/transitions.py,sha256=VQXJ-sGL7lcr3Q6uhb66hLlqW9213UBUAAH6DqJa9xs,2159
6
- videopython/base/video.py,sha256=KxhQt_xJp9YhuTsbBdCdXgEMRZCckUyv0I2xwZYWSrk,10167
7
- videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- videopython/utils/common.py,sha256=lms--xc-5Jj4cVsD_W_FBw1n_8XnHGiCtJEOx4f5dV4,461
9
- videopython/utils/stability_generation.py,sha256=POAQLgrwhyl-tvPsZpRphe8du2azqotaLWBu70_IaH8,2928
10
- videopython/utils/text_to_speech.py,sha256=wSRd2JnDDubIu2-vqnN80hGdvf4EpS1XZ68S8uQei8w,640
11
- videopython-0.1.1.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
12
- videopython-0.1.1.dist-info/METADATA,sha256=VJpGeOrbHUkDh2wajI-K6hKYjjSAIHn5z9eB5SQGQqA,14709
13
- videopython-0.1.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
14
- videopython-0.1.1.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
15
- videopython-0.1.1.dist-info/RECORD,,