videopython 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

@@ -0,0 +1,183 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Literal, final
3
+
4
+ import cv2
5
+ import numpy as np
6
+ from PIL import Image
7
+ from tqdm import tqdm
8
+
9
+ from videopython.base.video import Video
10
+
11
+
12
+ class Effect(ABC):
13
+ """Abstract class for effect on frames of video.
14
+
15
+ The effect must not change the number of frames and the shape of the frames.
16
+ """
17
+
18
+ @final
19
+ def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
20
+ original_shape = video.video_shape
21
+ start = start if start is not None else 0
22
+ stop = stop if stop is not None else video.total_seconds
23
+ # Check for start and stop correctness
24
+ if not 0 <= start <= video.total_seconds:
25
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed start: {start}!")
26
+ elif not start <= stop <= video.total_seconds:
27
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed stop: {stop}!")
28
+ # Apply effect on video slice
29
+ effect_start_frame = round(start * video.fps)
30
+ effect_end_frame = round(stop * video.fps)
31
+ video_with_effect = self._apply(video[effect_start_frame:effect_end_frame])
32
+ old_audio = video.audio
33
+ video = Video.from_frames(
34
+ np.r_[
35
+ "0,2",
36
+ video.frames[:effect_start_frame],
37
+ video_with_effect.frames,
38
+ video.frames[effect_end_frame:],
39
+ ],
40
+ fps=video.fps,
41
+ )
42
+ video.audio = old_audio
43
+ # Check if dimensions didn't change
44
+ if not video.video_shape == original_shape:
45
+ raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
46
+
47
+ return video
48
+
49
+ @abstractmethod
50
+ def _apply(self, video: Video) -> Video:
51
+ pass
52
+
53
+
54
+ class FullImageOverlay(Effect):
55
+ def __init__(self, overlay_image: np.ndarray, alpha: float | None = None, fade_time: float = 0.0):
56
+ if alpha is not None and not 0 <= alpha <= 1:
57
+ raise ValueError("Alpha must be in range [0, 1]!")
58
+ elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
59
+ raise ValueError("Only RGB and RGBA images are supported as an overlay!")
60
+ elif alpha is None:
61
+ alpha = 1.0
62
+
63
+ if overlay_image.shape[-1] == 3:
64
+ overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
65
+
66
+ self.alpha = alpha
67
+ self.overlay = overlay_image.astype(np.uint8)
68
+ self.fade_time = fade_time
69
+
70
+ def _overlay(self, img: np.ndarray, alpha: float = 1.0) -> np.ndarray:
71
+ img_pil = Image.fromarray(img)
72
+ overlay = self.overlay.copy()
73
+ overlay[:, :, 3] = overlay[:, :, 3] * (self.alpha * alpha)
74
+ overlay_pil = Image.fromarray(overlay)
75
+ img_pil.paste(overlay_pil, (0, 0), overlay_pil)
76
+ return np.array(img_pil)
77
+
78
+ def _apply(self, video: Video) -> Video:
79
+ if not video.frame_shape == self.overlay[:, :, :3].shape:
80
+ raise ValueError(
81
+ f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
82
+ )
83
+ elif not (0 <= 2 * self.fade_time <= video.total_seconds):
84
+ raise ValueError(f"Video is only {video.total_seconds}s long, but fade time is {self.fade_time}s!")
85
+
86
+ print("Overlaying video...")
87
+ if self.fade_time == 0:
88
+ video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
89
+ else:
90
+ num_video_frames = len(video.frames)
91
+ num_fade_frames = round(self.fade_time * video.fps)
92
+ new_frames = []
93
+ for i, frame in enumerate(tqdm(video.frames)):
94
+ frames_dist_from_end = min(i, num_video_frames - i)
95
+ if frames_dist_from_end >= num_fade_frames:
96
+ fade_alpha = 1.0
97
+ else:
98
+ fade_alpha = frames_dist_from_end / num_fade_frames
99
+ new_frames.append(self._overlay(frame, fade_alpha))
100
+ video.frames = np.array(new_frames, dtype=np.uint8)
101
+ return video
102
+
103
+
104
+ class Blur(Effect):
105
+ def __init__(
106
+ self,
107
+ mode: Literal["constant", "ascending", "descending"],
108
+ iterations: int,
109
+ kernel_size: tuple[int, int] = (5, 5),
110
+ ):
111
+ if iterations < 1:
112
+ raise ValueError("Iterations must be at least 1!")
113
+ self.mode = mode
114
+ self.iterations = iterations
115
+ self.kernel_size = kernel_size
116
+
117
+ def _apply(self, video: Video) -> Video:
118
+ n_frames = len(video.frames)
119
+ new_frames = []
120
+ if self.mode == "constant":
121
+ for frame in video.frames:
122
+ blurred_frame = frame
123
+ for _ in range(self.iterations):
124
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
125
+ new_frames.append(blurred_frame)
126
+ elif self.mode == "ascending":
127
+ for i, frame in tqdm(enumerate(video.frames)):
128
+ frame_iterations = max(1, round((i / n_frames) * self.iterations))
129
+ blurred_frame = frame
130
+ for _ in range(frame_iterations):
131
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
132
+ new_frames.append(blurred_frame)
133
+ elif self.mode == "descending":
134
+ for i, frame in tqdm(enumerate(video.frames)):
135
+ frame_iterations = max(round(((n_frames - i) / n_frames) * self.iterations), 1)
136
+ blurred_frame = frame
137
+ for _ in range(frame_iterations):
138
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
139
+ new_frames.append(blurred_frame)
140
+ else:
141
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
142
+ video.frames = np.asarray(new_frames)
143
+ return video
144
+
145
+
146
+ class Zoom(Effect):
147
+ def __init__(self, zoom_factor: float, mode: Literal["in", "out"]):
148
+ if zoom_factor <= 1:
149
+ raise ValueError("Zoom factor must be greater than 1!")
150
+ self.zoom_factor = zoom_factor
151
+ self.mode = mode
152
+
153
+ def _apply(self, video: Video) -> Video:
154
+ n_frames = len(video.frames)
155
+ new_frames = []
156
+
157
+ width = video.metadata.width
158
+ height = video.metadata.height
159
+ crop_sizes_w, crop_sizes_h = np.linspace(width // self.zoom_factor, width, n_frames), np.linspace(
160
+ height // self.zoom_factor, height, n_frames
161
+ )
162
+
163
+ if self.mode == "in":
164
+ for frame, w, h in tqdm(zip(video.frames, reversed(crop_sizes_w), reversed(crop_sizes_h))):
165
+
166
+ x = width / 2 - w / 2
167
+ y = height / 2 - h / 2
168
+
169
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
170
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
171
+ new_frames.append(zoomed_frame)
172
+ elif self.mode == "out":
173
+ for frame, w, h in tqdm(zip(video.frames, crop_sizes_w, crop_sizes_h)):
174
+ x = width / 2 - w / 2
175
+ y = height / 2 - h / 2
176
+
177
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
178
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
179
+ new_frames.append(zoomed_frame)
180
+ else:
181
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
182
+ video.frames = np.asarray(new_frames)
183
+ return video
@@ -1,8 +1,11 @@
1
1
  from abc import ABC, abstractmethod
2
+ from enum import Enum
2
3
  from multiprocessing import Pool
4
+ from typing import Literal
3
5
 
4
6
  import cv2
5
7
  import numpy as np
8
+ from tqdm import tqdm
6
9
 
7
10
  from videopython.base.video import Video
8
11
 
@@ -14,9 +17,6 @@ class Transformation(ABC):
14
17
  def apply(self, video: Video) -> Video:
15
18
  pass
16
19
 
17
- def __call__(self, video: Video) -> Video:
18
- return self.apply(video)
19
-
20
20
 
21
21
  class TransformationPipeline:
22
22
  def __init__(self, transformations: list[Transformation] | None):
@@ -58,7 +58,7 @@ class CutFrames(Transformation):
58
58
  self.end_frame = end_frame
59
59
 
60
60
  def apply(self, video: Video) -> Video:
61
- video.frames = video.frames[self.start_frame : self.end_frame]
61
+ video = video[self.start_frame : self.end_frame]
62
62
  return video
63
63
 
64
64
 
@@ -68,14 +68,16 @@ class CutSeconds(Transformation):
68
68
  self.end_second = end_second
69
69
 
70
70
  def apply(self, video: Video) -> Video:
71
- video.frames = video.frames[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
71
+ video = video[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
72
72
  return video
73
73
 
74
74
 
75
75
  class Resize(Transformation):
76
- def __init__(self, new_width: int, new_height: int):
77
- self.new_width = new_width
78
- self.new_height = new_height
76
+ def __init__(self, width: int | None = None, height: int | None = None):
77
+ self.width = width
78
+ self.height = height
79
+ if width is None and height is None:
80
+ raise ValueError("You must provide either `width` or `height`!")
79
81
 
80
82
  def _resize_frame(self, frame: np.ndarray, new_width: int, new_height: int) -> np.ndarray:
81
83
  return cv2.resize(
@@ -85,10 +87,92 @@ class Resize(Transformation):
85
87
  )
86
88
 
87
89
  def apply(self, video: Video) -> Video:
90
+ if self.width and self.height:
91
+ new_height = self.height
92
+ new_width = self.width
93
+ elif self.height is None and self.width:
94
+ video_height = video.video_shape[1]
95
+ video_width = video.video_shape[2]
96
+ new_height = round(video_height * (self.width / video_width))
97
+ new_width = self.width
98
+ elif self.width is None and self.height:
99
+ video_height = video.video_shape[1]
100
+ video_width = video.video_shape[2]
101
+ new_width = round(video_width * (self.height / video_height))
102
+ new_height = self.height
103
+
104
+ print(f"Resizing video to: {new_width}x{new_height}!")
88
105
  with Pool() as pool:
89
106
  frames_copy = pool.starmap(
90
107
  self._resize_frame,
91
- [(frame, self.new_width, self.new_height) for frame in video.frames],
108
+ [(frame, new_width, new_height) for frame in video.frames],
92
109
  )
93
110
  video.frames = np.array(frames_copy)
94
111
  return video
112
+
113
+
114
+ class ResampleFPS(Transformation):
115
+ def __init__(self, new_fps: int | float):
116
+ self.new_fps = float(new_fps)
117
+
118
+ def _downsample(self, video: Video) -> Video:
119
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
120
+ new_frame_indices = np.round(np.linspace(0, len(video.frames) - 1, target_frame_count)).astype(int)
121
+ video.frames = video.frames[new_frame_indices]
122
+ video.fps = self.new_fps
123
+ return video
124
+
125
+ def _upsample(self, video: Video) -> Video:
126
+ target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
127
+ new_frame_indices = np.linspace(0, len(video.frames) - 1, target_frame_count)
128
+ new_frames = []
129
+ for i in tqdm(range(len(new_frame_indices) - 1)):
130
+ # Interpolate between the two nearest frames
131
+ ratio = new_frame_indices[i] % 1
132
+ new_frame = (1 - ratio) * video.frames[int(new_frame_indices[i])] + ratio * video.frames[
133
+ int(np.ceil(new_frame_indices[i]))
134
+ ]
135
+ new_frames.append(new_frame.astype(np.uint8))
136
+ video.frames = np.array(new_frames, dtype=np.uint8)
137
+ video.fps = self.new_fps
138
+ return video
139
+
140
+ def apply(self, video: Video) -> Video:
141
+ if video.fps == self.new_fps:
142
+ return video
143
+ elif video.fps > self.new_fps:
144
+ print(f"Downsampling video from {video.fps} to {self.new_fps} FPS.")
145
+ video = self._downsample(video)
146
+ else:
147
+ print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
148
+ video = self._upsample(video)
149
+ return video
150
+
151
+
152
+ class CropMode(Enum):
153
+ CENTER = "center"
154
+
155
+
156
+ class Crop(Transformation):
157
+
158
+ def __init__(self, width: int, height: int, mode: CropMode = CropMode.CENTER):
159
+ self.width = width
160
+ self.height = height
161
+ self.mode = mode
162
+
163
+ def apply(self, video: Video) -> Video:
164
+ if self.mode == CropMode.CENTER:
165
+ current_shape = video.frame_shape[:2]
166
+ center_height = current_shape[0] // 2
167
+ center_width = current_shape[1] // 2
168
+ width_offset = self.width // 2
169
+ height_offset = self.height // 2
170
+ video.frames = video.frames[
171
+ :,
172
+ center_height - height_offset : center_height + height_offset,
173
+ center_width - width_offset : center_width + width_offset,
174
+ :,
175
+ ]
176
+ else:
177
+ raise ValueError(f"Unknown mode: {self.mode}")
178
+ return video
@@ -4,6 +4,7 @@ from typing import final
4
4
 
5
5
  import numpy as np
6
6
 
7
+ from videopython.base.effects import Blur
7
8
  from videopython.base.video import Video
8
9
 
9
10
 
@@ -15,19 +16,19 @@ class Transition(ABC):
15
16
  """
16
17
 
17
18
  @final
18
- def apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
19
+ def apply(self, videos: tuple[Video, Video]) -> Video:
19
20
  assert videos[0].metadata.can_be_merged_with(videos[1].metadata)
20
- return self._apply(videos, **kwargs)
21
+ return self._apply(videos)
21
22
 
22
23
  @abstractmethod
23
- def _apply(self, videos: tuple[Video, Video], **kwargs) -> Video:
24
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
24
25
  pass
25
26
 
26
27
 
27
28
  class InstantTransition(Transition):
28
29
  """Instant cut without any transition."""
29
30
 
30
- def _apply(self, videos: list[Video] | tuple[Video]) -> Video:
31
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
31
32
  return videos[0] + videos[1]
32
33
 
33
34
 
@@ -57,7 +58,7 @@ class FadeTransition(Transition):
57
58
  effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
58
59
  transition = self.fade(videos[0].frames[-effect_time_fps:], videos[1].frames[:effect_time_fps])
59
60
 
60
- return Video.from_frames(
61
+ faded_videos = Video.from_frames(
61
62
  np.r_[
62
63
  "0,2",
63
64
  videos[0].frames[:-effect_time_fps],
@@ -66,3 +67,40 @@ class FadeTransition(Transition):
66
67
  ],
67
68
  fps=video_fps,
68
69
  )
70
+ faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
71
+ return faded_videos
72
+
73
+
74
+ class BlurTransition(Transition):
75
+ def __init__(
76
+ self, effect_time_seconds: float = 1.5, blur_iterations: int = 400, blur_kernel_size: tuple[int, int] = (11, 11)
77
+ ):
78
+ self.effect_time_seconds = effect_time_seconds
79
+ self.blur_iterations = blur_iterations
80
+ self.blur_kernel_size = blur_kernel_size
81
+
82
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
83
+ video_fps = videos[0].fps
84
+ for video in videos:
85
+ if video.total_seconds < self.effect_time_seconds:
86
+ raise RuntimeError("Not enough space to make transition!")
87
+
88
+ effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
89
+
90
+ ascending_blur = Blur("ascending", self.blur_iterations, self.blur_kernel_size)
91
+ descending_blur = Blur("descending", self.blur_iterations, self.blur_kernel_size)
92
+ transition = ascending_blur.apply(videos[0][-effect_time_fps:]) + descending_blur.apply(
93
+ videos[1][:effect_time_fps]
94
+ )
95
+
96
+ blurred_videos = Video.from_frames(
97
+ np.r_[
98
+ "0,2",
99
+ videos[0].frames[:-effect_time_fps],
100
+ transition.frames,
101
+ videos[1].frames[effect_time_fps:],
102
+ ],
103
+ fps=video_fps,
104
+ )
105
+ blurred_videos.audio = videos[0].audio.append(videos[1].audio)
106
+ return blurred_videos
videopython/base/video.py CHANGED
@@ -7,11 +7,9 @@ from pathlib import Path
7
7
 
8
8
  import cv2
9
9
  import numpy as np
10
- import torch
11
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
12
10
  from pydub import AudioSegment
13
11
 
14
- from videopython.utils.common import generate_random_name
12
+ from videopython.utils.common import check_path, generate_random_name
15
13
 
16
14
 
17
15
  @dataclass
@@ -25,7 +23,7 @@ class VideoMetadata:
25
23
  total_seconds: float
26
24
 
27
25
  def __str__(self):
28
- return f"{self.height}x{self.width} @ {self.fps}fps, {self.total_seconds} seconds"
26
+ return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
29
27
 
30
28
  def __repr__(self) -> str:
31
29
  return self.__str__()
@@ -117,16 +115,22 @@ class Video:
117
115
  audio = cls._load_audio_from_path(path)
118
116
  if not audio:
119
117
  print(f"No audio found for `{path}`, adding silent track!")
120
- audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
118
+ audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
121
119
  new_vid.audio = audio
122
120
  return new_vid
123
121
 
124
122
  @classmethod
125
123
  def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
126
124
  new_vid = cls()
125
+ if frames.ndim != 4:
126
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
127
+ elif frames.shape[-1] == 4:
128
+ frames = frames[:, :, :, :3]
129
+ elif frames.shape[-1] != 3:
130
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
127
131
  new_vid.frames = frames
128
132
  new_vid.fps = fps
129
- new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
133
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
130
134
  return new_vid
131
135
 
132
136
  @classmethod
@@ -136,37 +140,9 @@ class Video:
136
140
  image = np.expand_dims(image, axis=0)
137
141
  new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
138
142
  new_vid.fps = fps
139
- new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
143
+ new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
140
144
  return new_vid
141
145
 
142
- @classmethod
143
- def from_prompt(
144
- cls,
145
- prompt: str,
146
- num_steps: int = 25,
147
- height: int = 320,
148
- width: int = 576,
149
- num_frames: int = 24,
150
- gpu_optimized: bool = False,
151
- ) -> Video:
152
- pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch_dtype)
153
- if gpu_optimized:
154
- pipe.enable_model_cpu_offload()
155
- torch_dtype = torch.float16
156
- else:
157
- torch_dtype = torch.float32
158
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
159
- video_frames = np.asarray(
160
- pipe(
161
- prompt,
162
- num_inference_steps=num_steps,
163
- height=height,
164
- width=width,
165
- num_frames=num_frames,
166
- ).frames
167
- )
168
- return Video.from_frames(video_frames, fps=24.0)
169
-
170
146
  def copy(self) -> Video:
171
147
  copied = Video().from_frames(self.frames.copy(), self.fps)
172
148
  copied.audio = self.audio
@@ -198,19 +174,10 @@ class Video:
198
174
  """
199
175
  if not self.is_loaded():
200
176
  raise RuntimeError(f"Video is not loaded, cannot save!")
201
- # Check filename correctness or generate a new one if not given
202
- if not filename:
203
- filename = Path(generate_random_name()).resolve()
204
- directory = filename.parent
205
- elif not Path(filename).suffix == ".mp4":
206
- raise ValueError("Only .mp4 save option is supported.")
207
- else:
208
- filename = Path(filename)
209
- directory = filename.parent
210
- if not directory.exists():
211
- raise ValueError(f"Selected directory `{directory}` does not exist!")
212
177
 
213
- filename, directory = str(filename), str(directory)
178
+ if filename is None:
179
+ filename = generate_random_name(suffix=".mp4")
180
+ filename = check_path(filename, dir_exists=True, suffix=".mp4")
214
181
 
215
182
  ffmpeg_video_command = (
216
183
  f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
@@ -250,10 +217,14 @@ class Video:
250
217
 
251
218
  def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
252
219
  new_audio = self._load_audio_from_path(path)
253
- if (duration_diff := self.total_seconds - new_audio.duration_seconds) > 0 and not loop:
220
+ if new_audio is None:
221
+ print(f"Audio file `{path}` not found, skipping!")
222
+ return
223
+
224
+ if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
254
225
  new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
255
226
  elif new_audio.duration_seconds > self.total_seconds:
256
- new_audio = new_audio[: self.total_seconds * 1000]
227
+ new_audio = new_audio[: round(self.total_seconds * 1000)]
257
228
 
258
229
  if overlay:
259
230
  self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
@@ -276,17 +247,25 @@ class Video:
276
247
  def __str__(self) -> str:
277
248
  return str(self.metadata)
278
249
 
279
- def __getitem__(self, val: int | slice) -> Video | np.ndarray:
280
- if isinstance(val, slice):
281
- # Sub-slice video if given a slice
282
- sliced = self.from_frames(self.frames[val], fps=self.fps)
283
- audio_start = (val.start / self.fps) * 1000
284
- audio_end = (val.stop / self.fps) * 1000
285
- sliced.audio = self.audio[audio_start:audio_end]
286
- return sliced
287
- elif isinstance(val, int):
288
- # Return single frame for integer indexing
289
- return self.frames[val]
250
+ def __getitem__(self, val: slice) -> Video:
251
+ if not isinstance(val, slice):
252
+ raise ValueError("Only slices are supported for video indexing!")
253
+
254
+ # Sub-slice video if given a slice
255
+ sliced = self.from_frames(self.frames[val], fps=self.fps)
256
+ # Handle slicing without value for audio
257
+ start = val.start if val.start else 0
258
+ stop = val.stop if val.stop else len(self.frames)
259
+ # Handle negative values for audio slices
260
+ if start < 0:
261
+ start = len(self.frames) + start
262
+ if stop < 0:
263
+ stop = len(self.frames) + stop
264
+ # Append audio to the slice
265
+ audio_start = round(start / self.fps) * 1000
266
+ audio_end = round(stop / self.fps) * 1000
267
+ sliced.audio = self.audio[audio_start:audio_end]
268
+ return sliced
290
269
 
291
270
  @staticmethod
292
271
  def _load_audio_from_path(path: str) -> AudioSegment | None:
@@ -0,0 +1,10 @@
1
+ from .audio import TextToSpeech
2
+ from .image import TextToImage
3
+ from .video import ImageToVideo, TextToVideo
4
+
5
+ __all__ = [
6
+ "ImageToVideo",
7
+ "TextToSpeech",
8
+ "TextToImage",
9
+ "TextToVideo",
10
+ ]
@@ -0,0 +1,22 @@
1
+ import numpy as np
2
+ import torch
3
+ from pydub import AudioSegment
4
+ from transformers import AutoTokenizer, VitsModel
5
+
6
+ TEXT_TO_SPEECH_MODEL = "facebook/mms-tts-eng"
7
+
8
+
9
+ class TextToSpeech:
10
+ def __init__(self):
11
+ self.pipeline = VitsModel.from_pretrained(TEXT_TO_SPEECH_MODEL)
12
+ self.tokenizer = AutoTokenizer.from_pretrained(TEXT_TO_SPEECH_MODEL)
13
+
14
+ def generate_audio(self, text: str) -> AudioSegment:
15
+ tokenized = self.tokenizer(text, return_tensors="pt")
16
+
17
+ with torch.no_grad():
18
+ output = self.pipeline(**tokenized).waveform
19
+
20
+ output = (output.T.float().numpy() * (2**31 - 1)).astype(np.int32)
21
+ audio = AudioSegment(data=output, frame_rate=self.pipeline.config.sampling_rate, sample_width=4, channels=1)
22
+ return audio
@@ -0,0 +1,22 @@
1
+ import io
2
+ import os
3
+
4
+ import torch
5
+ from diffusers import DiffusionPipeline
6
+ from PIL import Image
7
+
8
+ TEXT_TO_IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
9
+
10
+
11
+ class TextToImage:
12
+ def __init__(self):
13
+ if not torch.cuda.is_available():
14
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
15
+ self.pipeline = DiffusionPipeline.from_pretrained(
16
+ TEXT_TO_IMAGE_MODEL, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
17
+ )
18
+ self.pipeline.to("cuda")
19
+
20
+ def generate_image(self, prompt: str) -> Image.Image:
21
+ image = self.pipeline(prompt=prompt).images[0]
22
+ return image
@@ -0,0 +1,45 @@
1
+ import numpy as np
2
+ import torch
3
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
4
+ from PIL.Image import Image
5
+
6
+ from videopython.base.video import Video
7
+
8
+ TEXT_TO_VIDEO_MODEL = "cerspense/zeroscope_v2_576w"
9
+ IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
10
+
11
+
12
+ class TextToVideo:
13
+ def __init__(self):
14
+ if not torch.cuda.is_available():
15
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
16
+ self.pipeline = DiffusionPipeline.from_pretrained(TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16)
17
+ self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
18
+ self.pipeline.to("cuda")
19
+
20
+ def generate_video(
21
+ self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
22
+ ) -> Video:
23
+ video_frames = self.pipeline(
24
+ prompt,
25
+ num_inference_steps=num_steps,
26
+ height=height,
27
+ width=width,
28
+ num_frames=num_frames,
29
+ ).frames[0]
30
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
31
+ return Video.from_frames(video_frames, fps=24.0)
32
+
33
+
34
+ class ImageToVideo:
35
+ def __init__(self):
36
+ if not torch.cuda.is_available():
37
+ raise ValueError("CUDA is not available, but ImageToVideo model requires CUDA.")
38
+ self.pipeline = DiffusionPipeline.from_pretrained(
39
+ IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
40
+ ).to("cuda")
41
+
42
+ def generate_video(self, image: Image, fps: int = 24) -> Video:
43
+ video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
44
+ video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
45
+ return Video.from_frames(video_frames, fps=float(fps))
@@ -1,5 +1,7 @@
1
1
  import time
2
2
  import uuid
3
+ from pathlib import Path
4
+ from typing import Callable
3
5
 
4
6
 
5
7
  def generate_random_name(suffix=".mp4"):
@@ -7,7 +9,7 @@ def generate_random_name(suffix=".mp4"):
7
9
  return f"{uuid.uuid4()}{suffix}"
8
10
 
9
11
 
10
- def timeit(func: callable):
12
+ def timeit(func: Callable):
11
13
  """Decorator to measure execution time of a function."""
12
14
 
13
15
  def timed(*args, **kwargs):
@@ -18,3 +20,12 @@ def timeit(func: callable):
18
20
  return result
19
21
 
20
22
  return timed
23
+
24
+
25
+ def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
26
+ fullpath = Path(path).resolve()
27
+ if dir_exists and not fullpath.parent.exists():
28
+ raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
29
+ if suffix and suffix != fullpath.suffix:
30
+ raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
31
+ return str(fullpath)
@@ -0,0 +1,275 @@
1
+ from typing import Literal
2
+
3
+ import cv2
4
+ import numpy as np
5
+ from PIL import Image, ImageDraw, ImageFont
6
+
7
+ from videopython.base.video import Video
8
+ from videopython.exceptions import OutOfBoundsError
9
+
10
+
11
+ class ImageText:
12
+ def __init__(
13
+ self,
14
+ image_size: tuple[int, int] = (1080, 1920), # (width, height)
15
+ mode: str = "RGBA",
16
+ background: tuple[int, int, int, int] = (0, 0, 0, 0), # Transparent background
17
+ ):
18
+ self.image_size = image_size
19
+ self.image = Image.new(mode, image_size, color=background)
20
+ self._draw = ImageDraw.Draw(self.image)
21
+
22
+ @property
23
+ def img_array(self) -> np.ndarray:
24
+ return np.array(self.image)
25
+
26
+ def save(self, filename: str) -> None:
27
+ self.image.save(filename)
28
+
29
+ def _fit_font_width(self, text: str, font: str, max_width: int) -> int:
30
+ """Find the maximum font size where the text width is less than or equal to max_width."""
31
+ font_size = 1
32
+ text_width = self.get_text_size(font, font_size, text)[0]
33
+ while text_width < max_width:
34
+ font_size += 1
35
+ text_width = self.get_text_size(font, font_size, text)[0]
36
+ max_font_size = font_size - 1
37
+ if max_font_size < 1:
38
+ raise ValueError(f"Max height {max_width} is too small for any font size!")
39
+ return max_font_size
40
+
41
+ def _fit_font_height(self, text: str, font: str, max_height: int) -> int:
42
+ """Find the maximum font size where the text height is less than or equal to max_height."""
43
+ font_size = 1
44
+ text_height = self.get_text_size(font, font_size, text)[1]
45
+ while text_height < max_height:
46
+ font_size += 1
47
+ text_height = self.get_text_size(font, font_size, text)[1]
48
+ max_font_size = font_size - 1
49
+ if max_font_size < 1:
50
+ raise ValueError(f"Max height {max_height} is too small for any font size!")
51
+ return max_font_size
52
+
53
+ def _get_font_size(
54
+ self,
55
+ text: str,
56
+ font: str,
57
+ max_width: int | None = None,
58
+ max_height: int | None = None,
59
+ ) -> int:
60
+ """Get maximum font size for `text` to fill in the `max_width` and `max_height`."""
61
+ if max_width is None and max_height is None:
62
+ raise ValueError("You need to pass max_width or max_height")
63
+ if max_width is not None:
64
+ width_font_size = self._fit_font_width(text, font, max_width)
65
+ if max_height is not None:
66
+ height_font_size = self._fit_font_height(text, font, max_height)
67
+ return min([size for size in [width_font_size, height_font_size] if size is not None])
68
+
69
+ def write_text(
70
+ self,
71
+ text: str,
72
+ font_filename: str,
73
+ xy: tuple[int, int],
74
+ font_size: int | None = 11,
75
+ color: tuple[int, int, int] = (0, 0, 0),
76
+ max_width: int | None = None,
77
+ max_height: int | None = None,
78
+ ) -> tuple[int, int]:
79
+ x, y = xy
80
+ if font_size is None and (max_width is None or max_height is None):
81
+ raise ValueError(f"Must set either `font_size`, or both `max_width` and `max_height`!")
82
+ elif font_size is None:
83
+ font_size = self._get_font_size(text, font_filename, max_width, max_height)
84
+ text_size = self.get_text_size(font_filename, font_size, text)
85
+ if (text_size[0] + x > self.image_size[0]) or (text_size[1] + y > self.image_size[1]):
86
+ raise OutOfBoundsError(f"Font size `{font_size}` is too big, text won't fit!")
87
+ font = ImageFont.truetype(font_filename, font_size)
88
+ self._draw.text((x, y), text, font=font, fill=color)
89
+ return text_size
90
+
91
+ def get_text_size(self, font_filename: str, font_size: int, text: str) -> tuple[int, int]:
92
+ """Return bounding box size of the rendered `text` with `font_filename` and `font_size`."""
93
+ font = ImageFont.truetype(font_filename, font_size)
94
+ return font.getbbox(text)[2:]
95
+
96
+ def _split_lines_by_width(
97
+ self,
98
+ text: str,
99
+ font_filename: str,
100
+ font_size: int,
101
+ box_width: int,
102
+ ) -> list[str]:
103
+ """Split the `text` into lines of maximum `box_width`."""
104
+ words = text.split()
105
+ split_lines: list[list[str]] = []
106
+ current_line: list[str] = []
107
+ for word in words:
108
+ new_line = " ".join(current_line + [word])
109
+ size = self.get_text_size(font_filename, font_size, new_line)
110
+ if size[0] <= box_width:
111
+ current_line.append(word)
112
+ else:
113
+ split_lines.append(current_line)
114
+ current_line = [word]
115
+ if current_line:
116
+ split_lines.append(current_line)
117
+ lines = [" ".join(line) for line in split_lines]
118
+ return lines
119
+
120
+ def write_text_box(
121
+ self,
122
+ text: str,
123
+ font_filename: str,
124
+ xy: tuple[int, int],
125
+ box_width: int,
126
+ font_size: int = 11,
127
+ text_color: tuple[int, int, int] = (0, 0, 0),
128
+ background_color: None | tuple[int, int, int, int] = None,
129
+ background_padding: int = 0,
130
+ place: Literal["left", "right", "center"] = "left",
131
+ ) -> tuple[int, int]:
132
+ """Write text in box described by upper-left corner and maxium width of the box.
133
+
134
+ Args:
135
+ text: Text to be written inside the box.
136
+ font_filename: Path to the font file.
137
+ xy: X and Y coordinates describing upper-left of the box containing the text.
138
+ box_width: Pixel width of the box containing the text.
139
+ font_size: Font size.
140
+ text_color: RGB color of the text.
141
+ background_color: If set, adds background color to the text box. Expects RGBA values.
142
+ background_padding: Number of padding pixels to add when adding text background color.
143
+ place: Strategy for justifying the text inside the container box. Defaults to "left".
144
+
145
+ Returns:
146
+ Lower-left corner of the written text box.
147
+ """
148
+ x, y = xy
149
+ lines = self._split_lines_by_width(text, font_filename, font_size, box_width)
150
+ # Run checks to see if the text will fit
151
+ if x + box_width > self.image_size[0]:
152
+ raise OutOfBoundsError(f"Box width {box_width} is too big for the image width {self.image_size[0]}!")
153
+ lines_height = sum([self.get_text_size(font_filename, font_size, line)[1] for line in lines])
154
+ if y + lines_height > self.image_size[1]:
155
+ available_space = self.image_size[1] - y
156
+ raise OutOfBoundsError(f"Text height {lines_height} is too big for the available space {available_space}!")
157
+ # Write lines
158
+ current_text_height = y
159
+ for line in lines:
160
+ line_size = self.get_text_size(font_filename, font_size, line)
161
+ # Write line text into the image
162
+ if place == "left":
163
+ self.write_text(
164
+ text=line,
165
+ font_filename=font_filename,
166
+ xy=(x, current_text_height),
167
+ font_size=font_size,
168
+ color=text_color,
169
+ )
170
+ elif place == "right":
171
+ x_left = x + box_width - line_size[0]
172
+ self.write_text(
173
+ text=line,
174
+ font_filename=font_filename,
175
+ xy=(x_left, current_text_height),
176
+ font_size=font_size,
177
+ color=text_color,
178
+ )
179
+ elif place == "center":
180
+ x_left = int(x + ((box_width - line_size[0]) / 2))
181
+ self.write_text(
182
+ text=line,
183
+ font_filename=font_filename,
184
+ xy=(x_left, current_text_height),
185
+ font_size=font_size,
186
+ color=text_color,
187
+ )
188
+ else:
189
+ raise ValueError(f"Place {place} is not supported. Use one of: `left`, `right` or `center`!")
190
+ # Increment text height
191
+ current_text_height += line_size[1]
192
+ # Add background color for the text if set
193
+ if background_color is not None:
194
+ if len(background_color) != 4:
195
+ raise ValueError(f"Text background color {background_color} must be RGBA!")
196
+ img = self.img_array
197
+ # Find bounding rectangle for written text
198
+ box_slice = img[y:current_text_height, x : x + box_width]
199
+ text_mask = np.any(box_slice != 0, axis=2).astype(np.uint8)
200
+ xmin, xmax, ymin, ymax = self._find_smallest_bounding_rect(text_mask)
201
+ # Get global bounding box position
202
+ xmin += x - background_padding
203
+ xmax += x + background_padding
204
+ ymin += y - background_padding
205
+ ymax += y + background_padding
206
+ # Make sure we are inside image, cut to image if not
207
+ xmin = max(0, xmin)
208
+ ymin = max(0, ymin)
209
+ xmax = min(xmax, self.image_size[0])
210
+ ymax = min(ymax, self.image_size[1])
211
+ # Slice the bounding box and find text mask
212
+ bbox_slice = img[ymin:ymax, xmin:xmax]
213
+ bbox_text_mask = np.any(bbox_slice != 0, axis=2).astype(np.uint8)
214
+ # Add background color outside of text
215
+ bbox_slice[~bbox_text_mask.astype(bool)] = background_color
216
+ # Blur nicely with semi-transparent pixels from the font
217
+ text_slice = bbox_slice[bbox_text_mask.astype(bool)]
218
+ text_background = text_slice[:, :3] * (np.expand_dims(text_slice[:, -1], axis=1) / 255)
219
+ color_background = (1 - (np.expand_dims(text_slice[:, -1], axis=1) / 255)) * background_color
220
+ faded_background = text_background[:, :3] + color_background[:, :3]
221
+ text_slice[:, :3] = faded_background
222
+ text_slice[:, -1] = 255
223
+ bbox_slice[bbox_text_mask.astype(bool)] = text_slice
224
+ # Set image with the background color
225
+ self.image = Image.fromarray(img)
226
+ return (x, current_text_height)
227
+
228
+ def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
229
+ """Find the smallest bounding rectangle for the mask."""
230
+ rows = np.any(mask, axis=1)
231
+ cols = np.any(mask, axis=0)
232
+ ymin, ymax = np.where(rows)[0][[0, -1]]
233
+ xmin, xmax = np.where(cols)[0][[0, -1]]
234
+ return xmin, xmax, ymin, ymax
235
+
236
+
237
+ class SlideOverImage:
238
+ def __init__(
239
+ self,
240
+ direction: Literal["left", "right"],
241
+ video_shape: tuple[int, int] = (1080, 1920),
242
+ fps: float = 24.0,
243
+ length_seconds: float = 1.0,
244
+ ) -> None:
245
+ self.direction = direction
246
+ self.video_width, self.video_height = video_shape
247
+ self.fps = fps
248
+ self.length_seconds = length_seconds
249
+
250
+ def slide(self, image: np.ndarray) -> Video:
251
+ image = self._resize(image)
252
+ max_offset = image.shape[1] - self.video_width
253
+ frame_count = round(self.fps * self.length_seconds)
254
+
255
+ deltas = np.linspace(0, max_offset, frame_count)
256
+ frames = []
257
+
258
+ for delta in deltas:
259
+ if self.direction == "right":
260
+ frame = image[:, round(delta) : round(delta) + self.video_width]
261
+ elif self.direction == "left":
262
+ frame = image[:, image.shape[1] - round(delta) - self.video_width : image.shape[1] - round(delta)]
263
+ frames.append(frame)
264
+
265
+ return Video.from_frames(frames=np.stack(frames, axis=0), fps=self.fps)
266
+
267
+ def _resize(self, image: np.ndarray) -> np.ndarray:
268
+ resize_factor = image.shape[0] / self.video_height
269
+ resize_dims = (round(image.shape[1] / resize_factor), round(image.shape[0] / resize_factor)) # width, height
270
+ image = cv2.resize(image, resize_dims)
271
+ if self.video_height > image.shape[0] or self.video_width > image.shape[1]:
272
+ raise ValueError(
273
+ f"Image `{image.shape}` is too small for the video frame `({self.video_width}, {self.video_height})`!"
274
+ )
275
+ return image
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -199,7 +199,7 @@ License: Apache License
199
199
  Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
200
200
  Project-URL: Bug Reports, https://github.com/bartwojtowicz/videopython/issues
201
201
  Project-URL: Source, https://github.com/bartwojtowicz/videopython/
202
- Keywords: videopython,video,movie,opencv,generation,editing
202
+ Keywords: python,videopython,video,movie,opencv,generation,editing
203
203
  Classifier: License :: OSI Approved :: Apache Software License
204
204
  Classifier: Programming Language :: Python :: 3
205
205
  Classifier: Programming Language :: Python :: 3.10
@@ -210,14 +210,23 @@ Description-Content-Type: text/markdown
210
210
  License-File: LICENSE
211
211
  Requires-Dist: click >=8.1.7
212
212
  Requires-Dist: numpy >=1.25.2
213
- Requires-Dist: opencv-python >=4.7.0.68
214
- Requires-Dist: pytest >=7.4.0
215
- Requires-Dist: transformers >=4.36.0
216
- Requires-Dist: diffusers >=0.21.4
217
- Requires-Dist: torch >=2.1.0
218
- Requires-Dist: stability-sdk >=0.8.4
219
- Requires-Dist: openai ==1.3.5
213
+ Requires-Dist: opencv-python >=4.9.0.80
214
+ Requires-Dist: pillow >=10.3.0
220
215
  Requires-Dist: pydub >=0.25.1
216
+ Requires-Dist: tqdm >=4.66.3
217
+ Provides-Extra: dev
218
+ Requires-Dist: black ==24.3.0 ; extra == 'dev'
219
+ Requires-Dist: isort ==5.12.0 ; extra == 'dev'
220
+ Requires-Dist: mypy ==1.8.0 ; extra == 'dev'
221
+ Requires-Dist: pytest ==7.4.0 ; extra == 'dev'
222
+ Requires-Dist: types-Pillow ==10.2.0.20240213 ; extra == 'dev'
223
+ Requires-Dist: types-tqdm ==4.66.0.20240106 ; extra == 'dev'
224
+ Requires-Dist: pydub-stubs ==0.25.1.1 ; extra == 'dev'
225
+ Provides-Extra: generation
226
+ Requires-Dist: accelerate >=0.29.2 ; extra == 'generation'
227
+ Requires-Dist: diffusers >=0.26.3 ; extra == 'generation'
228
+ Requires-Dist: torch >=2.1.0 ; extra == 'generation'
229
+ Requires-Dist: transformers >=4.38.1 ; extra == 'generation'
221
230
 
222
231
  # About
223
232
 
@@ -235,41 +244,40 @@ sudo apt-get install ffmpeg
235
244
 
236
245
  ### Install with pip
237
246
  ```bash
238
- pip install videopython
247
+ pip install videopython[generation]
239
248
  ```
249
+ > You can install without `[generation]` dependencies for basic video handling and processing.
250
+ > The funcionalities found in `videopython.generation` won't work.
240
251
 
241
252
  ## Basic Usage
253
+ > Using Nvidia A40 or better is recommended for the `videopython.generation` module.
242
254
 
243
255
  ```python
244
- from videopython.base.video import Video
245
- from videopython.base.transitions import FadeTransition
256
+ # Generate image and animate it
257
+ from videopython.generation import ImageToVideo
258
+ from videopython.generation import TextToImage
246
259
 
247
- # Load video
248
- video = Video.from_path("tests/test_data/fast_benchmark.mp4")
249
- print(video.metadata)
250
- print(video.frames.shape) # Video is based on numpy representation of frames
260
+ image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
261
+ video = ImageToVideo().generate_video(image=image, fps=24)
251
262
 
252
- # Generate videos
253
- video1 = Video.from_prompt("Dogs playing in the snow.")
254
- video2 = Video.from_prompt("Dogs going back home.")
263
+ # Video generation directly from prompt
264
+ from videopython.generation import TextToVideo
265
+ video_gen = TextToVideo()
266
+ video = video_gen.generate_video("Dogs playing in the snow")
267
+ for _ in range(10):
268
+ video += video_gen.generate_video("Dogs playing in the snow")
255
269
 
256
- # Add videos
257
- combined_video = video1 + video2
258
- print(combined_video.metadata)
270
+ # Cut the first 2 seconds
271
+ from videopython.base.transforms import CutSeconds
272
+ transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
259
273
 
260
- # Apply fade transition between videos
261
- fade = FadeTransition(0.5) # 0.5s effect time
262
- faded_video = fade.apply(videos=(video1, video2))
263
- print(faded_video.metadata)
274
+ # Upsample to 30 FPS
275
+ from videopython.base.transforms import ResampleFPS
276
+ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
264
277
 
265
- # Add audio from file
266
- faded_video.add_audio_from_file("tests/test_data/test_audio.mp3")
278
+ # Resize to 1000x1000
279
+ from videopython.base.transforms import Resize
280
+ transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
267
281
 
268
- # Save to a file
269
- faded_video.save("my_video.mp4")
270
- ```
271
-
272
- ### Running Unit Tests
273
- ```bash
274
- PYTHONPATH=./src/ pytest
282
+ filepath = transformed_video.save()
275
283
  ```
@@ -0,0 +1,18 @@
1
+ videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
3
+ videopython/base/effects.py,sha256=ZFUWrgVWTn4uWpxPfTQSQQKEZN5ns4btMofOZNHCeQc,7540
4
+ videopython/base/transforms.py,sha256=yDtM1uZOacB0OLPQnSGHl5upoNNeN0dPWXe0hJPeV-I,6004
5
+ videopython/base/transitions.py,sha256=zYsxIgiVfN9P-CoGWUWRYFBr_0inX1sAJ02gyIEQ678,3694
6
+ videopython/base/video.py,sha256=kG-juKN-da5NzV89YxZl5JkyMTJFkgPceh4yuAUnsQs,11099
7
+ videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
8
+ videopython/generation/audio.py,sha256=BTc-3vJ5e6D0lt2OPo2hfOcUqhNXIcvRLNoo2oQ470M,777
9
+ videopython/generation/image.py,sha256=i8zJm0WXn_Pykby9Urw1kzDcla6ArYhRgG-ueRdoAJ0,675
10
+ videopython/generation/video.py,sha256=WMFKKUSfIkQmxL6xhUb-MeAiHU6uOF_oFpmf69H8V8g,1827
11
+ videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
13
+ videopython/utils/image.py,sha256=CaZ-XAbnIemzvYDNpZACLmIVKt9Zw20STvOptCrUXiw,12079
14
+ videopython-0.1.4.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
15
+ videopython-0.1.4.dist-info/METADATA,sha256=bKgx5rLr4t95KWNDw65QjuoUdLr2zbaajd76whhCQF0,15658
16
+ videopython-0.1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
17
+ videopython-0.1.4.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
18
+ videopython-0.1.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
videopython/__init__.py DELETED
File without changes
@@ -1,31 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- from openai import OpenAI
5
- from pydub import AudioSegment
6
-
7
- from videopython.utils.common import generate_random_name
8
-
9
-
10
- def text_to_speech_openai(
11
- text: str, voice: str = "alloy", save: bool = True, output_dir: str | None = None
12
- ) -> str | AudioSegment:
13
- client = OpenAI()
14
-
15
- filename = generate_random_name(suffix=".mp3")
16
- if output_dir:
17
- output_dir = Path(output_dir)
18
- output_dir.mkdir(parents=True, exist_ok=True)
19
- else:
20
- output_dir = Path(os.getcwd())
21
- save_path = output_dir / filename
22
-
23
- response = client.audio.speech.create(model="tts-1", voice=voice, input=text)
24
- response.stream_to_file(save_path)
25
-
26
- if save:
27
- return str(save_path.resolve())
28
- else:
29
- audio = AudioSegment.from_mp3(str(save_path))
30
- save_path.unlink()
31
- return audio
@@ -1,77 +0,0 @@
1
- import io
2
- import os
3
- from pathlib import Path
4
-
5
- import numpy as np
6
- import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
7
- from PIL import Image
8
- from stability_sdk import client
9
-
10
- from videopython.utils.common import generate_random_name
11
-
12
- API_KEY = os.getenv("STABILITY_KEY")
13
- if not API_KEY:
14
- raise KeyError(
15
- "Stability API key was not found in the environment! Please set in as `STABILITY_KEY` in your environment."
16
- )
17
-
18
-
19
- def text_to_image(
20
- prompt: str,
21
- save: bool = True,
22
- output_dir: str | None = None,
23
- width: int = 1024,
24
- height: int = 1024,
25
- num_samples: int = 1,
26
- steps: int = 30,
27
- cfg_scale: float = 8.0,
28
- engine: str = "stable-diffusion-xl-1024-v1-0",
29
- verbose: bool = True,
30
- seed: int = 1,
31
- ) -> np.ndarray | str:
32
- """Generates image from prompt using the stability.ai API."""
33
- # Generate image
34
- stability_api = client.StabilityInference(
35
- key=API_KEY,
36
- verbose=verbose,
37
- engine=engine, # Set the engine to use for generation.
38
- # Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
39
- )
40
- answers = stability_api.generate(
41
- prompt=prompt,
42
- seed=seed,
43
- steps=steps, # Amount of inference steps performed on image generation.
44
- cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
45
- # Setting this value higher increases the strength in which it tries to match your prompt.
46
- # Defaults to 7.0 if not specified.
47
- width=width,
48
- height=height,
49
- samples=num_samples,
50
- sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
51
- # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
52
- # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
53
- )
54
- # Parse API response
55
- for resp in answers:
56
- for artifact in resp.artifacts:
57
- if artifact.finish_reason == generation.FILTER:
58
- raise RuntimeError(
59
- "Your request activated the API's safety filters and could not be processed."
60
- "Please modify the prompt and try again."
61
- )
62
- if artifact.type == generation.ARTIFACT_IMAGE:
63
- img = Image.open(io.BytesIO(artifact.binary))
64
- else:
65
- raise ValueError(f"Unknown artifact type: {artifact.type}")
66
-
67
- if save:
68
- if output_dir:
69
- output_dir = Path(output_dir)
70
- output_dir.mkdir(parents=True, exist_ok=True)
71
- else:
72
- output_dir = Path(os.getcwd())
73
- filename = output_dir / generate_random_name(suffix=".png")
74
- img.save(filename)
75
- return str(filename.resolve())
76
- else:
77
- return np.array(img)
@@ -1,15 +0,0 @@
1
- videopython/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
4
- videopython/base/transforms.py,sha256=aXIqbp9sZkZI5PYRn0uDSxLoQxCdku1BAmzfQpnGW_w,2701
5
- videopython/base/transitions.py,sha256=VQXJ-sGL7lcr3Q6uhb66hLlqW9213UBUAAH6DqJa9xs,2159
6
- videopython/base/video.py,sha256=Pn2vRRLicNRhju1qkK-QUkvtgcTNEUzaQPA8cZqqpwQ,11767
7
- videopython/generation/openai/text_to_speech.py,sha256=d5Sli8kAiIAW_ugyLAxS4yQ7jcW4_NAI6hqy6QdpAU8,852
8
- videopython/generation/stability/text_to_image.py,sha256=K6_xWVAUlFLUd-Hj6T-l3QgfxZfV_kx8KYIDpqiiNSo,2983
9
- videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- videopython/utils/common.py,sha256=-7YPlggyteVg_QbylDgBRm7yuWwuWvnlOIEVyuCzZhw,455
11
- videopython-0.1.2.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
12
- videopython-0.1.2.dist-info/METADATA,sha256=YktibiiRKPOCZpRp0Yna8tYyp5oWuPRYiU7E_oUtz44,14709
13
- videopython-0.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
14
- videopython-0.1.2.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
15
- videopython-0.1.2.dist-info/RECORD,,