videopython 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- videopython/base/effects.py +57 -0
- videopython/base/transforms.py +41 -5
- videopython/base/transitions.py +7 -5
- videopython/base/video.py +32 -59
- videopython/generation/__init__.py +10 -0
- videopython/generation/audio.py +30 -0
- videopython/generation/image.py +60 -0
- videopython/generation/video.py +47 -0
- videopython/utils/common.py +12 -1
- {videopython-0.1.2.dist-info → videopython-0.1.3.dist-info}/METADATA +5 -5
- videopython-0.1.3.dist-info/RECORD +17 -0
- videopython/__init__.py +0 -0
- videopython/generation/openai/text_to_speech.py +0 -31
- videopython/generation/stability/text_to_image.py +0 -77
- videopython-0.1.2.dist-info/RECORD +0 -15
- {videopython-0.1.2.dist-info → videopython-0.1.3.dist-info}/LICENSE +0 -0
- {videopython-0.1.2.dist-info → videopython-0.1.3.dist-info}/WHEEL +0 -0
- {videopython-0.1.2.dist-info → videopython-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import final
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from videopython.base.video import Video
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Effect(ABC):
|
|
11
|
+
"""Abstract class for effect on frames of video.
|
|
12
|
+
|
|
13
|
+
The effect must not change the number of frames and the shape of the frames.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@final
|
|
17
|
+
def apply(self, video: Video) -> Video:
|
|
18
|
+
original_shape = video.video_shape
|
|
19
|
+
video_with_effect = self._apply(video)
|
|
20
|
+
if not video_with_effect.video_shape == original_shape:
|
|
21
|
+
raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
|
|
22
|
+
return video_with_effect
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def _apply(self, video: Video) -> Video:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FullImageOverlay(Effect):
|
|
30
|
+
def __init__(self, overlay_image: np.ndarray, alpha: float | None = None):
|
|
31
|
+
if alpha is not None and not 0 <= alpha <= 1:
|
|
32
|
+
raise ValueError("Alpha must be in range [0, 1]!")
|
|
33
|
+
elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
|
|
34
|
+
raise ValueError("Only RGB and RGBA images are supported as an overlay!")
|
|
35
|
+
elif alpha is None:
|
|
36
|
+
alpha = 1.0
|
|
37
|
+
|
|
38
|
+
if overlay_image.shape[-1] == 3:
|
|
39
|
+
overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
|
|
40
|
+
overlay_image[:, :, 3] = overlay_image[:, :, 3] * alpha
|
|
41
|
+
|
|
42
|
+
self._overlay_alpha = (overlay_image[:, :, 3] / 255.0)[:, :, np.newaxis]
|
|
43
|
+
self._base_transparency = 1 - self._overlay_alpha
|
|
44
|
+
|
|
45
|
+
self.overlay = overlay_image[:, :, :3] * self._overlay_alpha
|
|
46
|
+
|
|
47
|
+
def _overlay(self, img: np.ndarray) -> np.ndarray:
|
|
48
|
+
return self.overlay + (img * self._base_transparency)
|
|
49
|
+
|
|
50
|
+
def _apply(self, video: Video) -> Video:
|
|
51
|
+
if not video.frame_shape == self.overlay.shape:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
|
|
54
|
+
)
|
|
55
|
+
print("Overlaying video...")
|
|
56
|
+
video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
|
|
57
|
+
return video
|
videopython/base/transforms.py
CHANGED
|
@@ -3,6 +3,7 @@ from multiprocessing import Pool
|
|
|
3
3
|
|
|
4
4
|
import cv2
|
|
5
5
|
import numpy as np
|
|
6
|
+
from tqdm import tqdm
|
|
6
7
|
|
|
7
8
|
from videopython.base.video import Video
|
|
8
9
|
|
|
@@ -14,9 +15,6 @@ class Transformation(ABC):
|
|
|
14
15
|
def apply(self, video: Video) -> Video:
|
|
15
16
|
pass
|
|
16
17
|
|
|
17
|
-
def __call__(self, video: Video) -> Video:
|
|
18
|
-
return self.apply(video)
|
|
19
|
-
|
|
20
18
|
|
|
21
19
|
class TransformationPipeline:
|
|
22
20
|
def __init__(self, transformations: list[Transformation] | None):
|
|
@@ -58,7 +56,7 @@ class CutFrames(Transformation):
|
|
|
58
56
|
self.end_frame = end_frame
|
|
59
57
|
|
|
60
58
|
def apply(self, video: Video) -> Video:
|
|
61
|
-
video
|
|
59
|
+
video = video[self.start_frame : self.end_frame]
|
|
62
60
|
return video
|
|
63
61
|
|
|
64
62
|
|
|
@@ -68,7 +66,7 @@ class CutSeconds(Transformation):
|
|
|
68
66
|
self.end_second = end_second
|
|
69
67
|
|
|
70
68
|
def apply(self, video: Video) -> Video:
|
|
71
|
-
video
|
|
69
|
+
video = video[round(self.start_second * video.fps) : round(self.end_second * video.fps)]
|
|
72
70
|
return video
|
|
73
71
|
|
|
74
72
|
|
|
@@ -92,3 +90,41 @@ class Resize(Transformation):
|
|
|
92
90
|
)
|
|
93
91
|
video.frames = np.array(frames_copy)
|
|
94
92
|
return video
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ResampleFPS(Transformation):
|
|
96
|
+
def __init__(self, new_fps: int | float):
|
|
97
|
+
self.new_fps = float(new_fps)
|
|
98
|
+
|
|
99
|
+
def _downsample(self, video: Video) -> Video:
|
|
100
|
+
target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
|
|
101
|
+
new_frame_indices = np.round(np.linspace(0, len(video.frames) - 1, target_frame_count)).astype(int)
|
|
102
|
+
video.frames = video.frames[new_frame_indices]
|
|
103
|
+
video.fps = self.new_fps
|
|
104
|
+
return video
|
|
105
|
+
|
|
106
|
+
def _upsample(self, video: Video) -> Video:
|
|
107
|
+
target_frame_count = int(len(video.frames) * (self.new_fps / video.fps))
|
|
108
|
+
new_frame_indices = np.linspace(0, len(video.frames) - 1, target_frame_count)
|
|
109
|
+
new_frames = []
|
|
110
|
+
for i in tqdm(range(len(new_frame_indices) - 1)):
|
|
111
|
+
# Interpolate between the two nearest frames
|
|
112
|
+
ratio = new_frame_indices[i] % 1
|
|
113
|
+
new_frame = (1 - ratio) * video.frames[int(new_frame_indices[i])] + ratio * video.frames[
|
|
114
|
+
int(np.ceil(new_frame_indices[i]))
|
|
115
|
+
]
|
|
116
|
+
new_frames.append(new_frame.astype(np.uint8))
|
|
117
|
+
video.frames = np.array(new_frames, dtype=np.uint8)
|
|
118
|
+
video.fps = self.new_fps
|
|
119
|
+
return video
|
|
120
|
+
|
|
121
|
+
def apply(self, video: Video) -> Video:
|
|
122
|
+
if video.fps == self.new_fps:
|
|
123
|
+
return video
|
|
124
|
+
elif video.fps > self.new_fps:
|
|
125
|
+
print(f"Downsampling video from {video.fps} to {self.new_fps} FPS.")
|
|
126
|
+
video = self._downsample(video)
|
|
127
|
+
else:
|
|
128
|
+
print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
|
|
129
|
+
video = self._upsample(video)
|
|
130
|
+
return video
|
videopython/base/transitions.py
CHANGED
|
@@ -15,19 +15,19 @@ class Transition(ABC):
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
@final
|
|
18
|
-
def apply(self, videos: tuple[Video, Video]
|
|
18
|
+
def apply(self, videos: tuple[Video, Video]) -> Video:
|
|
19
19
|
assert videos[0].metadata.can_be_merged_with(videos[1].metadata)
|
|
20
|
-
return self._apply(videos
|
|
20
|
+
return self._apply(videos)
|
|
21
21
|
|
|
22
22
|
@abstractmethod
|
|
23
|
-
def _apply(self, videos: tuple[Video, Video]
|
|
23
|
+
def _apply(self, videos: tuple[Video, Video]) -> Video:
|
|
24
24
|
pass
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class InstantTransition(Transition):
|
|
28
28
|
"""Instant cut without any transition."""
|
|
29
29
|
|
|
30
|
-
def _apply(self, videos:
|
|
30
|
+
def _apply(self, videos: tuple[Video, Video]) -> Video:
|
|
31
31
|
return videos[0] + videos[1]
|
|
32
32
|
|
|
33
33
|
|
|
@@ -57,7 +57,7 @@ class FadeTransition(Transition):
|
|
|
57
57
|
effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
|
|
58
58
|
transition = self.fade(videos[0].frames[-effect_time_fps:], videos[1].frames[:effect_time_fps])
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
faded_videos = Video.from_frames(
|
|
61
61
|
np.r_[
|
|
62
62
|
"0,2",
|
|
63
63
|
videos[0].frames[:-effect_time_fps],
|
|
@@ -66,3 +66,5 @@ class FadeTransition(Transition):
|
|
|
66
66
|
],
|
|
67
67
|
fps=video_fps,
|
|
68
68
|
)
|
|
69
|
+
faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
|
|
70
|
+
return faded_videos
|
videopython/base/video.py
CHANGED
|
@@ -7,11 +7,9 @@ from pathlib import Path
|
|
|
7
7
|
|
|
8
8
|
import cv2
|
|
9
9
|
import numpy as np
|
|
10
|
-
import torch
|
|
11
|
-
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
|
12
10
|
from pydub import AudioSegment
|
|
13
11
|
|
|
14
|
-
from videopython.utils.common import generate_random_name
|
|
12
|
+
from videopython.utils.common import check_path, generate_random_name
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
@dataclass
|
|
@@ -117,7 +115,7 @@ class Video:
|
|
|
117
115
|
audio = cls._load_audio_from_path(path)
|
|
118
116
|
if not audio:
|
|
119
117
|
print(f"No audio found for `{path}`, adding silent track!")
|
|
120
|
-
audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
|
|
118
|
+
audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
|
|
121
119
|
new_vid.audio = audio
|
|
122
120
|
return new_vid
|
|
123
121
|
|
|
@@ -126,7 +124,7 @@ class Video:
|
|
|
126
124
|
new_vid = cls()
|
|
127
125
|
new_vid.frames = frames
|
|
128
126
|
new_vid.fps = fps
|
|
129
|
-
new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
|
|
127
|
+
new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
|
|
130
128
|
return new_vid
|
|
131
129
|
|
|
132
130
|
@classmethod
|
|
@@ -136,37 +134,9 @@ class Video:
|
|
|
136
134
|
image = np.expand_dims(image, axis=0)
|
|
137
135
|
new_vid.frames = np.repeat(image, round(length_seconds * fps), axis=0)
|
|
138
136
|
new_vid.fps = fps
|
|
139
|
-
new_vid.audio = AudioSegment.silent(duration=new_vid.total_seconds * 1000)
|
|
137
|
+
new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
|
|
140
138
|
return new_vid
|
|
141
139
|
|
|
142
|
-
@classmethod
|
|
143
|
-
def from_prompt(
|
|
144
|
-
cls,
|
|
145
|
-
prompt: str,
|
|
146
|
-
num_steps: int = 25,
|
|
147
|
-
height: int = 320,
|
|
148
|
-
width: int = 576,
|
|
149
|
-
num_frames: int = 24,
|
|
150
|
-
gpu_optimized: bool = False,
|
|
151
|
-
) -> Video:
|
|
152
|
-
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch_dtype)
|
|
153
|
-
if gpu_optimized:
|
|
154
|
-
pipe.enable_model_cpu_offload()
|
|
155
|
-
torch_dtype = torch.float16
|
|
156
|
-
else:
|
|
157
|
-
torch_dtype = torch.float32
|
|
158
|
-
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
|
159
|
-
video_frames = np.asarray(
|
|
160
|
-
pipe(
|
|
161
|
-
prompt,
|
|
162
|
-
num_inference_steps=num_steps,
|
|
163
|
-
height=height,
|
|
164
|
-
width=width,
|
|
165
|
-
num_frames=num_frames,
|
|
166
|
-
).frames
|
|
167
|
-
)
|
|
168
|
-
return Video.from_frames(video_frames, fps=24.0)
|
|
169
|
-
|
|
170
140
|
def copy(self) -> Video:
|
|
171
141
|
copied = Video().from_frames(self.frames.copy(), self.fps)
|
|
172
142
|
copied.audio = self.audio
|
|
@@ -198,19 +168,10 @@ class Video:
|
|
|
198
168
|
"""
|
|
199
169
|
if not self.is_loaded():
|
|
200
170
|
raise RuntimeError(f"Video is not loaded, cannot save!")
|
|
201
|
-
# Check filename correctness or generate a new one if not given
|
|
202
|
-
if not filename:
|
|
203
|
-
filename = Path(generate_random_name()).resolve()
|
|
204
|
-
directory = filename.parent
|
|
205
|
-
elif not Path(filename).suffix == ".mp4":
|
|
206
|
-
raise ValueError("Only .mp4 save option is supported.")
|
|
207
|
-
else:
|
|
208
|
-
filename = Path(filename)
|
|
209
|
-
directory = filename.parent
|
|
210
|
-
if not directory.exists():
|
|
211
|
-
raise ValueError(f"Selected directory `{directory}` does not exist!")
|
|
212
171
|
|
|
213
|
-
filename
|
|
172
|
+
if filename is None:
|
|
173
|
+
filename = generate_random_name(suffix=".mp4")
|
|
174
|
+
filename = check_path(filename, dir_exists=True, suffix=".mp4")
|
|
214
175
|
|
|
215
176
|
ffmpeg_video_command = (
|
|
216
177
|
f"ffmpeg -loglevel error -y -framerate {self.fps} -f rawvideo -pix_fmt rgb24"
|
|
@@ -250,10 +211,14 @@ class Video:
|
|
|
250
211
|
|
|
251
212
|
def add_audio_from_file(self, path: str, overlay: bool = True, overlay_gain: int = 0, loop: bool = False) -> None:
|
|
252
213
|
new_audio = self._load_audio_from_path(path)
|
|
253
|
-
if
|
|
214
|
+
if new_audio is None:
|
|
215
|
+
print(f"Audio file `{path}` not found, skipping!")
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
if (duration_diff := round(self.total_seconds - new_audio.duration_seconds)) > 0 and not loop:
|
|
254
219
|
new_audio = new_audio + AudioSegment.silent(duration_diff * 1000)
|
|
255
220
|
elif new_audio.duration_seconds > self.total_seconds:
|
|
256
|
-
new_audio = new_audio[: self.total_seconds * 1000]
|
|
221
|
+
new_audio = new_audio[: round(self.total_seconds * 1000)]
|
|
257
222
|
|
|
258
223
|
if overlay:
|
|
259
224
|
self.audio = self.audio.overlay(new_audio, loop=loop, gain_during_overlay=overlay_gain)
|
|
@@ -276,17 +241,25 @@ class Video:
|
|
|
276
241
|
def __str__(self) -> str:
|
|
277
242
|
return str(self.metadata)
|
|
278
243
|
|
|
279
|
-
def __getitem__(self, val:
|
|
280
|
-
if isinstance(val, slice):
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
244
|
+
def __getitem__(self, val: slice) -> Video:
|
|
245
|
+
if not isinstance(val, slice):
|
|
246
|
+
raise ValueError("Only slices are supported for video indexing!")
|
|
247
|
+
|
|
248
|
+
# Sub-slice video if given a slice
|
|
249
|
+
sliced = self.from_frames(self.frames[val], fps=self.fps)
|
|
250
|
+
# Handle slicing without value for audio
|
|
251
|
+
start = val.start if val.start else 0
|
|
252
|
+
stop = val.stop if val.stop else len(self.frames)
|
|
253
|
+
# Handle negative values for audio slices
|
|
254
|
+
if start < 0:
|
|
255
|
+
start = len(self.frames) + start
|
|
256
|
+
if stop < 0:
|
|
257
|
+
stop = len(self.frames) + stop
|
|
258
|
+
# Append audio to the slice
|
|
259
|
+
audio_start = round(start / self.fps) * 1000
|
|
260
|
+
audio_end = round(stop / self.fps) * 1000
|
|
261
|
+
sliced.audio = self.audio[audio_start:audio_end]
|
|
262
|
+
return sliced
|
|
290
263
|
|
|
291
264
|
@staticmethod
|
|
292
265
|
def _load_audio_from_path(path: str) -> AudioSegment | None:
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from openai import OpenAI
|
|
6
|
+
from pydub import AudioSegment
|
|
7
|
+
|
|
8
|
+
from videopython.utils.common import generate_random_name
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TextToSpeech:
|
|
12
|
+
def __init__(self, openai_key: str | None = None, save_audio: bool = True):
|
|
13
|
+
self.client = OpenAI(api_key=openai_key)
|
|
14
|
+
self._save = save_audio
|
|
15
|
+
|
|
16
|
+
def generate_audio(
|
|
17
|
+
self,
|
|
18
|
+
text: str,
|
|
19
|
+
voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
|
|
20
|
+
) -> AudioSegment:
|
|
21
|
+
filename = generate_random_name(suffix=".mp3")
|
|
22
|
+
output_path = str((Path(os.getcwd()) / filename).resolve())
|
|
23
|
+
response = self.client.audio.speech.create(model="tts-1", voice=voice, input=text)
|
|
24
|
+
response.stream_to_file(output_path)
|
|
25
|
+
audio = AudioSegment.from_file(output_path)
|
|
26
|
+
if self._save:
|
|
27
|
+
print(f"Audio saved to {output_path}")
|
|
28
|
+
else:
|
|
29
|
+
os.remove(output_path)
|
|
30
|
+
return audio
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
|
|
5
|
+
from PIL import Image
|
|
6
|
+
from stability_sdk import client
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TextToImage:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
stability_key: str | None = None,
|
|
13
|
+
engine: str = "stable-diffusion-xl-1024-v1-0",
|
|
14
|
+
verbose: bool = True,
|
|
15
|
+
):
|
|
16
|
+
stability_key = stability_key or os.getenv("STABILITY_KEY")
|
|
17
|
+
if stability_key is None:
|
|
18
|
+
raise ValueError(
|
|
19
|
+
"API Key for stability is required. Please provide it as an argument"
|
|
20
|
+
" or set it as an environment variable `STABILITY_KEY`. "
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
self.client = client.StabilityInference(stability_key, verbose=verbose, engine=engine)
|
|
24
|
+
|
|
25
|
+
def generate_image(
|
|
26
|
+
self,
|
|
27
|
+
prompt: str,
|
|
28
|
+
width: int = 1024,
|
|
29
|
+
height: int = 1024,
|
|
30
|
+
steps: int = 30,
|
|
31
|
+
cfg_scale: float = 8.0,
|
|
32
|
+
seed: int = 1,
|
|
33
|
+
) -> Image.Image:
|
|
34
|
+
answers = self.client.generate(
|
|
35
|
+
prompt=prompt,
|
|
36
|
+
seed=seed,
|
|
37
|
+
steps=steps, # Amount of inference steps performed on image generation.
|
|
38
|
+
cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
|
|
39
|
+
# Setting this value higher increases the strength in which it tries to match your prompt.
|
|
40
|
+
# Defaults to 7.0 if not specified.
|
|
41
|
+
width=width,
|
|
42
|
+
height=height,
|
|
43
|
+
safety=False,
|
|
44
|
+
samples=1,
|
|
45
|
+
sampler=generation.SAMPLER_K_DPMPP_2M, # Choose which sampler we want to denoise our generation with.
|
|
46
|
+
# Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
|
|
47
|
+
# (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
|
|
48
|
+
)
|
|
49
|
+
for resp in answers:
|
|
50
|
+
for artifact in resp.artifacts:
|
|
51
|
+
if artifact.finish_reason == generation.FILTER:
|
|
52
|
+
raise RuntimeError(
|
|
53
|
+
"Your request activated the API's safety filters and could not be processed."
|
|
54
|
+
"Please modify the prompt and try again."
|
|
55
|
+
)
|
|
56
|
+
if artifact.type == generation.ARTIFACT_IMAGE:
|
|
57
|
+
img = Image.open(io.BytesIO(artifact.binary))
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Unknown artifact type: {artifact.type}")
|
|
60
|
+
return img
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import torch
|
|
3
|
+
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
|
|
4
|
+
from PIL.Image import Image
|
|
5
|
+
|
|
6
|
+
from videopython.base.video import Video
|
|
7
|
+
|
|
8
|
+
TEXT_TO_VIDEO_MODEL = "cerspense/zeroscope_v2_576w"
|
|
9
|
+
IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TextToVideo:
|
|
13
|
+
def __init__(self, gpu_optimized: bool = True):
|
|
14
|
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
|
15
|
+
TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
|
|
16
|
+
)
|
|
17
|
+
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
|
|
18
|
+
if gpu_optimized:
|
|
19
|
+
self.pipeline.enable_model_cpu_offload()
|
|
20
|
+
|
|
21
|
+
def generate_video(
|
|
22
|
+
self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
|
|
23
|
+
) -> Video:
|
|
24
|
+
video_frames = self.pipeline(
|
|
25
|
+
prompt,
|
|
26
|
+
num_inference_steps=num_steps,
|
|
27
|
+
height=height,
|
|
28
|
+
width=width,
|
|
29
|
+
num_frames=num_frames,
|
|
30
|
+
).frames[0]
|
|
31
|
+
video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
|
|
32
|
+
return Video.from_frames(video_frames, fps=24.0)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ImageToVideo:
|
|
36
|
+
def __init__(self):
|
|
37
|
+
if not torch.cuda.is_available():
|
|
38
|
+
raise ValueError("CUDA is not available, but ImageToVideo model requires CUDA.")
|
|
39
|
+
self.pipeline = DiffusionPipeline.from_pretrained(
|
|
40
|
+
IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
|
|
41
|
+
).to("cuda")
|
|
42
|
+
self.pipeline.enable_model_cpu_offload()
|
|
43
|
+
|
|
44
|
+
def generate_video(self, image: Image, fps: int = 24) -> Video:
|
|
45
|
+
video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
|
|
46
|
+
video_frames = np.asarray(255 * video_frames, dtype=np.uint8)
|
|
47
|
+
return Video.from_frames(video_frames, fps=float(fps))
|
videopython/utils/common.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import time
|
|
2
2
|
import uuid
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Callable
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
def generate_random_name(suffix=".mp4"):
|
|
@@ -7,7 +9,7 @@ def generate_random_name(suffix=".mp4"):
|
|
|
7
9
|
return f"{uuid.uuid4()}{suffix}"
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
def timeit(func:
|
|
12
|
+
def timeit(func: Callable):
|
|
11
13
|
"""Decorator to measure execution time of a function."""
|
|
12
14
|
|
|
13
15
|
def timed(*args, **kwargs):
|
|
@@ -18,3 +20,12 @@ def timeit(func: callable):
|
|
|
18
20
|
return result
|
|
19
21
|
|
|
20
22
|
return timed
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def check_path(path: str, dir_exists: bool = True, suffix: str | None = None) -> str:
|
|
26
|
+
fullpath = Path(path).resolve()
|
|
27
|
+
if dir_exists and not fullpath.parent.exists():
|
|
28
|
+
raise ValueError(f"Directory `{fullpath.parent}` does not exist!")
|
|
29
|
+
if suffix and suffix != fullpath.suffix:
|
|
30
|
+
raise ValueError(f"Required suffix `{suffix}` does not match the file suffix `{fullpath.suffix}`")
|
|
31
|
+
return str(fullpath)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: videopython
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Minimal video generation and processing library.
|
|
5
5
|
Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
|
|
6
6
|
License: Apache License
|
|
@@ -210,12 +210,12 @@ Description-Content-Type: text/markdown
|
|
|
210
210
|
License-File: LICENSE
|
|
211
211
|
Requires-Dist: click >=8.1.7
|
|
212
212
|
Requires-Dist: numpy >=1.25.2
|
|
213
|
-
Requires-Dist: opencv-python >=4.
|
|
213
|
+
Requires-Dist: opencv-python >=4.9.0.80
|
|
214
214
|
Requires-Dist: pytest >=7.4.0
|
|
215
|
-
Requires-Dist: transformers >=4.
|
|
216
|
-
Requires-Dist: diffusers >=0.
|
|
215
|
+
Requires-Dist: transformers >=4.38.1
|
|
216
|
+
Requires-Dist: diffusers >=0.26.3
|
|
217
217
|
Requires-Dist: torch >=2.1.0
|
|
218
|
-
Requires-Dist: stability-sdk >=0.8.
|
|
218
|
+
Requires-Dist: stability-sdk >=0.8.5
|
|
219
219
|
Requires-Dist: openai ==1.3.5
|
|
220
220
|
Requires-Dist: pydub >=0.25.1
|
|
221
221
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
|
|
3
|
+
videopython/base/effects.py,sha256=DpA8V89Es7YWPEq72l_h_D7MG1QYf1iuslAl-QgzZx8,2153
|
|
4
|
+
videopython/base/transforms.py,sha256=DQcG8tZ8nlGj3khlp3v4C0MISpRY2rZr-6B6GtPZykE,4251
|
|
5
|
+
videopython/base/transitions.py,sha256=efuJdls2xJVpXV8RGaFd--ii8cLUPz6FdmhSvOjaiTM,2275
|
|
6
|
+
videopython/base/video.py,sha256=40leF8bSjNIhP_L8loOh9ptlZNTZAZ95Dgv9FH4mSz4,10791
|
|
7
|
+
videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
|
|
8
|
+
videopython/generation/audio.py,sha256=YPqUdAcB0mGCt0mgFrxzupX08xx0O_qwfVdjFGlAxaw,985
|
|
9
|
+
videopython/generation/image.py,sha256=B-TlrNXFu18NnMi3KO5fjk0paTSmIsQk400iZb76K8w,2507
|
|
10
|
+
videopython/generation/video.py,sha256=4P4DhHS-_eDColsXK6YefSdoQbU3Ce0n6fHuY5zewYI,1874
|
|
11
|
+
videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
|
|
13
|
+
videopython-0.1.3.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
|
|
14
|
+
videopython-0.1.3.dist-info/METADATA,sha256=xj8k5j3qPIVKgXbr4uTi6ad2BSs9j6-V6baonpQKoJI,14709
|
|
15
|
+
videopython-0.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
16
|
+
videopython-0.1.3.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
|
|
17
|
+
videopython-0.1.3.dist-info/RECORD,,
|
videopython/__init__.py
DELETED
|
File without changes
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
from openai import OpenAI
|
|
5
|
-
from pydub import AudioSegment
|
|
6
|
-
|
|
7
|
-
from videopython.utils.common import generate_random_name
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def text_to_speech_openai(
|
|
11
|
-
text: str, voice: str = "alloy", save: bool = True, output_dir: str | None = None
|
|
12
|
-
) -> str | AudioSegment:
|
|
13
|
-
client = OpenAI()
|
|
14
|
-
|
|
15
|
-
filename = generate_random_name(suffix=".mp3")
|
|
16
|
-
if output_dir:
|
|
17
|
-
output_dir = Path(output_dir)
|
|
18
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
-
else:
|
|
20
|
-
output_dir = Path(os.getcwd())
|
|
21
|
-
save_path = output_dir / filename
|
|
22
|
-
|
|
23
|
-
response = client.audio.speech.create(model="tts-1", voice=voice, input=text)
|
|
24
|
-
response.stream_to_file(save_path)
|
|
25
|
-
|
|
26
|
-
if save:
|
|
27
|
-
return str(save_path.resolve())
|
|
28
|
-
else:
|
|
29
|
-
audio = AudioSegment.from_mp3(str(save_path))
|
|
30
|
-
save_path.unlink()
|
|
31
|
-
return audio
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import os
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
|
|
7
|
-
from PIL import Image
|
|
8
|
-
from stability_sdk import client
|
|
9
|
-
|
|
10
|
-
from videopython.utils.common import generate_random_name
|
|
11
|
-
|
|
12
|
-
API_KEY = os.getenv("STABILITY_KEY")
|
|
13
|
-
if not API_KEY:
|
|
14
|
-
raise KeyError(
|
|
15
|
-
"Stability API key was not found in the environment! Please set in as `STABILITY_KEY` in your environment."
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def text_to_image(
|
|
20
|
-
prompt: str,
|
|
21
|
-
save: bool = True,
|
|
22
|
-
output_dir: str | None = None,
|
|
23
|
-
width: int = 1024,
|
|
24
|
-
height: int = 1024,
|
|
25
|
-
num_samples: int = 1,
|
|
26
|
-
steps: int = 30,
|
|
27
|
-
cfg_scale: float = 8.0,
|
|
28
|
-
engine: str = "stable-diffusion-xl-1024-v1-0",
|
|
29
|
-
verbose: bool = True,
|
|
30
|
-
seed: int = 1,
|
|
31
|
-
) -> np.ndarray | str:
|
|
32
|
-
"""Generates image from prompt using the stability.ai API."""
|
|
33
|
-
# Generate image
|
|
34
|
-
stability_api = client.StabilityInference(
|
|
35
|
-
key=API_KEY,
|
|
36
|
-
verbose=verbose,
|
|
37
|
-
engine=engine, # Set the engine to use for generation.
|
|
38
|
-
# Check out the following link for a list of available engines: https://platform.stability.ai/docs/features/api-parameters#engine
|
|
39
|
-
)
|
|
40
|
-
answers = stability_api.generate(
|
|
41
|
-
prompt=prompt,
|
|
42
|
-
seed=seed,
|
|
43
|
-
steps=steps, # Amount of inference steps performed on image generation.
|
|
44
|
-
cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
|
|
45
|
-
# Setting this value higher increases the strength in which it tries to match your prompt.
|
|
46
|
-
# Defaults to 7.0 if not specified.
|
|
47
|
-
width=width,
|
|
48
|
-
height=height,
|
|
49
|
-
samples=num_samples,
|
|
50
|
-
sampler=generation.SAMPLER_K_DPMPP_2M # Choose which sampler we want to denoise our generation with.
|
|
51
|
-
# Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
|
|
52
|
-
# (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
|
|
53
|
-
)
|
|
54
|
-
# Parse API response
|
|
55
|
-
for resp in answers:
|
|
56
|
-
for artifact in resp.artifacts:
|
|
57
|
-
if artifact.finish_reason == generation.FILTER:
|
|
58
|
-
raise RuntimeError(
|
|
59
|
-
"Your request activated the API's safety filters and could not be processed."
|
|
60
|
-
"Please modify the prompt and try again."
|
|
61
|
-
)
|
|
62
|
-
if artifact.type == generation.ARTIFACT_IMAGE:
|
|
63
|
-
img = Image.open(io.BytesIO(artifact.binary))
|
|
64
|
-
else:
|
|
65
|
-
raise ValueError(f"Unknown artifact type: {artifact.type}")
|
|
66
|
-
|
|
67
|
-
if save:
|
|
68
|
-
if output_dir:
|
|
69
|
-
output_dir = Path(output_dir)
|
|
70
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
71
|
-
else:
|
|
72
|
-
output_dir = Path(os.getcwd())
|
|
73
|
-
filename = output_dir / generate_random_name(suffix=".png")
|
|
74
|
-
img.save(filename)
|
|
75
|
-
return str(filename.resolve())
|
|
76
|
-
else:
|
|
77
|
-
return np.array(img)
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
videopython/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
|
|
4
|
-
videopython/base/transforms.py,sha256=aXIqbp9sZkZI5PYRn0uDSxLoQxCdku1BAmzfQpnGW_w,2701
|
|
5
|
-
videopython/base/transitions.py,sha256=VQXJ-sGL7lcr3Q6uhb66hLlqW9213UBUAAH6DqJa9xs,2159
|
|
6
|
-
videopython/base/video.py,sha256=Pn2vRRLicNRhju1qkK-QUkvtgcTNEUzaQPA8cZqqpwQ,11767
|
|
7
|
-
videopython/generation/openai/text_to_speech.py,sha256=d5Sli8kAiIAW_ugyLAxS4yQ7jcW4_NAI6hqy6QdpAU8,852
|
|
8
|
-
videopython/generation/stability/text_to_image.py,sha256=K6_xWVAUlFLUd-Hj6T-l3QgfxZfV_kx8KYIDpqiiNSo,2983
|
|
9
|
-
videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
videopython/utils/common.py,sha256=-7YPlggyteVg_QbylDgBRm7yuWwuWvnlOIEVyuCzZhw,455
|
|
11
|
-
videopython-0.1.2.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
|
|
12
|
-
videopython-0.1.2.dist-info/METADATA,sha256=YktibiiRKPOCZpRp0Yna8tYyp5oWuPRYiU7E_oUtz44,14709
|
|
13
|
-
videopython-0.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
14
|
-
videopython-0.1.2.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
|
|
15
|
-
videopython-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|