videopython 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

Files changed (38) hide show
  1. {videopython-0.1.3 → videopython-0.1.4}/PKG-INFO +42 -34
  2. videopython-0.1.4/README.md +53 -0
  3. {videopython-0.1.3 → videopython-0.1.4}/pyproject.toml +8 -16
  4. videopython-0.1.4/requirements-dev.txt +7 -0
  5. videopython-0.1.4/requirements-generation.txt +4 -0
  6. videopython-0.1.4/requirements.txt +6 -0
  7. videopython-0.1.4/src/videopython/base/effects.py +183 -0
  8. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/base/transforms.py +52 -4
  9. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/base/transitions.py +36 -0
  10. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/base/video.py +7 -1
  11. videopython-0.1.4/src/videopython/generation/audio.py +22 -0
  12. videopython-0.1.4/src/videopython/generation/image.py +22 -0
  13. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/generation/video.py +5 -7
  14. videopython-0.1.4/src/videopython/utils/image.py +275 -0
  15. {videopython-0.1.3 → videopython-0.1.4}/src/videopython.egg-info/PKG-INFO +42 -34
  16. {videopython-0.1.3 → videopython-0.1.4}/src/videopython.egg-info/SOURCES.txt +5 -0
  17. videopython-0.1.4/src/videopython.egg-info/requires.txt +21 -0
  18. videopython-0.1.4/tests/test_effects.py +71 -0
  19. {videopython-0.1.3 → videopython-0.1.4}/tests/test_transforms.py +1 -1
  20. {videopython-0.1.3 → videopython-0.1.4}/tests/test_transitions.py +11 -1
  21. videopython-0.1.4/tests/test_utils.py +11 -0
  22. videopython-0.1.3/README.md +0 -54
  23. videopython-0.1.3/src/videopython/base/effects.py +0 -57
  24. videopython-0.1.3/src/videopython/generation/audio.py +0 -30
  25. videopython-0.1.3/src/videopython/generation/image.py +0 -60
  26. videopython-0.1.3/src/videopython.egg-info/requires.txt +0 -10
  27. videopython-0.1.3/tests/test_effects.py +0 -24
  28. {videopython-0.1.3 → videopython-0.1.4}/LICENSE +0 -0
  29. {videopython-0.1.3 → videopython-0.1.4}/setup.cfg +0 -0
  30. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/base/__init__.py +0 -0
  31. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/base/compose.py +0 -0
  32. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/generation/__init__.py +0 -0
  33. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/utils/__init__.py +0 -0
  34. {videopython-0.1.3 → videopython-0.1.4}/src/videopython/utils/common.py +0 -0
  35. {videopython-0.1.3 → videopython-0.1.4}/src/videopython.egg-info/dependency_links.txt +0 -0
  36. {videopython-0.1.3 → videopython-0.1.4}/src/videopython.egg-info/top_level.txt +0 -0
  37. {videopython-0.1.3 → videopython-0.1.4}/tests/test_compose.py +0 -0
  38. {videopython-0.1.3 → videopython-0.1.4}/tests/test_video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -199,7 +199,7 @@ License: Apache License
199
199
  Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
200
200
  Project-URL: Bug Reports, https://github.com/bartwojtowicz/videopython/issues
201
201
  Project-URL: Source, https://github.com/bartwojtowicz/videopython/
202
- Keywords: videopython,video,movie,opencv,generation,editing
202
+ Keywords: python,videopython,video,movie,opencv,generation,editing
203
203
  Classifier: License :: OSI Approved :: Apache Software License
204
204
  Classifier: Programming Language :: Python :: 3
205
205
  Classifier: Programming Language :: Python :: 3.10
@@ -211,13 +211,22 @@ License-File: LICENSE
211
211
  Requires-Dist: click>=8.1.7
212
212
  Requires-Dist: numpy>=1.25.2
213
213
  Requires-Dist: opencv-python>=4.9.0.80
214
- Requires-Dist: pytest>=7.4.0
215
- Requires-Dist: transformers>=4.38.1
216
- Requires-Dist: diffusers>=0.26.3
217
- Requires-Dist: torch>=2.1.0
218
- Requires-Dist: stability-sdk>=0.8.5
219
- Requires-Dist: openai==1.3.5
214
+ Requires-Dist: pillow>=10.3.0
220
215
  Requires-Dist: pydub>=0.25.1
216
+ Requires-Dist: tqdm>=4.66.3
217
+ Provides-Extra: dev
218
+ Requires-Dist: black==24.3.0; extra == "dev"
219
+ Requires-Dist: isort==5.12.0; extra == "dev"
220
+ Requires-Dist: mypy==1.8.0; extra == "dev"
221
+ Requires-Dist: pytest==7.4.0; extra == "dev"
222
+ Requires-Dist: types-Pillow==10.2.0.20240213; extra == "dev"
223
+ Requires-Dist: types-tqdm==4.66.0.20240106; extra == "dev"
224
+ Requires-Dist: pydub-stubs==0.25.1.1; extra == "dev"
225
+ Provides-Extra: generation
226
+ Requires-Dist: accelerate>=0.29.2; extra == "generation"
227
+ Requires-Dist: diffusers>=0.26.3; extra == "generation"
228
+ Requires-Dist: torch>=2.1.0; extra == "generation"
229
+ Requires-Dist: transformers>=4.38.1; extra == "generation"
221
230
 
222
231
  # About
223
232
 
@@ -235,41 +244,40 @@ sudo apt-get install ffmpeg
235
244
 
236
245
  ### Install with pip
237
246
  ```bash
238
- pip install videopython
247
+ pip install videopython[generation]
239
248
  ```
249
+ > You can install without `[generation]` dependencies for basic video handling and processing.
250
+ > The funcionalities found in `videopython.generation` won't work.
240
251
 
241
252
  ## Basic Usage
253
+ > Using Nvidia A40 or better is recommended for the `videopython.generation` module.
242
254
 
243
255
  ```python
244
- from videopython.base.video import Video
245
- from videopython.base.transitions import FadeTransition
256
+ # Generate image and animate it
257
+ from videopython.generation import ImageToVideo
258
+ from videopython.generation import TextToImage
246
259
 
247
- # Load video
248
- video = Video.from_path("tests/test_data/fast_benchmark.mp4")
249
- print(video.metadata)
250
- print(video.frames.shape) # Video is based on numpy representation of frames
260
+ image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
261
+ video = ImageToVideo().generate_video(image=image, fps=24)
251
262
 
252
- # Generate videos
253
- video1 = Video.from_prompt("Dogs playing in the snow.")
254
- video2 = Video.from_prompt("Dogs going back home.")
263
+ # Video generation directly from prompt
264
+ from videopython.generation import TextToVideo
265
+ video_gen = TextToVideo()
266
+ video = video_gen.generate_video("Dogs playing in the snow")
267
+ for _ in range(10):
268
+ video += video_gen.generate_video("Dogs playing in the snow")
255
269
 
256
- # Add videos
257
- combined_video = video1 + video2
258
- print(combined_video.metadata)
270
+ # Cut the first 2 seconds
271
+ from videopython.base.transforms import CutSeconds
272
+ transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
259
273
 
260
- # Apply fade transition between videos
261
- fade = FadeTransition(0.5) # 0.5s effect time
262
- faded_video = fade.apply(videos=(video1, video2))
263
- print(faded_video.metadata)
274
+ # Upsample to 30 FPS
275
+ from videopython.base.transforms import ResampleFPS
276
+ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
264
277
 
265
- # Add audio from file
266
- faded_video.add_audio_from_file("tests/test_data/test_audio.mp3")
278
+ # Resize to 1000x1000
279
+ from videopython.base.transforms import Resize
280
+ transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
267
281
 
268
- # Save to a file
269
- faded_video.save("my_video.mp4")
270
- ```
271
-
272
- ### Running Unit Tests
273
- ```bash
274
- PYTHONPATH=./src/ pytest
282
+ filepath = transformed_video.save()
275
283
  ```
@@ -0,0 +1,53 @@
1
+ # About
2
+
3
+ Minimal video generation and processing library.
4
+
5
+ ## Setup
6
+
7
+ ### Install ffmpeg
8
+ ```bash
9
+ # Install with brew for MacOS:
10
+ brew install ffmpeg
11
+ # Install with apt-get for Ubuntu:
12
+ sudo apt-get install ffmpeg
13
+ ```
14
+
15
+ ### Install with pip
16
+ ```bash
17
+ pip install videopython[generation]
18
+ ```
19
+ > You can install without `[generation]` dependencies for basic video handling and processing.
20
+ > The funcionalities found in `videopython.generation` won't work.
21
+
22
+ ## Basic Usage
23
+ > Using Nvidia A40 or better is recommended for the `videopython.generation` module.
24
+
25
+ ```python
26
+ # Generate image and animate it
27
+ from videopython.generation import ImageToVideo
28
+ from videopython.generation import TextToImage
29
+
30
+ image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
31
+ video = ImageToVideo().generate_video(image=image, fps=24)
32
+
33
+ # Video generation directly from prompt
34
+ from videopython.generation import TextToVideo
35
+ video_gen = TextToVideo()
36
+ video = video_gen.generate_video("Dogs playing in the snow")
37
+ for _ in range(10):
38
+ video += video_gen.generate_video("Dogs playing in the snow")
39
+
40
+ # Cut the first 2 seconds
41
+ from videopython.base.transforms import CutSeconds
42
+ transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
43
+
44
+ # Upsample to 30 FPS
45
+ from videopython.base.transforms import ResampleFPS
46
+ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
47
+
48
+ # Resize to 1000x1000
49
+ from videopython.base.transforms import Resize
50
+ transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
51
+
52
+ filepath = transformed_video.save()
53
+ ```
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools>=61.0"]
2
+ requires = ["setuptools>=66.1"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [tool.setuptools.packages.find]
@@ -11,12 +11,13 @@ include = ["videopython.*"]
11
11
 
12
12
  [project]
13
13
  name = "videopython"
14
- version = "0.1.3"
14
+ version = "0.1.4"
15
15
  description = "Minimal video generation and processing library."
16
16
  readme = "README.md"
17
17
  requires-python = ">=3.10"
18
18
  license = {file = "LICENSE"}
19
- keywords = ["videopython", "video", "movie", "opencv", "generation", "editing"]
19
+ keywords = ["python", "videopython", "video", "movie", "opencv", "generation", "editing"]
20
+ dynamic = ["dependencies", "optional-dependencies"]
20
21
 
21
22
  authors = [
22
23
  {name = "Bartosz Wójtowicz", email = "bartoszwojtowicz@outlook.com" },
@@ -32,20 +33,11 @@ classifiers = [
32
33
  "Operating System :: OS Independent",
33
34
  ]
34
35
 
35
- dependencies = [
36
- "click>=8.1.7",
37
- "numpy>=1.25.2",
38
- "opencv-python>=4.9.0.80",
39
- "pytest>=7.4.0",
40
- "transformers>=4.38.1",
41
- "diffusers>=0.26.3",
42
- "torch>=2.1.0",
43
- "stability-sdk>=0.8.5",
44
- "openai==1.3.5",
45
- "pydub>=0.25.1"
46
- ]
36
+ [tool.setuptools.dynamic]
37
+ dependencies = {file = ["requirements.txt"]}
38
+ optional-dependencies = { dev = {file = ["requirements-dev.txt"]}, generation = {file = ["requirements-generation.txt"]} }
47
39
 
48
- [project.urls] # Optional
40
+ [project.urls]
49
41
  "Homepage" = "https://github.com/bartwojtowicz/videopython/"
50
42
  "Bug Reports" = "https://github.com/bartwojtowicz/videopython/issues"
51
43
  "Source" = "https://github.com/bartwojtowicz/videopython/"
@@ -0,0 +1,7 @@
1
+ black==24.3.0
2
+ isort==5.12.0
3
+ mypy==1.8.0
4
+ pytest==7.4.0
5
+ types-Pillow==10.2.0.20240213
6
+ types-tqdm==4.66.0.20240106
7
+ pydub-stubs==0.25.1.1
@@ -0,0 +1,4 @@
1
+ accelerate>=0.29.2
2
+ diffusers>=0.26.3
3
+ torch>=2.1.0
4
+ transformers>=4.38.1
@@ -0,0 +1,6 @@
1
+ click>=8.1.7
2
+ numpy>=1.25.2
3
+ opencv-python>=4.9.0.80
4
+ pillow>=10.3.0
5
+ pydub>=0.25.1
6
+ tqdm>=4.66.3
@@ -0,0 +1,183 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Literal, final
3
+
4
+ import cv2
5
+ import numpy as np
6
+ from PIL import Image
7
+ from tqdm import tqdm
8
+
9
+ from videopython.base.video import Video
10
+
11
+
12
+ class Effect(ABC):
13
+ """Abstract class for effect on frames of video.
14
+
15
+ The effect must not change the number of frames and the shape of the frames.
16
+ """
17
+
18
+ @final
19
+ def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
20
+ original_shape = video.video_shape
21
+ start = start if start is not None else 0
22
+ stop = stop if stop is not None else video.total_seconds
23
+ # Check for start and stop correctness
24
+ if not 0 <= start <= video.total_seconds:
25
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed start: {start}!")
26
+ elif not start <= stop <= video.total_seconds:
27
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed stop: {stop}!")
28
+ # Apply effect on video slice
29
+ effect_start_frame = round(start * video.fps)
30
+ effect_end_frame = round(stop * video.fps)
31
+ video_with_effect = self._apply(video[effect_start_frame:effect_end_frame])
32
+ old_audio = video.audio
33
+ video = Video.from_frames(
34
+ np.r_[
35
+ "0,2",
36
+ video.frames[:effect_start_frame],
37
+ video_with_effect.frames,
38
+ video.frames[effect_end_frame:],
39
+ ],
40
+ fps=video.fps,
41
+ )
42
+ video.audio = old_audio
43
+ # Check if dimensions didn't change
44
+ if not video.video_shape == original_shape:
45
+ raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
46
+
47
+ return video
48
+
49
+ @abstractmethod
50
+ def _apply(self, video: Video) -> Video:
51
+ pass
52
+
53
+
54
+ class FullImageOverlay(Effect):
55
+ def __init__(self, overlay_image: np.ndarray, alpha: float | None = None, fade_time: float = 0.0):
56
+ if alpha is not None and not 0 <= alpha <= 1:
57
+ raise ValueError("Alpha must be in range [0, 1]!")
58
+ elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
59
+ raise ValueError("Only RGB and RGBA images are supported as an overlay!")
60
+ elif alpha is None:
61
+ alpha = 1.0
62
+
63
+ if overlay_image.shape[-1] == 3:
64
+ overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
65
+
66
+ self.alpha = alpha
67
+ self.overlay = overlay_image.astype(np.uint8)
68
+ self.fade_time = fade_time
69
+
70
+ def _overlay(self, img: np.ndarray, alpha: float = 1.0) -> np.ndarray:
71
+ img_pil = Image.fromarray(img)
72
+ overlay = self.overlay.copy()
73
+ overlay[:, :, 3] = overlay[:, :, 3] * (self.alpha * alpha)
74
+ overlay_pil = Image.fromarray(overlay)
75
+ img_pil.paste(overlay_pil, (0, 0), overlay_pil)
76
+ return np.array(img_pil)
77
+
78
+ def _apply(self, video: Video) -> Video:
79
+ if not video.frame_shape == self.overlay[:, :, :3].shape:
80
+ raise ValueError(
81
+ f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
82
+ )
83
+ elif not (0 <= 2 * self.fade_time <= video.total_seconds):
84
+ raise ValueError(f"Video is only {video.total_seconds}s long, but fade time is {self.fade_time}s!")
85
+
86
+ print("Overlaying video...")
87
+ if self.fade_time == 0:
88
+ video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
89
+ else:
90
+ num_video_frames = len(video.frames)
91
+ num_fade_frames = round(self.fade_time * video.fps)
92
+ new_frames = []
93
+ for i, frame in enumerate(tqdm(video.frames)):
94
+ frames_dist_from_end = min(i, num_video_frames - i)
95
+ if frames_dist_from_end >= num_fade_frames:
96
+ fade_alpha = 1.0
97
+ else:
98
+ fade_alpha = frames_dist_from_end / num_fade_frames
99
+ new_frames.append(self._overlay(frame, fade_alpha))
100
+ video.frames = np.array(new_frames, dtype=np.uint8)
101
+ return video
102
+
103
+
104
+ class Blur(Effect):
105
+ def __init__(
106
+ self,
107
+ mode: Literal["constant", "ascending", "descending"],
108
+ iterations: int,
109
+ kernel_size: tuple[int, int] = (5, 5),
110
+ ):
111
+ if iterations < 1:
112
+ raise ValueError("Iterations must be at least 1!")
113
+ self.mode = mode
114
+ self.iterations = iterations
115
+ self.kernel_size = kernel_size
116
+
117
+ def _apply(self, video: Video) -> Video:
118
+ n_frames = len(video.frames)
119
+ new_frames = []
120
+ if self.mode == "constant":
121
+ for frame in video.frames:
122
+ blurred_frame = frame
123
+ for _ in range(self.iterations):
124
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
125
+ new_frames.append(blurred_frame)
126
+ elif self.mode == "ascending":
127
+ for i, frame in tqdm(enumerate(video.frames)):
128
+ frame_iterations = max(1, round((i / n_frames) * self.iterations))
129
+ blurred_frame = frame
130
+ for _ in range(frame_iterations):
131
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
132
+ new_frames.append(blurred_frame)
133
+ elif self.mode == "descending":
134
+ for i, frame in tqdm(enumerate(video.frames)):
135
+ frame_iterations = max(round(((n_frames - i) / n_frames) * self.iterations), 1)
136
+ blurred_frame = frame
137
+ for _ in range(frame_iterations):
138
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
139
+ new_frames.append(blurred_frame)
140
+ else:
141
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
142
+ video.frames = np.asarray(new_frames)
143
+ return video
144
+
145
+
146
+ class Zoom(Effect):
147
+ def __init__(self, zoom_factor: float, mode: Literal["in", "out"]):
148
+ if zoom_factor <= 1:
149
+ raise ValueError("Zoom factor must be greater than 1!")
150
+ self.zoom_factor = zoom_factor
151
+ self.mode = mode
152
+
153
+ def _apply(self, video: Video) -> Video:
154
+ n_frames = len(video.frames)
155
+ new_frames = []
156
+
157
+ width = video.metadata.width
158
+ height = video.metadata.height
159
+ crop_sizes_w, crop_sizes_h = np.linspace(width // self.zoom_factor, width, n_frames), np.linspace(
160
+ height // self.zoom_factor, height, n_frames
161
+ )
162
+
163
+ if self.mode == "in":
164
+ for frame, w, h in tqdm(zip(video.frames, reversed(crop_sizes_w), reversed(crop_sizes_h))):
165
+
166
+ x = width / 2 - w / 2
167
+ y = height / 2 - h / 2
168
+
169
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
170
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
171
+ new_frames.append(zoomed_frame)
172
+ elif self.mode == "out":
173
+ for frame, w, h in tqdm(zip(video.frames, crop_sizes_w, crop_sizes_h)):
174
+ x = width / 2 - w / 2
175
+ y = height / 2 - h / 2
176
+
177
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
178
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
179
+ new_frames.append(zoomed_frame)
180
+ else:
181
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
182
+ video.frames = np.asarray(new_frames)
183
+ return video
@@ -1,5 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
+ from enum import Enum
2
3
  from multiprocessing import Pool
4
+ from typing import Literal
3
5
 
4
6
  import cv2
5
7
  import numpy as np
@@ -71,9 +73,11 @@ class CutSeconds(Transformation):
71
73
 
72
74
 
73
75
  class Resize(Transformation):
74
- def __init__(self, new_width: int, new_height: int):
75
- self.new_width = new_width
76
- self.new_height = new_height
76
+ def __init__(self, width: int | None = None, height: int | None = None):
77
+ self.width = width
78
+ self.height = height
79
+ if width is None and height is None:
80
+ raise ValueError("You must provide either `width` or `height`!")
77
81
 
78
82
  def _resize_frame(self, frame: np.ndarray, new_width: int, new_height: int) -> np.ndarray:
79
83
  return cv2.resize(
@@ -83,10 +87,25 @@ class Resize(Transformation):
83
87
  )
84
88
 
85
89
  def apply(self, video: Video) -> Video:
90
+ if self.width and self.height:
91
+ new_height = self.height
92
+ new_width = self.width
93
+ elif self.height is None and self.width:
94
+ video_height = video.video_shape[1]
95
+ video_width = video.video_shape[2]
96
+ new_height = round(video_height * (self.width / video_width))
97
+ new_width = self.width
98
+ elif self.width is None and self.height:
99
+ video_height = video.video_shape[1]
100
+ video_width = video.video_shape[2]
101
+ new_width = round(video_width * (self.height / video_height))
102
+ new_height = self.height
103
+
104
+ print(f"Resizing video to: {new_width}x{new_height}!")
86
105
  with Pool() as pool:
87
106
  frames_copy = pool.starmap(
88
107
  self._resize_frame,
89
- [(frame, self.new_width, self.new_height) for frame in video.frames],
108
+ [(frame, new_width, new_height) for frame in video.frames],
90
109
  )
91
110
  video.frames = np.array(frames_copy)
92
111
  return video
@@ -128,3 +147,32 @@ class ResampleFPS(Transformation):
128
147
  print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
129
148
  video = self._upsample(video)
130
149
  return video
150
+
151
+
152
+ class CropMode(Enum):
153
+ CENTER = "center"
154
+
155
+
156
+ class Crop(Transformation):
157
+
158
+ def __init__(self, width: int, height: int, mode: CropMode = CropMode.CENTER):
159
+ self.width = width
160
+ self.height = height
161
+ self.mode = mode
162
+
163
+ def apply(self, video: Video) -> Video:
164
+ if self.mode == CropMode.CENTER:
165
+ current_shape = video.frame_shape[:2]
166
+ center_height = current_shape[0] // 2
167
+ center_width = current_shape[1] // 2
168
+ width_offset = self.width // 2
169
+ height_offset = self.height // 2
170
+ video.frames = video.frames[
171
+ :,
172
+ center_height - height_offset : center_height + height_offset,
173
+ center_width - width_offset : center_width + width_offset,
174
+ :,
175
+ ]
176
+ else:
177
+ raise ValueError(f"Unknown mode: {self.mode}")
178
+ return video
@@ -4,6 +4,7 @@ from typing import final
4
4
 
5
5
  import numpy as np
6
6
 
7
+ from videopython.base.effects import Blur
7
8
  from videopython.base.video import Video
8
9
 
9
10
 
@@ -68,3 +69,38 @@ class FadeTransition(Transition):
68
69
  )
69
70
  faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
70
71
  return faded_videos
72
+
73
+
74
+ class BlurTransition(Transition):
75
+ def __init__(
76
+ self, effect_time_seconds: float = 1.5, blur_iterations: int = 400, blur_kernel_size: tuple[int, int] = (11, 11)
77
+ ):
78
+ self.effect_time_seconds = effect_time_seconds
79
+ self.blur_iterations = blur_iterations
80
+ self.blur_kernel_size = blur_kernel_size
81
+
82
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
83
+ video_fps = videos[0].fps
84
+ for video in videos:
85
+ if video.total_seconds < self.effect_time_seconds:
86
+ raise RuntimeError("Not enough space to make transition!")
87
+
88
+ effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
89
+
90
+ ascending_blur = Blur("ascending", self.blur_iterations, self.blur_kernel_size)
91
+ descending_blur = Blur("descending", self.blur_iterations, self.blur_kernel_size)
92
+ transition = ascending_blur.apply(videos[0][-effect_time_fps:]) + descending_blur.apply(
93
+ videos[1][:effect_time_fps]
94
+ )
95
+
96
+ blurred_videos = Video.from_frames(
97
+ np.r_[
98
+ "0,2",
99
+ videos[0].frames[:-effect_time_fps],
100
+ transition.frames,
101
+ videos[1].frames[effect_time_fps:],
102
+ ],
103
+ fps=video_fps,
104
+ )
105
+ blurred_videos.audio = videos[0].audio.append(videos[1].audio)
106
+ return blurred_videos
@@ -23,7 +23,7 @@ class VideoMetadata:
23
23
  total_seconds: float
24
24
 
25
25
  def __str__(self):
26
- return f"{self.height}x{self.width} @ {self.fps}fps, {self.total_seconds} seconds"
26
+ return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
27
27
 
28
28
  def __repr__(self) -> str:
29
29
  return self.__str__()
@@ -122,6 +122,12 @@ class Video:
122
122
  @classmethod
123
123
  def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
124
124
  new_vid = cls()
125
+ if frames.ndim != 4:
126
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
127
+ elif frames.shape[-1] == 4:
128
+ frames = frames[:, :, :, :3]
129
+ elif frames.shape[-1] != 3:
130
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
125
131
  new_vid.frames = frames
126
132
  new_vid.fps = fps
127
133
  new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
@@ -0,0 +1,22 @@
1
+ import numpy as np
2
+ import torch
3
+ from pydub import AudioSegment
4
+ from transformers import AutoTokenizer, VitsModel
5
+
6
+ TEXT_TO_SPEECH_MODEL = "facebook/mms-tts-eng"
7
+
8
+
9
+ class TextToSpeech:
10
+ def __init__(self):
11
+ self.pipeline = VitsModel.from_pretrained(TEXT_TO_SPEECH_MODEL)
12
+ self.tokenizer = AutoTokenizer.from_pretrained(TEXT_TO_SPEECH_MODEL)
13
+
14
+ def generate_audio(self, text: str) -> AudioSegment:
15
+ tokenized = self.tokenizer(text, return_tensors="pt")
16
+
17
+ with torch.no_grad():
18
+ output = self.pipeline(**tokenized).waveform
19
+
20
+ output = (output.T.float().numpy() * (2**31 - 1)).astype(np.int32)
21
+ audio = AudioSegment(data=output, frame_rate=self.pipeline.config.sampling_rate, sample_width=4, channels=1)
22
+ return audio
@@ -0,0 +1,22 @@
1
+ import io
2
+ import os
3
+
4
+ import torch
5
+ from diffusers import DiffusionPipeline
6
+ from PIL import Image
7
+
8
+ TEXT_TO_IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
9
+
10
+
11
+ class TextToImage:
12
+ def __init__(self):
13
+ if not torch.cuda.is_available():
14
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
15
+ self.pipeline = DiffusionPipeline.from_pretrained(
16
+ TEXT_TO_IMAGE_MODEL, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
17
+ )
18
+ self.pipeline.to("cuda")
19
+
20
+ def generate_image(self, prompt: str) -> Image.Image:
21
+ image = self.pipeline(prompt=prompt).images[0]
22
+ return image
@@ -10,13 +10,12 @@ IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
10
10
 
11
11
 
12
12
  class TextToVideo:
13
- def __init__(self, gpu_optimized: bool = True):
14
- self.pipeline = DiffusionPipeline.from_pretrained(
15
- TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
16
- )
13
+ def __init__(self):
14
+ if not torch.cuda.is_available():
15
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
16
+ self.pipeline = DiffusionPipeline.from_pretrained(TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16)
17
17
  self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
18
- if gpu_optimized:
19
- self.pipeline.enable_model_cpu_offload()
18
+ self.pipeline.to("cuda")
20
19
 
21
20
  def generate_video(
22
21
  self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
@@ -39,7 +38,6 @@ class ImageToVideo:
39
38
  self.pipeline = DiffusionPipeline.from_pretrained(
40
39
  IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
41
40
  ).to("cuda")
42
- self.pipeline.enable_model_cpu_offload()
43
41
 
44
42
  def generate_video(self, image: Image, fps: int = 24) -> Video:
45
43
  video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]