videopython 0.1.3__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videopython might be problematic. Click here for more details.

@@ -1,7 +1,9 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import final
2
+ from typing import Literal, final
3
3
 
4
+ import cv2
4
5
  import numpy as np
6
+ from PIL import Image
5
7
  from tqdm import tqdm
6
8
 
7
9
  from videopython.base.video import Video
@@ -14,12 +16,35 @@ class Effect(ABC):
14
16
  """
15
17
 
16
18
  @final
17
- def apply(self, video: Video) -> Video:
19
+ def apply(self, video: Video, start: float | None = None, stop: float | None = None) -> Video:
18
20
  original_shape = video.video_shape
19
- video_with_effect = self._apply(video)
20
- if not video_with_effect.video_shape == original_shape:
21
+ start = start if start is not None else 0
22
+ stop = stop if stop is not None else video.total_seconds
23
+ # Check for start and stop correctness
24
+ if not 0 <= start <= video.total_seconds:
25
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed start: {start}!")
26
+ elif not start <= stop <= video.total_seconds:
27
+ raise ValueError(f"Video is only {video.total_seconds} long, but passed stop: {stop}!")
28
+ # Apply effect on video slice
29
+ effect_start_frame = round(start * video.fps)
30
+ effect_end_frame = round(stop * video.fps)
31
+ video_with_effect = self._apply(video[effect_start_frame:effect_end_frame])
32
+ old_audio = video.audio
33
+ video = Video.from_frames(
34
+ np.r_[
35
+ "0,2",
36
+ video.frames[:effect_start_frame],
37
+ video_with_effect.frames,
38
+ video.frames[effect_end_frame:],
39
+ ],
40
+ fps=video.fps,
41
+ )
42
+ video.audio = old_audio
43
+ # Check if dimensions didn't change
44
+ if not video.video_shape == original_shape:
21
45
  raise RuntimeError("The effect must not change the number of frames and the shape of the frames!")
22
- return video_with_effect
46
+
47
+ return video
23
48
 
24
49
  @abstractmethod
25
50
  def _apply(self, video: Video) -> Video:
@@ -27,7 +52,7 @@ class Effect(ABC):
27
52
 
28
53
 
29
54
  class FullImageOverlay(Effect):
30
- def __init__(self, overlay_image: np.ndarray, alpha: float | None = None):
55
+ def __init__(self, overlay_image: np.ndarray, alpha: float | None = None, fade_time: float = 0.0):
31
56
  if alpha is not None and not 0 <= alpha <= 1:
32
57
  raise ValueError("Alpha must be in range [0, 1]!")
33
58
  elif not (overlay_image.ndim == 3 and overlay_image.shape[-1] in [3, 4]):
@@ -37,21 +62,122 @@ class FullImageOverlay(Effect):
37
62
 
38
63
  if overlay_image.shape[-1] == 3:
39
64
  overlay_image = np.dstack([overlay_image, np.full(overlay_image.shape[:2], 255, dtype=np.uint8)])
40
- overlay_image[:, :, 3] = overlay_image[:, :, 3] * alpha
41
-
42
- self._overlay_alpha = (overlay_image[:, :, 3] / 255.0)[:, :, np.newaxis]
43
- self._base_transparency = 1 - self._overlay_alpha
44
65
 
45
- self.overlay = overlay_image[:, :, :3] * self._overlay_alpha
66
+ self.alpha = alpha
67
+ self.overlay = overlay_image.astype(np.uint8)
68
+ self.fade_time = fade_time
46
69
 
47
- def _overlay(self, img: np.ndarray) -> np.ndarray:
48
- return self.overlay + (img * self._base_transparency)
70
+ def _overlay(self, img: np.ndarray, alpha: float = 1.0) -> np.ndarray:
71
+ img_pil = Image.fromarray(img)
72
+ overlay = self.overlay.copy()
73
+ overlay[:, :, 3] = overlay[:, :, 3] * (self.alpha * alpha)
74
+ overlay_pil = Image.fromarray(overlay)
75
+ img_pil.paste(overlay_pil, (0, 0), overlay_pil)
76
+ return np.array(img_pil)
49
77
 
50
78
  def _apply(self, video: Video) -> Video:
51
- if not video.frame_shape == self.overlay.shape:
79
+ if not video.frame_shape == self.overlay[:, :, :3].shape:
52
80
  raise ValueError(
53
81
  f"Mismatch of overlay shape `{self.overlay.shape}` with video shape: `{video.frame_shape}`!"
54
82
  )
83
+ elif not (0 <= 2 * self.fade_time <= video.total_seconds):
84
+ raise ValueError(f"Video is only {video.total_seconds}s long, but fade time is {self.fade_time}s!")
85
+
55
86
  print("Overlaying video...")
56
- video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
87
+ if self.fade_time == 0:
88
+ video.frames = np.array([self._overlay(frame) for frame in tqdm(video.frames)], dtype=np.uint8)
89
+ else:
90
+ num_video_frames = len(video.frames)
91
+ num_fade_frames = round(self.fade_time * video.fps)
92
+ new_frames = []
93
+ for i, frame in enumerate(tqdm(video.frames)):
94
+ frames_dist_from_end = min(i, num_video_frames - i)
95
+ if frames_dist_from_end >= num_fade_frames:
96
+ fade_alpha = 1.0
97
+ else:
98
+ fade_alpha = frames_dist_from_end / num_fade_frames
99
+ new_frames.append(self._overlay(frame, fade_alpha))
100
+ video.frames = np.array(new_frames, dtype=np.uint8)
101
+ return video
102
+
103
+
104
+ class Blur(Effect):
105
+ def __init__(
106
+ self,
107
+ mode: Literal["constant", "ascending", "descending"],
108
+ iterations: int,
109
+ kernel_size: tuple[int, int] = (5, 5),
110
+ ):
111
+ if iterations < 1:
112
+ raise ValueError("Iterations must be at least 1!")
113
+ self.mode = mode
114
+ self.iterations = iterations
115
+ self.kernel_size = kernel_size
116
+
117
+ def _apply(self, video: Video) -> Video:
118
+ n_frames = len(video.frames)
119
+ new_frames = []
120
+ if self.mode == "constant":
121
+ for frame in video.frames:
122
+ blurred_frame = frame
123
+ for _ in range(self.iterations):
124
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
125
+ new_frames.append(blurred_frame)
126
+ elif self.mode == "ascending":
127
+ for i, frame in tqdm(enumerate(video.frames)):
128
+ frame_iterations = max(1, round((i / n_frames) * self.iterations))
129
+ blurred_frame = frame
130
+ for _ in range(frame_iterations):
131
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
132
+ new_frames.append(blurred_frame)
133
+ elif self.mode == "descending":
134
+ for i, frame in tqdm(enumerate(video.frames)):
135
+ frame_iterations = max(round(((n_frames - i) / n_frames) * self.iterations), 1)
136
+ blurred_frame = frame
137
+ for _ in range(frame_iterations):
138
+ blurred_frame = cv2.GaussianBlur(blurred_frame, self.kernel_size, 0)
139
+ new_frames.append(blurred_frame)
140
+ else:
141
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
142
+ video.frames = np.asarray(new_frames)
143
+ return video
144
+
145
+
146
+ class Zoom(Effect):
147
+ def __init__(self, zoom_factor: float, mode: Literal["in", "out"]):
148
+ if zoom_factor <= 1:
149
+ raise ValueError("Zoom factor must be greater than 1!")
150
+ self.zoom_factor = zoom_factor
151
+ self.mode = mode
152
+
153
+ def _apply(self, video: Video) -> Video:
154
+ n_frames = len(video.frames)
155
+ new_frames = []
156
+
157
+ width = video.metadata.width
158
+ height = video.metadata.height
159
+ crop_sizes_w, crop_sizes_h = np.linspace(width // self.zoom_factor, width, n_frames), np.linspace(
160
+ height // self.zoom_factor, height, n_frames
161
+ )
162
+
163
+ if self.mode == "in":
164
+ for frame, w, h in tqdm(zip(video.frames, reversed(crop_sizes_w), reversed(crop_sizes_h))):
165
+
166
+ x = width / 2 - w / 2
167
+ y = height / 2 - h / 2
168
+
169
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
170
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
171
+ new_frames.append(zoomed_frame)
172
+ elif self.mode == "out":
173
+ for frame, w, h in tqdm(zip(video.frames, crop_sizes_w, crop_sizes_h)):
174
+ x = width / 2 - w / 2
175
+ y = height / 2 - h / 2
176
+
177
+ cropped_frame = frame[round(y) : round(y + h), round(x) : round(x + w)]
178
+ zoomed_frame = cv2.resize(cropped_frame, (width, height))
179
+ new_frames.append(zoomed_frame)
180
+ else:
181
+ raise ValueError(f"Unknown mode: `{self.mode}`.")
182
+ video.frames = np.asarray(new_frames)
57
183
  return video
@@ -0,0 +1,2 @@
1
+ class OutOfBoundsError(Exception):
2
+ pass
@@ -1,5 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
+ from enum import Enum
2
3
  from multiprocessing import Pool
4
+ from typing import Literal
3
5
 
4
6
  import cv2
5
7
  import numpy as np
@@ -71,9 +73,11 @@ class CutSeconds(Transformation):
71
73
 
72
74
 
73
75
  class Resize(Transformation):
74
- def __init__(self, new_width: int, new_height: int):
75
- self.new_width = new_width
76
- self.new_height = new_height
76
+ def __init__(self, width: int | None = None, height: int | None = None):
77
+ self.width = width
78
+ self.height = height
79
+ if width is None and height is None:
80
+ raise ValueError("You must provide either `width` or `height`!")
77
81
 
78
82
  def _resize_frame(self, frame: np.ndarray, new_width: int, new_height: int) -> np.ndarray:
79
83
  return cv2.resize(
@@ -83,10 +87,25 @@ class Resize(Transformation):
83
87
  )
84
88
 
85
89
  def apply(self, video: Video) -> Video:
90
+ if self.width and self.height:
91
+ new_height = self.height
92
+ new_width = self.width
93
+ elif self.height is None and self.width:
94
+ video_height = video.video_shape[1]
95
+ video_width = video.video_shape[2]
96
+ new_height = round(video_height * (self.width / video_width))
97
+ new_width = self.width
98
+ elif self.width is None and self.height:
99
+ video_height = video.video_shape[1]
100
+ video_width = video.video_shape[2]
101
+ new_width = round(video_width * (self.height / video_height))
102
+ new_height = self.height
103
+
104
+ print(f"Resizing video to: {new_width}x{new_height}!")
86
105
  with Pool() as pool:
87
106
  frames_copy = pool.starmap(
88
107
  self._resize_frame,
89
- [(frame, self.new_width, self.new_height) for frame in video.frames],
108
+ [(frame, new_width, new_height) for frame in video.frames],
90
109
  )
91
110
  video.frames = np.array(frames_copy)
92
111
  return video
@@ -128,3 +147,32 @@ class ResampleFPS(Transformation):
128
147
  print(f"Upsampling video from {video.fps} to {self.new_fps} FPS.")
129
148
  video = self._upsample(video)
130
149
  return video
150
+
151
+
152
+ class CropMode(Enum):
153
+ CENTER = "center"
154
+
155
+
156
+ class Crop(Transformation):
157
+
158
+ def __init__(self, width: int, height: int, mode: CropMode = CropMode.CENTER):
159
+ self.width = width
160
+ self.height = height
161
+ self.mode = mode
162
+
163
+ def apply(self, video: Video) -> Video:
164
+ if self.mode == CropMode.CENTER:
165
+ current_shape = video.frame_shape[:2]
166
+ center_height = current_shape[0] // 2
167
+ center_width = current_shape[1] // 2
168
+ width_offset = self.width // 2
169
+ height_offset = self.height // 2
170
+ video.frames = video.frames[
171
+ :,
172
+ center_height - height_offset : center_height + height_offset,
173
+ center_width - width_offset : center_width + width_offset,
174
+ :,
175
+ ]
176
+ else:
177
+ raise ValueError(f"Unknown mode: {self.mode}")
178
+ return video
@@ -4,6 +4,7 @@ from typing import final
4
4
 
5
5
  import numpy as np
6
6
 
7
+ from videopython.base.effects import Blur
7
8
  from videopython.base.video import Video
8
9
 
9
10
 
@@ -68,3 +69,38 @@ class FadeTransition(Transition):
68
69
  )
69
70
  faded_videos.audio = videos[0].audio.append(videos[1].audio, crossfade=(effect_time_fps / video_fps) * 1000)
70
71
  return faded_videos
72
+
73
+
74
+ class BlurTransition(Transition):
75
+ def __init__(
76
+ self, effect_time_seconds: float = 1.5, blur_iterations: int = 400, blur_kernel_size: tuple[int, int] = (11, 11)
77
+ ):
78
+ self.effect_time_seconds = effect_time_seconds
79
+ self.blur_iterations = blur_iterations
80
+ self.blur_kernel_size = blur_kernel_size
81
+
82
+ def _apply(self, videos: tuple[Video, Video]) -> Video:
83
+ video_fps = videos[0].fps
84
+ for video in videos:
85
+ if video.total_seconds < self.effect_time_seconds:
86
+ raise RuntimeError("Not enough space to make transition!")
87
+
88
+ effect_time_fps = math.floor(self.effect_time_seconds * video_fps)
89
+
90
+ ascending_blur = Blur("ascending", self.blur_iterations, self.blur_kernel_size)
91
+ descending_blur = Blur("descending", self.blur_iterations, self.blur_kernel_size)
92
+ transition = ascending_blur.apply(videos[0][-effect_time_fps:]) + descending_blur.apply(
93
+ videos[1][:effect_time_fps]
94
+ )
95
+
96
+ blurred_videos = Video.from_frames(
97
+ np.r_[
98
+ "0,2",
99
+ videos[0].frames[:-effect_time_fps],
100
+ transition.frames,
101
+ videos[1].frames[effect_time_fps:],
102
+ ],
103
+ fps=video_fps,
104
+ )
105
+ blurred_videos.audio = videos[0].audio.append(videos[1].audio)
106
+ return blurred_videos
videopython/base/video.py CHANGED
@@ -23,7 +23,7 @@ class VideoMetadata:
23
23
  total_seconds: float
24
24
 
25
25
  def __str__(self):
26
- return f"{self.height}x{self.width} @ {self.fps}fps, {self.total_seconds} seconds"
26
+ return f"{self.width}x{self.height} @ {self.fps}fps, {self.total_seconds} seconds"
27
27
 
28
28
  def __repr__(self) -> str:
29
29
  return self.__str__()
@@ -122,6 +122,12 @@ class Video:
122
122
  @classmethod
123
123
  def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
124
124
  new_vid = cls()
125
+ if frames.ndim != 4:
126
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
127
+ elif frames.shape[-1] == 4:
128
+ frames = frames[:, :, :, :3]
129
+ elif frames.shape[-1] != 3:
130
+ raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
125
131
  new_vid.frames = frames
126
132
  new_vid.fps = fps
127
133
  new_vid.audio = AudioSegment.silent(duration=round(new_vid.total_seconds * 1000))
@@ -1,30 +1,22 @@
1
- import os
2
- from pathlib import Path
3
- from typing import Literal
4
-
5
- from openai import OpenAI
1
+ import numpy as np
2
+ import torch
6
3
  from pydub import AudioSegment
4
+ from transformers import AutoTokenizer, VitsModel
7
5
 
8
- from videopython.utils.common import generate_random_name
6
+ TEXT_TO_SPEECH_MODEL = "facebook/mms-tts-eng"
9
7
 
10
8
 
11
9
  class TextToSpeech:
12
- def __init__(self, openai_key: str | None = None, save_audio: bool = True):
13
- self.client = OpenAI(api_key=openai_key)
14
- self._save = save_audio
10
+ def __init__(self):
11
+ self.pipeline = VitsModel.from_pretrained(TEXT_TO_SPEECH_MODEL)
12
+ self.tokenizer = AutoTokenizer.from_pretrained(TEXT_TO_SPEECH_MODEL)
13
+
14
+ def generate_audio(self, text: str) -> AudioSegment:
15
+ tokenized = self.tokenizer(text, return_tensors="pt")
16
+
17
+ with torch.no_grad():
18
+ output = self.pipeline(**tokenized).waveform
15
19
 
16
- def generate_audio(
17
- self,
18
- text: str,
19
- voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
20
- ) -> AudioSegment:
21
- filename = generate_random_name(suffix=".mp3")
22
- output_path = str((Path(os.getcwd()) / filename).resolve())
23
- response = self.client.audio.speech.create(model="tts-1", voice=voice, input=text)
24
- response.stream_to_file(output_path)
25
- audio = AudioSegment.from_file(output_path)
26
- if self._save:
27
- print(f"Audio saved to {output_path}")
28
- else:
29
- os.remove(output_path)
20
+ output = (output.T.float().numpy() * (2**31 - 1)).astype(np.int32)
21
+ audio = AudioSegment(data=output, frame_rate=self.pipeline.config.sampling_rate, sample_width=4, channels=1)
30
22
  return audio
@@ -1,60 +1,22 @@
1
1
  import io
2
2
  import os
3
3
 
4
- import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
4
+ import torch
5
+ from diffusers import DiffusionPipeline
5
6
  from PIL import Image
6
- from stability_sdk import client
7
7
 
8
+ TEXT_TO_IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
8
9
 
9
- class TextToImage:
10
- def __init__(
11
- self,
12
- stability_key: str | None = None,
13
- engine: str = "stable-diffusion-xl-1024-v1-0",
14
- verbose: bool = True,
15
- ):
16
- stability_key = stability_key or os.getenv("STABILITY_KEY")
17
- if stability_key is None:
18
- raise ValueError(
19
- "API Key for stability is required. Please provide it as an argument"
20
- " or set it as an environment variable `STABILITY_KEY`. "
21
- )
22
-
23
- self.client = client.StabilityInference(stability_key, verbose=verbose, engine=engine)
24
10
 
25
- def generate_image(
26
- self,
27
- prompt: str,
28
- width: int = 1024,
29
- height: int = 1024,
30
- steps: int = 30,
31
- cfg_scale: float = 8.0,
32
- seed: int = 1,
33
- ) -> Image.Image:
34
- answers = self.client.generate(
35
- prompt=prompt,
36
- seed=seed,
37
- steps=steps, # Amount of inference steps performed on image generation.
38
- cfg_scale=cfg_scale, # Influences how strongly your generation is guided to match your prompt.
39
- # Setting this value higher increases the strength in which it tries to match your prompt.
40
- # Defaults to 7.0 if not specified.
41
- width=width,
42
- height=height,
43
- safety=False,
44
- samples=1,
45
- sampler=generation.SAMPLER_K_DPMPP_2M, # Choose which sampler we want to denoise our generation with.
46
- # Defaults to k_dpmpp_2m if not specified. Clip Guidance only supports ancestral samplers.
47
- # (Available Samplers: ddim, plms, k_euler, k_euler_ancestral, k_heun, k_dpm_2, k_dpm_2_ancestral, k_dpmpp_2s_ancestral, k_lms, k_dpmpp_2m, k_dpmpp_sde)
11
+ class TextToImage:
12
+ def __init__(self):
13
+ if not torch.cuda.is_available():
14
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
15
+ self.pipeline = DiffusionPipeline.from_pretrained(
16
+ TEXT_TO_IMAGE_MODEL, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
48
17
  )
49
- for resp in answers:
50
- for artifact in resp.artifacts:
51
- if artifact.finish_reason == generation.FILTER:
52
- raise RuntimeError(
53
- "Your request activated the API's safety filters and could not be processed."
54
- "Please modify the prompt and try again."
55
- )
56
- if artifact.type == generation.ARTIFACT_IMAGE:
57
- img = Image.open(io.BytesIO(artifact.binary))
58
- else:
59
- raise ValueError(f"Unknown artifact type: {artifact.type}")
60
- return img
18
+ self.pipeline.to("cuda")
19
+
20
+ def generate_image(self, prompt: str) -> Image.Image:
21
+ image = self.pipeline(prompt=prompt).images[0]
22
+ return image
@@ -10,13 +10,12 @@ IMAGE_TO_VIDEO_MODEL = "stabilityai/stable-video-diffusion-img2vid-xt"
10
10
 
11
11
 
12
12
  class TextToVideo:
13
- def __init__(self, gpu_optimized: bool = True):
14
- self.pipeline = DiffusionPipeline.from_pretrained(
15
- TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16 if gpu_optimized else torch.float32
16
- )
13
+ def __init__(self):
14
+ if not torch.cuda.is_available():
15
+ raise ValueError("CUDA is not available, but TextToVideo model requires CUDA.")
16
+ self.pipeline = DiffusionPipeline.from_pretrained(TEXT_TO_VIDEO_MODEL, torch_dtype=torch.float16)
17
17
  self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
18
- if gpu_optimized:
19
- self.pipeline.enable_model_cpu_offload()
18
+ self.pipeline.to("cuda")
20
19
 
21
20
  def generate_video(
22
21
  self, prompt: str, num_steps: int = 25, height: int = 320, width: int = 576, num_frames: int = 24
@@ -39,7 +38,6 @@ class ImageToVideo:
39
38
  self.pipeline = DiffusionPipeline.from_pretrained(
40
39
  IMAGE_TO_VIDEO_MODEL, torch_dtype=torch.float16, variant="fp16"
41
40
  ).to("cuda")
42
- self.pipeline.enable_model_cpu_offload()
43
41
 
44
42
  def generate_video(self, image: Image, fps: int = 24) -> Video:
45
43
  video_frames = self.pipeline(image=image, fps=fps, output_type="np").frames[0]
@@ -0,0 +1,275 @@
1
+ from typing import Literal
2
+
3
+ import cv2
4
+ import numpy as np
5
+ from PIL import Image, ImageDraw, ImageFont
6
+
7
+ from videopython.base.exceptions import OutOfBoundsError
8
+ from videopython.base.video import Video
9
+
10
+
11
+ class ImageText:
12
+ def __init__(
13
+ self,
14
+ image_size: tuple[int, int] = (1080, 1920), # (width, height)
15
+ mode: str = "RGBA",
16
+ background: tuple[int, int, int, int] = (0, 0, 0, 0), # Transparent background
17
+ ):
18
+ self.image_size = image_size
19
+ self.image = Image.new(mode, image_size, color=background)
20
+ self._draw = ImageDraw.Draw(self.image)
21
+
22
+ @property
23
+ def img_array(self) -> np.ndarray:
24
+ return np.array(self.image)
25
+
26
+ def save(self, filename: str) -> None:
27
+ self.image.save(filename)
28
+
29
+ def _fit_font_width(self, text: str, font: str, max_width: int) -> int:
30
+ """Find the maximum font size where the text width is less than or equal to max_width."""
31
+ font_size = 1
32
+ text_width = self.get_text_size(font, font_size, text)[0]
33
+ while text_width < max_width:
34
+ font_size += 1
35
+ text_width = self.get_text_size(font, font_size, text)[0]
36
+ max_font_size = font_size - 1
37
+ if max_font_size < 1:
38
+ raise ValueError(f"Max height {max_width} is too small for any font size!")
39
+ return max_font_size
40
+
41
+ def _fit_font_height(self, text: str, font: str, max_height: int) -> int:
42
+ """Find the maximum font size where the text height is less than or equal to max_height."""
43
+ font_size = 1
44
+ text_height = self.get_text_size(font, font_size, text)[1]
45
+ while text_height < max_height:
46
+ font_size += 1
47
+ text_height = self.get_text_size(font, font_size, text)[1]
48
+ max_font_size = font_size - 1
49
+ if max_font_size < 1:
50
+ raise ValueError(f"Max height {max_height} is too small for any font size!")
51
+ return max_font_size
52
+
53
+ def _get_font_size(
54
+ self,
55
+ text: str,
56
+ font: str,
57
+ max_width: int | None = None,
58
+ max_height: int | None = None,
59
+ ) -> int:
60
+ """Get maximum font size for `text` to fill in the `max_width` and `max_height`."""
61
+ if max_width is None and max_height is None:
62
+ raise ValueError("You need to pass max_width or max_height")
63
+ if max_width is not None:
64
+ width_font_size = self._fit_font_width(text, font, max_width)
65
+ if max_height is not None:
66
+ height_font_size = self._fit_font_height(text, font, max_height)
67
+ return min([size for size in [width_font_size, height_font_size] if size is not None])
68
+
69
+ def write_text(
70
+ self,
71
+ text: str,
72
+ font_filename: str,
73
+ xy: tuple[int, int],
74
+ font_size: int | None = 11,
75
+ color: tuple[int, int, int] = (0, 0, 0),
76
+ max_width: int | None = None,
77
+ max_height: int | None = None,
78
+ ) -> tuple[int, int]:
79
+ x, y = xy
80
+ if font_size is None and (max_width is None or max_height is None):
81
+ raise ValueError(f"Must set either `font_size`, or both `max_width` and `max_height`!")
82
+ elif font_size is None:
83
+ font_size = self._get_font_size(text, font_filename, max_width, max_height)
84
+ text_size = self.get_text_size(font_filename, font_size, text)
85
+ if (text_size[0] + x > self.image_size[0]) or (text_size[1] + y > self.image_size[1]):
86
+ raise OutOfBoundsError(f"Font size `{font_size}` is too big, text won't fit!")
87
+ font = ImageFont.truetype(font_filename, font_size)
88
+ self._draw.text((x, y), text, font=font, fill=color)
89
+ return text_size
90
+
91
+ def get_text_size(self, font_filename: str, font_size: int, text: str) -> tuple[int, int]:
92
+ """Return bounding box size of the rendered `text` with `font_filename` and `font_size`."""
93
+ font = ImageFont.truetype(font_filename, font_size)
94
+ return font.getbbox(text)[2:]
95
+
96
+ def _split_lines_by_width(
97
+ self,
98
+ text: str,
99
+ font_filename: str,
100
+ font_size: int,
101
+ box_width: int,
102
+ ) -> list[str]:
103
+ """Split the `text` into lines of maximum `box_width`."""
104
+ words = text.split()
105
+ split_lines: list[list[str]] = []
106
+ current_line: list[str] = []
107
+ for word in words:
108
+ new_line = " ".join(current_line + [word])
109
+ size = self.get_text_size(font_filename, font_size, new_line)
110
+ if size[0] <= box_width:
111
+ current_line.append(word)
112
+ else:
113
+ split_lines.append(current_line)
114
+ current_line = [word]
115
+ if current_line:
116
+ split_lines.append(current_line)
117
+ lines = [" ".join(line) for line in split_lines]
118
+ return lines
119
+
120
+ def write_text_box(
121
+ self,
122
+ text: str,
123
+ font_filename: str,
124
+ xy: tuple[int, int],
125
+ box_width: int,
126
+ font_size: int = 11,
127
+ text_color: tuple[int, int, int] = (0, 0, 0),
128
+ background_color: None | tuple[int, int, int, int] = None,
129
+ background_padding: int = 0,
130
+ place: Literal["left", "right", "center"] = "left",
131
+ ) -> tuple[int, int]:
132
+ """Write text in box described by upper-left corner and maxium width of the box.
133
+
134
+ Args:
135
+ text: Text to be written inside the box.
136
+ font_filename: Path to the font file.
137
+ xy: X and Y coordinates describing upper-left of the box containing the text.
138
+ box_width: Pixel width of the box containing the text.
139
+ font_size: Font size.
140
+ text_color: RGB color of the text.
141
+ background_color: If set, adds background color to the text box. Expects RGBA values.
142
+ background_padding: Number of padding pixels to add when adding text background color.
143
+ place: Strategy for justifying the text inside the container box. Defaults to "left".
144
+
145
+ Returns:
146
+ Lower-left corner of the written text box.
147
+ """
148
+ x, y = xy
149
+ lines = self._split_lines_by_width(text, font_filename, font_size, box_width)
150
+ # Run checks to see if the text will fit
151
+ if x + box_width > self.image_size[0]:
152
+ raise OutOfBoundsError(f"Box width {box_width} is too big for the image width {self.image_size[0]}!")
153
+ lines_height = sum([self.get_text_size(font_filename, font_size, line)[1] for line in lines])
154
+ if y + lines_height > self.image_size[1]:
155
+ available_space = self.image_size[1] - y
156
+ raise OutOfBoundsError(f"Text height {lines_height} is too big for the available space {available_space}!")
157
+ # Write lines
158
+ current_text_height = y
159
+ for line in lines:
160
+ line_size = self.get_text_size(font_filename, font_size, line)
161
+ # Write line text into the image
162
+ if place == "left":
163
+ self.write_text(
164
+ text=line,
165
+ font_filename=font_filename,
166
+ xy=(x, current_text_height),
167
+ font_size=font_size,
168
+ color=text_color,
169
+ )
170
+ elif place == "right":
171
+ x_left = x + box_width - line_size[0]
172
+ self.write_text(
173
+ text=line,
174
+ font_filename=font_filename,
175
+ xy=(x_left, current_text_height),
176
+ font_size=font_size,
177
+ color=text_color,
178
+ )
179
+ elif place == "center":
180
+ x_left = int(x + ((box_width - line_size[0]) / 2))
181
+ self.write_text(
182
+ text=line,
183
+ font_filename=font_filename,
184
+ xy=(x_left, current_text_height),
185
+ font_size=font_size,
186
+ color=text_color,
187
+ )
188
+ else:
189
+ raise ValueError(f"Place {place} is not supported. Use one of: `left`, `right` or `center`!")
190
+ # Increment text height
191
+ current_text_height += line_size[1]
192
+ # Add background color for the text if set
193
+ if background_color is not None:
194
+ if len(background_color) != 4:
195
+ raise ValueError(f"Text background color {background_color} must be RGBA!")
196
+ img = self.img_array
197
+ # Find bounding rectangle for written text
198
+ box_slice = img[y:current_text_height, x : x + box_width]
199
+ text_mask = np.any(box_slice != 0, axis=2).astype(np.uint8)
200
+ xmin, xmax, ymin, ymax = self._find_smallest_bounding_rect(text_mask)
201
+ # Get global bounding box position
202
+ xmin += x - background_padding
203
+ xmax += x + background_padding
204
+ ymin += y - background_padding
205
+ ymax += y + background_padding
206
+ # Make sure we are inside image, cut to image if not
207
+ xmin = max(0, xmin)
208
+ ymin = max(0, ymin)
209
+ xmax = min(xmax, self.image_size[0])
210
+ ymax = min(ymax, self.image_size[1])
211
+ # Slice the bounding box and find text mask
212
+ bbox_slice = img[ymin:ymax, xmin:xmax]
213
+ bbox_text_mask = np.any(bbox_slice != 0, axis=2).astype(np.uint8)
214
+ # Add background color outside of text
215
+ bbox_slice[~bbox_text_mask.astype(bool)] = background_color
216
+ # Blur nicely with semi-transparent pixels from the font
217
+ text_slice = bbox_slice[bbox_text_mask.astype(bool)]
218
+ text_background = text_slice[:, :3] * (np.expand_dims(text_slice[:, -1], axis=1) / 255)
219
+ color_background = (1 - (np.expand_dims(text_slice[:, -1], axis=1) / 255)) * background_color
220
+ faded_background = text_background[:, :3] + color_background[:, :3]
221
+ text_slice[:, :3] = faded_background
222
+ text_slice[:, -1] = 255
223
+ bbox_slice[bbox_text_mask.astype(bool)] = text_slice
224
+ # Set image with the background color
225
+ self.image = Image.fromarray(img)
226
+ return (x, current_text_height)
227
+
228
+ def _find_smallest_bounding_rect(self, mask: np.ndarray) -> tuple[int, int, int, int]:
229
+ """Find the smallest bounding rectangle for the mask."""
230
+ rows = np.any(mask, axis=1)
231
+ cols = np.any(mask, axis=0)
232
+ ymin, ymax = np.where(rows)[0][[0, -1]]
233
+ xmin, xmax = np.where(cols)[0][[0, -1]]
234
+ return xmin, xmax, ymin, ymax
235
+
236
+
237
+ class SlideOverImage:
238
+ def __init__(
239
+ self,
240
+ direction: Literal["left", "right"],
241
+ video_shape: tuple[int, int] = (1080, 1920),
242
+ fps: float = 24.0,
243
+ length_seconds: float = 1.0,
244
+ ) -> None:
245
+ self.direction = direction
246
+ self.video_width, self.video_height = video_shape
247
+ self.fps = fps
248
+ self.length_seconds = length_seconds
249
+
250
+ def slide(self, image: np.ndarray) -> Video:
251
+ image = self._resize(image)
252
+ max_offset = image.shape[1] - self.video_width
253
+ frame_count = round(self.fps * self.length_seconds)
254
+
255
+ deltas = np.linspace(0, max_offset, frame_count)
256
+ frames = []
257
+
258
+ for delta in deltas:
259
+ if self.direction == "right":
260
+ frame = image[:, round(delta) : round(delta) + self.video_width]
261
+ elif self.direction == "left":
262
+ frame = image[:, image.shape[1] - round(delta) - self.video_width : image.shape[1] - round(delta)]
263
+ frames.append(frame)
264
+
265
+ return Video.from_frames(frames=np.stack(frames, axis=0), fps=self.fps)
266
+
267
+ def _resize(self, image: np.ndarray) -> np.ndarray:
268
+ resize_factor = image.shape[0] / self.video_height
269
+ resize_dims = (round(image.shape[1] / resize_factor), round(image.shape[0] / resize_factor)) # width, height
270
+ image = cv2.resize(image, resize_dims)
271
+ if self.video_height > image.shape[0] or self.video_width > image.shape[1]:
272
+ raise ValueError(
273
+ f"Image `{image.shape}` is too small for the video frame `({self.video_width}, {self.video_height})`!"
274
+ )
275
+ return image
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: videopython
3
- Version: 0.1.3
3
+ Version: 0.1.41
4
4
  Summary: Minimal video generation and processing library.
5
5
  Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
6
6
  License: Apache License
@@ -199,7 +199,7 @@ License: Apache License
199
199
  Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
200
200
  Project-URL: Bug Reports, https://github.com/bartwojtowicz/videopython/issues
201
201
  Project-URL: Source, https://github.com/bartwojtowicz/videopython/
202
- Keywords: videopython,video,movie,opencv,generation,editing
202
+ Keywords: python,videopython,video,movie,opencv,generation,editing
203
203
  Classifier: License :: OSI Approved :: Apache Software License
204
204
  Classifier: Programming Language :: Python :: 3
205
205
  Classifier: Programming Language :: Python :: 3.10
@@ -211,13 +211,22 @@ License-File: LICENSE
211
211
  Requires-Dist: click >=8.1.7
212
212
  Requires-Dist: numpy >=1.25.2
213
213
  Requires-Dist: opencv-python >=4.9.0.80
214
- Requires-Dist: pytest >=7.4.0
215
- Requires-Dist: transformers >=4.38.1
216
- Requires-Dist: diffusers >=0.26.3
217
- Requires-Dist: torch >=2.1.0
218
- Requires-Dist: stability-sdk >=0.8.5
219
- Requires-Dist: openai ==1.3.5
214
+ Requires-Dist: pillow >=10.3.0
220
215
  Requires-Dist: pydub >=0.25.1
216
+ Requires-Dist: tqdm >=4.66.3
217
+ Provides-Extra: dev
218
+ Requires-Dist: black ==24.3.0 ; extra == 'dev'
219
+ Requires-Dist: isort ==5.12.0 ; extra == 'dev'
220
+ Requires-Dist: mypy ==1.8.0 ; extra == 'dev'
221
+ Requires-Dist: pytest ==7.4.0 ; extra == 'dev'
222
+ Requires-Dist: types-Pillow ==10.2.0.20240213 ; extra == 'dev'
223
+ Requires-Dist: types-tqdm ==4.66.0.20240106 ; extra == 'dev'
224
+ Requires-Dist: pydub-stubs ==0.25.1.1 ; extra == 'dev'
225
+ Provides-Extra: generation
226
+ Requires-Dist: accelerate >=0.29.2 ; extra == 'generation'
227
+ Requires-Dist: diffusers >=0.26.3 ; extra == 'generation'
228
+ Requires-Dist: torch >=2.1.0 ; extra == 'generation'
229
+ Requires-Dist: transformers >=4.38.1 ; extra == 'generation'
221
230
 
222
231
  # About
223
232
 
@@ -235,41 +244,40 @@ sudo apt-get install ffmpeg
235
244
 
236
245
  ### Install with pip
237
246
  ```bash
238
- pip install videopython
247
+ pip install videopython[generation]
239
248
  ```
249
+ > You can install without `[generation]` dependencies for basic video handling and processing.
250
+ > The funcionalities found in `videopython.generation` won't work.
240
251
 
241
252
  ## Basic Usage
253
+ > Using Nvidia A40 or better is recommended for the `videopython.generation` module.
242
254
 
243
255
  ```python
244
- from videopython.base.video import Video
245
- from videopython.base.transitions import FadeTransition
256
+ # Generate image and animate it
257
+ from videopython.generation import ImageToVideo
258
+ from videopython.generation import TextToImage
246
259
 
247
- # Load video
248
- video = Video.from_path("tests/test_data/fast_benchmark.mp4")
249
- print(video.metadata)
250
- print(video.frames.shape) # Video is based on numpy representation of frames
260
+ image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
261
+ video = ImageToVideo().generate_video(image=image, fps=24)
251
262
 
252
- # Generate videos
253
- video1 = Video.from_prompt("Dogs playing in the snow.")
254
- video2 = Video.from_prompt("Dogs going back home.")
263
+ # Video generation directly from prompt
264
+ from videopython.generation import TextToVideo
265
+ video_gen = TextToVideo()
266
+ video = video_gen.generate_video("Dogs playing in the snow")
267
+ for _ in range(10):
268
+ video += video_gen.generate_video("Dogs playing in the snow")
255
269
 
256
- # Add videos
257
- combined_video = video1 + video2
258
- print(combined_video.metadata)
270
+ # Cut the first 2 seconds
271
+ from videopython.base.transforms import CutSeconds
272
+ transformed_video = CutSeconds(start_second=0, end_second=2).apply(video.copy())
259
273
 
260
- # Apply fade transition between videos
261
- fade = FadeTransition(0.5) # 0.5s effect time
262
- faded_video = fade.apply(videos=(video1, video2))
263
- print(faded_video.metadata)
274
+ # Upsample to 30 FPS
275
+ from videopython.base.transforms import ResampleFPS
276
+ transformed_video = ResampleFPS(new_fps=30).apply(transformed_video)
264
277
 
265
- # Add audio from file
266
- faded_video.add_audio_from_file("tests/test_data/test_audio.mp3")
278
+ # Resize to 1000x1000
279
+ from videopython.base.transforms import Resize
280
+ transformed_video = Resize(width=1000, height=1000).apply(transformed_video)
267
281
 
268
- # Save to a file
269
- faded_video.save("my_video.mp4")
270
- ```
271
-
272
- ### Running Unit Tests
273
- ```bash
274
- PYTHONPATH=./src/ pytest
282
+ filepath = transformed_video.save()
275
283
  ```
@@ -0,0 +1,19 @@
1
+ videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
3
+ videopython/base/effects.py,sha256=ZFUWrgVWTn4uWpxPfTQSQQKEZN5ns4btMofOZNHCeQc,7540
4
+ videopython/base/exceptions.py,sha256=68_16lUPOR9_zhWdeBGS8_NFI32VbrcoDbN5KHHg0_w,44
5
+ videopython/base/transforms.py,sha256=yDtM1uZOacB0OLPQnSGHl5upoNNeN0dPWXe0hJPeV-I,6004
6
+ videopython/base/transitions.py,sha256=zYsxIgiVfN9P-CoGWUWRYFBr_0inX1sAJ02gyIEQ678,3694
7
+ videopython/base/video.py,sha256=kG-juKN-da5NzV89YxZl5JkyMTJFkgPceh4yuAUnsQs,11099
8
+ videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
9
+ videopython/generation/audio.py,sha256=BTc-3vJ5e6D0lt2OPo2hfOcUqhNXIcvRLNoo2oQ470M,777
10
+ videopython/generation/image.py,sha256=i8zJm0WXn_Pykby9Urw1kzDcla6ArYhRgG-ueRdoAJ0,675
11
+ videopython/generation/video.py,sha256=WMFKKUSfIkQmxL6xhUb-MeAiHU6uOF_oFpmf69H8V8g,1827
12
+ videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
14
+ videopython/utils/image.py,sha256=8m1uzyfrj5Kdbw7IZyqwj-6NXK4KH-szJxtN_EQva4s,12084
15
+ videopython-0.1.41.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
16
+ videopython-0.1.41.dist-info/METADATA,sha256=Hu511G-WnKEwOn9gG_53ywH1NDAH2pA5d_LzFUuYmfM,15659
17
+ videopython-0.1.41.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
18
+ videopython-0.1.41.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
19
+ videopython-0.1.41.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- videopython/base/compose.py,sha256=pti12VY3Yg7TZZiENPF6veM8POWssfsK8ePDdGlhAhA,1968
3
- videopython/base/effects.py,sha256=DpA8V89Es7YWPEq72l_h_D7MG1QYf1iuslAl-QgzZx8,2153
4
- videopython/base/transforms.py,sha256=DQcG8tZ8nlGj3khlp3v4C0MISpRY2rZr-6B6GtPZykE,4251
5
- videopython/base/transitions.py,sha256=efuJdls2xJVpXV8RGaFd--ii8cLUPz6FdmhSvOjaiTM,2275
6
- videopython/base/video.py,sha256=40leF8bSjNIhP_L8loOh9ptlZNTZAZ95Dgv9FH4mSz4,10791
7
- videopython/generation/__init__.py,sha256=Qse024UgiS9OxXzbbInyZ-9cpfI4enR2Dcds4lLDpNA,201
8
- videopython/generation/audio.py,sha256=YPqUdAcB0mGCt0mgFrxzupX08xx0O_qwfVdjFGlAxaw,985
9
- videopython/generation/image.py,sha256=B-TlrNXFu18NnMi3KO5fjk0paTSmIsQk400iZb76K8w,2507
10
- videopython/generation/video.py,sha256=4P4DhHS-_eDColsXK6YefSdoQbU3Ce0n6fHuY5zewYI,1874
11
- videopython/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- videopython/utils/common.py,sha256=F-30YoKUwWDI7HiJUWw0gRFUguhShSVaxT0aFfvpifg,936
13
- videopython-0.1.3.dist-info/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
14
- videopython-0.1.3.dist-info/METADATA,sha256=xj8k5j3qPIVKgXbr4uTi6ad2BSs9j6-V6baonpQKoJI,14709
15
- videopython-0.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
16
- videopython-0.1.3.dist-info/top_level.txt,sha256=OikTGG8Swfw_syz--1atAn5KQ4GH9Pye17eATGred-Q,12
17
- videopython-0.1.3.dist-info/RECORD,,