videopython 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videopython might be problematic. Click here for more details.
- videopython/ai/understanding/transcribe.py +48 -19
- videopython/base/combine.py +45 -0
- videopython/base/text/__init__.py +0 -0
- videopython/{utils/text.py → base/text/overlay.py} +383 -8
- videopython/base/text/transcription.py +121 -0
- videopython/base/utils.py +6 -0
- videopython/base/video.py +164 -77
- videopython-0.5.0.dist-info/METADATA +194 -0
- {videopython-0.4.0.dist-info → videopython-0.5.0.dist-info}/RECORD +11 -12
- videopython/base/compose.py +0 -55
- videopython/base/transcription.py +0 -13
- videopython/utils/__init__.py +0 -3
- videopython/utils/common.py +0 -31
- videopython/utils/image.py +0 -47
- videopython-0.4.0.dist-info/METADATA +0 -118
- {videopython-0.4.0.dist-info → videopython-0.5.0.dist-info}/WHEEL +0 -0
- {videopython-0.4.0.dist-info → videopython-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class TranscriptionWord:
|
|
8
|
+
start: float
|
|
9
|
+
end: float
|
|
10
|
+
word: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class TranscriptionSegment:
|
|
15
|
+
start: float
|
|
16
|
+
end: float
|
|
17
|
+
text: str
|
|
18
|
+
words: list[TranscriptionWord]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Transcription:
|
|
23
|
+
segments: list[TranscriptionSegment]
|
|
24
|
+
|
|
25
|
+
def offset(self, time: float) -> Transcription:
|
|
26
|
+
"""Return a new Transcription with all timings offset by the provided time value."""
|
|
27
|
+
offset_segments = []
|
|
28
|
+
|
|
29
|
+
for segment in self.segments:
|
|
30
|
+
offset_words = []
|
|
31
|
+
for word in segment.words:
|
|
32
|
+
offset_words.append(TranscriptionWord(start=word.start + time, end=word.end + time, word=word.word))
|
|
33
|
+
|
|
34
|
+
offset_segments.append(
|
|
35
|
+
TranscriptionSegment(
|
|
36
|
+
start=segment.start + time, end=segment.end + time, text=segment.text, words=offset_words
|
|
37
|
+
)
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
return Transcription(segments=offset_segments)
|
|
41
|
+
|
|
42
|
+
def standardize_segments(self, *, time: float | None = None, num_words: int | None = None) -> Transcription:
|
|
43
|
+
"""Return a new Transcription with standardized segments.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
time: Maximum duration in seconds for each segment
|
|
47
|
+
num_words: Exact number of words per segment
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
ValueError: If both time and num_words are provided or if neither is provided
|
|
51
|
+
"""
|
|
52
|
+
if (time is None) == (num_words is None):
|
|
53
|
+
raise ValueError("Exactly one of 'time' or 'num_words' must be provided")
|
|
54
|
+
|
|
55
|
+
if time is not None and time <= 0:
|
|
56
|
+
raise ValueError("Time must be positive")
|
|
57
|
+
|
|
58
|
+
if num_words is not None and num_words <= 0:
|
|
59
|
+
raise ValueError("Number of words must be positive")
|
|
60
|
+
|
|
61
|
+
# Collect all words from all segments
|
|
62
|
+
all_words = []
|
|
63
|
+
for segment in self.segments:
|
|
64
|
+
all_words.extend(segment.words)
|
|
65
|
+
|
|
66
|
+
if not all_words:
|
|
67
|
+
return Transcription(segments=[])
|
|
68
|
+
|
|
69
|
+
standardized_segments = []
|
|
70
|
+
|
|
71
|
+
if time is not None:
|
|
72
|
+
# Group words by time constraint
|
|
73
|
+
current_words = []
|
|
74
|
+
current_start = None
|
|
75
|
+
|
|
76
|
+
for word in all_words:
|
|
77
|
+
if current_start is None:
|
|
78
|
+
current_start = word.start
|
|
79
|
+
current_words = [word]
|
|
80
|
+
elif word.end - current_start <= time:
|
|
81
|
+
current_words.append(word)
|
|
82
|
+
else:
|
|
83
|
+
# Create segment from current words
|
|
84
|
+
if current_words:
|
|
85
|
+
segment_text = " ".join(w.word for w in current_words)
|
|
86
|
+
standardized_segments.append(
|
|
87
|
+
TranscriptionSegment(
|
|
88
|
+
start=current_start,
|
|
89
|
+
end=current_words[-1].end,
|
|
90
|
+
text=segment_text,
|
|
91
|
+
words=current_words.copy(),
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Start new segment
|
|
96
|
+
current_start = word.start
|
|
97
|
+
current_words = [word]
|
|
98
|
+
|
|
99
|
+
# Add final segment
|
|
100
|
+
if current_words:
|
|
101
|
+
segment_text = " ".join(w.word for w in current_words)
|
|
102
|
+
standardized_segments.append(
|
|
103
|
+
TranscriptionSegment(
|
|
104
|
+
start=current_start, # type: ignore
|
|
105
|
+
end=current_words[-1].end,
|
|
106
|
+
text=segment_text,
|
|
107
|
+
words=current_words.copy(),
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
elif num_words is not None:
|
|
111
|
+
# Group words by word count constraint
|
|
112
|
+
for i in range(0, len(all_words), num_words):
|
|
113
|
+
segment_words = all_words[i : i + num_words]
|
|
114
|
+
segment_text = " ".join(w.word for w in segment_words)
|
|
115
|
+
standardized_segments.append(
|
|
116
|
+
TranscriptionSegment(
|
|
117
|
+
start=segment_words[0].start, end=segment_words[-1].end, text=segment_text, words=segment_words
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return Transcription(segments=standardized_segments)
|
videopython/base/video.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Literal, get_args
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from soundpython import Audio
|
|
13
13
|
|
|
14
|
-
from videopython.utils
|
|
14
|
+
from videopython.base.utils import generate_random_name
|
|
15
15
|
|
|
16
16
|
ALLOWED_VIDEO_FORMATS = Literal["mp4", "avi", "mov", "mkv", "webm"]
|
|
17
17
|
|
|
@@ -134,15 +134,20 @@ class VideoMetadata:
|
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
class Video:
|
|
137
|
-
def __init__(self):
|
|
138
|
-
self.
|
|
139
|
-
self.
|
|
140
|
-
|
|
137
|
+
def __init__(self, frames: np.ndarray, fps: int | float, audio: Audio | None = None):
|
|
138
|
+
self.frames = frames
|
|
139
|
+
self.fps = fps
|
|
140
|
+
if audio:
|
|
141
|
+
self.audio = audio
|
|
142
|
+
else:
|
|
143
|
+
self.audio = Audio.create_silent(
|
|
144
|
+
duration_seconds=round(self.total_seconds, 2), stereo=True, sample_rate=44100
|
|
145
|
+
)
|
|
141
146
|
|
|
142
147
|
@classmethod
|
|
143
|
-
def from_path(
|
|
144
|
-
|
|
145
|
-
|
|
148
|
+
def from_path(
|
|
149
|
+
cls, path: str, read_batch_size: int = 100, start_second: float | None = None, end_second: float | None = None
|
|
150
|
+
) -> Video:
|
|
146
151
|
try:
|
|
147
152
|
# Get video metadata using VideoMetadata.from_path
|
|
148
153
|
metadata = VideoMetadata.from_path(path)
|
|
@@ -150,77 +155,154 @@ class Video:
|
|
|
150
155
|
width = metadata.width
|
|
151
156
|
height = metadata.height
|
|
152
157
|
fps = metadata.fps
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
#
|
|
156
|
-
|
|
157
|
-
"
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
"
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
"-
|
|
169
|
-
|
|
170
|
-
]
|
|
171
|
-
|
|
172
|
-
#
|
|
158
|
+
total_duration = metadata.total_seconds
|
|
159
|
+
|
|
160
|
+
# Validate time bounds
|
|
161
|
+
if start_second is not None and start_second < 0:
|
|
162
|
+
raise ValueError("start_second must be non-negative")
|
|
163
|
+
if end_second is not None and end_second > total_duration:
|
|
164
|
+
raise ValueError(f"end_second ({end_second}) exceeds video duration ({total_duration})")
|
|
165
|
+
if start_second is not None and end_second is not None and start_second >= end_second:
|
|
166
|
+
raise ValueError("start_second must be less than end_second")
|
|
167
|
+
|
|
168
|
+
# Build FFmpeg command with improved segment handling
|
|
169
|
+
ffmpeg_cmd = ["ffmpeg"]
|
|
170
|
+
|
|
171
|
+
# Add seek option BEFORE input for more efficient seeking
|
|
172
|
+
if start_second is not None:
|
|
173
|
+
ffmpeg_cmd.extend(["-ss", str(start_second)])
|
|
174
|
+
|
|
175
|
+
ffmpeg_cmd.extend(["-i", path])
|
|
176
|
+
|
|
177
|
+
# Add duration AFTER input for more precise timing
|
|
178
|
+
if end_second is not None and start_second is not None:
|
|
179
|
+
duration = end_second - start_second
|
|
180
|
+
ffmpeg_cmd.extend(["-t", str(duration)])
|
|
181
|
+
elif end_second is not None:
|
|
182
|
+
ffmpeg_cmd.extend(["-t", str(end_second)])
|
|
183
|
+
|
|
184
|
+
# Output format settings - removed problematic -vsync 0
|
|
185
|
+
ffmpeg_cmd.extend(
|
|
186
|
+
[
|
|
187
|
+
"-f",
|
|
188
|
+
"rawvideo",
|
|
189
|
+
"-pix_fmt",
|
|
190
|
+
"rgb24",
|
|
191
|
+
"-vcodec",
|
|
192
|
+
"rawvideo",
|
|
193
|
+
"-avoid_negative_ts",
|
|
194
|
+
"make_zero", # Handle timing issues
|
|
195
|
+
"-y",
|
|
196
|
+
"pipe:1",
|
|
197
|
+
]
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Start FFmpeg process with stderr redirected to avoid deadlock
|
|
173
201
|
process = subprocess.Popen(
|
|
174
202
|
ffmpeg_cmd,
|
|
175
203
|
stdout=subprocess.PIPE,
|
|
176
|
-
stderr=subprocess.
|
|
177
|
-
bufsize=10**8, # Use large buffer
|
|
204
|
+
stderr=subprocess.DEVNULL, # Redirect stderr to avoid deadlock
|
|
205
|
+
bufsize=10**8, # Use large buffer for efficient I/O
|
|
178
206
|
)
|
|
179
207
|
|
|
180
208
|
# Calculate frame size in bytes
|
|
181
209
|
frame_size = width * height * 3 # 3 bytes per pixel for RGB
|
|
182
210
|
|
|
183
|
-
#
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
raw_data = process.stdout.read(frame_size * batch_size) # type: ignore
|
|
193
|
-
if not raw_data:
|
|
194
|
-
break
|
|
195
|
-
|
|
196
|
-
# Convert raw bytes to numpy array and reshape
|
|
197
|
-
batch_frames = np.frombuffer(raw_data, dtype=np.uint8)
|
|
198
|
-
batch_frames = batch_frames.reshape(-1, height, width, 3)
|
|
199
|
-
|
|
200
|
-
# Store batch in pre-allocated array
|
|
201
|
-
frames[frame_idx:batch_end] = batch_frames
|
|
202
|
-
|
|
203
|
-
# Clean up FFmpeg process
|
|
204
|
-
process.stdout.close() # type: ignore
|
|
205
|
-
process.stderr.close() # type: ignore
|
|
206
|
-
process.wait()
|
|
211
|
+
# Estimate frame count for pre-allocation
|
|
212
|
+
if start_second is not None and end_second is not None:
|
|
213
|
+
estimated_duration = end_second - start_second
|
|
214
|
+
elif end_second is not None:
|
|
215
|
+
estimated_duration = end_second
|
|
216
|
+
elif start_second is not None:
|
|
217
|
+
estimated_duration = total_duration - start_second
|
|
218
|
+
else:
|
|
219
|
+
estimated_duration = total_duration
|
|
207
220
|
|
|
208
|
-
|
|
209
|
-
|
|
221
|
+
# Add 10% buffer to handle frame rate variations and rounding
|
|
222
|
+
estimated_frames = int(estimated_duration * fps * 1.1) + 10
|
|
210
223
|
|
|
211
|
-
|
|
212
|
-
|
|
224
|
+
# Pre-allocate numpy array
|
|
225
|
+
frames = np.empty((estimated_frames, height, width, 3), dtype=np.uint8)
|
|
226
|
+
frames_read = 0
|
|
213
227
|
|
|
214
|
-
# Load audio
|
|
215
228
|
try:
|
|
216
|
-
|
|
229
|
+
while frames_read < estimated_frames:
|
|
230
|
+
# Calculate remaining frames to read
|
|
231
|
+
remaining_frames = estimated_frames - frames_read
|
|
232
|
+
batch_size = min(read_batch_size, remaining_frames)
|
|
233
|
+
|
|
234
|
+
# Read batch of data
|
|
235
|
+
batch_data = process.stdout.read(frame_size * batch_size) # type: ignore
|
|
236
|
+
|
|
237
|
+
if not batch_data:
|
|
238
|
+
break
|
|
239
|
+
|
|
240
|
+
# Convert to numpy array
|
|
241
|
+
batch_frames = np.frombuffer(batch_data, dtype=np.uint8)
|
|
242
|
+
|
|
243
|
+
# Calculate how many complete frames we got
|
|
244
|
+
complete_frames = len(batch_frames) // (height * width * 3)
|
|
245
|
+
|
|
246
|
+
if complete_frames == 0:
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
# Only keep complete frames
|
|
250
|
+
complete_data = batch_frames[: complete_frames * height * width * 3]
|
|
251
|
+
batch_frames_array = complete_data.reshape(complete_frames, height, width, 3)
|
|
252
|
+
|
|
253
|
+
# Check if we have room in pre-allocated array
|
|
254
|
+
if frames_read + complete_frames > estimated_frames:
|
|
255
|
+
# Need to expand array - this should be rare with our buffer
|
|
256
|
+
new_size = max(estimated_frames * 2, frames_read + complete_frames + 100)
|
|
257
|
+
new_frames = np.empty((new_size, height, width, 3), dtype=np.uint8)
|
|
258
|
+
new_frames[:frames_read] = frames[:frames_read]
|
|
259
|
+
frames = new_frames
|
|
260
|
+
estimated_frames = new_size
|
|
261
|
+
|
|
262
|
+
# Store batch in pre-allocated array
|
|
263
|
+
end_idx = frames_read + complete_frames
|
|
264
|
+
frames[frames_read:end_idx] = batch_frames_array
|
|
265
|
+
frames_read += complete_frames
|
|
266
|
+
|
|
267
|
+
finally:
|
|
268
|
+
# Ensure process is properly terminated
|
|
269
|
+
if process.poll() is None:
|
|
270
|
+
process.terminate()
|
|
271
|
+
try:
|
|
272
|
+
process.wait(timeout=5)
|
|
273
|
+
except subprocess.TimeoutExpired:
|
|
274
|
+
process.kill()
|
|
275
|
+
process.wait()
|
|
276
|
+
|
|
277
|
+
# Clean up pipes
|
|
278
|
+
if process.stdout:
|
|
279
|
+
process.stdout.close()
|
|
280
|
+
|
|
281
|
+
# Check if FFmpeg had an error (non-zero return code)
|
|
282
|
+
if process.returncode not in (0, None) and frames_read == 0:
|
|
283
|
+
raise ValueError(f"FFmpeg failed to process video (return code: {process.returncode})")
|
|
284
|
+
|
|
285
|
+
if frames_read == 0:
|
|
286
|
+
raise ValueError("No frames were read from the video")
|
|
287
|
+
|
|
288
|
+
# Trim the pre-allocated array to actual frames read
|
|
289
|
+
frames = frames[:frames_read] # type: ignore
|
|
290
|
+
|
|
291
|
+
# Load audio for the specified segment
|
|
292
|
+
try:
|
|
293
|
+
audio = Audio.from_file(path)
|
|
294
|
+
# Slice audio to match the video segment
|
|
295
|
+
if start_second is not None or end_second is not None:
|
|
296
|
+
audio_start = start_second if start_second is not None else 0
|
|
297
|
+
audio_end = end_second if end_second is not None else audio.metadata.duration_seconds
|
|
298
|
+
audio = audio.slice(start_seconds=audio_start, end_seconds=audio_end)
|
|
217
299
|
except Exception:
|
|
218
300
|
print(f"No audio found for `{path}`, adding silent track!")
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
)
|
|
301
|
+
# Create silent audio based on actual frames read
|
|
302
|
+
segment_duration = frames_read / fps
|
|
303
|
+
audio = Audio.create_silent(duration_seconds=round(segment_duration, 2), stereo=True, sample_rate=44100)
|
|
222
304
|
|
|
223
|
-
return
|
|
305
|
+
return cls(frames=frames, fps=fps, audio=audio)
|
|
224
306
|
|
|
225
307
|
except VideoMetadataError as e:
|
|
226
308
|
raise ValueError(f"Error getting video metadata: {e}")
|
|
@@ -231,32 +313,23 @@ class Video:
|
|
|
231
313
|
|
|
232
314
|
@classmethod
|
|
233
315
|
def from_frames(cls, frames: np.ndarray, fps: float) -> Video:
|
|
234
|
-
new_vid = cls()
|
|
235
316
|
if frames.ndim != 4:
|
|
236
317
|
raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
|
|
237
318
|
elif frames.shape[-1] == 4:
|
|
238
319
|
frames = frames[:, :, :, :3]
|
|
239
320
|
elif frames.shape[-1] != 3:
|
|
240
321
|
raise ValueError(f"Unsupported number of dimensions: {frames.shape}!")
|
|
241
|
-
|
|
242
|
-
new_vid.fps = fps
|
|
243
|
-
new_vid.audio = Audio.create_silent(
|
|
244
|
-
duration_seconds=round(new_vid.total_seconds, 2), stereo=True, sample_rate=44100
|
|
245
|
-
)
|
|
246
|
-
return new_vid
|
|
322
|
+
return cls(frames=frames, fps=fps)
|
|
247
323
|
|
|
248
324
|
@classmethod
|
|
249
325
|
def from_image(cls, image: np.ndarray, fps: float = 24.0, length_seconds: float = 1.0) -> Video:
|
|
250
|
-
new_vid = cls()
|
|
251
326
|
if len(image.shape) == 3:
|
|
252
327
|
image = np.expand_dims(image, axis=0)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
new_vid.audio = Audio.create_silent(duration_seconds=length_seconds, stereo=True, sample_rate=44100)
|
|
256
|
-
return new_vid
|
|
328
|
+
frames = np.repeat(image, round(length_seconds * fps), axis=0)
|
|
329
|
+
return cls(frames=frames, fps=fps)
|
|
257
330
|
|
|
258
331
|
def copy(self) -> Video:
|
|
259
|
-
copied = Video
|
|
332
|
+
copied = Video.from_frames(self.frames.copy(), self.fps)
|
|
260
333
|
copied.audio = self.audio # Audio objects are immutable, no need to copy
|
|
261
334
|
return copied
|
|
262
335
|
|
|
@@ -376,6 +449,20 @@ class Video:
|
|
|
376
449
|
raise
|
|
377
450
|
|
|
378
451
|
def add_audio(self, audio: Audio, overlay: bool = True) -> None:
|
|
452
|
+
video_duration = self.total_seconds
|
|
453
|
+
audio_duration = audio.metadata.duration_seconds
|
|
454
|
+
|
|
455
|
+
if audio_duration > video_duration:
|
|
456
|
+
audio = audio.slice(start_seconds=0, end_seconds=video_duration)
|
|
457
|
+
elif audio_duration < video_duration:
|
|
458
|
+
silence_duration = video_duration - audio_duration
|
|
459
|
+
silence = Audio.create_silent(
|
|
460
|
+
duration_seconds=silence_duration,
|
|
461
|
+
stereo=audio.metadata.channels == 2,
|
|
462
|
+
sample_rate=audio.metadata.sample_rate,
|
|
463
|
+
)
|
|
464
|
+
audio = audio.concat(silence)
|
|
465
|
+
|
|
379
466
|
if self.audio.is_silent:
|
|
380
467
|
self.audio = audio
|
|
381
468
|
elif overlay:
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: videopython
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Minimal video generation and processing library.
|
|
5
|
+
Project-URL: Homepage, https://github.com/bartwojtowicz/videopython/
|
|
6
|
+
Project-URL: Repository, https://github.com/bartwojtowicz/videopython/
|
|
7
|
+
Project-URL: Documentation, https://github.com/bartwojtowicz/videopython/
|
|
8
|
+
Author-email: Bartosz Wójtowicz <bartoszwojtowicz@outlook.com>, Bartosz Rudnikowicz <bartoszrudnikowicz840@gmail.com>, Piotr Pukisz <piotr.pukisz@gmail.com>
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,editing,generation,movie,opencv,python,shorts,video,videopython
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: <3.13,>=3.10
|
|
19
|
+
Requires-Dist: numpy>=1.25.2
|
|
20
|
+
Requires-Dist: opencv-python>=4.9.0.80
|
|
21
|
+
Requires-Dist: pillow>=10.3.0
|
|
22
|
+
Requires-Dist: soundpython>=0.1.11
|
|
23
|
+
Requires-Dist: tqdm>=4.66.3
|
|
24
|
+
Provides-Extra: ai
|
|
25
|
+
Requires-Dist: accelerate>=0.29.2; extra == 'ai'
|
|
26
|
+
Requires-Dist: diffusers>=0.26.3; extra == 'ai'
|
|
27
|
+
Requires-Dist: numba>=0.61.0; extra == 'ai'
|
|
28
|
+
Requires-Dist: openai-whisper>=20240930; extra == 'ai'
|
|
29
|
+
Requires-Dist: torch>=2.1.0; extra == 'ai'
|
|
30
|
+
Requires-Dist: transformers>=4.38.1; extra == 'ai'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: mypy>=1.8.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-cov>=6.1.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.1.14; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-pillow>=10.2.0.20240213; extra == 'dev'
|
|
37
|
+
Requires-Dist: types-tqdm>=4.66.0.20240106; extra == 'dev'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# About
|
|
41
|
+
|
|
42
|
+
Videopython is a minimal video generation and processing library designed with short-form videos in mind, with focus on simplicity and ease of use for both humans and AI agents.
|
|
43
|
+
|
|
44
|
+
# Setup
|
|
45
|
+
|
|
46
|
+
## Install ffmpeg
|
|
47
|
+
```bash
|
|
48
|
+
# Install with brew for MacOS:
|
|
49
|
+
brew install ffmpeg
|
|
50
|
+
# Install with apt-get for Ubuntu:
|
|
51
|
+
sudo apt-get install ffmpeg
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Install library
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Install with your favourite package manager
|
|
58
|
+
uv add videopython --extra ai
|
|
59
|
+
|
|
60
|
+
# pip install works as well :)
|
|
61
|
+
pip install videopython[ai]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
> You can install without `[ai]` dependencies for basic video handling and processing.
|
|
65
|
+
> The functionalities found in `videopython.ai` won't work.
|
|
66
|
+
|
|
67
|
+
# Usage examples
|
|
68
|
+
|
|
69
|
+
## Basic video editing
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from videopython.base.video import Video
|
|
73
|
+
|
|
74
|
+
# Load videos and print metadata
|
|
75
|
+
video1 = Video.from_path("tests/test_data/small_video.mp4")
|
|
76
|
+
print(video1)
|
|
77
|
+
|
|
78
|
+
video2 = Video.from_path("tests/test_data/big_video.mp4")
|
|
79
|
+
print(video2)
|
|
80
|
+
|
|
81
|
+
# Define the transformations
|
|
82
|
+
from videopython.base.transforms import CutSeconds, ResampleFPS, Resize, TransformationPipeline
|
|
83
|
+
|
|
84
|
+
pipeline = TransformationPipeline(
|
|
85
|
+
[CutSeconds(start=1.5, end=6.5), ResampleFPS(fps=30), Resize(width=1000, height=1000)]
|
|
86
|
+
)
|
|
87
|
+
video1 = pipeline.run(video1)
|
|
88
|
+
video2 = pipeline.run(video2)
|
|
89
|
+
|
|
90
|
+
# Combine videos, add audio and save
|
|
91
|
+
from videopython.base.transitions import FadeTransition
|
|
92
|
+
|
|
93
|
+
fade = FadeTransition(effect_time_seconds=3.0)
|
|
94
|
+
video = fade.apply(videos=(video1, video2))
|
|
95
|
+
video.add_audio_from_file("tests/test_data/test_audio.mp3")
|
|
96
|
+
|
|
97
|
+
savepath = video.save()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## AI powered examples
|
|
101
|
+
|
|
102
|
+
### Video Generation
|
|
103
|
+
|
|
104
|
+
> Using Nvidia A40 or better is recommended for the `videopython.ai` module.
|
|
105
|
+
```python
|
|
106
|
+
# Generate image and animate it
|
|
107
|
+
from videopython.ai.generation import ImageToVideo
|
|
108
|
+
from videopython.ai.generation import TextToImage
|
|
109
|
+
|
|
110
|
+
image = TextToImage().generate_image(prompt="Golden Retriever playing in the park")
|
|
111
|
+
video = ImageToVideo().generate_video(image=image, fps=24)
|
|
112
|
+
|
|
113
|
+
# Video generation directly from prompt
|
|
114
|
+
from videopython.ai.generation import TextToVideo
|
|
115
|
+
video_gen = TextToVideo()
|
|
116
|
+
video = video_gen.generate_video("Dogs playing in the park")
|
|
117
|
+
for _ in range(10):
|
|
118
|
+
video += video_gen.generate_video("Dogs playing in the park")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Audio generation
|
|
122
|
+
```python
|
|
123
|
+
from videopython.base.video import Video
|
|
124
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
125
|
+
|
|
126
|
+
# Generate music on top of video
|
|
127
|
+
from videopython.ai.generation import TextToMusic
|
|
128
|
+
text_to_music = TextToMusic()
|
|
129
|
+
audio = text_to_music.generate_audio("Happy dogs playing together in a park", max_new_tokens=256)
|
|
130
|
+
video.add_audio(audio=audio)
|
|
131
|
+
|
|
132
|
+
# Add TTS on top of video
|
|
133
|
+
from videopython.ai.generation import TextToSpeech
|
|
134
|
+
text_to_speech = TextToSpeech()
|
|
135
|
+
audio = text_to_speech.generate_audio("Woof woof woof! Woooooof!")
|
|
136
|
+
video.add_audio(audio=audio)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Generate and overlay subtitles
|
|
140
|
+
```python
|
|
141
|
+
from videopython.base.video import Video
|
|
142
|
+
video = Video.from_path("<PATH_TO_VIDEO>")
|
|
143
|
+
|
|
144
|
+
# Generate transcription with timestamps
|
|
145
|
+
from videopython.ai.understanding.transcribe import CreateTranscription
|
|
146
|
+
transcription = CreateTranscription("base").transcribe(video)
|
|
147
|
+
# Initialise object for overlaying. See `TranscriptionOverlay` to see detailed configuration options.
|
|
148
|
+
from videopython.base.text.overlay import TranscriptionOverlay
|
|
149
|
+
transcription_overlay = TranscriptionOverlay(font_filename="src/tests/test_data/test_font.ttf")
|
|
150
|
+
|
|
151
|
+
video = transcription_overlay.apply(video, transcription)
|
|
152
|
+
video.save()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
# Development notes
|
|
156
|
+
|
|
157
|
+
## Project structure
|
|
158
|
+
|
|
159
|
+
Source code of the project can be found under `src/` directory, along with separate directories for unit tests and mypy stubs.
|
|
160
|
+
```
|
|
161
|
+
.
|
|
162
|
+
└── src
|
|
163
|
+
├── stubs # Contains stubs for mypy
|
|
164
|
+
├── tests # Unit tests
|
|
165
|
+
└── videopython # Library code
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
----
|
|
169
|
+
|
|
170
|
+
The `videopython` library is divided into 2 separate high-level modules:
|
|
171
|
+
* `videopython.base`: Contains base classes for handling videos and for basic video editing. There are no imports from `videopython.ai` within the `base` module, which allows users to install light-weight base dependencies to do simple video operations.
|
|
172
|
+
* `videopython.ai`: Contains AI-powered functionalities for video generation. It has its own `ai` dependency group, which contains all dependencies required to run AI models.
|
|
173
|
+
|
|
174
|
+
## Running locally
|
|
175
|
+
|
|
176
|
+
We are using [uv](https://docs.astral.sh/uv/) as project and package manager. Once you clone the repo and install uv locally, you can use it to sync the dependencies.
|
|
177
|
+
```bash
|
|
178
|
+
uv sync --all-extras
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
To run the unit tests, you can simply run:
|
|
182
|
+
```bash
|
|
183
|
+
uv run pytest
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
We also use [Ruff](https://docs.astral.sh/ruff/) for linting/formatting and [mypy](https://github.com/python/mypy) as type checker.
|
|
187
|
+
```bash
|
|
188
|
+
# Run formatting
|
|
189
|
+
uv run ruff format
|
|
190
|
+
# Run linting and apply fixes
|
|
191
|
+
uv run ruff check --fix
|
|
192
|
+
# Run type checks
|
|
193
|
+
uv run mypy src/
|
|
194
|
+
```
|
|
@@ -6,20 +6,19 @@ videopython/ai/generation/audio.py,sha256=CNf6ZeV3iU4CU0Kq8HtDLwLPP2ABq9AGQD1TBO
|
|
|
6
6
|
videopython/ai/generation/image.py,sha256=gS0zqzyIoCvjTjfks31ApG8lX0nUKXWRRgFGGLN4RjM,654
|
|
7
7
|
videopython/ai/generation/video.py,sha256=206YON_XjPTYyjIJ3j5uBgd_yHmCDg7SqbkIU9GzEgw,1831
|
|
8
8
|
videopython/ai/understanding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
videopython/ai/understanding/transcribe.py,sha256=
|
|
9
|
+
videopython/ai/understanding/transcribe.py,sha256=hm2f5Fm1O_tMrSmUlcUdl_rQRhc5Sz_kaV4tnJ4IxbQ,2557
|
|
10
10
|
videopython/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
videopython/base/
|
|
11
|
+
videopython/base/combine.py,sha256=XC_pzyhbIh6h0fmxX1LhhhtlmOBbUQX9Y4EtDJqQn8g,1900
|
|
12
12
|
videopython/base/effects.py,sha256=1RbRLTQD0V26DBc4jbRCDI4eGr6-TyBdX-Ia2USKxmc,7554
|
|
13
13
|
videopython/base/exceptions.py,sha256=68_16lUPOR9_zhWdeBGS8_NFI32VbrcoDbN5KHHg0_w,44
|
|
14
|
-
videopython/base/transcription.py,sha256=FloqvY-OlBQPOCkPnSx6R7azn4smD5-JYd-pMNssuYw,196
|
|
15
14
|
videopython/base/transforms.py,sha256=FDh-8EgQoZxB6Gv-T15kZGctcu9_4XHsTy_n7kgxlQw,5828
|
|
16
15
|
videopython/base/transitions.py,sha256=P1bBsxugf5i0JEtx7MoRgxWSIDcBli-0QucRwBIFGqs,3687
|
|
17
|
-
videopython/base/
|
|
18
|
-
videopython/
|
|
19
|
-
videopython/
|
|
20
|
-
videopython/
|
|
21
|
-
videopython/
|
|
22
|
-
videopython-0.
|
|
23
|
-
videopython-0.
|
|
24
|
-
videopython-0.
|
|
25
|
-
videopython-0.
|
|
16
|
+
videopython/base/utils.py,sha256=bAwIagHvd1NWu8UYAsS-pDm38E4R8qRfeHvWk-O2__0,125
|
|
17
|
+
videopython/base/video.py,sha256=RxKHmR39EEvBa5m2xFDNj4_mq213RUG3NQ_lhk5U-PA,20462
|
|
18
|
+
videopython/base/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
videopython/base/text/overlay.py,sha256=EiBDSsnn2pSGeWGajblUxovcP_IdA6gk2zZ5rsjhdI8,44434
|
|
20
|
+
videopython/base/text/transcription.py,sha256=9c3FRBr7RkialHhdfSwEX303QnIt1sCSiXoId9_DRkk,4246
|
|
21
|
+
videopython-0.5.0.dist-info/METADATA,sha256=FTo8Bo3YLhp9bGTrctiehMMksQwecH1DN84JO5RydyU,6574
|
|
22
|
+
videopython-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
23
|
+
videopython-0.5.0.dist-info/licenses/LICENSE,sha256=nJL9jVOt2MSW7swNDq4Y6oD_n9bLI0B0afr8ougtZ6s,10832
|
|
24
|
+
videopython-0.5.0.dist-info/RECORD,,
|