yta-video-opengl 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_video_opengl/complete/__init__.py +0 -0
- yta_video_opengl/complete/timeline.py +271 -0
- yta_video_opengl/complete/track.py +403 -0
- yta_video_opengl/complete/video_on_track.py +149 -0
- yta_video_opengl/reader/__init__.py +190 -42
- yta_video_opengl/reader/cache.py +299 -25
- yta_video_opengl/tests.py +11 -0
- yta_video_opengl/utils.py +151 -1
- yta_video_opengl/writer.py +17 -3
- {yta_video_opengl-0.0.10.dist-info → yta_video_opengl-0.0.12.dist-info}/METADATA +1 -1
- yta_video_opengl-0.0.12.dist-info/RECORD +20 -0
- yta_video_opengl-0.0.10.dist-info/RECORD +0 -16
- {yta_video_opengl-0.0.10.dist-info → yta_video_opengl-0.0.12.dist-info}/LICENSE +0 -0
- {yta_video_opengl-0.0.10.dist-info → yta_video_opengl-0.0.12.dist-info}/WHEEL +0 -0
yta_video_opengl/reader/cache.py
CHANGED
@@ -16,17 +16,22 @@ memory all those frames to be handled fast. It
|
|
16
16
|
will remove the old frames if needed to use only
|
17
17
|
the 'size' we set when creating it.
|
18
18
|
"""
|
19
|
-
from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index
|
19
|
+
from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index, index_to_pts
|
20
|
+
from yta_video_frame_time import T
|
20
21
|
from av.container import InputContainer
|
21
22
|
from av.video.stream import VideoStream
|
22
23
|
from av.audio.stream import AudioStream
|
23
24
|
from av.video.frame import VideoFrame
|
24
25
|
from av.audio.frame import AudioFrame
|
25
26
|
from yta_validation.parameter import ParameterValidator
|
27
|
+
from yta_validation import PythonValidator
|
26
28
|
from fractions import Fraction
|
27
29
|
from collections import OrderedDict
|
28
30
|
from typing import Union
|
29
31
|
|
32
|
+
import numpy as np
|
33
|
+
import math
|
34
|
+
|
30
35
|
|
31
36
|
class VideoFrameCache:
|
32
37
|
"""
|
@@ -60,11 +65,11 @@ class VideoFrameCache:
|
|
60
65
|
self,
|
61
66
|
container: InputContainer,
|
62
67
|
stream: Union[VideoStream, AudioStream],
|
63
|
-
size: int =
|
68
|
+
size: Union[int, None] = None
|
64
69
|
):
|
65
70
|
ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
|
66
71
|
ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
|
67
|
-
ParameterValidator.
|
72
|
+
ParameterValidator.validate_positive_int('size', size)
|
68
73
|
|
69
74
|
self.container: InputContainer = container
|
70
75
|
"""
|
@@ -78,7 +83,7 @@ class VideoFrameCache:
|
|
78
83
|
"""
|
79
84
|
The cache ordered dictionary.
|
80
85
|
"""
|
81
|
-
self.size = size
|
86
|
+
self.size: Union[int, None] = size
|
82
87
|
"""
|
83
88
|
The size (in number of frames) of the cache.
|
84
89
|
"""
|
@@ -99,6 +104,31 @@ class VideoFrameCache:
|
|
99
104
|
if packet.is_keyframe:
|
100
105
|
self.key_frames_pts.append(packet.pts)
|
101
106
|
|
107
|
+
# The cache size will be auto-calculated to
|
108
|
+
# use the amount of frames of the biggest
|
109
|
+
# interval of frames that belongs to a key
|
110
|
+
# frame, or a value by default
|
111
|
+
fps = (
|
112
|
+
float(self.stream.average_rate)
|
113
|
+
if PythonValidator.is_instance_of(self.stream, VideoStream) else
|
114
|
+
float(self.stream.rate)
|
115
|
+
)
|
116
|
+
# Intervals, but in number of frames
|
117
|
+
intervals = np.diff(
|
118
|
+
# Intervals of time between keyframes
|
119
|
+
np.array(self.key_frames_pts) * self.stream.time_base
|
120
|
+
) * fps
|
121
|
+
|
122
|
+
self.size = (
|
123
|
+
math.ceil(np.max(intervals))
|
124
|
+
if intervals.size > 0 else
|
125
|
+
(
|
126
|
+
self.size or
|
127
|
+
# TODO: Make this 'default_size' a setting or something
|
128
|
+
60
|
129
|
+
)
|
130
|
+
)
|
131
|
+
|
102
132
|
self.container.seek(0)
|
103
133
|
|
104
134
|
def _get_nearest_keyframe_fps(
|
@@ -117,10 +147,29 @@ class VideoFrameCache:
|
|
117
147
|
if key_frame_pts <= pts
|
118
148
|
])
|
119
149
|
|
120
|
-
def
|
150
|
+
def _store_frame_in_cache(
|
151
|
+
self,
|
152
|
+
frame: Union[VideoFrame, AudioFrame]
|
153
|
+
) -> Union[VideoFrame, AudioFrame]:
|
154
|
+
"""
|
155
|
+
Store the provided 'frame' in cache if it
|
156
|
+
is not on it, removing the first item of
|
157
|
+
the cache if full.
|
158
|
+
"""
|
159
|
+
if frame.pts not in self.cache:
|
160
|
+
# TODO: The 'format' must be dynamic
|
161
|
+
self.cache[frame.pts] = frame
|
162
|
+
|
163
|
+
# Clean cache if full
|
164
|
+
if len(self.cache) > self.size:
|
165
|
+
self.cache.popitem(last = False)
|
166
|
+
|
167
|
+
return frame
|
168
|
+
|
169
|
+
def get_frame_from_pts(
|
121
170
|
self,
|
122
171
|
pts: int
|
123
|
-
):
|
172
|
+
) -> Union[VideoFrame, AudioFrame, None]:
|
124
173
|
"""
|
125
174
|
Get the frame that has the provided 'pts'.
|
126
175
|
|
@@ -132,6 +181,9 @@ class VideoFrameCache:
|
|
132
181
|
This method must be called when the frame
|
133
182
|
requested is not stored in the caché.
|
134
183
|
"""
|
184
|
+
if pts in self.cache:
|
185
|
+
return self.cache[pts]
|
186
|
+
|
135
187
|
# Look for the most near key frame
|
136
188
|
key_frame_pts = self._get_nearest_keyframe_fps(pts)
|
137
189
|
|
@@ -145,18 +197,14 @@ class VideoFrameCache:
|
|
145
197
|
continue
|
146
198
|
|
147
199
|
# Store in cache if needed
|
148
|
-
|
149
|
-
# TODO: The 'format' must be dynamic
|
150
|
-
self.cache[frame.pts] = frame.to_ndarray(format = "rgb24")
|
151
|
-
|
152
|
-
# Clean cache if full
|
153
|
-
if len(self.cache) > self.size:
|
154
|
-
self.cache.popitem(last = False)
|
200
|
+
self._store_frame_in_cache(frame)
|
155
201
|
|
156
202
|
if frame.pts >= pts:
|
157
203
|
decoded = self.cache[frame.pts]
|
158
204
|
break
|
159
205
|
|
206
|
+
# TODO: Is this working? We need previous
|
207
|
+
# frames to be able to decode...
|
160
208
|
return decoded
|
161
209
|
|
162
210
|
def get_frame(
|
@@ -167,13 +215,29 @@ class VideoFrameCache:
|
|
167
215
|
Get the frame with the given 'index' from
|
168
216
|
the cache.
|
169
217
|
"""
|
170
|
-
# TODO: Maybe we can accept '
|
171
|
-
|
218
|
+
# TODO: Maybe we can accept 'pts' also
|
219
|
+
pts = index_to_pts(index, self.time_base, self.fps)
|
220
|
+
|
221
|
+
return (
|
222
|
+
self.cache[pts]
|
223
|
+
if pts in self.cache else
|
224
|
+
self.get_frame_from_pts(pts)
|
225
|
+
)
|
226
|
+
|
227
|
+
def get_frame_from_t(
|
228
|
+
self,
|
229
|
+
t: float
|
230
|
+
) -> Union[VideoFrame, AudioFrame]:
|
231
|
+
"""
|
232
|
+
Get the frame with the given 't' time moment
|
233
|
+
from the cache.
|
234
|
+
"""
|
235
|
+
pts = t_to_pts(t, self.time_base)
|
172
236
|
|
173
237
|
return (
|
174
|
-
self.cache[
|
175
|
-
if
|
176
|
-
self.
|
238
|
+
self.cache[pts]
|
239
|
+
if pts in self.cache else
|
240
|
+
self.get_frame_from_pts(pts)
|
177
241
|
)
|
178
242
|
|
179
243
|
def get_frames(
|
@@ -185,11 +249,49 @@ class VideoFrameCache:
|
|
185
249
|
Get all the frames in the range between
|
186
250
|
the provided 'start' and 'end' time in
|
187
251
|
seconds.
|
252
|
+
|
253
|
+
This method is an iterator that yields
|
254
|
+
the frame, its t and its index.
|
255
|
+
"""
|
256
|
+
# We use the cache as iterator if all the frames
|
257
|
+
# requested are stored there
|
258
|
+
# TODO: I think this is not ok... I will never
|
259
|
+
# have all the pts form here stored, as they come
|
260
|
+
# from 't' that is different...
|
261
|
+
|
262
|
+
"""
|
263
|
+
Feel free to move this explanation to other
|
264
|
+
place, its about the duration.
|
265
|
+
|
266
|
+
The stream 'duration' parameter is measured
|
267
|
+
on ticks, the amount of ticks that the
|
268
|
+
stream lasts. Here below is an example:
|
269
|
+
|
270
|
+
- Duration raw: 529200
|
271
|
+
- Time base: 1/44100
|
272
|
+
- Duration (seconds): 12.0
|
188
273
|
"""
|
189
|
-
|
190
|
-
#
|
191
|
-
|
192
|
-
|
274
|
+
|
275
|
+
# The 'duration' is on pts ticks
|
276
|
+
duration = float(self.stream.duration * self.stream.time_base)
|
277
|
+
print(f'duration of the whole stream: {str(duration)}s, asking for [{str(start)}, {str(end)})')
|
278
|
+
# TODO: I think it would be better to
|
279
|
+
# receive and work with pts instead of
|
280
|
+
# 't' time moments...
|
281
|
+
# pts_list = [
|
282
|
+
# t_to_pts(t, self.time_base)
|
283
|
+
# for t in T.get_frame_indexes(duration, self.fps, start, end)
|
284
|
+
# ]
|
285
|
+
|
286
|
+
# if all(
|
287
|
+
# pts in self.cache
|
288
|
+
# for pts in pts_list
|
289
|
+
# ):
|
290
|
+
# for pts in pts_list:
|
291
|
+
# yield self.cache[pts]
|
292
|
+
|
293
|
+
# If not all, we ignore the cache because we
|
294
|
+
# need to decode and they are all consecutive
|
193
295
|
start = t_to_pts(start, self.time_base)
|
194
296
|
end = (
|
195
297
|
t_to_pts(end, self.time_base)
|
@@ -206,16 +308,85 @@ class VideoFrameCache:
|
|
206
308
|
if frame.pts is None:
|
207
309
|
continue
|
208
310
|
|
209
|
-
|
311
|
+
# We store all the frames in cache
|
312
|
+
self._store_frame_in_cache(frame)
|
313
|
+
|
314
|
+
print(frame)
|
315
|
+
frame_end_pts = frame.pts + int(frame.samples * (1 / self.stream.sample_rate) / self.time_base)
|
316
|
+
#frame_end_pts = frame.pts + int(frame.samples)
|
317
|
+
#frame_end_pts = frame.pts + int(frame.samples / (self.stream.sample_rate * self.time_base))
|
318
|
+
print(f' Frame from [{str(frame.pts)}, {str(frame_end_pts)}] and looking for [{str(start)}, {str(end)}]')
|
319
|
+
|
320
|
+
# For the next comments imagine we are looking
|
321
|
+
# for the [1.0, 2.0) audio time range
|
322
|
+
# Previous frame and nothing is inside
|
323
|
+
if frame_end_pts <= start:
|
324
|
+
# From 0.25 to 1.0
|
210
325
|
continue
|
211
326
|
|
327
|
+
# We finished, nothing is inside and its after
|
212
328
|
if (
|
213
329
|
end is not None and
|
214
|
-
frame.pts
|
330
|
+
frame.pts >= end
|
215
331
|
):
|
332
|
+
# From 2.0 to 2.75
|
216
333
|
return
|
334
|
+
|
335
|
+
# We need: from 1 to 2
|
336
|
+
# Audio is:
|
337
|
+
# - from 0 to 0.75 (Not included, omit)
|
338
|
+
# - from 0.5 to 1.5 (Included, take 1.0 to 1.5)
|
339
|
+
# - from 0.5 to 2.5 (Included, take 1.0 to 2.0)
|
340
|
+
# - from 1.25 to 1.5 (Included, take 1.25 to 1.5)
|
341
|
+
# - from 1.25 to 2.5 (Included, take 1.25 to 2.0)
|
342
|
+
# - from 2.5 to 3.5 (Not included, omit)
|
343
|
+
|
344
|
+
# Here below, at least a part is inside
|
345
|
+
if (
|
346
|
+
frame.pts < start and
|
347
|
+
frame_end_pts > start
|
348
|
+
):
|
349
|
+
# A part at the end is included
|
350
|
+
end_time = (
|
351
|
+
# From 0.5 to 1.5 0> take 1.0 to 1.5
|
352
|
+
frame_end_pts
|
353
|
+
if frame_end_pts <= end else
|
354
|
+
# From 0.5 to 2.5 => take 1.0 to 2.0
|
355
|
+
end
|
356
|
+
)
|
357
|
+
print('A part at the end is included.')
|
358
|
+
# TODO: I'm using too much 'pts_to_t'
|
359
|
+
frame = trim_audio_frame_pts(
|
360
|
+
frame = frame,
|
361
|
+
start_pts = start,
|
362
|
+
end_pts = end_time,
|
363
|
+
time_base = self.time_base
|
364
|
+
)
|
365
|
+
elif (
|
366
|
+
frame.pts >= start and
|
367
|
+
frame.pts < end
|
368
|
+
):
|
369
|
+
end_time = (
|
370
|
+
# From 1.25 to 1.5 => take 1.25 to 1.5
|
371
|
+
frame_end_pts
|
372
|
+
if frame_end_pts <= end else
|
373
|
+
# From 1.25 to 2.5 => take 1.25 to 2.0
|
374
|
+
end
|
375
|
+
)
|
376
|
+
# A part at the begining is included
|
377
|
+
print('A part at the begining is included.')
|
378
|
+
# TODO: I'm using too much 'pts_to_t'
|
379
|
+
frame = trim_audio_frame_pts(
|
380
|
+
frame = frame,
|
381
|
+
start_pts = frame.pts,
|
382
|
+
end_pts = end_time,
|
383
|
+
time_base = self.time_base
|
384
|
+
)
|
385
|
+
|
386
|
+
# If the whole frame is in, past as it is
|
217
387
|
|
218
388
|
# TODO: Maybe send a @dataclass instead (?)
|
389
|
+
# TODO: Do I really need these 't' and 'index' (?)
|
219
390
|
yield (
|
220
391
|
frame,
|
221
392
|
pts_to_t(frame.pts, self.time_base),
|
@@ -230,4 +401,107 @@ class VideoFrameCache:
|
|
230
401
|
"""
|
231
402
|
self.cache.clear()
|
232
403
|
|
233
|
-
return self
|
404
|
+
return self
|
405
|
+
|
406
|
+
|
407
|
+
|
408
|
+
import av
|
409
|
+
import numpy as np
|
410
|
+
|
411
|
+
import av
|
412
|
+
|
413
|
+
|
414
|
+
|
415
|
+
def trim_audio_frame_pts(
|
416
|
+
frame: av.AudioFrame,
|
417
|
+
start_pts: int,
|
418
|
+
end_pts: int,
|
419
|
+
time_base
|
420
|
+
) -> av.AudioFrame:
|
421
|
+
"""
|
422
|
+
Recorta un AudioFrame para quedarse solo con la parte entre [start_pts, end_pts] en ticks (PTS).
|
423
|
+
"""
|
424
|
+
samples = frame.to_ndarray() # (channels, n_samples)
|
425
|
+
n_channels, n_samples = samples.shape
|
426
|
+
sr = frame.sample_rate
|
427
|
+
|
428
|
+
#frame_end_pts = frame.pts + int((n_samples / sr) / time_base)
|
429
|
+
# TODO: This could be wrong
|
430
|
+
frame_end_pts = frame.pts + int(frame.samples)
|
431
|
+
|
432
|
+
# solapamiento en PTS
|
433
|
+
cut_start_pts = max(frame.pts, start_pts)
|
434
|
+
cut_end_pts = min(frame_end_pts, end_pts)
|
435
|
+
|
436
|
+
if cut_start_pts >= cut_end_pts:
|
437
|
+
raise Exception('Oops...')
|
438
|
+
return None # no hay solapamiento
|
439
|
+
|
440
|
+
# convertir a índices de samples (en ticks → segundos → samples)
|
441
|
+
cut_start_time = (cut_start_pts - frame.pts) * time_base
|
442
|
+
cut_end_time = (cut_end_pts - frame.pts) * time_base
|
443
|
+
|
444
|
+
start_idx = int(cut_start_time * sr)
|
445
|
+
end_idx = int(cut_end_time * sr)
|
446
|
+
|
447
|
+
print(
|
448
|
+
f"cutting [{frame.pts}, {frame_end_pts}] "
|
449
|
+
f"to [{cut_start_pts}, {cut_end_pts}] "
|
450
|
+
f"({start_idx}:{end_idx} / {frame.samples})"
|
451
|
+
#f"({start_idx}:{end_idx} / {n_samples})"
|
452
|
+
)
|
453
|
+
|
454
|
+
cut_samples = samples[:, start_idx:end_idx]
|
455
|
+
|
456
|
+
# crear nuevo AudioFrame
|
457
|
+
new_frame = av.AudioFrame.from_ndarray(cut_samples, format=frame.format, layout=frame.layout)
|
458
|
+
new_frame.sample_rate = sr
|
459
|
+
|
460
|
+
# ajustar PTS → corresponde al inicio real del recorte
|
461
|
+
new_frame.pts = cut_start_pts
|
462
|
+
new_frame.time_base = time_base
|
463
|
+
|
464
|
+
return new_frame
|
465
|
+
|
466
|
+
|
467
|
+
|
468
|
+
def trim_audio_frame_t(
|
469
|
+
frame: av.AudioFrame,
|
470
|
+
start_time: float,
|
471
|
+
end_time: float,
|
472
|
+
time_base
|
473
|
+
) -> av.AudioFrame:
|
474
|
+
"""
|
475
|
+
Recorta un AudioFrame para quedarse solo con la parte entre [start_time, end_time] en segundos.
|
476
|
+
"""
|
477
|
+
samples = frame.to_ndarray() # (channels, n_samples)
|
478
|
+
n_channels, n_samples = samples.shape
|
479
|
+
sr = frame.sample_rate
|
480
|
+
|
481
|
+
frame_start = float(frame.pts * time_base)
|
482
|
+
frame_end = frame_start + (n_samples / sr)
|
483
|
+
|
484
|
+
# calcular solapamiento en segundos
|
485
|
+
cut_start = max(frame_start, start_time)
|
486
|
+
cut_end = min(frame_end, end_time)
|
487
|
+
|
488
|
+
if cut_start >= cut_end:
|
489
|
+
return None # no hay solapamiento
|
490
|
+
|
491
|
+
# convertir a índices de samples
|
492
|
+
start_idx = int((cut_start - frame_start) * sr)
|
493
|
+
end_idx = int((cut_end - frame_start) * sr)
|
494
|
+
|
495
|
+
print(f'cutting [{str(frame_start)}, {str(frame_end)}] to [{str(float(start_time))}, {str(float(end_time))}] from {str(start_idx)} to {str(end_idx)} of {str(int((frame_end - frame_start) * sr))}')
|
496
|
+
cut_samples = samples[:, start_idx:end_idx]
|
497
|
+
|
498
|
+
# crear nuevo AudioFrame
|
499
|
+
new_frame = av.AudioFrame.from_ndarray(cut_samples, format = frame.format, layout = frame.layout)
|
500
|
+
new_frame.sample_rate = sr
|
501
|
+
|
502
|
+
# ajustar PTS → corresponde al inicio real del recorte
|
503
|
+
new_pts = int(cut_start / time_base)
|
504
|
+
new_frame.pts = new_pts
|
505
|
+
new_frame.time_base = time_base
|
506
|
+
|
507
|
+
return new_frame
|
yta_video_opengl/tests.py
CHANGED
@@ -582,6 +582,17 @@ def video_modified_stored():
|
|
582
582
|
from yta_video_opengl.classes import WavingFrame, BreathingFrame, HandheldFrame, OrbitingFrame, RotatingInCenterFrame, StrangeTvFrame, GlitchRgbFrame, WavingNode
|
583
583
|
from yta_video_opengl.utils import texture_to_frame, frame_to_texture
|
584
584
|
from yta_video_opengl.video import Video
|
585
|
+
from yta_video_opengl.complete.timeline import Timeline
|
586
|
+
|
587
|
+
video = Video(VIDEO_PATH, 0.25, 0.75)
|
588
|
+
timeline = Timeline()
|
589
|
+
timeline.add_video(Video(VIDEO_PATH, 0.25, 0.75), 0.5)
|
590
|
+
# This is successfully raising an exception
|
591
|
+
#timeline.add_video(Video(VIDEO_PATH, 0.25, 0.75), 0.6)
|
592
|
+
timeline.add_video(Video(VIDEO_PATH, 0.25, 0.75), 1.5)
|
593
|
+
timeline.render(OUTPUT_PATH)
|
594
|
+
|
595
|
+
return
|
585
596
|
|
586
597
|
Video(VIDEO_PATH, 0.25, 0.75).save_as(OUTPUT_PATH)
|
587
598
|
|
yta_video_opengl/utils.py
CHANGED
@@ -4,6 +4,7 @@ from av.video.stream import VideoStream
|
|
4
4
|
from av.audio.stream import AudioStream
|
5
5
|
from av.video.frame import VideoFrame
|
6
6
|
from typing import Union
|
7
|
+
from fractions import Fraction
|
7
8
|
|
8
9
|
import av
|
9
10
|
import numpy as np
|
@@ -332,6 +333,17 @@ def pts_to_index(
|
|
332
333
|
"""
|
333
334
|
return int(round(pts_to_t(pts, stream_time_base) * fps))
|
334
335
|
|
336
|
+
def index_to_pts(
|
337
|
+
index: int,
|
338
|
+
stream_time_base: 'Fraction',
|
339
|
+
fps: float
|
340
|
+
) -> int:
|
341
|
+
"""
|
342
|
+
Transform a frame index into a 'pts' packet
|
343
|
+
timestamp.
|
344
|
+
"""
|
345
|
+
return int(index / fps / stream_time_base)
|
346
|
+
|
335
347
|
def pts_to_t(
|
336
348
|
pts: int,
|
337
349
|
stream_time_base: 'Fraction'
|
@@ -340,4 +352,142 @@ def pts_to_t(
|
|
340
352
|
Transform a 'pts' packet timestamp to a 't'
|
341
353
|
time moment.
|
342
354
|
"""
|
343
|
-
return pts * stream_time_base
|
355
|
+
return pts * stream_time_base
|
356
|
+
|
357
|
+
|
358
|
+
# TODO: Move this to another utils
|
359
|
+
def get_silent_audio_frame(
|
360
|
+
sample_rate,
|
361
|
+
layout="stereo",
|
362
|
+
nb_samples=1024,
|
363
|
+
format="s16"
|
364
|
+
):
|
365
|
+
# Número de canales
|
366
|
+
channels = len(av.AudioLayout(layout).channels)
|
367
|
+
|
368
|
+
# Creamos un array de ceros (silencio)
|
369
|
+
# dtype depende del formato
|
370
|
+
# if format in ('s16', 's16p'):
|
371
|
+
# dtype = np.int16
|
372
|
+
# elif format in ('flt', 'fltp'):
|
373
|
+
# dtype = np.float32
|
374
|
+
# else:
|
375
|
+
# raise ValueError(f"Formato no soportado: {format}")
|
376
|
+
|
377
|
+
if format == "s16":
|
378
|
+
dtype = np.int16
|
379
|
+
elif format in ('flt', 'fltp'):
|
380
|
+
dtype = np.float32
|
381
|
+
else:
|
382
|
+
raise ValueError(f"Formato no soportado: {format}")
|
383
|
+
|
384
|
+
# Para formatos packed → (1, samples * channels)
|
385
|
+
|
386
|
+
if layout == 'stereo':
|
387
|
+
silent_array = np.zeros((2, nb_samples), dtype = dtype)
|
388
|
+
else:
|
389
|
+
silent_array = np.zeros((1, nb_samples), dtype = dtype)
|
390
|
+
|
391
|
+
# # Si es planar: (channels, samples) | Si es packed: (samples, channels)
|
392
|
+
# if format.endswith("p"): # planar
|
393
|
+
# silent_array = np.zeros((channels, nb_samples), dtype=dtype)
|
394
|
+
# else: # packed
|
395
|
+
# silent_array = np.zeros((nb_samples, channels), dtype=dtype)
|
396
|
+
|
397
|
+
# Crear frame de audio
|
398
|
+
frame = av.AudioFrame.from_ndarray(silent_array, format=format, layout=layout)
|
399
|
+
frame.sample_rate = sample_rate
|
400
|
+
|
401
|
+
return frame
|
402
|
+
|
403
|
+
def get_black_background_video_frame(
|
404
|
+
size: tuple[int, int] = (1920, 1080),
|
405
|
+
format: str = 'rgb24'
|
406
|
+
):
|
407
|
+
return av.VideoFrame.from_ndarray(
|
408
|
+
# TODO: What if we want alpha (?)
|
409
|
+
array = np.zeros((size[0], size[1], 3), dtype = np.uint8),
|
410
|
+
format = format
|
411
|
+
)
|
412
|
+
|
413
|
+
def get_audio_frame_pts_range(
|
414
|
+
frame: av.AudioFrame,
|
415
|
+
do_in_seconds: bool = False
|
416
|
+
):
|
417
|
+
"""
|
418
|
+
Get the [start_pts, end_pts) range of the
|
419
|
+
pyav AudioFrame, or in seconds if the
|
420
|
+
'do_in_seconds' parameter is True.
|
421
|
+
"""
|
422
|
+
if frame.pts is None:
|
423
|
+
raise Exception('No "pts" found.')
|
424
|
+
|
425
|
+
# First and last sample. Remember that
|
426
|
+
# the last one is not included
|
427
|
+
start_pts = frame.pts
|
428
|
+
end_pts = frame.pts + frame.samples
|
429
|
+
|
430
|
+
# Time base for the seconds conversion
|
431
|
+
time_base = (
|
432
|
+
frame.time_base
|
433
|
+
if frame.time_base else
|
434
|
+
Fraction(1, frame.sample_rate)
|
435
|
+
)
|
436
|
+
|
437
|
+
start_time = (
|
438
|
+
float(start_pts * time_base)
|
439
|
+
if do_in_seconds else
|
440
|
+
start_time
|
441
|
+
)
|
442
|
+
end_time = (
|
443
|
+
float(end_pts * time_base)
|
444
|
+
if do_in_seconds else
|
445
|
+
end_time
|
446
|
+
)
|
447
|
+
|
448
|
+
return (
|
449
|
+
start_time,
|
450
|
+
end_time
|
451
|
+
)
|
452
|
+
|
453
|
+
def audio_frames_and_remainder_per_video_frame(
|
454
|
+
fps: float,
|
455
|
+
sample_rate: int,
|
456
|
+
nb_samples: int
|
457
|
+
):
|
458
|
+
"""
|
459
|
+
Calcula cuántos audio frames completos y cuántas muestras sobrantes
|
460
|
+
corresponden a la duración de 1 frame de vídeo.
|
461
|
+
|
462
|
+
Args:
|
463
|
+
fps (float): Frames por segundo del vídeo
|
464
|
+
sample_rate (int): Frecuencia de muestreo del audio (Hz)
|
465
|
+
nb_samples (int): Número de samples por AudioFrame (PyAV)
|
466
|
+
|
467
|
+
Returns:
|
468
|
+
(int, int): (frames_completos, muestras_restantes)
|
469
|
+
"""
|
470
|
+
# Duración de un frame de vídeo en segundos
|
471
|
+
video_frame_duration = 1.0 / fps
|
472
|
+
|
473
|
+
# Total de samples de audio necesarios
|
474
|
+
samples_needed = round(sample_rate * video_frame_duration)
|
475
|
+
|
476
|
+
# Cuántos audio frames completos de nb_samples
|
477
|
+
full_frames = samples_needed // nb_samples
|
478
|
+
|
479
|
+
# Restante que no completa un audio frame
|
480
|
+
remainder = samples_needed % nb_samples
|
481
|
+
|
482
|
+
return full_frames, remainder
|
483
|
+
|
484
|
+
# # Usage below:
|
485
|
+
# fps = 30
|
486
|
+
# sample_rate = 44100
|
487
|
+
# nb_samples = 1024
|
488
|
+
|
489
|
+
# full, rem = audio_frames_and_remainder_per_video_frame(fps, sample_rate, nb_samples)
|
490
|
+
# # This will return (1, 446)
|
491
|
+
|
492
|
+
|
493
|
+
|
yta_video_opengl/writer.py
CHANGED
@@ -55,10 +55,13 @@ class VideoWriter:
|
|
55
55
|
self.video_stream: VideoStream = self.output.add_stream(
|
56
56
|
# TODO: Maybe 'libx264' as default 'codec_name' (?)
|
57
57
|
codec_name = codec_name,
|
58
|
-
rate = fps,
|
58
|
+
rate = int(fps),
|
59
59
|
options = options
|
60
60
|
)
|
61
61
|
|
62
|
+
# We need to force this or it will not work
|
63
|
+
self.video_stream.time_base = Fraction(1, int(fps))
|
64
|
+
|
62
65
|
if size is not None:
|
63
66
|
self.video_stream.width = size[0]
|
64
67
|
self.video_stream.height = size[1]
|
@@ -88,17 +91,27 @@ class VideoWriter:
|
|
88
91
|
|
89
92
|
def set_audio_stream(
|
90
93
|
self,
|
91
|
-
codec_name: Union[str, None]
|
94
|
+
codec_name: Union[str, None],
|
95
|
+
fps: float = 44_100.0
|
92
96
|
# TODO: Add more if needed
|
93
97
|
) -> 'VideoWriter':
|
94
98
|
"""
|
95
99
|
Set the audio stream, that will overwrite any other
|
96
100
|
previous audio stream set.
|
97
101
|
"""
|
102
|
+
# TODO: Check what else we can set
|
98
103
|
self.audio_stream: AudioStream = self.output.add_stream(
|
99
|
-
codec_name = codec_name
|
104
|
+
codec_name = codec_name,
|
105
|
+
rate = int(fps)
|
100
106
|
)
|
101
107
|
|
108
|
+
# audio_stream = output.add_stream("aac", rate=48000) # codec AAC, 48kHz
|
109
|
+
# # Configurar stream
|
110
|
+
# audio_stream.channels = 2 # número de canales
|
111
|
+
# audio_stream.layout = "stereo" # layout
|
112
|
+
# audio_stream.sample_rate = 48000 # sample rate
|
113
|
+
# audio_stream.format = "s16" # formato de las muestras (PCM signed 16-bit)
|
114
|
+
|
102
115
|
# TODO: Add more if needed
|
103
116
|
|
104
117
|
return self
|
@@ -190,6 +203,7 @@ class VideoWriter:
|
|
190
203
|
# TODO: What strategy should we adopt with
|
191
204
|
# the packets that cannot be handled
|
192
205
|
# properly (?)
|
206
|
+
print('Invalid packet')
|
193
207
|
print(packet)
|
194
208
|
pass
|
195
209
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
yta_video_opengl/__init__.py,sha256=ycAx_XYMVDfkuObSvtW6irQ0Wo-fgxEz3fjIRMe8PpY,205
|
2
|
+
yta_video_opengl/classes.py,sha256=t5-Tfc7ecvHl8JlVBp_FVzZT6ole6Ly5-FeBBH7wcxo,37742
|
3
|
+
yta_video_opengl/complete/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
yta_video_opengl/complete/timeline.py,sha256=mf5QH7pFHvQCfDFpZd4HhyHSinvU-RDtvRhXaKOTNuY,9096
|
5
|
+
yta_video_opengl/complete/track.py,sha256=vDGjkMxYY9y_KHFZb-kvN0K1Yt3_foiR_eTB3fIJxVY,12923
|
6
|
+
yta_video_opengl/complete/video_on_track.py,sha256=jbRXX6KMwtvshcW1ce2FRAVKlVRHYxyyY-IOzKSO8-I,4364
|
7
|
+
yta_video_opengl/nodes/__init__.py,sha256=TZ-ZO05PZ0_ABq675E22_PngLWOe-_w5s1cLlV3NbWM,3469
|
8
|
+
yta_video_opengl/nodes/audio/__init__.py,sha256=4nKkC70k1UgLcCSPqFWm3cKdaJM0KUmQTwGWv1xFarQ,2926
|
9
|
+
yta_video_opengl/nodes/video/__init__.py,sha256=gSoaoEmjdQmyRwH18mf5z3NAhap3S0RgbeBbfBXi4jc,132
|
10
|
+
yta_video_opengl/nodes/video/opengl.py,sha256=K2pyCJEd9z4gnZqJetKyGPbtHuBzFsx74ZYyzhSqYPo,8510
|
11
|
+
yta_video_opengl/reader/__init__.py,sha256=da4Jqtqi7xjbjgfzwJpXIMkILz0SKLedt8KDFqmy0Is,20848
|
12
|
+
yta_video_opengl/reader/cache.py,sha256=KOFU3BtymZeel3FIvgxrWm9siUlakfNZdJzQUbFxavg,16657
|
13
|
+
yta_video_opengl/tests.py,sha256=p2Pq4o2H0DMZkV7HNNNAlebSjrDMHKTKk0d_weiiPHQ,26221
|
14
|
+
yta_video_opengl/utils.py,sha256=znlkvL5xjQeeN37cqFHNILBgop4W1PQTfFFudyt8-60,14614
|
15
|
+
yta_video_opengl/video.py,sha256=3n7jgZab7PUSOpODoaH4iNg0sy7NMRo_OaJ4Zj8u0NM,5855
|
16
|
+
yta_video_opengl/writer.py,sha256=wcDVL6Av-16kgx1X_LCAgKboa1eGnKvXaKuGPOsky-s,8880
|
17
|
+
yta_video_opengl-0.0.12.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
18
|
+
yta_video_opengl-0.0.12.dist-info/METADATA,sha256=Q-stok3ZF1NXMjfNxzSA8GBjPs6-yb4BnrPg1cxjTds,671
|
19
|
+
yta_video_opengl-0.0.12.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
20
|
+
yta_video_opengl-0.0.12.dist-info/RECORD,,
|