yta-video-opengl 0.0.7__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/PKG-INFO +1 -1
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/pyproject.toml +1 -1
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/src/yta_video_opengl/reader/__init__.py +151 -8
- yta_video_opengl-0.0.9/src/yta_video_opengl/reader/cache.py +233 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/src/yta_video_opengl/tests.py +5 -3
- yta_video_opengl-0.0.9/src/yta_video_opengl/utils.py +343 -0
- yta_video_opengl-0.0.9/src/yta_video_opengl/video.py +170 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/src/yta_video_opengl/writer.py +14 -0
- yta_video_opengl-0.0.7/src/yta_video_opengl/reader/cache.py +0 -155
- yta_video_opengl-0.0.7/src/yta_video_opengl/utils.py +0 -100
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/LICENSE +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/README.md +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/src/yta_video_opengl/__init__.py +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.9}/src/yta_video_opengl/classes.py +0 -0
@@ -3,6 +3,7 @@ A video reader using the PyAv (av) library
|
|
3
3
|
that, using ffmpeg, detects the video.
|
4
4
|
"""
|
5
5
|
from yta_video_opengl.reader.cache import VideoFrameCache
|
6
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
6
7
|
from yta_validation import PythonValidator
|
7
8
|
from av.video.frame import VideoFrame
|
8
9
|
from av.audio.frame import AudioFrame
|
@@ -285,7 +286,7 @@ class VideoReader:
|
|
285
286
|
The fps of the audio.
|
286
287
|
"""
|
287
288
|
# TODO: What if no audio (?)
|
288
|
-
return self.audio_stream.
|
289
|
+
return self.audio_stream.rate
|
289
290
|
|
290
291
|
@property
|
291
292
|
def time_base(
|
@@ -303,8 +304,38 @@ class VideoReader:
|
|
303
304
|
"""
|
304
305
|
The time base of the audio.
|
305
306
|
"""
|
307
|
+
# TODO: What if no audio (?)
|
306
308
|
return self.audio_stream.time_base
|
307
309
|
|
310
|
+
@property
|
311
|
+
def duration(
|
312
|
+
self
|
313
|
+
) -> Union[float, None]:
|
314
|
+
"""
|
315
|
+
The duration of the video.
|
316
|
+
"""
|
317
|
+
return (
|
318
|
+
float(self.video_stream.duration * self.video_stream.time_base)
|
319
|
+
if self.video_stream.duration else
|
320
|
+
# TODO: What to do in this case (?)
|
321
|
+
None
|
322
|
+
)
|
323
|
+
|
324
|
+
@property
|
325
|
+
def audio_duration(
|
326
|
+
self
|
327
|
+
) -> Union[float, None]:
|
328
|
+
"""
|
329
|
+
The duration of the audio.
|
330
|
+
"""
|
331
|
+
# TODO: What if no audio (?)
|
332
|
+
return (
|
333
|
+
float(self.audio_stream.duration * self.audio_stream.time_base)
|
334
|
+
if self.audio_stream.duration else
|
335
|
+
# TODO: What to do in this case (?)
|
336
|
+
None
|
337
|
+
)
|
338
|
+
|
308
339
|
@property
|
309
340
|
def size(
|
310
341
|
self
|
@@ -367,9 +398,14 @@ class VideoReader:
|
|
367
398
|
"""
|
368
399
|
The stream that includes the audio.
|
369
400
|
"""
|
370
|
-
self.
|
401
|
+
self.video_cache: VideoFrameCache = None
|
371
402
|
"""
|
372
|
-
The frame cache system to optimize
|
403
|
+
The video frame cache system to optimize
|
404
|
+
the way we access to the frames.
|
405
|
+
"""
|
406
|
+
self.audio_cache: VideoFrameCache = None
|
407
|
+
"""
|
408
|
+
The audio frame cache system to optimize
|
373
409
|
the way we access to the frames.
|
374
410
|
"""
|
375
411
|
|
@@ -403,7 +439,28 @@ class VideoReader:
|
|
403
439
|
self.video_stream.thread_type = 'AUTO'
|
404
440
|
self.audio_stream = self.container.streams.audio[0]
|
405
441
|
self.audio_stream.thread_type = 'AUTO'
|
406
|
-
self.
|
442
|
+
self.video_cache = VideoFrameCache(self.container, self.video_stream)
|
443
|
+
self.audio_cache = VideoFrameCache(self.container, self.audio_stream)
|
444
|
+
|
445
|
+
def seek(
|
446
|
+
self,
|
447
|
+
pts,
|
448
|
+
stream = None
|
449
|
+
) -> 'VideoReader':
|
450
|
+
"""
|
451
|
+
Call the container '.seek()' method with
|
452
|
+
the given 'pts' packet time stamp.
|
453
|
+
"""
|
454
|
+
stream = (
|
455
|
+
self.video_stream
|
456
|
+
if stream is None else
|
457
|
+
stream
|
458
|
+
)
|
459
|
+
|
460
|
+
# TODO: Is 'offset' actually a 'pts' (?)
|
461
|
+
self.container.seek(pts, stream = stream)
|
462
|
+
|
463
|
+
return self
|
407
464
|
|
408
465
|
def iterate(
|
409
466
|
self
|
@@ -454,7 +511,58 @@ class VideoReader:
|
|
454
511
|
yield VideoReaderFrame(frame)
|
455
512
|
else:
|
456
513
|
# Return the packet as it is
|
457
|
-
yield VideoReaderPacket(packet)
|
514
|
+
yield VideoReaderPacket(packet)
|
515
|
+
|
516
|
+
# These methods below are using the demux
|
517
|
+
def iterate_video_frames(
|
518
|
+
self,
|
519
|
+
start_pts: int = 0,
|
520
|
+
end_pts: Union[int, None] = None
|
521
|
+
):
|
522
|
+
"""
|
523
|
+
Iterate over the video stream packets and
|
524
|
+
decode only the ones in the expected range,
|
525
|
+
so only those frames are decoded (which is
|
526
|
+
an expensive process).
|
527
|
+
|
528
|
+
This method returns a tuple of 3 elements:
|
529
|
+
- `frame` as a `VideoFrame` instance
|
530
|
+
- `t` as the frame time moment
|
531
|
+
- `index` as the frame index
|
532
|
+
"""
|
533
|
+
for frame in iterate_stream_frames_demuxing(
|
534
|
+
container = self.container,
|
535
|
+
video_stream = self.video_stream,
|
536
|
+
audio_stream = None,
|
537
|
+
start_pts = start_pts,
|
538
|
+
end_pts = end_pts
|
539
|
+
):
|
540
|
+
yield frame
|
541
|
+
|
542
|
+
def iterate_audio_frames(
|
543
|
+
self,
|
544
|
+
start_pts: int = 0,
|
545
|
+
end_pts: Union[int, None] = None
|
546
|
+
):
|
547
|
+
"""
|
548
|
+
Iterate over the audio stream packets and
|
549
|
+
decode only the ones in the expected range,
|
550
|
+
so only those frames are decoded (which is
|
551
|
+
an expensive process).
|
552
|
+
|
553
|
+
This method returns a tuple of 3 elements:
|
554
|
+
- `frame` as a `AudioFrame` instance
|
555
|
+
- `t` as the frame time moment
|
556
|
+
- `index` as the frame index
|
557
|
+
"""
|
558
|
+
for frame in iterate_stream_frames_demuxing(
|
559
|
+
container = self.container,
|
560
|
+
video_stream = None,
|
561
|
+
audio_stream = self.audio_stream,
|
562
|
+
start_pts = start_pts,
|
563
|
+
end_pts = end_pts
|
564
|
+
):
|
565
|
+
yield frame
|
458
566
|
|
459
567
|
# TODO: Will we use this (?)
|
460
568
|
def get_frame(
|
@@ -462,10 +570,45 @@ class VideoReader:
|
|
462
570
|
index: int
|
463
571
|
) -> 'VideoFrame':
|
464
572
|
"""
|
465
|
-
Get the frame with the given 'index',
|
466
|
-
the cache system.
|
573
|
+
Get the video frame with the given 'index',
|
574
|
+
using the video cache system.
|
575
|
+
"""
|
576
|
+
return self.video_cache.get_frame(index)
|
577
|
+
|
578
|
+
# TODO: Will we use this (?)
|
579
|
+
def get_audio_frame(
|
580
|
+
self,
|
581
|
+
index: int
|
582
|
+
) -> 'VideoFrame':
|
583
|
+
"""
|
584
|
+
Get the audio frame with the given 'index',
|
585
|
+
using the audio cache system.
|
586
|
+
"""
|
587
|
+
return self.video_cache.get_frame(index)
|
588
|
+
|
589
|
+
def get_frames(
|
590
|
+
self,
|
591
|
+
start: float = 0.0,
|
592
|
+
end: Union[float, None] = None
|
593
|
+
):
|
594
|
+
"""
|
595
|
+
Iterator to get the video frames in between
|
596
|
+
the provided 'start' and 'end' time moments.
|
597
|
+
"""
|
598
|
+
for frame in self.video_cache.get_frames(start, end):
|
599
|
+
yield frame
|
600
|
+
|
601
|
+
def get_audio_frames(
|
602
|
+
self,
|
603
|
+
start: float = 0.0,
|
604
|
+
end: Union[float, None] = None
|
605
|
+
):
|
606
|
+
"""
|
607
|
+
Iterator to get the audio frames in between
|
608
|
+
the provided 'start' and 'end' time moments.
|
467
609
|
"""
|
468
|
-
|
610
|
+
for frame in self.audio_cache.get_frames(start, end):
|
611
|
+
yield frame
|
469
612
|
|
470
613
|
def close(
|
471
614
|
self
|
@@ -0,0 +1,233 @@
|
|
1
|
+
"""
|
2
|
+
The pyav container stores the information based
|
3
|
+
on the packets timestamps (called 'pts'). Some
|
4
|
+
of the packets are considered key_frames because
|
5
|
+
they include those key frames.
|
6
|
+
|
7
|
+
Also, this library uses those key frames to start
|
8
|
+
decodifying from there to the next one, obtaining
|
9
|
+
all the frames in between able to be read and
|
10
|
+
modified.
|
11
|
+
|
12
|
+
This cache system will look for the range of
|
13
|
+
frames that belong to the key frame related to the
|
14
|
+
frame we are requesting in the moment, keeping in
|
15
|
+
memory all those frames to be handled fast. It
|
16
|
+
will remove the old frames if needed to use only
|
17
|
+
the 'size' we set when creating it.
|
18
|
+
"""
|
19
|
+
from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index
|
20
|
+
from av.container import InputContainer
|
21
|
+
from av.video.stream import VideoStream
|
22
|
+
from av.audio.stream import AudioStream
|
23
|
+
from av.video.frame import VideoFrame
|
24
|
+
from av.audio.frame import AudioFrame
|
25
|
+
from yta_validation.parameter import ParameterValidator
|
26
|
+
from fractions import Fraction
|
27
|
+
from collections import OrderedDict
|
28
|
+
from typing import Union
|
29
|
+
|
30
|
+
|
31
|
+
class VideoFrameCache:
|
32
|
+
"""
|
33
|
+
Class to manage the frames cache of a video
|
34
|
+
within a video reader instance.
|
35
|
+
"""
|
36
|
+
|
37
|
+
@property
|
38
|
+
def fps(
|
39
|
+
self
|
40
|
+
) -> float:
|
41
|
+
"""
|
42
|
+
The frames per second as a float.
|
43
|
+
"""
|
44
|
+
return (
|
45
|
+
float(self.stream.average_rate)
|
46
|
+
if self.stream.type == 'video' else
|
47
|
+
float(self.stream.rate)
|
48
|
+
)
|
49
|
+
|
50
|
+
@property
|
51
|
+
def time_base(
|
52
|
+
self
|
53
|
+
) -> Union[Fraction, None]:
|
54
|
+
"""
|
55
|
+
The time base of the stream.
|
56
|
+
"""
|
57
|
+
return self.stream.time_base
|
58
|
+
|
59
|
+
def __init__(
|
60
|
+
self,
|
61
|
+
container: InputContainer,
|
62
|
+
stream: Union[VideoStream, AudioStream],
|
63
|
+
size: int = 50
|
64
|
+
):
|
65
|
+
ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
|
66
|
+
ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
|
67
|
+
ParameterValidator.validate_mandatory_positive_int('size', size)
|
68
|
+
|
69
|
+
self.container: InputContainer = container
|
70
|
+
"""
|
71
|
+
The pyav container.
|
72
|
+
"""
|
73
|
+
self.stream: Union[VideoStream, AudioStream] = stream
|
74
|
+
"""
|
75
|
+
The pyav stream.
|
76
|
+
"""
|
77
|
+
self.cache: OrderedDict = OrderedDict()
|
78
|
+
"""
|
79
|
+
The cache ordered dictionary.
|
80
|
+
"""
|
81
|
+
self.size = size
|
82
|
+
"""
|
83
|
+
The size (in number of frames) of the cache.
|
84
|
+
"""
|
85
|
+
self.key_frames_pts: list[int] = []
|
86
|
+
"""
|
87
|
+
The list that contains the timestamps of the
|
88
|
+
key frame packets, ordered from begining to
|
89
|
+
end.
|
90
|
+
"""
|
91
|
+
|
92
|
+
self._prepare()
|
93
|
+
|
94
|
+
def _prepare(
|
95
|
+
self
|
96
|
+
):
|
97
|
+
# Index key frames
|
98
|
+
for packet in self.container.demux(self.stream):
|
99
|
+
if packet.is_keyframe:
|
100
|
+
self.key_frames_pts.append(packet.pts)
|
101
|
+
|
102
|
+
self.container.seek(0)
|
103
|
+
|
104
|
+
def _get_nearest_keyframe_fps(
|
105
|
+
self,
|
106
|
+
pts: int
|
107
|
+
):
|
108
|
+
"""
|
109
|
+
Get the fps of the keyframe that is the
|
110
|
+
nearest to the provided 'pts'. Useful to
|
111
|
+
seek and start decoding frames from that
|
112
|
+
keyframe.
|
113
|
+
"""
|
114
|
+
return max([
|
115
|
+
key_frame_pts
|
116
|
+
for key_frame_pts in self.key_frames_pts
|
117
|
+
if key_frame_pts <= pts
|
118
|
+
])
|
119
|
+
|
120
|
+
def _get_frame_by_pts(
|
121
|
+
self,
|
122
|
+
pts: int
|
123
|
+
):
|
124
|
+
"""
|
125
|
+
Get the frame that has the provided 'pts'.
|
126
|
+
|
127
|
+
This method will start decoding frames from the
|
128
|
+
most near key frame (the one with the nearer
|
129
|
+
pts) until the one requested is found. All those
|
130
|
+
frames will be stored in cache.
|
131
|
+
|
132
|
+
This method must be called when the frame
|
133
|
+
requested is not stored in the caché.
|
134
|
+
"""
|
135
|
+
# Look for the most near key frame
|
136
|
+
key_frame_pts = self._get_nearest_keyframe_fps(pts)
|
137
|
+
|
138
|
+
# Go to the key frame that includes it
|
139
|
+
self.container.seek(key_frame_pts, stream = self.stream)
|
140
|
+
|
141
|
+
decoded = None
|
142
|
+
for frame in self.container.decode(self.stream):
|
143
|
+
# TODO: Could 'frame' be None (?)
|
144
|
+
if frame.pts is None:
|
145
|
+
continue
|
146
|
+
|
147
|
+
# Store in cache if needed
|
148
|
+
if frame.pts not in self.cache:
|
149
|
+
# TODO: The 'format' must be dynamic
|
150
|
+
self.cache[frame.pts] = frame.to_ndarray(format = "rgb24")
|
151
|
+
|
152
|
+
# Clean cache if full
|
153
|
+
if len(self.cache) > self.size:
|
154
|
+
self.cache.popitem(last = False)
|
155
|
+
|
156
|
+
if frame.pts >= pts:
|
157
|
+
decoded = self.cache[frame.pts]
|
158
|
+
break
|
159
|
+
|
160
|
+
return decoded
|
161
|
+
|
162
|
+
def get_frame(
|
163
|
+
self,
|
164
|
+
index: int
|
165
|
+
) -> Union[VideoFrame, AudioFrame]:
|
166
|
+
"""
|
167
|
+
Get the frame with the given 'index' from
|
168
|
+
the cache.
|
169
|
+
"""
|
170
|
+
# TODO: Maybe we can accept 't' and 'pts' also
|
171
|
+
target_pts = int(index / self.fps / self.time_base)
|
172
|
+
|
173
|
+
return (
|
174
|
+
self.cache[target_pts]
|
175
|
+
if target_pts in self.cache else
|
176
|
+
self._get_frame_by_pts(target_pts)
|
177
|
+
)
|
178
|
+
|
179
|
+
def get_frames(
|
180
|
+
self,
|
181
|
+
start: float = 0,
|
182
|
+
end: Union[float, None] = None
|
183
|
+
):
|
184
|
+
"""
|
185
|
+
Get all the frames in the range between
|
186
|
+
the provided 'start' and 'end' time in
|
187
|
+
seconds.
|
188
|
+
"""
|
189
|
+
# TODO: I create this method by default using
|
190
|
+
# the cache. Think about how to implement it
|
191
|
+
# and apply it here, please.
|
192
|
+
# Go to the nearest key frame
|
193
|
+
start = t_to_pts(start, self.time_base)
|
194
|
+
end = (
|
195
|
+
t_to_pts(end, self.time_base)
|
196
|
+
if end is not None else
|
197
|
+
None
|
198
|
+
)
|
199
|
+
key_frame_pts = self._get_nearest_keyframe_fps(start)
|
200
|
+
|
201
|
+
# Go to the nearest key frame to start decoding
|
202
|
+
self.container.seek(key_frame_pts, stream = self.stream)
|
203
|
+
|
204
|
+
for packet in self.container.demux(self.stream):
|
205
|
+
for frame in packet.decode():
|
206
|
+
if frame.pts is None:
|
207
|
+
continue
|
208
|
+
|
209
|
+
if frame.pts < start:
|
210
|
+
continue
|
211
|
+
|
212
|
+
if (
|
213
|
+
end is not None and
|
214
|
+
frame.pts > end
|
215
|
+
):
|
216
|
+
return
|
217
|
+
|
218
|
+
# TODO: Maybe send a @dataclass instead (?)
|
219
|
+
yield (
|
220
|
+
frame,
|
221
|
+
pts_to_t(frame.pts, self.time_base),
|
222
|
+
pts_to_index(frame.pts, self.time_base, self.fps)
|
223
|
+
)
|
224
|
+
|
225
|
+
def clear(
|
226
|
+
self
|
227
|
+
) -> 'VideoFrameCache':
|
228
|
+
"""
|
229
|
+
Clear the cache by removing all the items.
|
230
|
+
"""
|
231
|
+
self.cache.clear()
|
232
|
+
|
233
|
+
return self
|
@@ -581,6 +581,11 @@ def video_modified_stored():
|
|
581
581
|
|
582
582
|
from yta_video_opengl.classes import WavingFrame, BreathingFrame, HandheldFrame, OrbitingFrame, RotatingInCenterFrame, StrangeTvFrame, GlitchRgbFrame, WavingNode
|
583
583
|
from yta_video_opengl.utils import texture_to_frame, frame_to_texture
|
584
|
+
from yta_video_opengl.video import Video
|
585
|
+
|
586
|
+
Video(VIDEO_PATH, 0.25, 0.75).save_as(OUTPUT_PATH)
|
587
|
+
|
588
|
+
return
|
584
589
|
|
585
590
|
video = VideoReader(VIDEO_PATH)
|
586
591
|
video_writer = (
|
@@ -636,12 +641,9 @@ def video_modified_stored():
|
|
636
641
|
elif is_video_frame:
|
637
642
|
with Timer(is_silent_as_context = True) as timer:
|
638
643
|
t = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
|
639
|
-
|
640
644
|
# This is another way of getting 't'
|
641
645
|
#t = float(frame_or_packet.pts * video.time_base)
|
642
646
|
|
643
|
-
# TODO: Pass the frame as a texture
|
644
|
-
|
645
647
|
video_writer.mux_video_frame(
|
646
648
|
frame = texture_to_frame(
|
647
649
|
texture = node.process(
|
@@ -0,0 +1,343 @@
|
|
1
|
+
from yta_validation import PythonValidator
|
2
|
+
from av.container import InputContainer
|
3
|
+
from av.video.stream import VideoStream
|
4
|
+
from av.audio.stream import AudioStream
|
5
|
+
from av.video.frame import VideoFrame
|
6
|
+
from typing import Union
|
7
|
+
|
8
|
+
import av
|
9
|
+
import numpy as np
|
10
|
+
import moderngl
|
11
|
+
|
12
|
+
|
13
|
+
def frame_to_texture(
|
14
|
+
frame: Union['VideoFrame', 'np.ndarray'],
|
15
|
+
context: moderngl.Context,
|
16
|
+
numpy_format: str = 'rgb24'
|
17
|
+
):
|
18
|
+
"""
|
19
|
+
Transform the given 'frame' to an opengl
|
20
|
+
texture. The frame can be a VideoFrame
|
21
|
+
instance (from pyav library) or a numpy
|
22
|
+
array.
|
23
|
+
"""
|
24
|
+
# To numpy RGB inverted for opengl
|
25
|
+
frame: np.ndarray = (
|
26
|
+
np.flipud(frame.to_ndarray(format = numpy_format))
|
27
|
+
if PythonValidator.is_instance_of(frame, 'VideoFrame') else
|
28
|
+
np.flipud(frame)
|
29
|
+
)
|
30
|
+
|
31
|
+
return context.texture(
|
32
|
+
size = (frame.shape[1], frame.shape[0]),
|
33
|
+
components = frame.shape[2],
|
34
|
+
data = frame.tobytes()
|
35
|
+
)
|
36
|
+
|
37
|
+
# TODO: I should make different methods to
|
38
|
+
# obtain a VideoFrame or a numpy array frame
|
39
|
+
def texture_to_frame(
|
40
|
+
texture: moderngl.Texture
|
41
|
+
) -> 'VideoFrame':
|
42
|
+
"""
|
43
|
+
Transform an opengl texture into a pyav
|
44
|
+
VideoFrame instance.
|
45
|
+
"""
|
46
|
+
# RGBA8
|
47
|
+
data = texture.read(alignment = 1)
|
48
|
+
frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
|
49
|
+
# Opengl gives it with the y inverted
|
50
|
+
frame = np.flipud(frame)
|
51
|
+
# TODO: This can be returned as a numpy frame
|
52
|
+
|
53
|
+
# This is if we need an 'av' VideoFrame (to
|
54
|
+
# export through the demuxer, for example)
|
55
|
+
frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
|
56
|
+
# TODO: Make this customizable
|
57
|
+
frame = frame.reformat(format = 'yuv420p')
|
58
|
+
|
59
|
+
return frame
|
60
|
+
|
61
|
+
def get_fullscreen_quad_vao(
|
62
|
+
context: moderngl.Context,
|
63
|
+
program: moderngl.Program
|
64
|
+
) -> moderngl.VertexArray:
|
65
|
+
"""
|
66
|
+
Get the vertex array object of a quad, by
|
67
|
+
using the vertices, the indexes, the vbo,
|
68
|
+
the ibo and the vao content.
|
69
|
+
"""
|
70
|
+
# Quad vertices in NDC (-1..1) with texture
|
71
|
+
# coords (0..1)
|
72
|
+
"""
|
73
|
+
The UV coordinates to build the quad we
|
74
|
+
will use to represent the frame by
|
75
|
+
applying it as a texture.
|
76
|
+
"""
|
77
|
+
vertices = np.array([
|
78
|
+
# pos.x, pos.y, tex.u, tex.v
|
79
|
+
-1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
|
80
|
+
1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
|
81
|
+
-1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
|
82
|
+
1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
|
83
|
+
], dtype = 'f4')
|
84
|
+
|
85
|
+
"""
|
86
|
+
The indexes of the vertices (see 'vertices'
|
87
|
+
property) to build the 2 opengl triangles
|
88
|
+
that will represent the quad we need for
|
89
|
+
the frame.
|
90
|
+
"""
|
91
|
+
indices = np.array([
|
92
|
+
0, 1, 2,
|
93
|
+
2, 1, 3
|
94
|
+
], dtype = 'i4')
|
95
|
+
|
96
|
+
vbo = context.buffer(vertices.tobytes())
|
97
|
+
ibo = context.buffer(indices.tobytes())
|
98
|
+
|
99
|
+
vao_content = [
|
100
|
+
# 2 floats position, 2 floats texcoords
|
101
|
+
(vbo, '2f 2f', 'in_vert', 'in_texcoord'),
|
102
|
+
]
|
103
|
+
|
104
|
+
return context.vertex_array(program, vao_content, ibo)
|
105
|
+
|
106
|
+
def iterate_streams_packets(
|
107
|
+
container: 'InputContainer',
|
108
|
+
video_stream: 'VideoStream',
|
109
|
+
audio_stream: 'AudioStream',
|
110
|
+
video_start_pts: int = 0,
|
111
|
+
video_end_pts: Union[int, None] = None,
|
112
|
+
audio_start_pts: int = 0,
|
113
|
+
audio_end_pts: Union[int, None] = None
|
114
|
+
):
|
115
|
+
"""
|
116
|
+
Iterate over the provided 'stream' packets
|
117
|
+
and yield the ones in the expected range.
|
118
|
+
This is nice when trying to copy a stream
|
119
|
+
without modifications.
|
120
|
+
"""
|
121
|
+
# 'video_start_pts' and 'audio_start_pts' must
|
122
|
+
# be 0 or a positive tps
|
123
|
+
|
124
|
+
if (
|
125
|
+
video_stream is None and
|
126
|
+
audio_stream is None
|
127
|
+
):
|
128
|
+
raise Exception('No streams provided.')
|
129
|
+
|
130
|
+
# We only need to seek on video
|
131
|
+
if video_stream is not None:
|
132
|
+
container.seek(video_start_pts, stream = video_stream)
|
133
|
+
if audio_stream is not None:
|
134
|
+
container.seek(audio_start_pts, stream = audio_stream)
|
135
|
+
|
136
|
+
stream = [
|
137
|
+
stream
|
138
|
+
for stream in (video_stream, audio_stream)
|
139
|
+
if stream
|
140
|
+
]
|
141
|
+
|
142
|
+
"""
|
143
|
+
Apparently, if we ignore some packets based
|
144
|
+
on the 'pts', we can be ignoring information
|
145
|
+
that is needed for the next frames to be
|
146
|
+
decoded, so we need to decode them all...
|
147
|
+
|
148
|
+
If we can find some strategy to seek not for
|
149
|
+
the inmediate but some before and read from
|
150
|
+
that one to avoid reading all of the packets
|
151
|
+
we could save some time, but at what cost?
|
152
|
+
We cannot skip any crucial frame so we need
|
153
|
+
to know how many we can skip, and that sounds
|
154
|
+
a bit difficult depending on the codec.
|
155
|
+
"""
|
156
|
+
stream_finished: str = ''
|
157
|
+
for packet in container.demux(stream):
|
158
|
+
if packet.pts is None:
|
159
|
+
continue
|
160
|
+
|
161
|
+
# TODO: We cannot skip like this, we need to
|
162
|
+
# look for the nearest keyframe to be able
|
163
|
+
# to decode the frames later. Take a look at
|
164
|
+
# the VideoFrameCache class and use it.
|
165
|
+
|
166
|
+
# start_pts = (
|
167
|
+
# video_start_pts
|
168
|
+
# if packet.stream.type == 'video' else
|
169
|
+
# audio_start_pts
|
170
|
+
# )
|
171
|
+
# end_pts = (
|
172
|
+
# video_end_pts
|
173
|
+
# if packet.stream.type == 'video' else
|
174
|
+
# audio_end_pts
|
175
|
+
# )
|
176
|
+
|
177
|
+
# if packet.pts < start_pts:
|
178
|
+
# continue
|
179
|
+
|
180
|
+
# if (
|
181
|
+
# end_pts is not None and
|
182
|
+
# packet.pts > end_pts
|
183
|
+
# ):
|
184
|
+
# if (
|
185
|
+
# stream_finished != '' and
|
186
|
+
# (
|
187
|
+
# # Finish if only one stream
|
188
|
+
# stream_finished != packet.stream.type or
|
189
|
+
# video_stream is None or
|
190
|
+
# audio_stream is None
|
191
|
+
# )
|
192
|
+
# ):
|
193
|
+
# # We have yielded all the frames in the
|
194
|
+
# # expected range, no more needed
|
195
|
+
# return
|
196
|
+
|
197
|
+
# stream_finished = packet.stream.type
|
198
|
+
# continue
|
199
|
+
|
200
|
+
yield packet
|
201
|
+
|
202
|
+
def iterate_stream_frames_demuxing(
|
203
|
+
container: 'InputContainer',
|
204
|
+
video_stream: 'VideoStream',
|
205
|
+
audio_stream: 'AudioStream',
|
206
|
+
video_start_pts : int = 0,
|
207
|
+
video_end_pts: Union[int, None] = None,
|
208
|
+
audio_start_pts: int = 0,
|
209
|
+
audio_end_pts: Union[int, None] = None
|
210
|
+
):
|
211
|
+
"""
|
212
|
+
Iterate over the provided 'stream' packets
|
213
|
+
and decode only the ones in the expected
|
214
|
+
range, so only those frames are decoded
|
215
|
+
(which is an expensive process).
|
216
|
+
|
217
|
+
This method returns a tuple of 3 elements:
|
218
|
+
- `frame` as a `VideoFrame` instance
|
219
|
+
- `t` as the frame time moment
|
220
|
+
- `index` as the frame index
|
221
|
+
|
222
|
+
You can easy transform the frame received
|
223
|
+
to a numpy array by using this:
|
224
|
+
- `frame.to_ndarray(format = format)`
|
225
|
+
"""
|
226
|
+
# 'start_pts' must be 0 or a positive tps
|
227
|
+
# 'end_pts' must be None or a positive tps
|
228
|
+
|
229
|
+
# We cannot skip packets or we will lose
|
230
|
+
# information needed to build the video
|
231
|
+
for packet in iterate_streams_packets(
|
232
|
+
container = container,
|
233
|
+
video_stream = video_stream,
|
234
|
+
audio_stream = audio_stream,
|
235
|
+
video_start_pts = video_start_pts,
|
236
|
+
video_end_pts = video_end_pts,
|
237
|
+
audio_start_pts = audio_start_pts,
|
238
|
+
audio_end_pts = audio_end_pts
|
239
|
+
):
|
240
|
+
# Only valid and in range packets here
|
241
|
+
# Here only the accepted ones
|
242
|
+
stream_finished: str = ''
|
243
|
+
for frame in packet.decode():
|
244
|
+
if frame.pts is None:
|
245
|
+
continue
|
246
|
+
|
247
|
+
time_base = (
|
248
|
+
video_stream.time_base
|
249
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
250
|
+
audio_stream.time_base
|
251
|
+
)
|
252
|
+
|
253
|
+
average_rate = (
|
254
|
+
video_stream.average_rate
|
255
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
256
|
+
audio_stream.rate
|
257
|
+
)
|
258
|
+
|
259
|
+
start_pts = (
|
260
|
+
video_start_pts
|
261
|
+
if packet.stream.type == 'video' else
|
262
|
+
audio_start_pts
|
263
|
+
)
|
264
|
+
|
265
|
+
end_pts = (
|
266
|
+
video_end_pts
|
267
|
+
if packet.stream.type == 'video' else
|
268
|
+
audio_end_pts
|
269
|
+
)
|
270
|
+
|
271
|
+
if frame.pts < start_pts:
|
272
|
+
continue
|
273
|
+
|
274
|
+
if (
|
275
|
+
end_pts is not None and
|
276
|
+
frame.pts > end_pts
|
277
|
+
):
|
278
|
+
if (
|
279
|
+
stream_finished != '' and
|
280
|
+
(
|
281
|
+
# Finish if only one stream
|
282
|
+
stream_finished != packet.stream.type or
|
283
|
+
video_stream is None or
|
284
|
+
audio_stream is None
|
285
|
+
)
|
286
|
+
):
|
287
|
+
# We have yielded all the frames in the
|
288
|
+
# expected range, no more needed
|
289
|
+
return
|
290
|
+
|
291
|
+
stream_finished = packet.stream.type
|
292
|
+
continue
|
293
|
+
|
294
|
+
time_base = (
|
295
|
+
video_stream.time_base
|
296
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
297
|
+
audio_stream.time_base
|
298
|
+
)
|
299
|
+
|
300
|
+
average_rate = (
|
301
|
+
video_stream.average_rate
|
302
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
303
|
+
audio_stream.rate
|
304
|
+
)
|
305
|
+
|
306
|
+
# TODO: Maybe send a @dataclass instead (?)
|
307
|
+
yield (
|
308
|
+
frame,
|
309
|
+
pts_to_t(frame.pts, time_base),
|
310
|
+
pts_to_index(frame.pts, time_base, average_rate)
|
311
|
+
)
|
312
|
+
|
313
|
+
def t_to_pts(
|
314
|
+
t: float,
|
315
|
+
stream_time_base: 'Fraction'
|
316
|
+
) -> int:
|
317
|
+
"""
|
318
|
+
Transform a 't' time moment (in seconds) to
|
319
|
+
a packet timestamp (pts) understandable by
|
320
|
+
the pyav library.
|
321
|
+
"""
|
322
|
+
return int((t + 0.000001) / stream_time_base)
|
323
|
+
|
324
|
+
def pts_to_index(
|
325
|
+
pts: int,
|
326
|
+
stream_time_base: 'Fraction',
|
327
|
+
fps: float
|
328
|
+
) -> int:
|
329
|
+
"""
|
330
|
+
Transform a 'pts' packet timestamp to a
|
331
|
+
frame index.
|
332
|
+
"""
|
333
|
+
return int(round(pts_to_t(pts, stream_time_base) * fps))
|
334
|
+
|
335
|
+
def pts_to_t(
|
336
|
+
pts: int,
|
337
|
+
stream_time_base: 'Fraction'
|
338
|
+
) -> float:
|
339
|
+
"""
|
340
|
+
Transform a 'pts' packet timestamp to a 't'
|
341
|
+
time moment.
|
342
|
+
"""
|
343
|
+
return pts * stream_time_base
|
@@ -0,0 +1,170 @@
|
|
1
|
+
from yta_video_opengl.reader import VideoReader
|
2
|
+
from yta_video_opengl.writer import VideoWriter
|
3
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
4
|
+
from yta_validation import PythonValidator
|
5
|
+
from typing import Union
|
6
|
+
|
7
|
+
|
8
|
+
# TODO: Where can I obtain this dynamically (?)
|
9
|
+
PIXEL_FORMAT = 'yuv420p'
|
10
|
+
|
11
|
+
# TODO: Maybe rename to 'Media' (?)
|
12
|
+
class Video:
|
13
|
+
"""
|
14
|
+
Class to wrap the functionality related to
|
15
|
+
handling and modifying a video.
|
16
|
+
"""
|
17
|
+
|
18
|
+
@property
|
19
|
+
def start_pts(
|
20
|
+
self
|
21
|
+
) -> int:
|
22
|
+
"""
|
23
|
+
The start packet time stamp (pts), needed
|
24
|
+
to optimize the packet iteration process.
|
25
|
+
"""
|
26
|
+
return int(self.start / self.reader.time_base)
|
27
|
+
|
28
|
+
@property
|
29
|
+
def end_pts(
|
30
|
+
self
|
31
|
+
) -> Union[int, None]:
|
32
|
+
"""
|
33
|
+
The end packet time stamp (pts), needed to
|
34
|
+
optimize the packet iteration process.
|
35
|
+
"""
|
36
|
+
return (
|
37
|
+
int(self.end / self.reader.time_base)
|
38
|
+
# TODO: What do we do if no duration (?)
|
39
|
+
if self.duration is not None else
|
40
|
+
None
|
41
|
+
)
|
42
|
+
|
43
|
+
@property
|
44
|
+
def audio_start_pts(
|
45
|
+
self
|
46
|
+
) -> int:
|
47
|
+
"""
|
48
|
+
The start packet time stamp (pts), needed
|
49
|
+
to optimize the packet iteration process.
|
50
|
+
"""
|
51
|
+
return int(self.start / self.reader.audio_time_base)
|
52
|
+
|
53
|
+
@property
|
54
|
+
def audio_end_pts(
|
55
|
+
self
|
56
|
+
) -> Union[int, None]:
|
57
|
+
"""
|
58
|
+
The end packet time stamp (pts), needed to
|
59
|
+
optimize the packet iteration process.
|
60
|
+
"""
|
61
|
+
return (
|
62
|
+
int(self.end / self.reader.audio_time_base)
|
63
|
+
# TODO: What do we do if no duration (?)
|
64
|
+
if self.duration is not None else
|
65
|
+
None
|
66
|
+
)
|
67
|
+
|
68
|
+
@property
|
69
|
+
def duration(
|
70
|
+
self
|
71
|
+
):
|
72
|
+
"""
|
73
|
+
The duration of the video.
|
74
|
+
"""
|
75
|
+
return self.end - self.start
|
76
|
+
|
77
|
+
@property
|
78
|
+
def frames(
|
79
|
+
self
|
80
|
+
):
|
81
|
+
"""
|
82
|
+
Iterator to yield all the frames, one by
|
83
|
+
one, within the range defined by the
|
84
|
+
'start' and 'end' parameters provided when
|
85
|
+
instantiating it.
|
86
|
+
|
87
|
+
This method returns a tuple of 3 elements:
|
88
|
+
- `frame` as a `VideoFrame` instance
|
89
|
+
- `t` as the frame time moment
|
90
|
+
- `index` as the frame index
|
91
|
+
"""
|
92
|
+
for frame in self.reader.get_frames(self.start, self.end):
|
93
|
+
yield frame
|
94
|
+
|
95
|
+
for frame in self.reader.get_audio_frames(self.start, self.end):
|
96
|
+
yield frame
|
97
|
+
|
98
|
+
# for frame in iterate_stream_frames_demuxing(
|
99
|
+
# container = self.reader.container,
|
100
|
+
# video_stream = self.reader.video_stream,
|
101
|
+
# audio_stream = self.reader.audio_stream,
|
102
|
+
# video_start_pts = self.start_pts,
|
103
|
+
# video_end_pts = self.end_pts,
|
104
|
+
# audio_start_pts = self.audio_start_pts,
|
105
|
+
# audio_end_pts = self.audio_end_pts
|
106
|
+
# ):
|
107
|
+
# yield frame
|
108
|
+
|
109
|
+
def __init__(
|
110
|
+
self,
|
111
|
+
filename: str,
|
112
|
+
start: float = 0.0,
|
113
|
+
end: Union[float, None] = None
|
114
|
+
):
|
115
|
+
self.filename: str = filename
|
116
|
+
"""
|
117
|
+
The filename of the original video.
|
118
|
+
"""
|
119
|
+
# TODO: Detect the 'pixel_format' from the
|
120
|
+
# extension (?)
|
121
|
+
self.reader: VideoReader = VideoReader(self.filename)
|
122
|
+
"""
|
123
|
+
The pyav video reader.
|
124
|
+
"""
|
125
|
+
self.start: float = start
|
126
|
+
"""
|
127
|
+
The time moment 't' in which the video
|
128
|
+
should start.
|
129
|
+
"""
|
130
|
+
self.end: Union[float, None] = (
|
131
|
+
# TODO: Is this 'end' ok (?)
|
132
|
+
self.reader.duration
|
133
|
+
if end is None else
|
134
|
+
end
|
135
|
+
)
|
136
|
+
"""
|
137
|
+
The time moment 't' in which the video
|
138
|
+
should end.
|
139
|
+
"""
|
140
|
+
|
141
|
+
def save_as(
|
142
|
+
self,
|
143
|
+
filename: str
|
144
|
+
) -> 'Video':
|
145
|
+
writer = VideoWriter(filename)
|
146
|
+
#writer.set_video_stream(self.reader.video_stream.codec.name, self.reader.fps, self.reader.size, PIXEL_FORMAT)
|
147
|
+
writer.set_video_stream_from_template(self.reader.video_stream)
|
148
|
+
writer.set_audio_stream_from_template(self.reader.audio_stream)
|
149
|
+
|
150
|
+
# TODO: I need to process the audio also, so
|
151
|
+
# build a method that do the same but for
|
152
|
+
# both streams at the same time
|
153
|
+
for frame, t, index in self.frames:
|
154
|
+
if PythonValidator.is_instance_of(frame, 'VideoFrame'):
|
155
|
+
print(f'Saving video frame {str(index)}, with t = {str(t)}')
|
156
|
+
writer.mux_video_frame(
|
157
|
+
frame = frame
|
158
|
+
)
|
159
|
+
else:
|
160
|
+
print(f'Saving audio frame {str(index)} ({str(round(float(t * self.reader.fps), 2))}), with t = {str(t)}')
|
161
|
+
writer.mux_audio_frame(
|
162
|
+
frame = frame
|
163
|
+
)
|
164
|
+
|
165
|
+
writer.mux_audio_frame(None)
|
166
|
+
writer.mux_video_frame(None)
|
167
|
+
|
168
|
+
# TODO: Maybe move this to the '__del__' (?)
|
169
|
+
writer.output.close()
|
170
|
+
self.reader.container.close()
|
@@ -114,9 +114,23 @@ class VideoWriter:
|
|
114
114
|
You can pass the audio stream as it was
|
115
115
|
obtained from the reader.
|
116
116
|
"""
|
117
|
+
self.audio_stream: AudioStream = self.output.add_stream(
|
118
|
+
codec_name = template.codec_context.name,
|
119
|
+
rate = template.codec_context.rate
|
120
|
+
)
|
121
|
+
self.audio_stream.codec_context.format = template.codec_context.format
|
122
|
+
self.audio_stream.codec_context.layout = template.codec_context.layout
|
123
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
124
|
+
|
125
|
+
return self
|
126
|
+
|
127
|
+
# This below is not working
|
117
128
|
self.audio_stream: AudioStream = self.output.add_stream_from_template(
|
118
129
|
template
|
119
130
|
)
|
131
|
+
# TODO: Is this actually needed (?)
|
132
|
+
# Force this 'rate'
|
133
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
120
134
|
|
121
135
|
return self
|
122
136
|
|
@@ -1,155 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
The pyav container stores the information based
|
3
|
-
on the packets timestamps (called 'pts'). Some
|
4
|
-
of the packets are considered key_frames because
|
5
|
-
they include those key frames.
|
6
|
-
|
7
|
-
Also, this library uses those key frames to start
|
8
|
-
decodifying from there to the next one, obtaining
|
9
|
-
all the frames in between able to be read and
|
10
|
-
modified.
|
11
|
-
|
12
|
-
This cache system will look for the range of
|
13
|
-
frames that belong to the key frame related to the
|
14
|
-
frame we are requesting in the moment, keeping in
|
15
|
-
memory all those frames to be handled fast. It
|
16
|
-
will remove the old frames if needed to use only
|
17
|
-
the 'size' we set when creating it.
|
18
|
-
"""
|
19
|
-
from collections import OrderedDict
|
20
|
-
|
21
|
-
|
22
|
-
class VideoFrameCache:
|
23
|
-
"""
|
24
|
-
Class to manage the frames cache of a video
|
25
|
-
within a video reader instance.
|
26
|
-
"""
|
27
|
-
|
28
|
-
@property
|
29
|
-
def container(
|
30
|
-
self
|
31
|
-
) -> 'InputContainer':
|
32
|
-
"""
|
33
|
-
Shortcut to the video reader instance container.
|
34
|
-
"""
|
35
|
-
return self.reader_instance.container
|
36
|
-
|
37
|
-
@property
|
38
|
-
def stream(
|
39
|
-
self
|
40
|
-
) -> 'VideoStream':
|
41
|
-
"""
|
42
|
-
Shortcut to the video reader instance video
|
43
|
-
stream.
|
44
|
-
"""
|
45
|
-
return self.reader_instance.video_stream
|
46
|
-
|
47
|
-
def __init__(
|
48
|
-
self,
|
49
|
-
reader: 'VideoReader',
|
50
|
-
size: int = 50
|
51
|
-
):
|
52
|
-
self.reader_instance: 'VideoReader' = reader
|
53
|
-
"""
|
54
|
-
The video reader instance this cache belongs
|
55
|
-
to.
|
56
|
-
"""
|
57
|
-
self.cache: OrderedDict = OrderedDict()
|
58
|
-
"""
|
59
|
-
The cache ordered dictionary.
|
60
|
-
"""
|
61
|
-
self.size = size
|
62
|
-
"""
|
63
|
-
The size (in number of frames) of the cache.
|
64
|
-
"""
|
65
|
-
self.key_frames_pts: list[int] = []
|
66
|
-
"""
|
67
|
-
The list that contains the timestamps of the
|
68
|
-
key frame packets, ordered from begining to
|
69
|
-
end.
|
70
|
-
"""
|
71
|
-
|
72
|
-
# Index key frames
|
73
|
-
for packet in self.container.demux(self.stream):
|
74
|
-
if packet.is_keyframe:
|
75
|
-
self.key_frames_pts.append(packet.pts)
|
76
|
-
|
77
|
-
self.container.seek(0)
|
78
|
-
# TODO: Maybe this is better (?)
|
79
|
-
#self.reader_instance.reset()
|
80
|
-
|
81
|
-
def _get_frame_by_pts(
|
82
|
-
self,
|
83
|
-
target_pts
|
84
|
-
):
|
85
|
-
"""
|
86
|
-
Get the frame that has the provided 'target_pts'.
|
87
|
-
|
88
|
-
This method will start decoding frames from the
|
89
|
-
most near key frame (the one with the nearer
|
90
|
-
pts) until the one requested is found. All those
|
91
|
-
frames will be stored in cache.
|
92
|
-
|
93
|
-
This method must be called when the frame
|
94
|
-
requested is not stored in the caché.
|
95
|
-
"""
|
96
|
-
# Look for the most near key frame
|
97
|
-
key_frame_pts = max([
|
98
|
-
key_frame_pts
|
99
|
-
for key_frame_pts in self.key_frames_pts
|
100
|
-
if key_frame_pts <= target_pts
|
101
|
-
])
|
102
|
-
|
103
|
-
# Go to the key frame that includes it
|
104
|
-
self.container.seek(key_frame_pts, stream = self.stream)
|
105
|
-
|
106
|
-
decoded = None
|
107
|
-
for frame in self.container.decode(self.stream):
|
108
|
-
# TODO: Could 'frame' be None (?)
|
109
|
-
pts = frame.pts
|
110
|
-
if pts is None:
|
111
|
-
continue
|
112
|
-
|
113
|
-
# Store in cache if needed
|
114
|
-
if pts not in self.cache:
|
115
|
-
# TODO: The 'format' must be dynamic
|
116
|
-
self.cache[pts] = frame.to_ndarray(format = "rgb24")
|
117
|
-
|
118
|
-
# Clean cache if full
|
119
|
-
if len(self.cache) > self.size:
|
120
|
-
self.cache.popitem(last = False)
|
121
|
-
|
122
|
-
if pts >= target_pts:
|
123
|
-
decoded = self.cache[pts]
|
124
|
-
break
|
125
|
-
|
126
|
-
return decoded
|
127
|
-
|
128
|
-
def get_frame(
|
129
|
-
self,
|
130
|
-
index: int
|
131
|
-
) -> 'VideoFrame':
|
132
|
-
"""
|
133
|
-
Get the frame with the given 'index' from
|
134
|
-
the cache.
|
135
|
-
"""
|
136
|
-
# convertir frame_number a PTS (timestamps internos)
|
137
|
-
time_base = self.stream.time_base
|
138
|
-
fps = float(self.stream.average_rate)
|
139
|
-
target_pts = int(index / fps / time_base)
|
140
|
-
|
141
|
-
return (
|
142
|
-
self.cache[target_pts]
|
143
|
-
if target_pts in self.cache else
|
144
|
-
self._get_frame_by_pts(target_pts)
|
145
|
-
)
|
146
|
-
|
147
|
-
def clear(
|
148
|
-
self
|
149
|
-
) -> 'VideoFrameCache':
|
150
|
-
"""
|
151
|
-
Clear the cache by removing all the items.
|
152
|
-
"""
|
153
|
-
self.cache.clear()
|
154
|
-
|
155
|
-
return self
|
@@ -1,100 +0,0 @@
|
|
1
|
-
from yta_validation import PythonValidator
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
import av
|
5
|
-
import numpy as np
|
6
|
-
import moderngl
|
7
|
-
|
8
|
-
|
9
|
-
def frame_to_texture(
|
10
|
-
frame: Union['VideoFrame', 'np.ndarray'],
|
11
|
-
context: moderngl.Context,
|
12
|
-
numpy_format: str = 'rgb24'
|
13
|
-
):
|
14
|
-
"""
|
15
|
-
Transform the given 'frame' to an opengl
|
16
|
-
texture. The frame can be a VideoFrame
|
17
|
-
instance (from pyav library) or a numpy
|
18
|
-
array.
|
19
|
-
"""
|
20
|
-
# To numpy RGB inverted for opengl
|
21
|
-
frame: np.ndarray = (
|
22
|
-
np.flipud(frame.to_ndarray(format = numpy_format))
|
23
|
-
if PythonValidator.is_instance_of(frame, 'VideoFrame') else
|
24
|
-
np.flipud(frame)
|
25
|
-
)
|
26
|
-
|
27
|
-
return context.texture(
|
28
|
-
size = (frame.shape[1], frame.shape[0]),
|
29
|
-
components = frame.shape[2],
|
30
|
-
data = frame.tobytes()
|
31
|
-
)
|
32
|
-
|
33
|
-
# TODO: I should make different methods to
|
34
|
-
# obtain a VideoFrame or a numpy array frame
|
35
|
-
def texture_to_frame(
|
36
|
-
texture: moderngl.Texture
|
37
|
-
) -> 'VideoFrame':
|
38
|
-
"""
|
39
|
-
Transform an opengl texture into a pyav
|
40
|
-
VideoFrame instance.
|
41
|
-
"""
|
42
|
-
# RGBA8
|
43
|
-
data = texture.read(alignment = 1)
|
44
|
-
frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
|
45
|
-
# Opengl gives it with the y inverted
|
46
|
-
frame = np.flipud(frame)
|
47
|
-
# TODO: This can be returned as a numpy frame
|
48
|
-
|
49
|
-
# This is if we need an 'av' VideoFrame (to
|
50
|
-
# export through the demuxer, for example)
|
51
|
-
frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
|
52
|
-
# TODO: Make this customizable
|
53
|
-
frame = frame.reformat(format = 'yuv420p')
|
54
|
-
|
55
|
-
return frame
|
56
|
-
|
57
|
-
def get_fullscreen_quad_vao(
|
58
|
-
context: moderngl.Context,
|
59
|
-
program: moderngl.Program
|
60
|
-
) -> moderngl.VertexArray:
|
61
|
-
"""
|
62
|
-
Get the vertex array object of a quad, by
|
63
|
-
using the vertices, the indexes, the vbo,
|
64
|
-
the ibo and the vao content.
|
65
|
-
"""
|
66
|
-
# Quad vertices in NDC (-1..1) with texture
|
67
|
-
# coords (0..1)
|
68
|
-
"""
|
69
|
-
The UV coordinates to build the quad we
|
70
|
-
will use to represent the frame by
|
71
|
-
applying it as a texture.
|
72
|
-
"""
|
73
|
-
vertices = np.array([
|
74
|
-
# pos.x, pos.y, tex.u, tex.v
|
75
|
-
-1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
|
76
|
-
1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
|
77
|
-
-1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
|
78
|
-
1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
|
79
|
-
], dtype = 'f4')
|
80
|
-
|
81
|
-
"""
|
82
|
-
The indexes of the vertices (see 'vertices'
|
83
|
-
property) to build the 2 opengl triangles
|
84
|
-
that will represent the quad we need for
|
85
|
-
the frame.
|
86
|
-
"""
|
87
|
-
indices = np.array([
|
88
|
-
0, 1, 2,
|
89
|
-
2, 1, 3
|
90
|
-
], dtype = 'i4')
|
91
|
-
|
92
|
-
vbo = context.buffer(vertices.tobytes())
|
93
|
-
ibo = context.buffer(indices.tobytes())
|
94
|
-
|
95
|
-
vao_content = [
|
96
|
-
# 2 floats position, 2 floats texcoords
|
97
|
-
(vbo, '2f 2f', 'in_vert', 'in_texcoord'),
|
98
|
-
]
|
99
|
-
|
100
|
-
return context.vertex_array(program, vao_content, ibo)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|