yta-video-opengl 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_video_opengl/classes.py +1275 -0
- yta_video_opengl/{reader.py → reader/__init__.py} +118 -16
- yta_video_opengl/reader/cache.py +155 -0
- yta_video_opengl/tests.py +694 -55
- yta_video_opengl/utils.py +100 -0
- yta_video_opengl/writer.py +40 -2
- {yta_video_opengl-0.0.5.dist-info → yta_video_opengl-0.0.7.dist-info}/METADATA +1 -1
- yta_video_opengl-0.0.7.dist-info/RECORD +11 -0
- yta_video_opengl-0.0.5.dist-info/RECORD +0 -8
- {yta_video_opengl-0.0.5.dist-info → yta_video_opengl-0.0.7.dist-info}/LICENSE +0 -0
- {yta_video_opengl-0.0.5.dist-info → yta_video_opengl-0.0.7.dist-info}/WHEEL +0 -0
@@ -2,6 +2,7 @@
|
|
2
2
|
A video reader using the PyAv (av) library
|
3
3
|
that, using ffmpeg, detects the video.
|
4
4
|
"""
|
5
|
+
from yta_video_opengl.reader.cache import VideoFrameCache
|
5
6
|
from yta_validation import PythonValidator
|
6
7
|
from av.video.frame import VideoFrame
|
7
8
|
from av.audio.frame import AudioFrame
|
@@ -31,7 +32,7 @@ class VideoReaderFrame:
|
|
31
32
|
Flag to indicate if the instance is a video
|
32
33
|
frame.
|
33
34
|
"""
|
34
|
-
return PythonValidator.is_instance_of(self.
|
35
|
+
return PythonValidator.is_instance_of(self.value, VideoFrame)
|
35
36
|
|
36
37
|
@property
|
37
38
|
def is_audio(
|
@@ -41,18 +42,37 @@ class VideoReaderFrame:
|
|
41
42
|
Flag to indicate if the instance is an audio
|
42
43
|
frame.
|
43
44
|
"""
|
44
|
-
return PythonValidator.is_instance_of(self.
|
45
|
+
return PythonValidator.is_instance_of(self.value, AudioFrame)
|
46
|
+
|
47
|
+
@property
|
48
|
+
def as_numpy(
|
49
|
+
self
|
50
|
+
):
|
51
|
+
"""
|
52
|
+
The frame as a numpy array.
|
53
|
+
"""
|
54
|
+
return self.value.to_ndarray(format = self.pixel_format)
|
45
55
|
|
46
56
|
def __init__(
|
47
57
|
self,
|
48
58
|
# TODO: Add the type, please
|
49
|
-
|
59
|
+
frame: any,
|
60
|
+
t: float = None,
|
61
|
+
pixel_format: str = 'rgb24'
|
50
62
|
):
|
51
|
-
self.
|
63
|
+
self.value: Union[AudioFrame, VideoFrame] = frame
|
52
64
|
"""
|
53
65
|
The frame content, that can be audio or video
|
54
66
|
frame.
|
55
67
|
"""
|
68
|
+
self.t: float = t
|
69
|
+
"""
|
70
|
+
The 't' time moment of the frame.
|
71
|
+
"""
|
72
|
+
self.pixel_format: str = pixel_format
|
73
|
+
"""
|
74
|
+
The pixel format of the frame.
|
75
|
+
"""
|
56
76
|
|
57
77
|
@dataclass
|
58
78
|
class VideoReaderPacket:
|
@@ -70,7 +90,7 @@ class VideoReaderPacket:
|
|
70
90
|
Flag to indicate if the packet includes video
|
71
91
|
frames or not.
|
72
92
|
"""
|
73
|
-
return self.
|
93
|
+
return self.value.stream.type == 'video'
|
74
94
|
|
75
95
|
@property
|
76
96
|
def is_audio(
|
@@ -80,13 +100,13 @@ class VideoReaderPacket:
|
|
80
100
|
Flag to indicate if the packet includes audio
|
81
101
|
frames or not.
|
82
102
|
"""
|
83
|
-
return self.
|
103
|
+
return self.value.stream.type == 'audio'
|
84
104
|
|
85
105
|
def __init__(
|
86
106
|
self,
|
87
|
-
|
107
|
+
packet: Packet
|
88
108
|
):
|
89
|
-
self.
|
109
|
+
self.value: Packet = packet
|
90
110
|
"""
|
91
111
|
The packet, that can include video or audio
|
92
112
|
frames and can be decoded.
|
@@ -99,7 +119,7 @@ class VideoReaderPacket:
|
|
99
119
|
Get the frames but decoded, perfect to make
|
100
120
|
modifications and encode to save them again.
|
101
121
|
"""
|
102
|
-
return self.
|
122
|
+
return self.value.decode()
|
103
123
|
|
104
124
|
|
105
125
|
class VideoReader:
|
@@ -267,6 +287,24 @@ class VideoReader:
|
|
267
287
|
# TODO: What if no audio (?)
|
268
288
|
return self.audio_stream.average_rate
|
269
289
|
|
290
|
+
@property
|
291
|
+
def time_base(
|
292
|
+
self
|
293
|
+
) -> Fraction:
|
294
|
+
"""
|
295
|
+
The time base of the video.
|
296
|
+
"""
|
297
|
+
return self.video_stream.time_base
|
298
|
+
|
299
|
+
@property
|
300
|
+
def audio_time_base(
|
301
|
+
self
|
302
|
+
) -> Fraction:
|
303
|
+
"""
|
304
|
+
The time base of the audio.
|
305
|
+
"""
|
306
|
+
return self.audio_stream.time_base
|
307
|
+
|
270
308
|
@property
|
271
309
|
def size(
|
272
310
|
self
|
@@ -302,29 +340,70 @@ class VideoReader:
|
|
302
340
|
|
303
341
|
def __init__(
|
304
342
|
self,
|
305
|
-
filename: str
|
343
|
+
filename: str,
|
344
|
+
# Use 'rgba' if alpha channel
|
345
|
+
pixel_format: str = 'rgb24'
|
306
346
|
):
|
307
347
|
self.filename: str = filename
|
308
348
|
"""
|
309
349
|
The filename of the video source.
|
310
350
|
"""
|
311
|
-
self.
|
351
|
+
self.pixel_format: str = pixel_format
|
352
|
+
"""
|
353
|
+
The pixel format.
|
354
|
+
"""
|
355
|
+
self.container: InputContainer = None
|
312
356
|
"""
|
313
357
|
The av input general container of the
|
314
358
|
video (that also includes the audio) we
|
315
359
|
are reading.
|
316
360
|
"""
|
317
|
-
self.video_stream: VideoStream =
|
361
|
+
self.video_stream: VideoStream = None
|
318
362
|
"""
|
319
363
|
The stream that includes the video.
|
320
364
|
"""
|
321
|
-
self.video_stream.thread_type = 'AUTO'
|
322
365
|
# TODO: What if no audio (?)
|
323
|
-
self.audio_stream: AudioStream =
|
366
|
+
self.audio_stream: AudioStream = None
|
324
367
|
"""
|
325
368
|
The stream that includes the audio.
|
326
369
|
"""
|
327
|
-
self.
|
370
|
+
self.cache: VideoFrameCache = None
|
371
|
+
"""
|
372
|
+
The frame cache system to optimize
|
373
|
+
the way we access to the frames.
|
374
|
+
"""
|
375
|
+
|
376
|
+
# TODO: Maybe we can read the first
|
377
|
+
# frame, store it and reset, so we have
|
378
|
+
# it in memory since the first moment.
|
379
|
+
# We should do it here because if we
|
380
|
+
# iterate in some moment and then we
|
381
|
+
# want to obtain it... it will be
|
382
|
+
# difficult.
|
383
|
+
# Lets load the variables
|
384
|
+
self.reset()
|
385
|
+
|
386
|
+
def reset(
|
387
|
+
self
|
388
|
+
) -> 'VideoReader':
|
389
|
+
"""
|
390
|
+
Reset all the instances, closing the file
|
391
|
+
and opening again.
|
392
|
+
|
393
|
+
This will also return to the first frame.
|
394
|
+
"""
|
395
|
+
if self.container is not None:
|
396
|
+
# TODO: Maybe accept forcing it (?)
|
397
|
+
self.container.seek(0)
|
398
|
+
#self.container.close()
|
399
|
+
else:
|
400
|
+
self.container = av_open(self.filename)
|
401
|
+
# TODO: Should this be 'AUTO' (?)
|
402
|
+
self.video_stream = self.container.streams.video[0]
|
403
|
+
self.video_stream.thread_type = 'AUTO'
|
404
|
+
self.audio_stream = self.container.streams.audio[0]
|
405
|
+
self.audio_stream.thread_type = 'AUTO'
|
406
|
+
self.cache = VideoFrameCache(self)
|
328
407
|
|
329
408
|
def iterate(
|
330
409
|
self
|
@@ -334,7 +413,11 @@ class VideoReader:
|
|
334
413
|
(already decoded).
|
335
414
|
"""
|
336
415
|
for frame in self.frame_iterator:
|
337
|
-
yield VideoReaderFrame(
|
416
|
+
yield VideoReaderFrame(
|
417
|
+
frame = frame,
|
418
|
+
t = float(frame.pts * self.time_base),
|
419
|
+
pixel_format = self.pixel_format
|
420
|
+
)
|
338
421
|
|
339
422
|
def iterate_with_audio(
|
340
423
|
self,
|
@@ -373,6 +456,25 @@ class VideoReader:
|
|
373
456
|
# Return the packet as it is
|
374
457
|
yield VideoReaderPacket(packet)
|
375
458
|
|
459
|
+
# TODO: Will we use this (?)
|
460
|
+
def get_frame(
|
461
|
+
self,
|
462
|
+
index: int
|
463
|
+
) -> 'VideoFrame':
|
464
|
+
"""
|
465
|
+
Get the frame with the given 'index', using
|
466
|
+
the cache system.
|
467
|
+
"""
|
468
|
+
return self.cache.get_frame(index)
|
469
|
+
|
470
|
+
def close(
|
471
|
+
self
|
472
|
+
) -> None:
|
473
|
+
"""
|
474
|
+
Close the container to free it.
|
475
|
+
"""
|
476
|
+
self.container.close()
|
477
|
+
|
376
478
|
|
377
479
|
|
378
480
|
|
@@ -0,0 +1,155 @@
|
|
1
|
+
"""
|
2
|
+
The pyav container stores the information based
|
3
|
+
on the packets timestamps (called 'pts'). Some
|
4
|
+
of the packets are considered key_frames because
|
5
|
+
they include those key frames.
|
6
|
+
|
7
|
+
Also, this library uses those key frames to start
|
8
|
+
decodifying from there to the next one, obtaining
|
9
|
+
all the frames in between able to be read and
|
10
|
+
modified.
|
11
|
+
|
12
|
+
This cache system will look for the range of
|
13
|
+
frames that belong to the key frame related to the
|
14
|
+
frame we are requesting in the moment, keeping in
|
15
|
+
memory all those frames to be handled fast. It
|
16
|
+
will remove the old frames if needed to use only
|
17
|
+
the 'size' we set when creating it.
|
18
|
+
"""
|
19
|
+
from collections import OrderedDict
|
20
|
+
|
21
|
+
|
22
|
+
class VideoFrameCache:
|
23
|
+
"""
|
24
|
+
Class to manage the frames cache of a video
|
25
|
+
within a video reader instance.
|
26
|
+
"""
|
27
|
+
|
28
|
+
@property
|
29
|
+
def container(
|
30
|
+
self
|
31
|
+
) -> 'InputContainer':
|
32
|
+
"""
|
33
|
+
Shortcut to the video reader instance container.
|
34
|
+
"""
|
35
|
+
return self.reader_instance.container
|
36
|
+
|
37
|
+
@property
|
38
|
+
def stream(
|
39
|
+
self
|
40
|
+
) -> 'VideoStream':
|
41
|
+
"""
|
42
|
+
Shortcut to the video reader instance video
|
43
|
+
stream.
|
44
|
+
"""
|
45
|
+
return self.reader_instance.video_stream
|
46
|
+
|
47
|
+
def __init__(
|
48
|
+
self,
|
49
|
+
reader: 'VideoReader',
|
50
|
+
size: int = 50
|
51
|
+
):
|
52
|
+
self.reader_instance: 'VideoReader' = reader
|
53
|
+
"""
|
54
|
+
The video reader instance this cache belongs
|
55
|
+
to.
|
56
|
+
"""
|
57
|
+
self.cache: OrderedDict = OrderedDict()
|
58
|
+
"""
|
59
|
+
The cache ordered dictionary.
|
60
|
+
"""
|
61
|
+
self.size = size
|
62
|
+
"""
|
63
|
+
The size (in number of frames) of the cache.
|
64
|
+
"""
|
65
|
+
self.key_frames_pts: list[int] = []
|
66
|
+
"""
|
67
|
+
The list that contains the timestamps of the
|
68
|
+
key frame packets, ordered from begining to
|
69
|
+
end.
|
70
|
+
"""
|
71
|
+
|
72
|
+
# Index key frames
|
73
|
+
for packet in self.container.demux(self.stream):
|
74
|
+
if packet.is_keyframe:
|
75
|
+
self.key_frames_pts.append(packet.pts)
|
76
|
+
|
77
|
+
self.container.seek(0)
|
78
|
+
# TODO: Maybe this is better (?)
|
79
|
+
#self.reader_instance.reset()
|
80
|
+
|
81
|
+
def _get_frame_by_pts(
|
82
|
+
self,
|
83
|
+
target_pts
|
84
|
+
):
|
85
|
+
"""
|
86
|
+
Get the frame that has the provided 'target_pts'.
|
87
|
+
|
88
|
+
This method will start decoding frames from the
|
89
|
+
most near key frame (the one with the nearer
|
90
|
+
pts) until the one requested is found. All those
|
91
|
+
frames will be stored in cache.
|
92
|
+
|
93
|
+
This method must be called when the frame
|
94
|
+
requested is not stored in the caché.
|
95
|
+
"""
|
96
|
+
# Look for the most near key frame
|
97
|
+
key_frame_pts = max([
|
98
|
+
key_frame_pts
|
99
|
+
for key_frame_pts in self.key_frames_pts
|
100
|
+
if key_frame_pts <= target_pts
|
101
|
+
])
|
102
|
+
|
103
|
+
# Go to the key frame that includes it
|
104
|
+
self.container.seek(key_frame_pts, stream = self.stream)
|
105
|
+
|
106
|
+
decoded = None
|
107
|
+
for frame in self.container.decode(self.stream):
|
108
|
+
# TODO: Could 'frame' be None (?)
|
109
|
+
pts = frame.pts
|
110
|
+
if pts is None:
|
111
|
+
continue
|
112
|
+
|
113
|
+
# Store in cache if needed
|
114
|
+
if pts not in self.cache:
|
115
|
+
# TODO: The 'format' must be dynamic
|
116
|
+
self.cache[pts] = frame.to_ndarray(format = "rgb24")
|
117
|
+
|
118
|
+
# Clean cache if full
|
119
|
+
if len(self.cache) > self.size:
|
120
|
+
self.cache.popitem(last = False)
|
121
|
+
|
122
|
+
if pts >= target_pts:
|
123
|
+
decoded = self.cache[pts]
|
124
|
+
break
|
125
|
+
|
126
|
+
return decoded
|
127
|
+
|
128
|
+
def get_frame(
|
129
|
+
self,
|
130
|
+
index: int
|
131
|
+
) -> 'VideoFrame':
|
132
|
+
"""
|
133
|
+
Get the frame with the given 'index' from
|
134
|
+
the cache.
|
135
|
+
"""
|
136
|
+
# convertir frame_number a PTS (timestamps internos)
|
137
|
+
time_base = self.stream.time_base
|
138
|
+
fps = float(self.stream.average_rate)
|
139
|
+
target_pts = int(index / fps / time_base)
|
140
|
+
|
141
|
+
return (
|
142
|
+
self.cache[target_pts]
|
143
|
+
if target_pts in self.cache else
|
144
|
+
self._get_frame_by_pts(target_pts)
|
145
|
+
)
|
146
|
+
|
147
|
+
def clear(
|
148
|
+
self
|
149
|
+
) -> 'VideoFrameCache':
|
150
|
+
"""
|
151
|
+
Clear the cache by removing all the items.
|
152
|
+
"""
|
153
|
+
self.cache.clear()
|
154
|
+
|
155
|
+
return self
|