yta-video-opengl 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_video_opengl/reader/__init__.py +151 -8
- yta_video_opengl/reader/cache.py +111 -33
- yta_video_opengl/tests.py +5 -3
- yta_video_opengl/utils.py +244 -1
- yta_video_opengl/video.py +170 -0
- yta_video_opengl/writer.py +14 -0
- {yta_video_opengl-0.0.7.dist-info → yta_video_opengl-0.0.9.dist-info}/METADATA +1 -1
- yta_video_opengl-0.0.9.dist-info/RECORD +12 -0
- yta_video_opengl-0.0.7.dist-info/RECORD +0 -11
- {yta_video_opengl-0.0.7.dist-info → yta_video_opengl-0.0.9.dist-info}/LICENSE +0 -0
- {yta_video_opengl-0.0.7.dist-info → yta_video_opengl-0.0.9.dist-info}/WHEEL +0 -0
@@ -3,6 +3,7 @@ A video reader using the PyAv (av) library
|
|
3
3
|
that, using ffmpeg, detects the video.
|
4
4
|
"""
|
5
5
|
from yta_video_opengl.reader.cache import VideoFrameCache
|
6
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
6
7
|
from yta_validation import PythonValidator
|
7
8
|
from av.video.frame import VideoFrame
|
8
9
|
from av.audio.frame import AudioFrame
|
@@ -285,7 +286,7 @@ class VideoReader:
|
|
285
286
|
The fps of the audio.
|
286
287
|
"""
|
287
288
|
# TODO: What if no audio (?)
|
288
|
-
return self.audio_stream.
|
289
|
+
return self.audio_stream.rate
|
289
290
|
|
290
291
|
@property
|
291
292
|
def time_base(
|
@@ -303,8 +304,38 @@ class VideoReader:
|
|
303
304
|
"""
|
304
305
|
The time base of the audio.
|
305
306
|
"""
|
307
|
+
# TODO: What if no audio (?)
|
306
308
|
return self.audio_stream.time_base
|
307
309
|
|
310
|
+
@property
|
311
|
+
def duration(
|
312
|
+
self
|
313
|
+
) -> Union[float, None]:
|
314
|
+
"""
|
315
|
+
The duration of the video.
|
316
|
+
"""
|
317
|
+
return (
|
318
|
+
float(self.video_stream.duration * self.video_stream.time_base)
|
319
|
+
if self.video_stream.duration else
|
320
|
+
# TODO: What to do in this case (?)
|
321
|
+
None
|
322
|
+
)
|
323
|
+
|
324
|
+
@property
|
325
|
+
def audio_duration(
|
326
|
+
self
|
327
|
+
) -> Union[float, None]:
|
328
|
+
"""
|
329
|
+
The duration of the audio.
|
330
|
+
"""
|
331
|
+
# TODO: What if no audio (?)
|
332
|
+
return (
|
333
|
+
float(self.audio_stream.duration * self.audio_stream.time_base)
|
334
|
+
if self.audio_stream.duration else
|
335
|
+
# TODO: What to do in this case (?)
|
336
|
+
None
|
337
|
+
)
|
338
|
+
|
308
339
|
@property
|
309
340
|
def size(
|
310
341
|
self
|
@@ -367,9 +398,14 @@ class VideoReader:
|
|
367
398
|
"""
|
368
399
|
The stream that includes the audio.
|
369
400
|
"""
|
370
|
-
self.
|
401
|
+
self.video_cache: VideoFrameCache = None
|
371
402
|
"""
|
372
|
-
The frame cache system to optimize
|
403
|
+
The video frame cache system to optimize
|
404
|
+
the way we access to the frames.
|
405
|
+
"""
|
406
|
+
self.audio_cache: VideoFrameCache = None
|
407
|
+
"""
|
408
|
+
The audio frame cache system to optimize
|
373
409
|
the way we access to the frames.
|
374
410
|
"""
|
375
411
|
|
@@ -403,7 +439,28 @@ class VideoReader:
|
|
403
439
|
self.video_stream.thread_type = 'AUTO'
|
404
440
|
self.audio_stream = self.container.streams.audio[0]
|
405
441
|
self.audio_stream.thread_type = 'AUTO'
|
406
|
-
self.
|
442
|
+
self.video_cache = VideoFrameCache(self.container, self.video_stream)
|
443
|
+
self.audio_cache = VideoFrameCache(self.container, self.audio_stream)
|
444
|
+
|
445
|
+
def seek(
|
446
|
+
self,
|
447
|
+
pts,
|
448
|
+
stream = None
|
449
|
+
) -> 'VideoReader':
|
450
|
+
"""
|
451
|
+
Call the container '.seek()' method with
|
452
|
+
the given 'pts' packet time stamp.
|
453
|
+
"""
|
454
|
+
stream = (
|
455
|
+
self.video_stream
|
456
|
+
if stream is None else
|
457
|
+
stream
|
458
|
+
)
|
459
|
+
|
460
|
+
# TODO: Is 'offset' actually a 'pts' (?)
|
461
|
+
self.container.seek(pts, stream = stream)
|
462
|
+
|
463
|
+
return self
|
407
464
|
|
408
465
|
def iterate(
|
409
466
|
self
|
@@ -454,7 +511,58 @@ class VideoReader:
|
|
454
511
|
yield VideoReaderFrame(frame)
|
455
512
|
else:
|
456
513
|
# Return the packet as it is
|
457
|
-
yield VideoReaderPacket(packet)
|
514
|
+
yield VideoReaderPacket(packet)
|
515
|
+
|
516
|
+
# These methods below are using the demux
|
517
|
+
def iterate_video_frames(
|
518
|
+
self,
|
519
|
+
start_pts: int = 0,
|
520
|
+
end_pts: Union[int, None] = None
|
521
|
+
):
|
522
|
+
"""
|
523
|
+
Iterate over the video stream packets and
|
524
|
+
decode only the ones in the expected range,
|
525
|
+
so only those frames are decoded (which is
|
526
|
+
an expensive process).
|
527
|
+
|
528
|
+
This method returns a tuple of 3 elements:
|
529
|
+
- `frame` as a `VideoFrame` instance
|
530
|
+
- `t` as the frame time moment
|
531
|
+
- `index` as the frame index
|
532
|
+
"""
|
533
|
+
for frame in iterate_stream_frames_demuxing(
|
534
|
+
container = self.container,
|
535
|
+
video_stream = self.video_stream,
|
536
|
+
audio_stream = None,
|
537
|
+
start_pts = start_pts,
|
538
|
+
end_pts = end_pts
|
539
|
+
):
|
540
|
+
yield frame
|
541
|
+
|
542
|
+
def iterate_audio_frames(
|
543
|
+
self,
|
544
|
+
start_pts: int = 0,
|
545
|
+
end_pts: Union[int, None] = None
|
546
|
+
):
|
547
|
+
"""
|
548
|
+
Iterate over the audio stream packets and
|
549
|
+
decode only the ones in the expected range,
|
550
|
+
so only those frames are decoded (which is
|
551
|
+
an expensive process).
|
552
|
+
|
553
|
+
This method returns a tuple of 3 elements:
|
554
|
+
- `frame` as a `AudioFrame` instance
|
555
|
+
- `t` as the frame time moment
|
556
|
+
- `index` as the frame index
|
557
|
+
"""
|
558
|
+
for frame in iterate_stream_frames_demuxing(
|
559
|
+
container = self.container,
|
560
|
+
video_stream = None,
|
561
|
+
audio_stream = self.audio_stream,
|
562
|
+
start_pts = start_pts,
|
563
|
+
end_pts = end_pts
|
564
|
+
):
|
565
|
+
yield frame
|
458
566
|
|
459
567
|
# TODO: Will we use this (?)
|
460
568
|
def get_frame(
|
@@ -462,10 +570,45 @@ class VideoReader:
|
|
462
570
|
index: int
|
463
571
|
) -> 'VideoFrame':
|
464
572
|
"""
|
465
|
-
Get the frame with the given 'index',
|
466
|
-
the cache system.
|
573
|
+
Get the video frame with the given 'index',
|
574
|
+
using the video cache system.
|
575
|
+
"""
|
576
|
+
return self.video_cache.get_frame(index)
|
577
|
+
|
578
|
+
# TODO: Will we use this (?)
|
579
|
+
def get_audio_frame(
|
580
|
+
self,
|
581
|
+
index: int
|
582
|
+
) -> 'VideoFrame':
|
583
|
+
"""
|
584
|
+
Get the audio frame with the given 'index',
|
585
|
+
using the audio cache system.
|
586
|
+
"""
|
587
|
+
return self.video_cache.get_frame(index)
|
588
|
+
|
589
|
+
def get_frames(
|
590
|
+
self,
|
591
|
+
start: float = 0.0,
|
592
|
+
end: Union[float, None] = None
|
593
|
+
):
|
594
|
+
"""
|
595
|
+
Iterator to get the video frames in between
|
596
|
+
the provided 'start' and 'end' time moments.
|
597
|
+
"""
|
598
|
+
for frame in self.video_cache.get_frames(start, end):
|
599
|
+
yield frame
|
600
|
+
|
601
|
+
def get_audio_frames(
|
602
|
+
self,
|
603
|
+
start: float = 0.0,
|
604
|
+
end: Union[float, None] = None
|
605
|
+
):
|
606
|
+
"""
|
607
|
+
Iterator to get the audio frames in between
|
608
|
+
the provided 'start' and 'end' time moments.
|
467
609
|
"""
|
468
|
-
|
610
|
+
for frame in self.audio_cache.get_frames(start, end):
|
611
|
+
yield frame
|
469
612
|
|
470
613
|
def close(
|
471
614
|
self
|
yta_video_opengl/reader/cache.py
CHANGED
@@ -16,7 +16,16 @@ memory all those frames to be handled fast. It
|
|
16
16
|
will remove the old frames if needed to use only
|
17
17
|
the 'size' we set when creating it.
|
18
18
|
"""
|
19
|
+
from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index
|
20
|
+
from av.container import InputContainer
|
21
|
+
from av.video.stream import VideoStream
|
22
|
+
from av.audio.stream import AudioStream
|
23
|
+
from av.video.frame import VideoFrame
|
24
|
+
from av.audio.frame import AudioFrame
|
25
|
+
from yta_validation.parameter import ParameterValidator
|
26
|
+
from fractions import Fraction
|
19
27
|
from collections import OrderedDict
|
28
|
+
from typing import Union
|
20
29
|
|
21
30
|
|
22
31
|
class VideoFrameCache:
|
@@ -26,33 +35,44 @@ class VideoFrameCache:
|
|
26
35
|
"""
|
27
36
|
|
28
37
|
@property
|
29
|
-
def
|
38
|
+
def fps(
|
30
39
|
self
|
31
|
-
) ->
|
40
|
+
) -> float:
|
32
41
|
"""
|
33
|
-
|
42
|
+
The frames per second as a float.
|
34
43
|
"""
|
35
|
-
return
|
44
|
+
return (
|
45
|
+
float(self.stream.average_rate)
|
46
|
+
if self.stream.type == 'video' else
|
47
|
+
float(self.stream.rate)
|
48
|
+
)
|
36
49
|
|
37
50
|
@property
|
38
|
-
def
|
51
|
+
def time_base(
|
39
52
|
self
|
40
|
-
) ->
|
53
|
+
) -> Union[Fraction, None]:
|
41
54
|
"""
|
42
|
-
|
43
|
-
stream.
|
55
|
+
The time base of the stream.
|
44
56
|
"""
|
45
|
-
return self.
|
57
|
+
return self.stream.time_base
|
46
58
|
|
47
59
|
def __init__(
|
48
60
|
self,
|
49
|
-
|
61
|
+
container: InputContainer,
|
62
|
+
stream: Union[VideoStream, AudioStream],
|
50
63
|
size: int = 50
|
51
64
|
):
|
52
|
-
|
65
|
+
ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
|
66
|
+
ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
|
67
|
+
ParameterValidator.validate_mandatory_positive_int('size', size)
|
68
|
+
|
69
|
+
self.container: InputContainer = container
|
70
|
+
"""
|
71
|
+
The pyav container.
|
53
72
|
"""
|
54
|
-
|
55
|
-
|
73
|
+
self.stream: Union[VideoStream, AudioStream] = stream
|
74
|
+
"""
|
75
|
+
The pyav stream.
|
56
76
|
"""
|
57
77
|
self.cache: OrderedDict = OrderedDict()
|
58
78
|
"""
|
@@ -69,21 +89,40 @@ class VideoFrameCache:
|
|
69
89
|
end.
|
70
90
|
"""
|
71
91
|
|
92
|
+
self._prepare()
|
93
|
+
|
94
|
+
def _prepare(
|
95
|
+
self
|
96
|
+
):
|
72
97
|
# Index key frames
|
73
98
|
for packet in self.container.demux(self.stream):
|
74
99
|
if packet.is_keyframe:
|
75
100
|
self.key_frames_pts.append(packet.pts)
|
76
101
|
|
77
102
|
self.container.seek(0)
|
78
|
-
|
79
|
-
|
103
|
+
|
104
|
+
def _get_nearest_keyframe_fps(
|
105
|
+
self,
|
106
|
+
pts: int
|
107
|
+
):
|
108
|
+
"""
|
109
|
+
Get the fps of the keyframe that is the
|
110
|
+
nearest to the provided 'pts'. Useful to
|
111
|
+
seek and start decoding frames from that
|
112
|
+
keyframe.
|
113
|
+
"""
|
114
|
+
return max([
|
115
|
+
key_frame_pts
|
116
|
+
for key_frame_pts in self.key_frames_pts
|
117
|
+
if key_frame_pts <= pts
|
118
|
+
])
|
80
119
|
|
81
120
|
def _get_frame_by_pts(
|
82
121
|
self,
|
83
|
-
|
122
|
+
pts: int
|
84
123
|
):
|
85
124
|
"""
|
86
|
-
Get the frame that has the provided '
|
125
|
+
Get the frame that has the provided 'pts'.
|
87
126
|
|
88
127
|
This method will start decoding frames from the
|
89
128
|
most near key frame (the one with the nearer
|
@@ -94,11 +133,7 @@ class VideoFrameCache:
|
|
94
133
|
requested is not stored in the caché.
|
95
134
|
"""
|
96
135
|
# Look for the most near key frame
|
97
|
-
key_frame_pts =
|
98
|
-
key_frame_pts
|
99
|
-
for key_frame_pts in self.key_frames_pts
|
100
|
-
if key_frame_pts <= target_pts
|
101
|
-
])
|
136
|
+
key_frame_pts = self._get_nearest_keyframe_fps(pts)
|
102
137
|
|
103
138
|
# Go to the key frame that includes it
|
104
139
|
self.container.seek(key_frame_pts, stream = self.stream)
|
@@ -106,21 +141,20 @@ class VideoFrameCache:
|
|
106
141
|
decoded = None
|
107
142
|
for frame in self.container.decode(self.stream):
|
108
143
|
# TODO: Could 'frame' be None (?)
|
109
|
-
|
110
|
-
if pts is None:
|
144
|
+
if frame.pts is None:
|
111
145
|
continue
|
112
146
|
|
113
147
|
# Store in cache if needed
|
114
|
-
if pts not in self.cache:
|
148
|
+
if frame.pts not in self.cache:
|
115
149
|
# TODO: The 'format' must be dynamic
|
116
|
-
self.cache[pts] = frame.to_ndarray(format = "rgb24")
|
150
|
+
self.cache[frame.pts] = frame.to_ndarray(format = "rgb24")
|
117
151
|
|
118
152
|
# Clean cache if full
|
119
153
|
if len(self.cache) > self.size:
|
120
154
|
self.cache.popitem(last = False)
|
121
155
|
|
122
|
-
if pts >=
|
123
|
-
decoded = self.cache[pts]
|
156
|
+
if frame.pts >= pts:
|
157
|
+
decoded = self.cache[frame.pts]
|
124
158
|
break
|
125
159
|
|
126
160
|
return decoded
|
@@ -128,21 +162,65 @@ class VideoFrameCache:
|
|
128
162
|
def get_frame(
|
129
163
|
self,
|
130
164
|
index: int
|
131
|
-
) ->
|
165
|
+
) -> Union[VideoFrame, AudioFrame]:
|
132
166
|
"""
|
133
167
|
Get the frame with the given 'index' from
|
134
168
|
the cache.
|
135
169
|
"""
|
136
|
-
#
|
137
|
-
|
138
|
-
fps = float(self.stream.average_rate)
|
139
|
-
target_pts = int(index / fps / time_base)
|
170
|
+
# TODO: Maybe we can accept 't' and 'pts' also
|
171
|
+
target_pts = int(index / self.fps / self.time_base)
|
140
172
|
|
141
173
|
return (
|
142
174
|
self.cache[target_pts]
|
143
175
|
if target_pts in self.cache else
|
144
176
|
self._get_frame_by_pts(target_pts)
|
145
177
|
)
|
178
|
+
|
179
|
+
def get_frames(
|
180
|
+
self,
|
181
|
+
start: float = 0,
|
182
|
+
end: Union[float, None] = None
|
183
|
+
):
|
184
|
+
"""
|
185
|
+
Get all the frames in the range between
|
186
|
+
the provided 'start' and 'end' time in
|
187
|
+
seconds.
|
188
|
+
"""
|
189
|
+
# TODO: I create this method by default using
|
190
|
+
# the cache. Think about how to implement it
|
191
|
+
# and apply it here, please.
|
192
|
+
# Go to the nearest key frame
|
193
|
+
start = t_to_pts(start, self.time_base)
|
194
|
+
end = (
|
195
|
+
t_to_pts(end, self.time_base)
|
196
|
+
if end is not None else
|
197
|
+
None
|
198
|
+
)
|
199
|
+
key_frame_pts = self._get_nearest_keyframe_fps(start)
|
200
|
+
|
201
|
+
# Go to the nearest key frame to start decoding
|
202
|
+
self.container.seek(key_frame_pts, stream = self.stream)
|
203
|
+
|
204
|
+
for packet in self.container.demux(self.stream):
|
205
|
+
for frame in packet.decode():
|
206
|
+
if frame.pts is None:
|
207
|
+
continue
|
208
|
+
|
209
|
+
if frame.pts < start:
|
210
|
+
continue
|
211
|
+
|
212
|
+
if (
|
213
|
+
end is not None and
|
214
|
+
frame.pts > end
|
215
|
+
):
|
216
|
+
return
|
217
|
+
|
218
|
+
# TODO: Maybe send a @dataclass instead (?)
|
219
|
+
yield (
|
220
|
+
frame,
|
221
|
+
pts_to_t(frame.pts, self.time_base),
|
222
|
+
pts_to_index(frame.pts, self.time_base, self.fps)
|
223
|
+
)
|
146
224
|
|
147
225
|
def clear(
|
148
226
|
self
|
yta_video_opengl/tests.py
CHANGED
@@ -581,6 +581,11 @@ def video_modified_stored():
|
|
581
581
|
|
582
582
|
from yta_video_opengl.classes import WavingFrame, BreathingFrame, HandheldFrame, OrbitingFrame, RotatingInCenterFrame, StrangeTvFrame, GlitchRgbFrame, WavingNode
|
583
583
|
from yta_video_opengl.utils import texture_to_frame, frame_to_texture
|
584
|
+
from yta_video_opengl.video import Video
|
585
|
+
|
586
|
+
Video(VIDEO_PATH, 0.25, 0.75).save_as(OUTPUT_PATH)
|
587
|
+
|
588
|
+
return
|
584
589
|
|
585
590
|
video = VideoReader(VIDEO_PATH)
|
586
591
|
video_writer = (
|
@@ -636,12 +641,9 @@ def video_modified_stored():
|
|
636
641
|
elif is_video_frame:
|
637
642
|
with Timer(is_silent_as_context = True) as timer:
|
638
643
|
t = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
|
639
|
-
|
640
644
|
# This is another way of getting 't'
|
641
645
|
#t = float(frame_or_packet.pts * video.time_base)
|
642
646
|
|
643
|
-
# TODO: Pass the frame as a texture
|
644
|
-
|
645
647
|
video_writer.mux_video_frame(
|
646
648
|
frame = texture_to_frame(
|
647
649
|
texture = node.process(
|
yta_video_opengl/utils.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
from yta_validation import PythonValidator
|
2
|
+
from av.container import InputContainer
|
3
|
+
from av.video.stream import VideoStream
|
4
|
+
from av.audio.stream import AudioStream
|
5
|
+
from av.video.frame import VideoFrame
|
2
6
|
from typing import Union
|
3
7
|
|
4
8
|
import av
|
@@ -97,4 +101,243 @@ def get_fullscreen_quad_vao(
|
|
97
101
|
(vbo, '2f 2f', 'in_vert', 'in_texcoord'),
|
98
102
|
]
|
99
103
|
|
100
|
-
return context.vertex_array(program, vao_content, ibo)
|
104
|
+
return context.vertex_array(program, vao_content, ibo)
|
105
|
+
|
106
|
+
def iterate_streams_packets(
|
107
|
+
container: 'InputContainer',
|
108
|
+
video_stream: 'VideoStream',
|
109
|
+
audio_stream: 'AudioStream',
|
110
|
+
video_start_pts: int = 0,
|
111
|
+
video_end_pts: Union[int, None] = None,
|
112
|
+
audio_start_pts: int = 0,
|
113
|
+
audio_end_pts: Union[int, None] = None
|
114
|
+
):
|
115
|
+
"""
|
116
|
+
Iterate over the provided 'stream' packets
|
117
|
+
and yield the ones in the expected range.
|
118
|
+
This is nice when trying to copy a stream
|
119
|
+
without modifications.
|
120
|
+
"""
|
121
|
+
# 'video_start_pts' and 'audio_start_pts' must
|
122
|
+
# be 0 or a positive tps
|
123
|
+
|
124
|
+
if (
|
125
|
+
video_stream is None and
|
126
|
+
audio_stream is None
|
127
|
+
):
|
128
|
+
raise Exception('No streams provided.')
|
129
|
+
|
130
|
+
# We only need to seek on video
|
131
|
+
if video_stream is not None:
|
132
|
+
container.seek(video_start_pts, stream = video_stream)
|
133
|
+
if audio_stream is not None:
|
134
|
+
container.seek(audio_start_pts, stream = audio_stream)
|
135
|
+
|
136
|
+
stream = [
|
137
|
+
stream
|
138
|
+
for stream in (video_stream, audio_stream)
|
139
|
+
if stream
|
140
|
+
]
|
141
|
+
|
142
|
+
"""
|
143
|
+
Apparently, if we ignore some packets based
|
144
|
+
on the 'pts', we can be ignoring information
|
145
|
+
that is needed for the next frames to be
|
146
|
+
decoded, so we need to decode them all...
|
147
|
+
|
148
|
+
If we can find some strategy to seek not for
|
149
|
+
the inmediate but some before and read from
|
150
|
+
that one to avoid reading all of the packets
|
151
|
+
we could save some time, but at what cost?
|
152
|
+
We cannot skip any crucial frame so we need
|
153
|
+
to know how many we can skip, and that sounds
|
154
|
+
a bit difficult depending on the codec.
|
155
|
+
"""
|
156
|
+
stream_finished: str = ''
|
157
|
+
for packet in container.demux(stream):
|
158
|
+
if packet.pts is None:
|
159
|
+
continue
|
160
|
+
|
161
|
+
# TODO: We cannot skip like this, we need to
|
162
|
+
# look for the nearest keyframe to be able
|
163
|
+
# to decode the frames later. Take a look at
|
164
|
+
# the VideoFrameCache class and use it.
|
165
|
+
|
166
|
+
# start_pts = (
|
167
|
+
# video_start_pts
|
168
|
+
# if packet.stream.type == 'video' else
|
169
|
+
# audio_start_pts
|
170
|
+
# )
|
171
|
+
# end_pts = (
|
172
|
+
# video_end_pts
|
173
|
+
# if packet.stream.type == 'video' else
|
174
|
+
# audio_end_pts
|
175
|
+
# )
|
176
|
+
|
177
|
+
# if packet.pts < start_pts:
|
178
|
+
# continue
|
179
|
+
|
180
|
+
# if (
|
181
|
+
# end_pts is not None and
|
182
|
+
# packet.pts > end_pts
|
183
|
+
# ):
|
184
|
+
# if (
|
185
|
+
# stream_finished != '' and
|
186
|
+
# (
|
187
|
+
# # Finish if only one stream
|
188
|
+
# stream_finished != packet.stream.type or
|
189
|
+
# video_stream is None or
|
190
|
+
# audio_stream is None
|
191
|
+
# )
|
192
|
+
# ):
|
193
|
+
# # We have yielded all the frames in the
|
194
|
+
# # expected range, no more needed
|
195
|
+
# return
|
196
|
+
|
197
|
+
# stream_finished = packet.stream.type
|
198
|
+
# continue
|
199
|
+
|
200
|
+
yield packet
|
201
|
+
|
202
|
+
def iterate_stream_frames_demuxing(
|
203
|
+
container: 'InputContainer',
|
204
|
+
video_stream: 'VideoStream',
|
205
|
+
audio_stream: 'AudioStream',
|
206
|
+
video_start_pts : int = 0,
|
207
|
+
video_end_pts: Union[int, None] = None,
|
208
|
+
audio_start_pts: int = 0,
|
209
|
+
audio_end_pts: Union[int, None] = None
|
210
|
+
):
|
211
|
+
"""
|
212
|
+
Iterate over the provided 'stream' packets
|
213
|
+
and decode only the ones in the expected
|
214
|
+
range, so only those frames are decoded
|
215
|
+
(which is an expensive process).
|
216
|
+
|
217
|
+
This method returns a tuple of 3 elements:
|
218
|
+
- `frame` as a `VideoFrame` instance
|
219
|
+
- `t` as the frame time moment
|
220
|
+
- `index` as the frame index
|
221
|
+
|
222
|
+
You can easy transform the frame received
|
223
|
+
to a numpy array by using this:
|
224
|
+
- `frame.to_ndarray(format = format)`
|
225
|
+
"""
|
226
|
+
# 'start_pts' must be 0 or a positive tps
|
227
|
+
# 'end_pts' must be None or a positive tps
|
228
|
+
|
229
|
+
# We cannot skip packets or we will lose
|
230
|
+
# information needed to build the video
|
231
|
+
for packet in iterate_streams_packets(
|
232
|
+
container = container,
|
233
|
+
video_stream = video_stream,
|
234
|
+
audio_stream = audio_stream,
|
235
|
+
video_start_pts = video_start_pts,
|
236
|
+
video_end_pts = video_end_pts,
|
237
|
+
audio_start_pts = audio_start_pts,
|
238
|
+
audio_end_pts = audio_end_pts
|
239
|
+
):
|
240
|
+
# Only valid and in range packets here
|
241
|
+
# Here only the accepted ones
|
242
|
+
stream_finished: str = ''
|
243
|
+
for frame in packet.decode():
|
244
|
+
if frame.pts is None:
|
245
|
+
continue
|
246
|
+
|
247
|
+
time_base = (
|
248
|
+
video_stream.time_base
|
249
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
250
|
+
audio_stream.time_base
|
251
|
+
)
|
252
|
+
|
253
|
+
average_rate = (
|
254
|
+
video_stream.average_rate
|
255
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
256
|
+
audio_stream.rate
|
257
|
+
)
|
258
|
+
|
259
|
+
start_pts = (
|
260
|
+
video_start_pts
|
261
|
+
if packet.stream.type == 'video' else
|
262
|
+
audio_start_pts
|
263
|
+
)
|
264
|
+
|
265
|
+
end_pts = (
|
266
|
+
video_end_pts
|
267
|
+
if packet.stream.type == 'video' else
|
268
|
+
audio_end_pts
|
269
|
+
)
|
270
|
+
|
271
|
+
if frame.pts < start_pts:
|
272
|
+
continue
|
273
|
+
|
274
|
+
if (
|
275
|
+
end_pts is not None and
|
276
|
+
frame.pts > end_pts
|
277
|
+
):
|
278
|
+
if (
|
279
|
+
stream_finished != '' and
|
280
|
+
(
|
281
|
+
# Finish if only one stream
|
282
|
+
stream_finished != packet.stream.type or
|
283
|
+
video_stream is None or
|
284
|
+
audio_stream is None
|
285
|
+
)
|
286
|
+
):
|
287
|
+
# We have yielded all the frames in the
|
288
|
+
# expected range, no more needed
|
289
|
+
return
|
290
|
+
|
291
|
+
stream_finished = packet.stream.type
|
292
|
+
continue
|
293
|
+
|
294
|
+
time_base = (
|
295
|
+
video_stream.time_base
|
296
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
297
|
+
audio_stream.time_base
|
298
|
+
)
|
299
|
+
|
300
|
+
average_rate = (
|
301
|
+
video_stream.average_rate
|
302
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
303
|
+
audio_stream.rate
|
304
|
+
)
|
305
|
+
|
306
|
+
# TODO: Maybe send a @dataclass instead (?)
|
307
|
+
yield (
|
308
|
+
frame,
|
309
|
+
pts_to_t(frame.pts, time_base),
|
310
|
+
pts_to_index(frame.pts, time_base, average_rate)
|
311
|
+
)
|
312
|
+
|
313
|
+
def t_to_pts(
|
314
|
+
t: float,
|
315
|
+
stream_time_base: 'Fraction'
|
316
|
+
) -> int:
|
317
|
+
"""
|
318
|
+
Transform a 't' time moment (in seconds) to
|
319
|
+
a packet timestamp (pts) understandable by
|
320
|
+
the pyav library.
|
321
|
+
"""
|
322
|
+
return int((t + 0.000001) / stream_time_base)
|
323
|
+
|
324
|
+
def pts_to_index(
|
325
|
+
pts: int,
|
326
|
+
stream_time_base: 'Fraction',
|
327
|
+
fps: float
|
328
|
+
) -> int:
|
329
|
+
"""
|
330
|
+
Transform a 'pts' packet timestamp to a
|
331
|
+
frame index.
|
332
|
+
"""
|
333
|
+
return int(round(pts_to_t(pts, stream_time_base) * fps))
|
334
|
+
|
335
|
+
def pts_to_t(
|
336
|
+
pts: int,
|
337
|
+
stream_time_base: 'Fraction'
|
338
|
+
) -> float:
|
339
|
+
"""
|
340
|
+
Transform a 'pts' packet timestamp to a 't'
|
341
|
+
time moment.
|
342
|
+
"""
|
343
|
+
return pts * stream_time_base
|
@@ -0,0 +1,170 @@
|
|
1
|
+
from yta_video_opengl.reader import VideoReader
|
2
|
+
from yta_video_opengl.writer import VideoWriter
|
3
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
4
|
+
from yta_validation import PythonValidator
|
5
|
+
from typing import Union
|
6
|
+
|
7
|
+
|
8
|
+
# TODO: Where can I obtain this dynamically (?)
|
9
|
+
PIXEL_FORMAT = 'yuv420p'
|
10
|
+
|
11
|
+
# TODO: Maybe rename to 'Media' (?)
|
12
|
+
class Video:
|
13
|
+
"""
|
14
|
+
Class to wrap the functionality related to
|
15
|
+
handling and modifying a video.
|
16
|
+
"""
|
17
|
+
|
18
|
+
@property
|
19
|
+
def start_pts(
|
20
|
+
self
|
21
|
+
) -> int:
|
22
|
+
"""
|
23
|
+
The start packet time stamp (pts), needed
|
24
|
+
to optimize the packet iteration process.
|
25
|
+
"""
|
26
|
+
return int(self.start / self.reader.time_base)
|
27
|
+
|
28
|
+
@property
|
29
|
+
def end_pts(
|
30
|
+
self
|
31
|
+
) -> Union[int, None]:
|
32
|
+
"""
|
33
|
+
The end packet time stamp (pts), needed to
|
34
|
+
optimize the packet iteration process.
|
35
|
+
"""
|
36
|
+
return (
|
37
|
+
int(self.end / self.reader.time_base)
|
38
|
+
# TODO: What do we do if no duration (?)
|
39
|
+
if self.duration is not None else
|
40
|
+
None
|
41
|
+
)
|
42
|
+
|
43
|
+
@property
|
44
|
+
def audio_start_pts(
|
45
|
+
self
|
46
|
+
) -> int:
|
47
|
+
"""
|
48
|
+
The start packet time stamp (pts), needed
|
49
|
+
to optimize the packet iteration process.
|
50
|
+
"""
|
51
|
+
return int(self.start / self.reader.audio_time_base)
|
52
|
+
|
53
|
+
@property
|
54
|
+
def audio_end_pts(
|
55
|
+
self
|
56
|
+
) -> Union[int, None]:
|
57
|
+
"""
|
58
|
+
The end packet time stamp (pts), needed to
|
59
|
+
optimize the packet iteration process.
|
60
|
+
"""
|
61
|
+
return (
|
62
|
+
int(self.end / self.reader.audio_time_base)
|
63
|
+
# TODO: What do we do if no duration (?)
|
64
|
+
if self.duration is not None else
|
65
|
+
None
|
66
|
+
)
|
67
|
+
|
68
|
+
@property
|
69
|
+
def duration(
|
70
|
+
self
|
71
|
+
):
|
72
|
+
"""
|
73
|
+
The duration of the video.
|
74
|
+
"""
|
75
|
+
return self.end - self.start
|
76
|
+
|
77
|
+
@property
|
78
|
+
def frames(
|
79
|
+
self
|
80
|
+
):
|
81
|
+
"""
|
82
|
+
Iterator to yield all the frames, one by
|
83
|
+
one, within the range defined by the
|
84
|
+
'start' and 'end' parameters provided when
|
85
|
+
instantiating it.
|
86
|
+
|
87
|
+
This method returns a tuple of 3 elements:
|
88
|
+
- `frame` as a `VideoFrame` instance
|
89
|
+
- `t` as the frame time moment
|
90
|
+
- `index` as the frame index
|
91
|
+
"""
|
92
|
+
for frame in self.reader.get_frames(self.start, self.end):
|
93
|
+
yield frame
|
94
|
+
|
95
|
+
for frame in self.reader.get_audio_frames(self.start, self.end):
|
96
|
+
yield frame
|
97
|
+
|
98
|
+
# for frame in iterate_stream_frames_demuxing(
|
99
|
+
# container = self.reader.container,
|
100
|
+
# video_stream = self.reader.video_stream,
|
101
|
+
# audio_stream = self.reader.audio_stream,
|
102
|
+
# video_start_pts = self.start_pts,
|
103
|
+
# video_end_pts = self.end_pts,
|
104
|
+
# audio_start_pts = self.audio_start_pts,
|
105
|
+
# audio_end_pts = self.audio_end_pts
|
106
|
+
# ):
|
107
|
+
# yield frame
|
108
|
+
|
109
|
+
def __init__(
|
110
|
+
self,
|
111
|
+
filename: str,
|
112
|
+
start: float = 0.0,
|
113
|
+
end: Union[float, None] = None
|
114
|
+
):
|
115
|
+
self.filename: str = filename
|
116
|
+
"""
|
117
|
+
The filename of the original video.
|
118
|
+
"""
|
119
|
+
# TODO: Detect the 'pixel_format' from the
|
120
|
+
# extension (?)
|
121
|
+
self.reader: VideoReader = VideoReader(self.filename)
|
122
|
+
"""
|
123
|
+
The pyav video reader.
|
124
|
+
"""
|
125
|
+
self.start: float = start
|
126
|
+
"""
|
127
|
+
The time moment 't' in which the video
|
128
|
+
should start.
|
129
|
+
"""
|
130
|
+
self.end: Union[float, None] = (
|
131
|
+
# TODO: Is this 'end' ok (?)
|
132
|
+
self.reader.duration
|
133
|
+
if end is None else
|
134
|
+
end
|
135
|
+
)
|
136
|
+
"""
|
137
|
+
The time moment 't' in which the video
|
138
|
+
should end.
|
139
|
+
"""
|
140
|
+
|
141
|
+
def save_as(
|
142
|
+
self,
|
143
|
+
filename: str
|
144
|
+
) -> 'Video':
|
145
|
+
writer = VideoWriter(filename)
|
146
|
+
#writer.set_video_stream(self.reader.video_stream.codec.name, self.reader.fps, self.reader.size, PIXEL_FORMAT)
|
147
|
+
writer.set_video_stream_from_template(self.reader.video_stream)
|
148
|
+
writer.set_audio_stream_from_template(self.reader.audio_stream)
|
149
|
+
|
150
|
+
# TODO: I need to process the audio also, so
|
151
|
+
# build a method that do the same but for
|
152
|
+
# both streams at the same time
|
153
|
+
for frame, t, index in self.frames:
|
154
|
+
if PythonValidator.is_instance_of(frame, 'VideoFrame'):
|
155
|
+
print(f'Saving video frame {str(index)}, with t = {str(t)}')
|
156
|
+
writer.mux_video_frame(
|
157
|
+
frame = frame
|
158
|
+
)
|
159
|
+
else:
|
160
|
+
print(f'Saving audio frame {str(index)} ({str(round(float(t * self.reader.fps), 2))}), with t = {str(t)}')
|
161
|
+
writer.mux_audio_frame(
|
162
|
+
frame = frame
|
163
|
+
)
|
164
|
+
|
165
|
+
writer.mux_audio_frame(None)
|
166
|
+
writer.mux_video_frame(None)
|
167
|
+
|
168
|
+
# TODO: Maybe move this to the '__del__' (?)
|
169
|
+
writer.output.close()
|
170
|
+
self.reader.container.close()
|
yta_video_opengl/writer.py
CHANGED
@@ -114,9 +114,23 @@ class VideoWriter:
|
|
114
114
|
You can pass the audio stream as it was
|
115
115
|
obtained from the reader.
|
116
116
|
"""
|
117
|
+
self.audio_stream: AudioStream = self.output.add_stream(
|
118
|
+
codec_name = template.codec_context.name,
|
119
|
+
rate = template.codec_context.rate
|
120
|
+
)
|
121
|
+
self.audio_stream.codec_context.format = template.codec_context.format
|
122
|
+
self.audio_stream.codec_context.layout = template.codec_context.layout
|
123
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
124
|
+
|
125
|
+
return self
|
126
|
+
|
127
|
+
# This below is not working
|
117
128
|
self.audio_stream: AudioStream = self.output.add_stream_from_template(
|
118
129
|
template
|
119
130
|
)
|
131
|
+
# TODO: Is this actually needed (?)
|
132
|
+
# Force this 'rate'
|
133
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
120
134
|
|
121
135
|
return self
|
122
136
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
yta_video_opengl/__init__.py,sha256=ycAx_XYMVDfkuObSvtW6irQ0Wo-fgxEz3fjIRMe8PpY,205
|
2
|
+
yta_video_opengl/classes.py,sha256=VUw73kfz8kxYLE0x0LxNHqFekF3CklcyofCNN-z57Lg,37706
|
3
|
+
yta_video_opengl/reader/__init__.py,sha256=rAWISZ7OzDnzar0At-LCfDA-MmWzax2jT2l5gySv4aw,16911
|
4
|
+
yta_video_opengl/reader/cache.py,sha256=UKhZvgY80ySuOYH52ikco6affsm8bjP656EroVR9Utg,6960
|
5
|
+
yta_video_opengl/tests.py,sha256=NZ-W1ak-ygwL9wATzEXtlCeCZX74ij_TZhktetMnOD4,25810
|
6
|
+
yta_video_opengl/utils.py,sha256=y0N1mS9FjpB4nFnx00K7sIs5EsqMkTe8C0bzLXZe9YM,10479
|
7
|
+
yta_video_opengl/video.py,sha256=Y14-Bsq7AH0GenwbPk61giD9eLHZDmWeZvP_iZn0e7w,5182
|
8
|
+
yta_video_opengl/writer.py,sha256=7xglz8xHOXMtWkctzuB21Y-e9xWFXYcklt3jVUN4svQ,8198
|
9
|
+
yta_video_opengl-0.0.9.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
10
|
+
yta_video_opengl-0.0.9.dist-info/METADATA,sha256=iYodm7r8DJD5lpPAs92lAClPcJ4dFrS94Dv5WjHcx5Q,670
|
11
|
+
yta_video_opengl-0.0.9.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
12
|
+
yta_video_opengl-0.0.9.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
yta_video_opengl/__init__.py,sha256=ycAx_XYMVDfkuObSvtW6irQ0Wo-fgxEz3fjIRMe8PpY,205
|
2
|
-
yta_video_opengl/classes.py,sha256=VUw73kfz8kxYLE0x0LxNHqFekF3CklcyofCNN-z57Lg,37706
|
3
|
-
yta_video_opengl/reader/__init__.py,sha256=npLoSfHPCbwAkmrVjOZn-7a9OHXZ6mPKxZj7BiDaFX4,12638
|
4
|
-
yta_video_opengl/reader/cache.py,sha256=Y3lQrirQJz7zFeiJQeJnkzyghYeMahkpKzsouzB90VI,4421
|
5
|
-
yta_video_opengl/tests.py,sha256=D8Rw0e2J0AbIA1IhinTope-z5tZdW5FQFFasBHBQvm8,25765
|
6
|
-
yta_video_opengl/utils.py,sha256=YEsvtZLingxU49-PSPZeESLMz7oMvlQxMBIe_vnyGJc,2939
|
7
|
-
yta_video_opengl/writer.py,sha256=fFkAz3r26wud2lJqKiP0EW24Ep6cZ1uTrQkc0tt8RsE,7562
|
8
|
-
yta_video_opengl-0.0.7.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
9
|
-
yta_video_opengl-0.0.7.dist-info/METADATA,sha256=Ga9Avta2a0bHcUwUZ_OvfYTN8rO11O2CtbmFt4btYBk,670
|
10
|
-
yta_video_opengl-0.0.7.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
11
|
-
yta_video_opengl-0.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|