yta-video-opengl 0.0.7__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/PKG-INFO +1 -1
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/pyproject.toml +1 -1
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/reader/__init__.py +104 -2
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/tests.py +5 -3
- yta_video_opengl-0.0.8/src/yta_video_opengl/utils.py +343 -0
- yta_video_opengl-0.0.8/src/yta_video_opengl/video.py +164 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/writer.py +14 -0
- yta_video_opengl-0.0.7/src/yta_video_opengl/utils.py +0 -100
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/LICENSE +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/README.md +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/__init__.py +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/classes.py +0 -0
- {yta_video_opengl-0.0.7 → yta_video_opengl-0.0.8}/src/yta_video_opengl/reader/cache.py +0 -0
@@ -3,6 +3,7 @@ A video reader using the PyAv (av) library
|
|
3
3
|
that, using ffmpeg, detects the video.
|
4
4
|
"""
|
5
5
|
from yta_video_opengl.reader.cache import VideoFrameCache
|
6
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
6
7
|
from yta_validation import PythonValidator
|
7
8
|
from av.video.frame import VideoFrame
|
8
9
|
from av.audio.frame import AudioFrame
|
@@ -285,7 +286,7 @@ class VideoReader:
|
|
285
286
|
The fps of the audio.
|
286
287
|
"""
|
287
288
|
# TODO: What if no audio (?)
|
288
|
-
return self.audio_stream.
|
289
|
+
return self.audio_stream.rate
|
289
290
|
|
290
291
|
@property
|
291
292
|
def time_base(
|
@@ -303,8 +304,38 @@ class VideoReader:
|
|
303
304
|
"""
|
304
305
|
The time base of the audio.
|
305
306
|
"""
|
307
|
+
# TODO: What if no audio (?)
|
306
308
|
return self.audio_stream.time_base
|
307
309
|
|
310
|
+
@property
|
311
|
+
def duration(
|
312
|
+
self
|
313
|
+
) -> Union[float, None]:
|
314
|
+
"""
|
315
|
+
The duration of the video.
|
316
|
+
"""
|
317
|
+
return (
|
318
|
+
float(self.video_stream.duration * self.video_stream.time_base)
|
319
|
+
if self.video_stream.duration else
|
320
|
+
# TODO: What to do in this case (?)
|
321
|
+
None
|
322
|
+
)
|
323
|
+
|
324
|
+
@property
|
325
|
+
def audio_duration(
|
326
|
+
self
|
327
|
+
) -> Union[float, None]:
|
328
|
+
"""
|
329
|
+
The duration of the audio.
|
330
|
+
"""
|
331
|
+
# TODO: What if no audio (?)
|
332
|
+
return (
|
333
|
+
float(self.audio_stream.duration * self.audio_stream.time_base)
|
334
|
+
if self.audio_stream.duration else
|
335
|
+
# TODO: What to do in this case (?)
|
336
|
+
None
|
337
|
+
)
|
338
|
+
|
308
339
|
@property
|
309
340
|
def size(
|
310
341
|
self
|
@@ -405,6 +436,26 @@ class VideoReader:
|
|
405
436
|
self.audio_stream.thread_type = 'AUTO'
|
406
437
|
self.cache = VideoFrameCache(self)
|
407
438
|
|
439
|
+
def seek(
|
440
|
+
self,
|
441
|
+
pts,
|
442
|
+
stream = None
|
443
|
+
) -> 'VideoReader':
|
444
|
+
"""
|
445
|
+
Call the container '.seek()' method with
|
446
|
+
the given 'pts' packet time stamp.
|
447
|
+
"""
|
448
|
+
stream = (
|
449
|
+
self.video_stream
|
450
|
+
if stream is None else
|
451
|
+
stream
|
452
|
+
)
|
453
|
+
|
454
|
+
# TODO: Is 'offset' actually a 'pts' (?)
|
455
|
+
self.container.seek(pts, stream = stream)
|
456
|
+
|
457
|
+
return self
|
458
|
+
|
408
459
|
def iterate(
|
409
460
|
self
|
410
461
|
) -> 'Iterator[Union[VideoFrame, AudioFrame]]':
|
@@ -454,7 +505,58 @@ class VideoReader:
|
|
454
505
|
yield VideoReaderFrame(frame)
|
455
506
|
else:
|
456
507
|
# Return the packet as it is
|
457
|
-
yield VideoReaderPacket(packet)
|
508
|
+
yield VideoReaderPacket(packet)
|
509
|
+
|
510
|
+
# These methods below are using the demux
|
511
|
+
def iterate_video_frames(
|
512
|
+
self,
|
513
|
+
start_pts: int = 0,
|
514
|
+
end_pts: Union[int, None] = None
|
515
|
+
):
|
516
|
+
"""
|
517
|
+
Iterate over the video stream packets and
|
518
|
+
decode only the ones in the expected range,
|
519
|
+
so only those frames are decoded (which is
|
520
|
+
an expensive process).
|
521
|
+
|
522
|
+
This method returns a tuple of 3 elements:
|
523
|
+
- `frame` as a `VideoFrame` instance
|
524
|
+
- `t` as the frame time moment
|
525
|
+
- `index` as the frame index
|
526
|
+
"""
|
527
|
+
for frame in iterate_stream_frames_demuxing(
|
528
|
+
container = self.container,
|
529
|
+
video_stream = self.video_stream,
|
530
|
+
audio_stream = None,
|
531
|
+
start_pts = start_pts,
|
532
|
+
end_pts = end_pts
|
533
|
+
):
|
534
|
+
yield frame
|
535
|
+
|
536
|
+
def iterate_audio_frames(
|
537
|
+
self,
|
538
|
+
start_pts: int = 0,
|
539
|
+
end_pts: Union[int, None] = None
|
540
|
+
):
|
541
|
+
"""
|
542
|
+
Iterate over the audio stream packets and
|
543
|
+
decode only the ones in the expected range,
|
544
|
+
so only those frames are decoded (which is
|
545
|
+
an expensive process).
|
546
|
+
|
547
|
+
This method returns a tuple of 3 elements:
|
548
|
+
- `frame` as a `AudioFrame` instance
|
549
|
+
- `t` as the frame time moment
|
550
|
+
- `index` as the frame index
|
551
|
+
"""
|
552
|
+
for frame in iterate_stream_frames_demuxing(
|
553
|
+
container = self.container,
|
554
|
+
video_stream = None,
|
555
|
+
audio_stream = self.audio_stream,
|
556
|
+
start_pts = start_pts,
|
557
|
+
end_pts = end_pts
|
558
|
+
):
|
559
|
+
yield frame
|
458
560
|
|
459
561
|
# TODO: Will we use this (?)
|
460
562
|
def get_frame(
|
@@ -581,6 +581,11 @@ def video_modified_stored():
|
|
581
581
|
|
582
582
|
from yta_video_opengl.classes import WavingFrame, BreathingFrame, HandheldFrame, OrbitingFrame, RotatingInCenterFrame, StrangeTvFrame, GlitchRgbFrame, WavingNode
|
583
583
|
from yta_video_opengl.utils import texture_to_frame, frame_to_texture
|
584
|
+
from yta_video_opengl.video import Video
|
585
|
+
|
586
|
+
Video(VIDEO_PATH, 0, 0.5).save_as(OUTPUT_PATH)
|
587
|
+
|
588
|
+
return
|
584
589
|
|
585
590
|
video = VideoReader(VIDEO_PATH)
|
586
591
|
video_writer = (
|
@@ -636,12 +641,9 @@ def video_modified_stored():
|
|
636
641
|
elif is_video_frame:
|
637
642
|
with Timer(is_silent_as_context = True) as timer:
|
638
643
|
t = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
|
639
|
-
|
640
644
|
# This is another way of getting 't'
|
641
645
|
#t = float(frame_or_packet.pts * video.time_base)
|
642
646
|
|
643
|
-
# TODO: Pass the frame as a texture
|
644
|
-
|
645
647
|
video_writer.mux_video_frame(
|
646
648
|
frame = texture_to_frame(
|
647
649
|
texture = node.process(
|
@@ -0,0 +1,343 @@
|
|
1
|
+
from yta_validation import PythonValidator
|
2
|
+
from av.container import InputContainer
|
3
|
+
from av.video.stream import VideoStream
|
4
|
+
from av.audio.stream import AudioStream
|
5
|
+
from av.video.frame import VideoFrame
|
6
|
+
from typing import Union
|
7
|
+
|
8
|
+
import av
|
9
|
+
import numpy as np
|
10
|
+
import moderngl
|
11
|
+
|
12
|
+
|
13
|
+
def frame_to_texture(
|
14
|
+
frame: Union['VideoFrame', 'np.ndarray'],
|
15
|
+
context: moderngl.Context,
|
16
|
+
numpy_format: str = 'rgb24'
|
17
|
+
):
|
18
|
+
"""
|
19
|
+
Transform the given 'frame' to an opengl
|
20
|
+
texture. The frame can be a VideoFrame
|
21
|
+
instance (from pyav library) or a numpy
|
22
|
+
array.
|
23
|
+
"""
|
24
|
+
# To numpy RGB inverted for opengl
|
25
|
+
frame: np.ndarray = (
|
26
|
+
np.flipud(frame.to_ndarray(format = numpy_format))
|
27
|
+
if PythonValidator.is_instance_of(frame, 'VideoFrame') else
|
28
|
+
np.flipud(frame)
|
29
|
+
)
|
30
|
+
|
31
|
+
return context.texture(
|
32
|
+
size = (frame.shape[1], frame.shape[0]),
|
33
|
+
components = frame.shape[2],
|
34
|
+
data = frame.tobytes()
|
35
|
+
)
|
36
|
+
|
37
|
+
# TODO: I should make different methods to
|
38
|
+
# obtain a VideoFrame or a numpy array frame
|
39
|
+
def texture_to_frame(
|
40
|
+
texture: moderngl.Texture
|
41
|
+
) -> 'VideoFrame':
|
42
|
+
"""
|
43
|
+
Transform an opengl texture into a pyav
|
44
|
+
VideoFrame instance.
|
45
|
+
"""
|
46
|
+
# RGBA8
|
47
|
+
data = texture.read(alignment = 1)
|
48
|
+
frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
|
49
|
+
# Opengl gives it with the y inverted
|
50
|
+
frame = np.flipud(frame)
|
51
|
+
# TODO: This can be returned as a numpy frame
|
52
|
+
|
53
|
+
# This is if we need an 'av' VideoFrame (to
|
54
|
+
# export through the demuxer, for example)
|
55
|
+
frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
|
56
|
+
# TODO: Make this customizable
|
57
|
+
frame = frame.reformat(format = 'yuv420p')
|
58
|
+
|
59
|
+
return frame
|
60
|
+
|
61
|
+
def get_fullscreen_quad_vao(
|
62
|
+
context: moderngl.Context,
|
63
|
+
program: moderngl.Program
|
64
|
+
) -> moderngl.VertexArray:
|
65
|
+
"""
|
66
|
+
Get the vertex array object of a quad, by
|
67
|
+
using the vertices, the indexes, the vbo,
|
68
|
+
the ibo and the vao content.
|
69
|
+
"""
|
70
|
+
# Quad vertices in NDC (-1..1) with texture
|
71
|
+
# coords (0..1)
|
72
|
+
"""
|
73
|
+
The UV coordinates to build the quad we
|
74
|
+
will use to represent the frame by
|
75
|
+
applying it as a texture.
|
76
|
+
"""
|
77
|
+
vertices = np.array([
|
78
|
+
# pos.x, pos.y, tex.u, tex.v
|
79
|
+
-1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
|
80
|
+
1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
|
81
|
+
-1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
|
82
|
+
1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
|
83
|
+
], dtype = 'f4')
|
84
|
+
|
85
|
+
"""
|
86
|
+
The indexes of the vertices (see 'vertices'
|
87
|
+
property) to build the 2 opengl triangles
|
88
|
+
that will represent the quad we need for
|
89
|
+
the frame.
|
90
|
+
"""
|
91
|
+
indices = np.array([
|
92
|
+
0, 1, 2,
|
93
|
+
2, 1, 3
|
94
|
+
], dtype = 'i4')
|
95
|
+
|
96
|
+
vbo = context.buffer(vertices.tobytes())
|
97
|
+
ibo = context.buffer(indices.tobytes())
|
98
|
+
|
99
|
+
vao_content = [
|
100
|
+
# 2 floats position, 2 floats texcoords
|
101
|
+
(vbo, '2f 2f', 'in_vert', 'in_texcoord'),
|
102
|
+
]
|
103
|
+
|
104
|
+
return context.vertex_array(program, vao_content, ibo)
|
105
|
+
|
106
|
+
def iterate_streams_packets(
|
107
|
+
container: 'InputContainer',
|
108
|
+
video_stream: 'VideoStream',
|
109
|
+
audio_stream: 'AudioStream',
|
110
|
+
video_start_pts: int = 0,
|
111
|
+
video_end_pts: Union[int, None] = None,
|
112
|
+
audio_start_pts: int = 0,
|
113
|
+
audio_end_pts: Union[int, None] = None
|
114
|
+
):
|
115
|
+
"""
|
116
|
+
Iterate over the provided 'stream' packets
|
117
|
+
and yield the ones in the expected range.
|
118
|
+
This is nice when trying to copy a stream
|
119
|
+
without modifications.
|
120
|
+
"""
|
121
|
+
# 'video_start_pts' and 'audio_start_pts' must
|
122
|
+
# be 0 or a positive tps
|
123
|
+
|
124
|
+
if (
|
125
|
+
video_stream is None and
|
126
|
+
audio_stream is None
|
127
|
+
):
|
128
|
+
raise Exception('No streams provided.')
|
129
|
+
|
130
|
+
# We only need to seek on video
|
131
|
+
if video_stream is not None:
|
132
|
+
container.seek(video_start_pts, stream = video_stream)
|
133
|
+
if audio_stream is not None:
|
134
|
+
container.seek(audio_start_pts, stream = audio_stream)
|
135
|
+
|
136
|
+
stream = [
|
137
|
+
stream
|
138
|
+
for stream in (video_stream, audio_stream)
|
139
|
+
if stream
|
140
|
+
]
|
141
|
+
|
142
|
+
"""
|
143
|
+
Apparently, if we ignore some packets based
|
144
|
+
on the 'pts', we can be ignoring information
|
145
|
+
that is needed for the next frames to be
|
146
|
+
decoded, so we need to decode them all...
|
147
|
+
|
148
|
+
If we can find some strategy to seek not for
|
149
|
+
the inmediate but some before and read from
|
150
|
+
that one to avoid reading all of the packets
|
151
|
+
we could save some time, but at what cost?
|
152
|
+
We cannot skip any crucial frame so we need
|
153
|
+
to know how many we can skip, and that sounds
|
154
|
+
a bit difficult depending on the codec.
|
155
|
+
"""
|
156
|
+
stream_finished: str = ''
|
157
|
+
for packet in container.demux(stream):
|
158
|
+
if packet.pts is None:
|
159
|
+
continue
|
160
|
+
|
161
|
+
# TODO: We cannot skip like this, we need to
|
162
|
+
# look for the nearest keyframe to be able
|
163
|
+
# to decode the frames later. Take a look at
|
164
|
+
# the VideoFrameCache class and use it.
|
165
|
+
|
166
|
+
# start_pts = (
|
167
|
+
# video_start_pts
|
168
|
+
# if packet.stream.type == 'video' else
|
169
|
+
# audio_start_pts
|
170
|
+
# )
|
171
|
+
# end_pts = (
|
172
|
+
# video_end_pts
|
173
|
+
# if packet.stream.type == 'video' else
|
174
|
+
# audio_end_pts
|
175
|
+
# )
|
176
|
+
|
177
|
+
# if packet.pts < start_pts:
|
178
|
+
# continue
|
179
|
+
|
180
|
+
# if (
|
181
|
+
# end_pts is not None and
|
182
|
+
# packet.pts > end_pts
|
183
|
+
# ):
|
184
|
+
# if (
|
185
|
+
# stream_finished != '' and
|
186
|
+
# (
|
187
|
+
# # Finish if only one stream
|
188
|
+
# stream_finished != packet.stream.type or
|
189
|
+
# video_stream is None or
|
190
|
+
# audio_stream is None
|
191
|
+
# )
|
192
|
+
# ):
|
193
|
+
# # We have yielded all the frames in the
|
194
|
+
# # expected range, no more needed
|
195
|
+
# return
|
196
|
+
|
197
|
+
# stream_finished = packet.stream.type
|
198
|
+
# continue
|
199
|
+
|
200
|
+
yield packet
|
201
|
+
|
202
|
+
def iterate_stream_frames_demuxing(
|
203
|
+
container: 'InputContainer',
|
204
|
+
video_stream: 'VideoStream',
|
205
|
+
audio_stream: 'AudioStream',
|
206
|
+
video_start_pts : int = 0,
|
207
|
+
video_end_pts: Union[int, None] = None,
|
208
|
+
audio_start_pts: int = 0,
|
209
|
+
audio_end_pts: Union[int, None] = None
|
210
|
+
):
|
211
|
+
"""
|
212
|
+
Iterate over the provided 'stream' packets
|
213
|
+
and decode only the ones in the expected
|
214
|
+
range, so only those frames are decoded
|
215
|
+
(which is an expensive process).
|
216
|
+
|
217
|
+
This method returns a tuple of 3 elements:
|
218
|
+
- `frame` as a `VideoFrame` instance
|
219
|
+
- `t` as the frame time moment
|
220
|
+
- `index` as the frame index
|
221
|
+
|
222
|
+
You can easy transform the frame received
|
223
|
+
to a numpy array by using this:
|
224
|
+
- `frame.to_ndarray(format = format)`
|
225
|
+
"""
|
226
|
+
# 'start_pts' must be 0 or a positive tps
|
227
|
+
# 'end_pts' must be None or a positive tps
|
228
|
+
|
229
|
+
# We cannot skip packets or we will lose
|
230
|
+
# information needed to build the video
|
231
|
+
for packet in iterate_streams_packets(
|
232
|
+
container = container,
|
233
|
+
video_stream = video_stream,
|
234
|
+
audio_stream = audio_stream,
|
235
|
+
video_start_pts = video_start_pts,
|
236
|
+
video_end_pts = video_end_pts,
|
237
|
+
audio_start_pts = audio_start_pts,
|
238
|
+
audio_end_pts = audio_end_pts
|
239
|
+
):
|
240
|
+
# Only valid and in range packets here
|
241
|
+
# Here only the accepted ones
|
242
|
+
stream_finished: str = ''
|
243
|
+
for frame in packet.decode():
|
244
|
+
if frame.pts is None:
|
245
|
+
continue
|
246
|
+
|
247
|
+
time_base = (
|
248
|
+
video_stream.time_base
|
249
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
250
|
+
audio_stream.time_base
|
251
|
+
)
|
252
|
+
|
253
|
+
average_rate = (
|
254
|
+
video_stream.average_rate
|
255
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
256
|
+
audio_stream.rate
|
257
|
+
)
|
258
|
+
|
259
|
+
start_pts = (
|
260
|
+
video_start_pts
|
261
|
+
if packet.stream.type == 'video' else
|
262
|
+
audio_start_pts
|
263
|
+
)
|
264
|
+
|
265
|
+
end_pts = (
|
266
|
+
video_end_pts
|
267
|
+
if packet.stream.type == 'video' else
|
268
|
+
audio_end_pts
|
269
|
+
)
|
270
|
+
|
271
|
+
if frame.pts < start_pts:
|
272
|
+
continue
|
273
|
+
|
274
|
+
if (
|
275
|
+
end_pts is not None and
|
276
|
+
frame.pts > end_pts
|
277
|
+
):
|
278
|
+
if (
|
279
|
+
stream_finished != '' and
|
280
|
+
(
|
281
|
+
# Finish if only one stream
|
282
|
+
stream_finished != packet.stream.type or
|
283
|
+
video_stream is None or
|
284
|
+
audio_stream is None
|
285
|
+
)
|
286
|
+
):
|
287
|
+
# We have yielded all the frames in the
|
288
|
+
# expected range, no more needed
|
289
|
+
return
|
290
|
+
|
291
|
+
stream_finished = packet.stream.type
|
292
|
+
continue
|
293
|
+
|
294
|
+
time_base = (
|
295
|
+
video_stream.time_base
|
296
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
297
|
+
audio_stream.time_base
|
298
|
+
)
|
299
|
+
|
300
|
+
average_rate = (
|
301
|
+
video_stream.average_rate
|
302
|
+
if PythonValidator.is_instance_of(frame, VideoFrame) else
|
303
|
+
audio_stream.rate
|
304
|
+
)
|
305
|
+
|
306
|
+
# TODO: Maybe send a @dataclass instead (?)
|
307
|
+
yield (
|
308
|
+
frame,
|
309
|
+
pts_to_t(frame.pts, time_base),
|
310
|
+
pts_to_index(frame.pts, time_base, average_rate)
|
311
|
+
)
|
312
|
+
|
313
|
+
def t_to_pts(
|
314
|
+
t: float,
|
315
|
+
stream_time_base: 'Fraction'
|
316
|
+
) -> int:
|
317
|
+
"""
|
318
|
+
Transform a 't' time moment (in seconds) to
|
319
|
+
a packet timestamp (pts) understandable by
|
320
|
+
the pyav library.
|
321
|
+
"""
|
322
|
+
return int((t + 0.000001) / stream_time_base)
|
323
|
+
|
324
|
+
def pts_to_index(
|
325
|
+
pts: int,
|
326
|
+
stream_time_base: 'Fraction',
|
327
|
+
fps: float
|
328
|
+
) -> int:
|
329
|
+
"""
|
330
|
+
Transform a 'pts' packet timestamp to a
|
331
|
+
frame index.
|
332
|
+
"""
|
333
|
+
return int(round(pts_to_t(pts, stream_time_base) * fps))
|
334
|
+
|
335
|
+
def pts_to_t(
|
336
|
+
pts: int,
|
337
|
+
stream_time_base: 'Fraction'
|
338
|
+
) -> float:
|
339
|
+
"""
|
340
|
+
Transform a 'pts' packet timestamp to a 't'
|
341
|
+
time moment.
|
342
|
+
"""
|
343
|
+
return pts * stream_time_base
|
@@ -0,0 +1,164 @@
|
|
1
|
+
from yta_video_opengl.reader import VideoReader
|
2
|
+
from yta_video_opengl.writer import VideoWriter
|
3
|
+
from yta_video_opengl.utils import iterate_stream_frames_demuxing
|
4
|
+
from yta_validation import PythonValidator
|
5
|
+
from typing import Union
|
6
|
+
|
7
|
+
|
8
|
+
# TODO: Where can I obtain this dynamically (?)
|
9
|
+
PIXEL_FORMAT = 'yuv420p'
|
10
|
+
|
11
|
+
# TODO: Maybe rename to 'Media' (?)
|
12
|
+
class Video:
|
13
|
+
"""
|
14
|
+
Class to wrap the functionality related to
|
15
|
+
handling and modifying a video.
|
16
|
+
"""
|
17
|
+
|
18
|
+
@property
|
19
|
+
def start_pts(
|
20
|
+
self
|
21
|
+
) -> int:
|
22
|
+
"""
|
23
|
+
The start packet time stamp (pts), needed
|
24
|
+
to optimize the packet iteration process.
|
25
|
+
"""
|
26
|
+
return int(self.start / self.reader.time_base)
|
27
|
+
|
28
|
+
@property
|
29
|
+
def end_pts(
|
30
|
+
self
|
31
|
+
) -> Union[int, None]:
|
32
|
+
"""
|
33
|
+
The end packet time stamp (pts), needed to
|
34
|
+
optimize the packet iteration process.
|
35
|
+
"""
|
36
|
+
return (
|
37
|
+
int(self.end / self.reader.time_base)
|
38
|
+
# TODO: What do we do if no duration (?)
|
39
|
+
if self.duration is not None else
|
40
|
+
None
|
41
|
+
)
|
42
|
+
|
43
|
+
@property
|
44
|
+
def audio_start_pts(
|
45
|
+
self
|
46
|
+
) -> int:
|
47
|
+
"""
|
48
|
+
The start packet time stamp (pts), needed
|
49
|
+
to optimize the packet iteration process.
|
50
|
+
"""
|
51
|
+
return int(self.start / self.reader.audio_time_base)
|
52
|
+
|
53
|
+
@property
|
54
|
+
def audio_end_pts(
|
55
|
+
self
|
56
|
+
) -> Union[int, None]:
|
57
|
+
"""
|
58
|
+
The end packet time stamp (pts), needed to
|
59
|
+
optimize the packet iteration process.
|
60
|
+
"""
|
61
|
+
return (
|
62
|
+
int(self.end / self.reader.audio_time_base)
|
63
|
+
# TODO: What do we do if no duration (?)
|
64
|
+
if self.duration is not None else
|
65
|
+
None
|
66
|
+
)
|
67
|
+
|
68
|
+
@property
|
69
|
+
def duration(
|
70
|
+
self
|
71
|
+
):
|
72
|
+
"""
|
73
|
+
The duration of the video.
|
74
|
+
"""
|
75
|
+
return self.end - self.start
|
76
|
+
|
77
|
+
@property
|
78
|
+
def frames(
|
79
|
+
self
|
80
|
+
):
|
81
|
+
"""
|
82
|
+
Iterator to yield all the frames, one by
|
83
|
+
one, within the range defined by the
|
84
|
+
'start' and 'end' parameters provided when
|
85
|
+
instantiating it.
|
86
|
+
|
87
|
+
This method returns a tuple of 3 elements:
|
88
|
+
- `frame` as a `VideoFrame` instance
|
89
|
+
- `t` as the frame time moment
|
90
|
+
- `index` as the frame index
|
91
|
+
"""
|
92
|
+
for frame in iterate_stream_frames_demuxing(
|
93
|
+
container = self.reader.container,
|
94
|
+
video_stream = self.reader.video_stream,
|
95
|
+
audio_stream = self.reader.audio_stream,
|
96
|
+
video_start_pts = self.start_pts,
|
97
|
+
video_end_pts = self.end_pts,
|
98
|
+
audio_start_pts = self.audio_start_pts,
|
99
|
+
audio_end_pts = self.audio_end_pts
|
100
|
+
):
|
101
|
+
yield frame
|
102
|
+
|
103
|
+
def __init__(
|
104
|
+
self,
|
105
|
+
filename: str,
|
106
|
+
start: float = 0.0,
|
107
|
+
end: Union[float, None] = None
|
108
|
+
):
|
109
|
+
self.filename: str = filename
|
110
|
+
"""
|
111
|
+
The filename of the original video.
|
112
|
+
"""
|
113
|
+
# TODO: Detect the 'pixel_format' from the
|
114
|
+
# extension (?)
|
115
|
+
self.reader: VideoReader = VideoReader(self.filename)
|
116
|
+
"""
|
117
|
+
The pyav video reader.
|
118
|
+
"""
|
119
|
+
self.start: float = start
|
120
|
+
"""
|
121
|
+
The time moment 't' in which the video
|
122
|
+
should start.
|
123
|
+
"""
|
124
|
+
self.end: Union[float, None] = (
|
125
|
+
# TODO: Is this 'end' ok (?)
|
126
|
+
self.reader.duration
|
127
|
+
if end is None else
|
128
|
+
end
|
129
|
+
)
|
130
|
+
"""
|
131
|
+
The time moment 't' in which the video
|
132
|
+
should end.
|
133
|
+
"""
|
134
|
+
|
135
|
+
def save_as(
|
136
|
+
self,
|
137
|
+
filename: str
|
138
|
+
) -> 'Video':
|
139
|
+
writer = VideoWriter(filename)
|
140
|
+
#writer.set_video_stream(self.reader.video_stream.codec.name, self.reader.fps, self.reader.size, PIXEL_FORMAT)
|
141
|
+
writer.set_video_stream_from_template(self.reader.video_stream)
|
142
|
+
writer.set_audio_stream_from_template(self.reader.audio_stream)
|
143
|
+
|
144
|
+
# TODO: I need to process the audio also, so
|
145
|
+
# build a method that do the same but for
|
146
|
+
# both streams at the same time
|
147
|
+
for frame, t, index in self.frames:
|
148
|
+
if PythonValidator.is_instance_of(frame, 'VideoFrame'):
|
149
|
+
print(f'Saving video frame {str(index)}, with t = {str(t)}')
|
150
|
+
writer.mux_video_frame(
|
151
|
+
frame = frame
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
print(f'Saving audio frame {str(index)} ({str(round(float(t * self.reader.fps), 2))}), with t = {str(t)}')
|
155
|
+
writer.mux_audio_frame(
|
156
|
+
frame = frame
|
157
|
+
)
|
158
|
+
|
159
|
+
writer.mux_audio_frame(None)
|
160
|
+
writer.mux_video_frame(None)
|
161
|
+
|
162
|
+
# TODO: Maybe move this to the '__del__' (?)
|
163
|
+
writer.output.close()
|
164
|
+
self.reader.container.close()
|
@@ -114,9 +114,23 @@ class VideoWriter:
|
|
114
114
|
You can pass the audio stream as it was
|
115
115
|
obtained from the reader.
|
116
116
|
"""
|
117
|
+
self.audio_stream: AudioStream = self.output.add_stream(
|
118
|
+
codec_name = template.codec_context.name,
|
119
|
+
rate = template.codec_context.rate
|
120
|
+
)
|
121
|
+
self.audio_stream.codec_context.format = template.codec_context.format
|
122
|
+
self.audio_stream.codec_context.layout = template.codec_context.layout
|
123
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
124
|
+
|
125
|
+
return self
|
126
|
+
|
127
|
+
# This below is not working
|
117
128
|
self.audio_stream: AudioStream = self.output.add_stream_from_template(
|
118
129
|
template
|
119
130
|
)
|
131
|
+
# TODO: Is this actually needed (?)
|
132
|
+
# Force this 'rate'
|
133
|
+
self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
|
120
134
|
|
121
135
|
return self
|
122
136
|
|
@@ -1,100 +0,0 @@
|
|
1
|
-
from yta_validation import PythonValidator
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
import av
|
5
|
-
import numpy as np
|
6
|
-
import moderngl
|
7
|
-
|
8
|
-
|
9
|
-
def frame_to_texture(
|
10
|
-
frame: Union['VideoFrame', 'np.ndarray'],
|
11
|
-
context: moderngl.Context,
|
12
|
-
numpy_format: str = 'rgb24'
|
13
|
-
):
|
14
|
-
"""
|
15
|
-
Transform the given 'frame' to an opengl
|
16
|
-
texture. The frame can be a VideoFrame
|
17
|
-
instance (from pyav library) or a numpy
|
18
|
-
array.
|
19
|
-
"""
|
20
|
-
# To numpy RGB inverted for opengl
|
21
|
-
frame: np.ndarray = (
|
22
|
-
np.flipud(frame.to_ndarray(format = numpy_format))
|
23
|
-
if PythonValidator.is_instance_of(frame, 'VideoFrame') else
|
24
|
-
np.flipud(frame)
|
25
|
-
)
|
26
|
-
|
27
|
-
return context.texture(
|
28
|
-
size = (frame.shape[1], frame.shape[0]),
|
29
|
-
components = frame.shape[2],
|
30
|
-
data = frame.tobytes()
|
31
|
-
)
|
32
|
-
|
33
|
-
# TODO: I should make different methods to
|
34
|
-
# obtain a VideoFrame or a numpy array frame
|
35
|
-
def texture_to_frame(
|
36
|
-
texture: moderngl.Texture
|
37
|
-
) -> 'VideoFrame':
|
38
|
-
"""
|
39
|
-
Transform an opengl texture into a pyav
|
40
|
-
VideoFrame instance.
|
41
|
-
"""
|
42
|
-
# RGBA8
|
43
|
-
data = texture.read(alignment = 1)
|
44
|
-
frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
|
45
|
-
# Opengl gives it with the y inverted
|
46
|
-
frame = np.flipud(frame)
|
47
|
-
# TODO: This can be returned as a numpy frame
|
48
|
-
|
49
|
-
# This is if we need an 'av' VideoFrame (to
|
50
|
-
# export through the demuxer, for example)
|
51
|
-
frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
|
52
|
-
# TODO: Make this customizable
|
53
|
-
frame = frame.reformat(format = 'yuv420p')
|
54
|
-
|
55
|
-
return frame
|
56
|
-
|
57
|
-
def get_fullscreen_quad_vao(
|
58
|
-
context: moderngl.Context,
|
59
|
-
program: moderngl.Program
|
60
|
-
) -> moderngl.VertexArray:
|
61
|
-
"""
|
62
|
-
Get the vertex array object of a quad, by
|
63
|
-
using the vertices, the indexes, the vbo,
|
64
|
-
the ibo and the vao content.
|
65
|
-
"""
|
66
|
-
# Quad vertices in NDC (-1..1) with texture
|
67
|
-
# coords (0..1)
|
68
|
-
"""
|
69
|
-
The UV coordinates to build the quad we
|
70
|
-
will use to represent the frame by
|
71
|
-
applying it as a texture.
|
72
|
-
"""
|
73
|
-
vertices = np.array([
|
74
|
-
# pos.x, pos.y, tex.u, tex.v
|
75
|
-
-1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
|
76
|
-
1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
|
77
|
-
-1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
|
78
|
-
1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
|
79
|
-
], dtype = 'f4')
|
80
|
-
|
81
|
-
"""
|
82
|
-
The indexes of the vertices (see 'vertices'
|
83
|
-
property) to build the 2 opengl triangles
|
84
|
-
that will represent the quad we need for
|
85
|
-
the frame.
|
86
|
-
"""
|
87
|
-
indices = np.array([
|
88
|
-
0, 1, 2,
|
89
|
-
2, 1, 3
|
90
|
-
], dtype = 'i4')
|
91
|
-
|
92
|
-
vbo = context.buffer(vertices.tobytes())
|
93
|
-
ibo = context.buffer(indices.tobytes())
|
94
|
-
|
95
|
-
vao_content = [
|
96
|
-
# 2 floats position, 2 floats texcoords
|
97
|
-
(vbo, '2f 2f', 'in_vert', 'in_texcoord'),
|
98
|
-
]
|
99
|
-
|
100
|
-
return context.vertex_array(program, vao_content, ibo)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|