yta-video-opengl 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yta-video-opengl
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Youtube Autonomous Video OpenGL Module
5
5
  Author: danialcala94
6
6
  Author-email: danielalcalavalera@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "yta-video-opengl"
3
- version = "0.0.3"
3
+ version = "0.0.5"
4
4
  description = "Youtube Autonomous Video OpenGL Module"
5
5
  authors = [
6
6
  {name = "danialcala94",email = "danielalcalavalera@gmail.com"}
@@ -0,0 +1,390 @@
1
+ """
2
+ A video reader using the PyAv (av) library
3
+ that, using ffmpeg, detects the video.
4
+ """
5
+ from yta_validation import PythonValidator
6
+ from av.video.frame import VideoFrame
7
+ from av.audio.frame import AudioFrame
8
+ from av.packet import Packet
9
+ from av.video.stream import VideoStream
10
+ from av.audio.stream import AudioStream
11
+ from av.container.input import InputContainer
12
+ from fractions import Fraction
13
+ from av import open as av_open
14
+ from typing import Union
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass
19
+ class VideoReaderFrame:
20
+ """
21
+ Class to wrap a frame of a video that is
22
+ being read, that can be a video or audio
23
+ frame, and has been decoded.
24
+ """
25
+
26
+ @property
27
+ def is_video(
28
+ self
29
+ ):
30
+ """
31
+ Flag to indicate if the instance is a video
32
+ frame.
33
+ """
34
+ return PythonValidator.is_instance_of(self.data, VideoFrame)
35
+
36
+ @property
37
+ def is_audio(
38
+ self
39
+ ):
40
+ """
41
+ Flag to indicate if the instance is an audio
42
+ frame.
43
+ """
44
+ return PythonValidator.is_instance_of(self.data, AudioFrame)
45
+
46
+ def __init__(
47
+ self,
48
+ # TODO: Add the type, please
49
+ data: any
50
+ ):
51
+ self.data: Union[AudioFrame, VideoFrame] = data
52
+ """
53
+ The frame content, that can be audio or video
54
+ frame.
55
+ """
56
+
57
+ @dataclass
58
+ class VideoReaderPacket:
59
+ """
60
+ Class to wrap a packet of a video that is
61
+ being read, that can contain video or audio
62
+ frames.
63
+ """
64
+
65
+ @property
66
+ def is_video(
67
+ self
68
+ ) -> bool:
69
+ """
70
+ Flag to indicate if the packet includes video
71
+ frames or not.
72
+ """
73
+ return self.data.stream.type == 'video'
74
+
75
+ @property
76
+ def is_audio(
77
+ self
78
+ ) -> bool:
79
+ """
80
+ Flag to indicate if the packet includes audio
81
+ frames or not.
82
+ """
83
+ return self.data.stream.type == 'audio'
84
+
85
+ def __init__(
86
+ self,
87
+ data: Packet
88
+ ):
89
+ self.data: Packet = data
90
+ """
91
+ The packet, that can include video or audio
92
+ frames and can be decoded.
93
+ """
94
+
95
+ def decode(
96
+ self
97
+ ) -> list['SubtitleSet']:
98
+ """
99
+ Get the frames but decoded, perfect to make
100
+ modifications and encode to save them again.
101
+ """
102
+ return self.data.decode()
103
+
104
+
105
+ class VideoReader:
106
+ """
107
+ Class to read video files with the PyAv (av)
108
+ library that uses ffmpeg on the background.
109
+ """
110
+
111
+ @property
112
+ def frame_iterator(
113
+ self
114
+ ) -> 'Iterator[VideoFrame]':
115
+ """
116
+ Iterator to iterate over all the video frames
117
+ decodified.
118
+ """
119
+ return self.container.decode(self.video_stream)
120
+
121
+ @property
122
+ def next_frame(
123
+ self
124
+ ) -> Union[VideoFrame, None]:
125
+ """
126
+ Get the next video frame (decoded) from the
127
+ iterator.
128
+ """
129
+ return next(self.frame_iterator)
130
+
131
+ @property
132
+ def audio_frame_iterator(
133
+ self
134
+ ) -> 'Iterator[AudioFrame]':
135
+ """
136
+ Iterator to iterate over all the audio frames
137
+ decodified.
138
+ """
139
+ return self.container.decode(self.audio_stream)
140
+
141
+ @property
142
+ def next_audio_frame(
143
+ self
144
+ ) -> Union[AudioFrame, None]:
145
+ """
146
+ Get the next audio frame (decoded) from the
147
+ iterator.
148
+ """
149
+ return next(self.audio_frame_iterator)
150
+
151
+ @property
152
+ def packet_iterator(
153
+ self
154
+ ) -> 'Iterator[Packet]':
155
+ """
156
+ Iterator to iterate over all the video frames
157
+ as packets (not decodified).
158
+ """
159
+ return self.container.demux(self.video_stream)
160
+
161
+ @property
162
+ def next_packet(
163
+ self
164
+ ) -> Union[Packet, None]:
165
+ """
166
+ Get the next video packet (not decoded) from
167
+ the iterator.
168
+ """
169
+ return next(self.packet_iterator)
170
+
171
+ @property
172
+ def audio_packet_iterator(
173
+ self
174
+ ) -> 'Iterator[Packet]':
175
+ """
176
+ Iterator to iterate over all the audio frames
177
+ as packets (not decodified).
178
+ """
179
+ return self.container.demux(self.audio_stream)
180
+
181
+ @property
182
+ def next_audio_packet(
183
+ self
184
+ ) -> Union[Packet, None]:
185
+ """
186
+ Get the next audio packet (not decoded) from
187
+ the iterator.
188
+ """
189
+ return next(self.packet_iterator)
190
+
191
+ @property
192
+ def packet_with_audio_iterator(
193
+ self
194
+ ) -> 'Iterator[Packet]':
195
+ """
196
+ Iterator to iterate over all the video frames
197
+ as packets (not decodified) including also the
198
+ audio as packets.
199
+ """
200
+ return self.container.demux((self.video_stream, self.audio_stream))
201
+
202
+ @property
203
+ def next_packet_with_audio(
204
+ self
205
+ ) -> Union[Packet, None]:
206
+ """
207
+ Get the next video frames packet (or audio
208
+ frames packet) from the iterator. Depending
209
+ on the position, the packet can be video or
210
+ audio.
211
+ """
212
+ return next(self.packet_with_audio_iterator)
213
+
214
+ @property
215
+ def codec_name(
216
+ self
217
+ ) -> str:
218
+ """
219
+ Get the name of the video codec.
220
+ """
221
+ return self.video_stream.codec_context.name
222
+
223
+ @property
224
+ def audio_codec_name(
225
+ self
226
+ ) -> str:
227
+ """
228
+ Get the name of the audio codec.
229
+ """
230
+ return self.audio_stream.codec_context.name
231
+
232
+ @property
233
+ def number_of_frames(
234
+ self
235
+ ) -> int:
236
+ """
237
+ The number of frames in the video.
238
+ """
239
+ return self.video_stream.frames
240
+
241
+ @property
242
+ def number_of_audio_frames(
243
+ self
244
+ ) -> int:
245
+ """
246
+ The number of frames in the audio.
247
+ """
248
+ return self.audio_stream.frames
249
+
250
+ @property
251
+ def fps(
252
+ self
253
+ ) -> Fraction:
254
+ """
255
+ The fps of the video.
256
+ """
257
+ # They return it as a Fraction but...
258
+ return self.video_stream.average_rate
259
+
260
+ @property
261
+ def audio_fps(
262
+ self
263
+ ) -> Fraction:
264
+ """
265
+ The fps of the audio.
266
+ """
267
+ # TODO: What if no audio (?)
268
+ return self.audio_stream.average_rate
269
+
270
+ @property
271
+ def size(
272
+ self
273
+ ) -> tuple[int, int]:
274
+ """
275
+ The size of the video in a (width, height) format.
276
+ """
277
+ return (
278
+ self.video_stream.width,
279
+ self.video_stream.height
280
+ )
281
+
282
+ @property
283
+ def width(
284
+ self
285
+ ) -> int:
286
+ """
287
+ The width of the video, in pixels.
288
+ """
289
+ return self.size[0]
290
+
291
+ @property
292
+ def height(
293
+ self
294
+ ) -> int:
295
+ """
296
+ The height of the video, in pixels.
297
+ """
298
+ return self.size[1]
299
+
300
+ # Any property related to audio has to
301
+ # start with 'audio_property_name'
302
+
303
+ def __init__(
304
+ self,
305
+ filename: str
306
+ ):
307
+ self.filename: str = filename
308
+ """
309
+ The filename of the video source.
310
+ """
311
+ self.container: InputContainer = av_open(filename)
312
+ """
313
+ The av input general container of the
314
+ video (that also includes the audio) we
315
+ are reading.
316
+ """
317
+ self.video_stream: VideoStream = self.container.streams.video[0]
318
+ """
319
+ The stream that includes the video.
320
+ """
321
+ self.video_stream.thread_type = 'AUTO'
322
+ # TODO: What if no audio (?)
323
+ self.audio_stream: AudioStream = self.container.streams.audio[0]
324
+ """
325
+ The stream that includes the audio.
326
+ """
327
+ self.audio_stream.thread_type = 'AUTO'
328
+
329
+ def iterate(
330
+ self
331
+ ) -> 'Iterator[Union[VideoFrame, AudioFrame]]':
332
+ """
333
+ Iterator to iterate over the video frames
334
+ (already decoded).
335
+ """
336
+ for frame in self.frame_iterator:
337
+ yield VideoReaderFrame(frame)
338
+
339
+ def iterate_with_audio(
340
+ self,
341
+ do_decode_video: bool = True,
342
+ do_decode_audio: bool = False
343
+ ) -> 'Iterator[Union[VideoReaderFrame, VideoReaderPacket, None]]':
344
+ """
345
+ Iterator to iterate over the video and audio
346
+ packets, decoded only if the parameters are
347
+ set as True.
348
+
349
+ If the packet is decoded, it will return each
350
+ frame individually as a VideoReaderFrame
351
+ instance. If not, the whole packet as a
352
+ VideoReaderPacket instance.
353
+ """
354
+ for packet in self.packet_with_audio_iterator:
355
+ is_video = packet.stream.type == 'video'
356
+
357
+ do_decode = (
358
+ (
359
+ is_video and
360
+ do_decode_video
361
+ ) or
362
+ (
363
+ not is_video and
364
+ do_decode_audio
365
+ )
366
+ )
367
+
368
+ if do_decode:
369
+ for frame in packet.decode():
370
+ # Return each frame decoded
371
+ yield VideoReaderFrame(frame)
372
+ else:
373
+ # Return the packet as it is
374
+ yield VideoReaderPacket(packet)
375
+
376
+
377
+
378
+
379
+ """
380
+ When reading packets directly from the stream
381
+ we can receive packets with size=0, but we need
382
+ to process them and decode (or yield them). It
383
+ is only when we are passing packets to the mux
384
+ when we need to ignore teh ones thar are empty
385
+ (size=0).
386
+
387
+ TODO: Do we need to ignore all? By now, ignoring
388
+ not is causing exceptions, and ignoring them is
389
+ making it work perfectly.
390
+ """
@@ -0,0 +1,179 @@
1
+ """
2
+ Manual tests that are working and are interesting
3
+ to learn about the code, refactor and build
4
+ classes.
5
+ """
6
+ from yta_validation import PythonValidator
7
+ from yta_video_opengl.reader import VideoReader
8
+ from yta_video_opengl.writer import VideoWriter
9
+ from yta_timer import Timer
10
+ from yta_video_frame_time import T
11
+
12
+ import av
13
+ import moderngl
14
+ import numpy as np
15
+
16
+
17
+ def video_modified_stored():
18
+ VIDEO_PATH = "test_files/test_1.mp4"
19
+ OUTPUT_PATH = "test_files/output.mp4"
20
+ AMP = 0.05
21
+ FREQ = 10.0
22
+ SPEED = 2.0
23
+
24
+ # ModernGL context without window
25
+ context = moderngl.create_standalone_context()
26
+
27
+ # Wave shader vertex and fragment
28
+ program = context.program(
29
+ vertex_shader = '''
30
+ #version 330
31
+ in vec2 in_pos;
32
+ in vec2 in_uv;
33
+ out vec2 v_uv;
34
+ void main() {
35
+ v_uv = in_uv;
36
+ gl_Position = vec4(in_pos, 0.0, 1.0);
37
+ }
38
+ ''',
39
+ fragment_shader = '''
40
+ #version 330
41
+ uniform sampler2D tex;
42
+ uniform float time;
43
+ uniform float amp;
44
+ uniform float freq;
45
+ uniform float speed;
46
+ in vec2 v_uv;
47
+ out vec4 f_color;
48
+ void main() {
49
+ float wave = sin(v_uv.x * freq + time * speed) * amp;
50
+ vec2 uv = vec2(v_uv.x, v_uv.y + wave);
51
+ f_color = texture(tex, uv);
52
+ }
53
+ '''
54
+ )
55
+
56
+ # Quad
57
+ vertices = np.array([
58
+ -1, -1, 0.0, 0.0,
59
+ 1, -1, 1.0, 0.0,
60
+ -1, 1, 0.0, 1.0,
61
+ 1, 1, 1.0, 1.0,
62
+ ], dtype = 'f4')
63
+ vbo = context.buffer(vertices.tobytes())
64
+ vao = context.simple_vertex_array(program, vbo, 'in_pos', 'in_uv')
65
+
66
+ video = VideoReader(VIDEO_PATH)
67
+
68
+ # TODO: This has to be dynamic, but
69
+ # according to what (?)
70
+ NUMPY_FORMAT = 'rgb24'
71
+ # TODO: Where do we obtain this from (?)
72
+ VIDEO_CODEC_NAME = 'libx264'
73
+ # TODO: Where do we obtain this from (?)
74
+ PIXEL_FORMAT = 'yuv420p'
75
+
76
+ # Framebuffer to render
77
+ fbo = context.simple_framebuffer(video.size)
78
+ fbo.use()
79
+
80
+ # Decode first frame and use as texture
81
+ first_frame = video.next_frame
82
+ # We need to reset it to being again pointing
83
+ # to the first frame...
84
+ # TODO: Improve this by, maybe, storing the first
85
+ # frame in memory so we can append it later, or
86
+ # using the '.seek(0)' even when it could be not
87
+ # accurate
88
+ video = VideoReader(VIDEO_PATH)
89
+
90
+ # Most of OpenGL textures expect origin in lower
91
+ # left corner
92
+ # TODO: What if alpha (?)
93
+ image = np.flipud(first_frame.to_ndarray(format = NUMPY_FORMAT))
94
+ texture = context.texture((image.shape[1], image.shape[0]), 3, image.tobytes())
95
+
96
+ texture.build_mipmaps()
97
+
98
+ # Uniforms
99
+ program['amp'].value = AMP
100
+ program['freq'].value = FREQ
101
+ program['speed'].value = SPEED
102
+
103
+ # Writer with H.264 codec
104
+ video_writer = (
105
+ VideoWriter(OUTPUT_PATH)
106
+ .set_video_stream(VIDEO_CODEC_NAME, video.fps, video.size, PIXEL_FORMAT)
107
+ .set_audio_stream_from_template(video.audio_stream)
108
+ )
109
+
110
+ frame_index = 0
111
+ for frame_or_packet in video.iterate_with_audio(
112
+ do_decode_video = True,
113
+ do_decode_audio = False
114
+ ):
115
+ # This below is because of the parameters we
116
+ # passed to the method
117
+ is_video_frame = PythonValidator.is_instance_of(frame_or_packet, 'VideoReaderFrame')
118
+ is_audio_packet = PythonValidator.is_instance_of(frame_or_packet, 'VideoReaderPacket')
119
+
120
+ # To simplify the process
121
+ if frame_or_packet is not None:
122
+ frame_or_packet = frame_or_packet.data
123
+
124
+ if is_audio_packet:
125
+ video_writer.mux(frame_or_packet)
126
+ elif is_video_frame:
127
+ with Timer(is_silent_as_context = True) as timer:
128
+
129
+ def process_frame(
130
+ frame: 'VideoFrame'
131
+ ):
132
+ # Add some variables if we need, for the
133
+ # opengl change we are applying (check the
134
+ # program code)
135
+ program['time'].value = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
136
+
137
+ # To numpy RGB inverted for OpenGL
138
+ img_array = np.flipud(
139
+ frame.to_ndarray(format = NUMPY_FORMAT)
140
+ )
141
+
142
+ # Create texture
143
+ texture = context.texture((img_array.shape[1], img_array.shape[0]), 3, img_array.tobytes())
144
+ texture.use()
145
+
146
+ # Render with shader to frame buffer
147
+ fbo.use()
148
+ vao.render(moderngl.TRIANGLE_STRIP)
149
+
150
+ # Processed GPU result to numpy
151
+ processed_data = np.frombuffer(
152
+ fbo.read(components = 3, alignment = 1), dtype = np.uint8
153
+ )
154
+ # Invert numpy to normal frame
155
+ processed_data = np.flipud(
156
+ processed_data.reshape((img_array.shape[0], img_array.shape[1], 3))
157
+ )
158
+
159
+ # To VideoFrame and to buffer
160
+ frame = av.VideoFrame.from_ndarray(processed_data, format = NUMPY_FORMAT)
161
+ # TODO: What is this for (?)
162
+ #out_frame.pict_type = 'NONE'
163
+ return frame
164
+
165
+ video_writer.mux_video_frame(process_frame(frame_or_packet))
166
+
167
+ print(f'Frame {str(frame_index)}: {timer.time_elapsed_str}s')
168
+ frame_index += 1
169
+
170
+ # While this code can be finished, the work in
171
+ # the muxer could be not finished and have some
172
+ # packets waiting to be written. Here we tell
173
+ # the muxer to process all those packets.
174
+ video_writer.mux_video_frame(None)
175
+
176
+ # TODO: Maybe move this to the '__del__' (?)
177
+ video_writer.output.close()
178
+ video.container.close()
179
+ print(f'Saved as "{OUTPUT_PATH}".')