yta-video-opengl 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yta-video-opengl
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Youtube Autonomous Video OpenGL Module
5
5
  Author: danialcala94
6
6
  Author-email: danielalcalavalera@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "yta-video-opengl"
3
- version = "0.0.3"
3
+ version = "0.0.4"
4
4
  description = "Youtube Autonomous Video OpenGL Module"
5
5
  authors = [
6
6
  {name = "danialcala94",email = "danielalcalavalera@gmail.com"}
@@ -0,0 +1,419 @@
1
+ """
2
+ A video reader using the PyAv (av) library
3
+ that, using ffmpeg, detects the video.
4
+ """
5
+ from yta_validation import PythonValidator
6
+ from av.video.frame import VideoFrame
7
+ from av.audio.frame import AudioFrame
8
+ from av.packet import Packet
9
+ from av.video.stream import VideoStream
10
+ from av.audio.stream import AudioStream
11
+ from av.container.input import InputContainer
12
+ from fractions import Fraction
13
+ from av import open as av_open
14
+ from typing import Union
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass
19
+ class VideoReaderFrame:
20
+ """
21
+ Class to wrap a frame of a video that is
22
+ being read, that can be a video or audio
23
+ frame, and has been decoded.
24
+ """
25
+
26
+ @property
27
+ def is_video(
28
+ self
29
+ ):
30
+ """
31
+ Flag to indicate if the instance is a video
32
+ frame.
33
+ """
34
+ return PythonValidator.is_instance_of(self.data, VideoFrame)
35
+
36
+ @property
37
+ def is_audio(
38
+ self
39
+ ):
40
+ """
41
+ Flag to indicate if the instance is an audio
42
+ frame.
43
+ """
44
+ return PythonValidator.is_instance_of(self.data, AudioFrame)
45
+
46
+ def __init__(
47
+ self,
48
+ # TODO: Add the type, please
49
+ data: any
50
+ ):
51
+ self.data: Union[AudioFrame, VideoFrame] = data
52
+ """
53
+ The frame content, that can be audio or video
54
+ frame.
55
+ """
56
+
57
+ @dataclass
58
+ class VideoReaderPacket:
59
+ """
60
+ Class to wrap a packet of a video that is
61
+ being read, that can contain video or audio
62
+ frames.
63
+ """
64
+
65
+ @property
66
+ def is_video(
67
+ self
68
+ ) -> bool:
69
+ """
70
+ Flag to indicate if the packet includes video
71
+ frames or not.
72
+ """
73
+ return self.data.stream.type == 'video'
74
+
75
+ @property
76
+ def is_audio(
77
+ self
78
+ ) -> bool:
79
+ """
80
+ Flag to indicate if the packet includes audio
81
+ frames or not.
82
+ """
83
+ return self.data.stream.type == 'audio'
84
+
85
+ def __init__(
86
+ self,
87
+ data: Packet
88
+ ):
89
+ self.data: Packet = data
90
+ """
91
+ The packet, that can include video or audio
92
+ frames and can be decoded.
93
+ """
94
+
95
+ def decode(
96
+ self
97
+ ) -> list['SubtitleSet']:
98
+ """
99
+ Get the frames but decoded, perfect to make
100
+ modifications and encode to save them again.
101
+ """
102
+ return self.data.decode()
103
+
104
+
105
+ class VideoReader:
106
+ """
107
+ Class to read video files with the PyAv (av)
108
+ library that uses ffmpeg on the background.
109
+ """
110
+
111
+ @property
112
+ def frame_iterator(
113
+ self
114
+ ) -> 'Iterator[VideoFrame]':
115
+ """
116
+ Iterator to iterate over all the video frames
117
+ decodified.
118
+ """
119
+ return self.container.decode(self.video_stream)
120
+
121
+ @property
122
+ def next_frame(
123
+ self
124
+ ) -> Union[VideoFrame, None]:
125
+ """
126
+ Get the next video frame (decoded) from the
127
+ iterator.
128
+ """
129
+ return next(self.frame_iterator)
130
+
131
+ @property
132
+ def audio_frame_iterator(
133
+ self
134
+ ) -> 'Iterator[AudioFrame]':
135
+ """
136
+ Iterator to iterate over all the audio frames
137
+ decodified.
138
+ """
139
+ return self.container.decode(self.audio_stream)
140
+
141
+ @property
142
+ def next_audio_frame(
143
+ self
144
+ ) -> Union[AudioFrame, None]:
145
+ """
146
+ Get the next audio frame (decoded) from the
147
+ iterator.
148
+ """
149
+ return next(self.audio_frame_iterator)
150
+
151
+ @property
152
+ def packet_iterator(
153
+ self
154
+ ) -> 'Iterator[Packet]':
155
+ """
156
+ Iterator to iterate over all the video frames
157
+ as packets (not decodified).
158
+ """
159
+ return self.container.demux(self.video_stream)
160
+
161
+ @property
162
+ def next_packet(
163
+ self
164
+ ) -> Union[Packet, None]:
165
+ """
166
+ Get the next video packet (not decoded) from
167
+ the iterator.
168
+ """
169
+ return next(self.packet_iterator)
170
+
171
+ @property
172
+ def audio_packet_iterator(
173
+ self
174
+ ) -> 'Iterator[Packet]':
175
+ """
176
+ Iterator to iterate over all the audio frames
177
+ as packets (not decodified).
178
+ """
179
+ return self.container.demux(self.audio_stream)
180
+
181
+ @property
182
+ def next_audio_packet(
183
+ self
184
+ ) -> Union[Packet, None]:
185
+ """
186
+ Get the next audio packet (not decoded) from
187
+ the iterator.
188
+ """
189
+ return next(self.packet_iterator)
190
+
191
+ @property
192
+ def packet_with_audio_iterator(
193
+ self
194
+ ) -> 'Iterator[Packet]':
195
+ """
196
+ Iterator to iterate over all the video frames
197
+ as packets (not decodified) including also the
198
+ audio as packets.
199
+ """
200
+ return self.container.demux((self.video_stream, self.audio_stream))
201
+
202
+ @property
203
+ def next_packet_with_audio(
204
+ self
205
+ ) -> Union[Packet, None]:
206
+ """
207
+ Get the next video frames packet (or audio
208
+ frames packet) from the iterator. Depending
209
+ on the position, the packet can be video or
210
+ audio.
211
+ """
212
+ return next(self.packet_with_audio_iterator)
213
+
214
+ @property
215
+ def codec_name(
216
+ self
217
+ ) -> str:
218
+ """
219
+ Get the name of the video codec.
220
+ """
221
+ return self.video_stream.codec_context.name
222
+
223
+ @property
224
+ def audio_codec_name(
225
+ self
226
+ ) -> str:
227
+ """
228
+ Get the name of the audio codec.
229
+ """
230
+ return self.audio_stream.codec_context.name
231
+
232
+ @property
233
+ def number_of_frames(
234
+ self
235
+ ) -> int:
236
+ """
237
+ The number of frames in the video.
238
+ """
239
+ return self.video_stream.frames
240
+
241
+ @property
242
+ def number_of_audio_frames(
243
+ self
244
+ ) -> int:
245
+ """
246
+ The number of frames in the audio.
247
+ """
248
+ return self.audio_stream.frames
249
+
250
+ @property
251
+ def fps(
252
+ self
253
+ ) -> Fraction:
254
+ """
255
+ The fps of the video.
256
+ """
257
+ # They return it as a Fraction but...
258
+ return self.video_stream.average_rate
259
+
260
+ @property
261
+ def audio_fps(
262
+ self
263
+ ) -> Fraction:
264
+ """
265
+ The fps of the audio.
266
+ """
267
+ # TODO: What if no audio (?)
268
+ return self.audio_stream.average_rate
269
+
270
+ @property
271
+ def size(
272
+ self
273
+ ) -> tuple[int, int]:
274
+ """
275
+ The size of the video in a (width, height) format.
276
+ """
277
+ return (
278
+ self.video_stream.width,
279
+ self.video_stream.height
280
+ )
281
+
282
+ @property
283
+ def width(
284
+ self
285
+ ) -> int:
286
+ """
287
+ The width of the video, in pixels.
288
+ """
289
+ return self.size[0]
290
+
291
+ @property
292
+ def height(
293
+ self
294
+ ) -> int:
295
+ """
296
+ The height of the video, in pixels.
297
+ """
298
+ return self.size[1]
299
+
300
+ # Any property related to audio has to
301
+ # start with 'audio_property_name'
302
+
303
+ def __init__(
304
+ self,
305
+ filename: str
306
+ ):
307
+ self.filename: str = filename
308
+ """
309
+ The filename of the video source.
310
+ """
311
+ self.container: InputContainer = av_open(filename)
312
+ """
313
+ The av input general container of the
314
+ video (that also includes the audio) we
315
+ are reading.
316
+ """
317
+ self.video_stream: VideoStream = self.container.streams.video[0]
318
+ """
319
+ The stream that includes the video.
320
+ """
321
+ self.video_stream.thread_type = 'AUTO'
322
+ # TODO: What if no audio (?)
323
+ self.audio_stream: AudioStream = self.container.streams.audio[0]
324
+ """
325
+ The stream that includes the audio.
326
+ """
327
+ self.audio_stream.thread_type = 'AUTO'
328
+
329
+ def iterate(
330
+ self
331
+ ) -> 'Iterator[Union[VideoFrame, AudioFrame]]':
332
+ """
333
+ Iterator to iterate over the video frames
334
+ (already decoded).
335
+ """
336
+ for frame in self.frame_iterator:
337
+ yield VideoReaderFrame(frame)
338
+
339
+ def iterate_with_audio(
340
+ self,
341
+ do_decode_video: bool = True,
342
+ do_decode_audio: bool = False
343
+ ) -> 'Iterator[Union[VideoReaderFrame, VideoReaderPacket, None]]':
344
+ """
345
+ Iterator to iterate over the video and audio
346
+ packets, decoded only if the parameters are
347
+ set as True.
348
+
349
+ If the packet is decoded, it will return each
350
+ frame individually as a VideoReaderFrame
351
+ instance. If not, the whole packet as a
352
+ VideoReaderPacket instance.
353
+
354
+ If the frame is the last one, with size == 0,
355
+ it will return None as it must not be passed
356
+ to the muxer '.mux()' method.
357
+ """
358
+ for packet in self.packet_with_audio_iterator:
359
+ if packet.size == 0:
360
+ # End packet, not for muxer
361
+ yield None
362
+ continue
363
+
364
+ is_video = packet.stream.type == 'video'
365
+
366
+ do_decode = (
367
+ (
368
+ is_video and
369
+ do_decode_video
370
+ ) or
371
+ (
372
+ not is_video and
373
+ do_decode_audio
374
+ )
375
+ )
376
+
377
+ if do_decode:
378
+ for frame in packet.decode():
379
+ # Return each frame decoded
380
+ yield VideoReaderFrame(frame)
381
+ else:
382
+ # Return the packet as it is
383
+ yield VideoReaderPacket(packet)
384
+
385
+
386
+
387
+
388
+ """
389
+ Read this below if you can to combine videos
390
+ that have not been written yet to the disk
391
+ (maybe a composition in moviepy or I don't
392
+ know).
393
+
394
+ Usar un pipe (sin escribir archivo completo)
395
+ Puedes lanzar un proceso FFmpeg que envíe el vídeo a PyAV por stdin como flujo sin codificar (por ejemplo en rawvideo), así no tienes que escribir el archivo final.
396
+ Ejemplo:
397
+
398
+ PYTHON_CODE:
399
+ import subprocess
400
+ import av
401
+
402
+ # FFmpeg produce frames en crudo por stdout
403
+ ffmpeg_proc = subprocess.Popen(
404
+ [
405
+ "ffmpeg",
406
+ "-i", "-", # Lee de stdin
407
+ "-f", "rawvideo",
408
+ "-pix_fmt", "rgba",
409
+ "-"
410
+ ],
411
+ stdin=subprocess.PIPE,
412
+ stdout=subprocess.PIPE
413
+ )
414
+
415
+ # Aquí enviarías los datos combinados desde tu programa al ffmpeg_proc.stdin
416
+ # y podrías leer con PyAV o directamente procesar arrays de píxeles
417
+
418
+ Esto es lo más usado para pipeline de vídeo en tiempo real.
419
+ """
@@ -0,0 +1,175 @@
1
+ """
2
+ Manual tests that are working and are interesting
3
+ to learn about the code, refactor and build
4
+ classes.
5
+ """
6
+ from yta_validation import PythonValidator
7
+ from yta_video_opengl.reader import VideoReader
8
+ from yta_video_opengl.writer import VideoWriter
9
+ from yta_timer import Timer
10
+ from yta_video_frame_time import T
11
+
12
+ import av
13
+ import moderngl
14
+ import numpy as np
15
+
16
+
17
+ def video_modified_stored():
18
+ VIDEO_PATH = "test_files/test_1.mp4"
19
+ OUTPUT_PATH = "test_files/output.mp4"
20
+ AMP = 0.05
21
+ FREQ = 10.0
22
+ SPEED = 2.0
23
+
24
+ # ModernGL context without window
25
+ context = moderngl.create_standalone_context()
26
+
27
+ # Wave shader vertex and fragment
28
+ program = context.program(
29
+ vertex_shader = '''
30
+ #version 330
31
+ in vec2 in_pos;
32
+ in vec2 in_uv;
33
+ out vec2 v_uv;
34
+ void main() {
35
+ v_uv = in_uv;
36
+ gl_Position = vec4(in_pos, 0.0, 1.0);
37
+ }
38
+ ''',
39
+ fragment_shader = '''
40
+ #version 330
41
+ uniform sampler2D tex;
42
+ uniform float time;
43
+ uniform float amp;
44
+ uniform float freq;
45
+ uniform float speed;
46
+ in vec2 v_uv;
47
+ out vec4 f_color;
48
+ void main() {
49
+ float wave = sin(v_uv.x * freq + time * speed) * amp;
50
+ vec2 uv = vec2(v_uv.x, v_uv.y + wave);
51
+ f_color = texture(tex, uv);
52
+ }
53
+ '''
54
+ )
55
+
56
+ # Quad
57
+ vertices = np.array([
58
+ -1, -1, 0.0, 0.0,
59
+ 1, -1, 1.0, 0.0,
60
+ -1, 1, 0.0, 1.0,
61
+ 1, 1, 1.0, 1.0,
62
+ ], dtype = 'f4')
63
+ vbo = context.buffer(vertices.tobytes())
64
+ vao = context.simple_vertex_array(program, vbo, 'in_pos', 'in_uv')
65
+
66
+ video = VideoReader(VIDEO_PATH)
67
+
68
+ print(video.number_of_frames)
69
+ print(video.number_of_audio_frames)
70
+
71
+ # TODO: This has to be dynamic, but
72
+ # according to what (?)
73
+ NUMPY_FORMAT = 'rgb24'
74
+ # TODO: Where do we obtain this from (?)
75
+ VIDEO_CODEC_NAME = 'libx264'
76
+ # TODO: Where do we obtain this from (?)
77
+ PIXEL_FORMAT = 'yuv420p'
78
+
79
+ # Framebuffer to render
80
+ fbo = context.simple_framebuffer(video.size)
81
+ fbo.use()
82
+
83
+ # Decode first frame and use as texture
84
+ first_frame = video.next_frame
85
+
86
+ # Most of OpenGL textures expect origin in lower
87
+ # left corner
88
+ # TODO: What if alpha (?)
89
+ image = np.flipud(first_frame.to_ndarray(format = NUMPY_FORMAT))
90
+ texture = context.texture((image.shape[1], image.shape[0]), 3, image.tobytes())
91
+
92
+ texture.build_mipmaps()
93
+
94
+ # Uniforms
95
+ program['amp'].value = AMP
96
+ program['freq'].value = FREQ
97
+ program['speed'].value = SPEED
98
+
99
+ # Writer with H.264 codec
100
+ video_writer = (
101
+ VideoWriter(OUTPUT_PATH)
102
+ .set_video_stream(VIDEO_CODEC_NAME, video.fps, video.size, PIXEL_FORMAT)
103
+ .set_audio_stream_from_template(video.audio_stream)
104
+ )
105
+
106
+ frame_index = 0
107
+ for frame_or_packet in video.iterate_with_audio(
108
+ do_decode_video = True,
109
+ do_decode_audio = False
110
+ ):
111
+ # This below is because of the parameters we
112
+ # passed to the method
113
+ is_video_frame = PythonValidator.is_instance_of(frame_or_packet, 'VideoReaderFrame')
114
+ is_audio_packet = PythonValidator.is_instance_of(frame_or_packet, 'VideoReaderPacket')
115
+
116
+ # To simplify the process
117
+ if frame_or_packet is not None:
118
+ frame_or_packet = frame_or_packet.data
119
+
120
+ if is_audio_packet:
121
+ video_writer.mux(frame_or_packet)
122
+ elif is_video_frame:
123
+ with Timer(is_silent_as_context = True) as timer:
124
+
125
+ def process_frame(
126
+ frame: 'VideoFrame'
127
+ ):
128
+ # Add some variables if we need, for the
129
+ # opengl change we are applying (check the
130
+ # program code)
131
+ program['time'].value = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
132
+
133
+ # To numpy RGB inverted for OpenGL
134
+ img_array = np.flipud(
135
+ frame.to_ndarray(format = NUMPY_FORMAT)
136
+ )
137
+
138
+ # Create texture
139
+ texture = context.texture((img_array.shape[1], img_array.shape[0]), 3, img_array.tobytes())
140
+ texture.use()
141
+
142
+ # Render with shader to frame buffer
143
+ fbo.use()
144
+ vao.render(moderngl.TRIANGLE_STRIP)
145
+
146
+ # Processed GPU result to numpy
147
+ processed_data = np.frombuffer(
148
+ fbo.read(components = 3, alignment = 1), dtype = np.uint8
149
+ )
150
+ # Invert numpy to normal frame
151
+ processed_data = np.flipud(
152
+ processed_data.reshape((img_array.shape[0], img_array.shape[1], 3))
153
+ )
154
+
155
+ # To VideoFrame and to buffer
156
+ frame = av.VideoFrame.from_ndarray(processed_data, format = NUMPY_FORMAT)
157
+ # TODO: What is this for (?)
158
+ #out_frame.pict_type = 'NONE'
159
+ return frame
160
+
161
+ video_writer.mux_video_frame(process_frame(frame_or_packet))
162
+
163
+ print(f'Frame {str(frame_index)}: {timer.time_elapsed_str}s')
164
+ frame_index += 1
165
+
166
+ # While this code can be finished, the work in
167
+ # the muxer could be not finished and have some
168
+ # packets waiting to be written. Here we tell
169
+ # the muxer to process all those packets.
170
+ video_writer.mux_video_frame(None)
171
+
172
+ # TODO: Maybe move this to the '__del__' (?)
173
+ video_writer.output.close()
174
+ video.container.close()
175
+ print(f'Saved as "{OUTPUT_PATH}".')