yta-video-opengl 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yta-video-opengl
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Youtube Autonomous Video OpenGL Module
5
5
  Author: danialcala94
6
6
  Author-email: danielalcalavalera@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "yta-video-opengl"
3
- version = "0.0.7"
3
+ version = "0.0.9"
4
4
  description = "Youtube Autonomous Video OpenGL Module"
5
5
  authors = [
6
6
  {name = "danialcala94",email = "danielalcalavalera@gmail.com"}
@@ -3,6 +3,7 @@ A video reader using the PyAv (av) library
3
3
  that, using ffmpeg, detects the video.
4
4
  """
5
5
  from yta_video_opengl.reader.cache import VideoFrameCache
6
+ from yta_video_opengl.utils import iterate_stream_frames_demuxing
6
7
  from yta_validation import PythonValidator
7
8
  from av.video.frame import VideoFrame
8
9
  from av.audio.frame import AudioFrame
@@ -285,7 +286,7 @@ class VideoReader:
285
286
  The fps of the audio.
286
287
  """
287
288
  # TODO: What if no audio (?)
288
- return self.audio_stream.average_rate
289
+ return self.audio_stream.rate
289
290
 
290
291
  @property
291
292
  def time_base(
@@ -303,8 +304,38 @@ class VideoReader:
303
304
  """
304
305
  The time base of the audio.
305
306
  """
307
+ # TODO: What if no audio (?)
306
308
  return self.audio_stream.time_base
307
309
 
310
+ @property
311
+ def duration(
312
+ self
313
+ ) -> Union[float, None]:
314
+ """
315
+ The duration of the video.
316
+ """
317
+ return (
318
+ float(self.video_stream.duration * self.video_stream.time_base)
319
+ if self.video_stream.duration else
320
+ # TODO: What to do in this case (?)
321
+ None
322
+ )
323
+
324
+ @property
325
+ def audio_duration(
326
+ self
327
+ ) -> Union[float, None]:
328
+ """
329
+ The duration of the audio.
330
+ """
331
+ # TODO: What if no audio (?)
332
+ return (
333
+ float(self.audio_stream.duration * self.audio_stream.time_base)
334
+ if self.audio_stream.duration else
335
+ # TODO: What to do in this case (?)
336
+ None
337
+ )
338
+
308
339
  @property
309
340
  def size(
310
341
  self
@@ -367,9 +398,14 @@ class VideoReader:
367
398
  """
368
399
  The stream that includes the audio.
369
400
  """
370
- self.cache: VideoFrameCache = None
401
+ self.video_cache: VideoFrameCache = None
371
402
  """
372
- The frame cache system to optimize
403
+ The video frame cache system to optimize
404
+ the way we access to the frames.
405
+ """
406
+ self.audio_cache: VideoFrameCache = None
407
+ """
408
+ The audio frame cache system to optimize
373
409
  the way we access to the frames.
374
410
  """
375
411
 
@@ -403,7 +439,28 @@ class VideoReader:
403
439
  self.video_stream.thread_type = 'AUTO'
404
440
  self.audio_stream = self.container.streams.audio[0]
405
441
  self.audio_stream.thread_type = 'AUTO'
406
- self.cache = VideoFrameCache(self)
442
+ self.video_cache = VideoFrameCache(self.container, self.video_stream)
443
+ self.audio_cache = VideoFrameCache(self.container, self.audio_stream)
444
+
445
+ def seek(
446
+ self,
447
+ pts,
448
+ stream = None
449
+ ) -> 'VideoReader':
450
+ """
451
+ Call the container '.seek()' method with
452
+ the given 'pts' packet time stamp.
453
+ """
454
+ stream = (
455
+ self.video_stream
456
+ if stream is None else
457
+ stream
458
+ )
459
+
460
+ # TODO: Is 'offset' actually a 'pts' (?)
461
+ self.container.seek(pts, stream = stream)
462
+
463
+ return self
407
464
 
408
465
  def iterate(
409
466
  self
@@ -454,7 +511,58 @@ class VideoReader:
454
511
  yield VideoReaderFrame(frame)
455
512
  else:
456
513
  # Return the packet as it is
457
- yield VideoReaderPacket(packet)
514
+ yield VideoReaderPacket(packet)
515
+
516
+ # These methods below are using the demux
517
+ def iterate_video_frames(
518
+ self,
519
+ start_pts: int = 0,
520
+ end_pts: Union[int, None] = None
521
+ ):
522
+ """
523
+ Iterate over the video stream packets and
524
+ decode only the ones in the expected range,
525
+ so only those frames are decoded (which is
526
+ an expensive process).
527
+
528
+ This method returns a tuple of 3 elements:
529
+ - `frame` as a `VideoFrame` instance
530
+ - `t` as the frame time moment
531
+ - `index` as the frame index
532
+ """
533
+ for frame in iterate_stream_frames_demuxing(
534
+ container = self.container,
535
+ video_stream = self.video_stream,
536
+ audio_stream = None,
537
+ start_pts = start_pts,
538
+ end_pts = end_pts
539
+ ):
540
+ yield frame
541
+
542
+ def iterate_audio_frames(
543
+ self,
544
+ start_pts: int = 0,
545
+ end_pts: Union[int, None] = None
546
+ ):
547
+ """
548
+ Iterate over the audio stream packets and
549
+ decode only the ones in the expected range,
550
+ so only those frames are decoded (which is
551
+ an expensive process).
552
+
553
+ This method returns a tuple of 3 elements:
554
+ - `frame` as a `AudioFrame` instance
555
+ - `t` as the frame time moment
556
+ - `index` as the frame index
557
+ """
558
+ for frame in iterate_stream_frames_demuxing(
559
+ container = self.container,
560
+ video_stream = None,
561
+ audio_stream = self.audio_stream,
562
+ start_pts = start_pts,
563
+ end_pts = end_pts
564
+ ):
565
+ yield frame
458
566
 
459
567
  # TODO: Will we use this (?)
460
568
  def get_frame(
@@ -462,10 +570,45 @@ class VideoReader:
462
570
  index: int
463
571
  ) -> 'VideoFrame':
464
572
  """
465
- Get the frame with the given 'index', using
466
- the cache system.
573
+ Get the video frame with the given 'index',
574
+ using the video cache system.
575
+ """
576
+ return self.video_cache.get_frame(index)
577
+
578
+ # TODO: Will we use this (?)
579
+ def get_audio_frame(
580
+ self,
581
+ index: int
582
+ ) -> 'VideoFrame':
583
+ """
584
+ Get the audio frame with the given 'index',
585
+ using the audio cache system.
586
+ """
587
+ return self.video_cache.get_frame(index)
588
+
589
+ def get_frames(
590
+ self,
591
+ start: float = 0.0,
592
+ end: Union[float, None] = None
593
+ ):
594
+ """
595
+ Iterator to get the video frames in between
596
+ the provided 'start' and 'end' time moments.
597
+ """
598
+ for frame in self.video_cache.get_frames(start, end):
599
+ yield frame
600
+
601
+ def get_audio_frames(
602
+ self,
603
+ start: float = 0.0,
604
+ end: Union[float, None] = None
605
+ ):
606
+ """
607
+ Iterator to get the audio frames in between
608
+ the provided 'start' and 'end' time moments.
467
609
  """
468
- return self.cache.get_frame(index)
610
+ for frame in self.audio_cache.get_frames(start, end):
611
+ yield frame
469
612
 
470
613
  def close(
471
614
  self
@@ -0,0 +1,233 @@
1
+ """
2
+ The pyav container stores the information based
3
+ on the packets timestamps (called 'pts'). Some
4
+ of the packets are considered key_frames because
5
+ they include those key frames.
6
+
7
+ Also, this library uses those key frames to start
8
+ decodifying from there to the next one, obtaining
9
+ all the frames in between able to be read and
10
+ modified.
11
+
12
+ This cache system will look for the range of
13
+ frames that belong to the key frame related to the
14
+ frame we are requesting in the moment, keeping in
15
+ memory all those frames to be handled fast. It
16
+ will remove the old frames if needed to use only
17
+ the 'size' we set when creating it.
18
+ """
19
+ from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index
20
+ from av.container import InputContainer
21
+ from av.video.stream import VideoStream
22
+ from av.audio.stream import AudioStream
23
+ from av.video.frame import VideoFrame
24
+ from av.audio.frame import AudioFrame
25
+ from yta_validation.parameter import ParameterValidator
26
+ from fractions import Fraction
27
+ from collections import OrderedDict
28
+ from typing import Union
29
+
30
+
31
+ class VideoFrameCache:
32
+ """
33
+ Class to manage the frames cache of a video
34
+ within a video reader instance.
35
+ """
36
+
37
+ @property
38
+ def fps(
39
+ self
40
+ ) -> float:
41
+ """
42
+ The frames per second as a float.
43
+ """
44
+ return (
45
+ float(self.stream.average_rate)
46
+ if self.stream.type == 'video' else
47
+ float(self.stream.rate)
48
+ )
49
+
50
+ @property
51
+ def time_base(
52
+ self
53
+ ) -> Union[Fraction, None]:
54
+ """
55
+ The time base of the stream.
56
+ """
57
+ return self.stream.time_base
58
+
59
+ def __init__(
60
+ self,
61
+ container: InputContainer,
62
+ stream: Union[VideoStream, AudioStream],
63
+ size: int = 50
64
+ ):
65
+ ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
66
+ ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
67
+ ParameterValidator.validate_mandatory_positive_int('size', size)
68
+
69
+ self.container: InputContainer = container
70
+ """
71
+ The pyav container.
72
+ """
73
+ self.stream: Union[VideoStream, AudioStream] = stream
74
+ """
75
+ The pyav stream.
76
+ """
77
+ self.cache: OrderedDict = OrderedDict()
78
+ """
79
+ The cache ordered dictionary.
80
+ """
81
+ self.size = size
82
+ """
83
+ The size (in number of frames) of the cache.
84
+ """
85
+ self.key_frames_pts: list[int] = []
86
+ """
87
+ The list that contains the timestamps of the
88
+ key frame packets, ordered from begining to
89
+ end.
90
+ """
91
+
92
+ self._prepare()
93
+
94
+ def _prepare(
95
+ self
96
+ ):
97
+ # Index key frames
98
+ for packet in self.container.demux(self.stream):
99
+ if packet.is_keyframe:
100
+ self.key_frames_pts.append(packet.pts)
101
+
102
+ self.container.seek(0)
103
+
104
+ def _get_nearest_keyframe_fps(
105
+ self,
106
+ pts: int
107
+ ):
108
+ """
109
+ Get the fps of the keyframe that is the
110
+ nearest to the provided 'pts'. Useful to
111
+ seek and start decoding frames from that
112
+ keyframe.
113
+ """
114
+ return max([
115
+ key_frame_pts
116
+ for key_frame_pts in self.key_frames_pts
117
+ if key_frame_pts <= pts
118
+ ])
119
+
120
+ def _get_frame_by_pts(
121
+ self,
122
+ pts: int
123
+ ):
124
+ """
125
+ Get the frame that has the provided 'pts'.
126
+
127
+ This method will start decoding frames from the
128
+ most near key frame (the one with the nearer
129
+ pts) until the one requested is found. All those
130
+ frames will be stored in cache.
131
+
132
+ This method must be called when the frame
133
+ requested is not stored in the caché.
134
+ """
135
+ # Look for the most near key frame
136
+ key_frame_pts = self._get_nearest_keyframe_fps(pts)
137
+
138
+ # Go to the key frame that includes it
139
+ self.container.seek(key_frame_pts, stream = self.stream)
140
+
141
+ decoded = None
142
+ for frame in self.container.decode(self.stream):
143
+ # TODO: Could 'frame' be None (?)
144
+ if frame.pts is None:
145
+ continue
146
+
147
+ # Store in cache if needed
148
+ if frame.pts not in self.cache:
149
+ # TODO: The 'format' must be dynamic
150
+ self.cache[frame.pts] = frame.to_ndarray(format = "rgb24")
151
+
152
+ # Clean cache if full
153
+ if len(self.cache) > self.size:
154
+ self.cache.popitem(last = False)
155
+
156
+ if frame.pts >= pts:
157
+ decoded = self.cache[frame.pts]
158
+ break
159
+
160
+ return decoded
161
+
162
+ def get_frame(
163
+ self,
164
+ index: int
165
+ ) -> Union[VideoFrame, AudioFrame]:
166
+ """
167
+ Get the frame with the given 'index' from
168
+ the cache.
169
+ """
170
+ # TODO: Maybe we can accept 't' and 'pts' also
171
+ target_pts = int(index / self.fps / self.time_base)
172
+
173
+ return (
174
+ self.cache[target_pts]
175
+ if target_pts in self.cache else
176
+ self._get_frame_by_pts(target_pts)
177
+ )
178
+
179
+ def get_frames(
180
+ self,
181
+ start: float = 0,
182
+ end: Union[float, None] = None
183
+ ):
184
+ """
185
+ Get all the frames in the range between
186
+ the provided 'start' and 'end' time in
187
+ seconds.
188
+ """
189
+ # TODO: I create this method by default using
190
+ # the cache. Think about how to implement it
191
+ # and apply it here, please.
192
+ # Go to the nearest key frame
193
+ start = t_to_pts(start, self.time_base)
194
+ end = (
195
+ t_to_pts(end, self.time_base)
196
+ if end is not None else
197
+ None
198
+ )
199
+ key_frame_pts = self._get_nearest_keyframe_fps(start)
200
+
201
+ # Go to the nearest key frame to start decoding
202
+ self.container.seek(key_frame_pts, stream = self.stream)
203
+
204
+ for packet in self.container.demux(self.stream):
205
+ for frame in packet.decode():
206
+ if frame.pts is None:
207
+ continue
208
+
209
+ if frame.pts < start:
210
+ continue
211
+
212
+ if (
213
+ end is not None and
214
+ frame.pts > end
215
+ ):
216
+ return
217
+
218
+ # TODO: Maybe send a @dataclass instead (?)
219
+ yield (
220
+ frame,
221
+ pts_to_t(frame.pts, self.time_base),
222
+ pts_to_index(frame.pts, self.time_base, self.fps)
223
+ )
224
+
225
+ def clear(
226
+ self
227
+ ) -> 'VideoFrameCache':
228
+ """
229
+ Clear the cache by removing all the items.
230
+ """
231
+ self.cache.clear()
232
+
233
+ return self
@@ -581,6 +581,11 @@ def video_modified_stored():
581
581
 
582
582
  from yta_video_opengl.classes import WavingFrame, BreathingFrame, HandheldFrame, OrbitingFrame, RotatingInCenterFrame, StrangeTvFrame, GlitchRgbFrame, WavingNode
583
583
  from yta_video_opengl.utils import texture_to_frame, frame_to_texture
584
+ from yta_video_opengl.video import Video
585
+
586
+ Video(VIDEO_PATH, 0.25, 0.75).save_as(OUTPUT_PATH)
587
+
588
+ return
584
589
 
585
590
  video = VideoReader(VIDEO_PATH)
586
591
  video_writer = (
@@ -636,12 +641,9 @@ def video_modified_stored():
636
641
  elif is_video_frame:
637
642
  with Timer(is_silent_as_context = True) as timer:
638
643
  t = T.video_frame_index_to_video_frame_time(frame_index, float(video.fps))
639
-
640
644
  # This is another way of getting 't'
641
645
  #t = float(frame_or_packet.pts * video.time_base)
642
646
 
643
- # TODO: Pass the frame as a texture
644
-
645
647
  video_writer.mux_video_frame(
646
648
  frame = texture_to_frame(
647
649
  texture = node.process(
@@ -0,0 +1,343 @@
1
+ from yta_validation import PythonValidator
2
+ from av.container import InputContainer
3
+ from av.video.stream import VideoStream
4
+ from av.audio.stream import AudioStream
5
+ from av.video.frame import VideoFrame
6
+ from typing import Union
7
+
8
+ import av
9
+ import numpy as np
10
+ import moderngl
11
+
12
+
13
+ def frame_to_texture(
14
+ frame: Union['VideoFrame', 'np.ndarray'],
15
+ context: moderngl.Context,
16
+ numpy_format: str = 'rgb24'
17
+ ):
18
+ """
19
+ Transform the given 'frame' to an opengl
20
+ texture. The frame can be a VideoFrame
21
+ instance (from pyav library) or a numpy
22
+ array.
23
+ """
24
+ # To numpy RGB inverted for opengl
25
+ frame: np.ndarray = (
26
+ np.flipud(frame.to_ndarray(format = numpy_format))
27
+ if PythonValidator.is_instance_of(frame, 'VideoFrame') else
28
+ np.flipud(frame)
29
+ )
30
+
31
+ return context.texture(
32
+ size = (frame.shape[1], frame.shape[0]),
33
+ components = frame.shape[2],
34
+ data = frame.tobytes()
35
+ )
36
+
37
+ # TODO: I should make different methods to
38
+ # obtain a VideoFrame or a numpy array frame
39
+ def texture_to_frame(
40
+ texture: moderngl.Texture
41
+ ) -> 'VideoFrame':
42
+ """
43
+ Transform an opengl texture into a pyav
44
+ VideoFrame instance.
45
+ """
46
+ # RGBA8
47
+ data = texture.read(alignment = 1)
48
+ frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
49
+ # Opengl gives it with the y inverted
50
+ frame = np.flipud(frame)
51
+ # TODO: This can be returned as a numpy frame
52
+
53
+ # This is if we need an 'av' VideoFrame (to
54
+ # export through the demuxer, for example)
55
+ frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
56
+ # TODO: Make this customizable
57
+ frame = frame.reformat(format = 'yuv420p')
58
+
59
+ return frame
60
+
61
+ def get_fullscreen_quad_vao(
62
+ context: moderngl.Context,
63
+ program: moderngl.Program
64
+ ) -> moderngl.VertexArray:
65
+ """
66
+ Get the vertex array object of a quad, by
67
+ using the vertices, the indexes, the vbo,
68
+ the ibo and the vao content.
69
+ """
70
+ # Quad vertices in NDC (-1..1) with texture
71
+ # coords (0..1)
72
+ """
73
+ The UV coordinates to build the quad we
74
+ will use to represent the frame by
75
+ applying it as a texture.
76
+ """
77
+ vertices = np.array([
78
+ # pos.x, pos.y, tex.u, tex.v
79
+ -1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
80
+ 1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
81
+ -1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
82
+ 1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
83
+ ], dtype = 'f4')
84
+
85
+ """
86
+ The indexes of the vertices (see 'vertices'
87
+ property) to build the 2 opengl triangles
88
+ that will represent the quad we need for
89
+ the frame.
90
+ """
91
+ indices = np.array([
92
+ 0, 1, 2,
93
+ 2, 1, 3
94
+ ], dtype = 'i4')
95
+
96
+ vbo = context.buffer(vertices.tobytes())
97
+ ibo = context.buffer(indices.tobytes())
98
+
99
+ vao_content = [
100
+ # 2 floats position, 2 floats texcoords
101
+ (vbo, '2f 2f', 'in_vert', 'in_texcoord'),
102
+ ]
103
+
104
+ return context.vertex_array(program, vao_content, ibo)
105
+
106
+ def iterate_streams_packets(
107
+ container: 'InputContainer',
108
+ video_stream: 'VideoStream',
109
+ audio_stream: 'AudioStream',
110
+ video_start_pts: int = 0,
111
+ video_end_pts: Union[int, None] = None,
112
+ audio_start_pts: int = 0,
113
+ audio_end_pts: Union[int, None] = None
114
+ ):
115
+ """
116
+ Iterate over the provided 'stream' packets
117
+ and yield the ones in the expected range.
118
+ This is nice when trying to copy a stream
119
+ without modifications.
120
+ """
121
+ # 'video_start_pts' and 'audio_start_pts' must
122
+ # be 0 or a positive tps
123
+
124
+ if (
125
+ video_stream is None and
126
+ audio_stream is None
127
+ ):
128
+ raise Exception('No streams provided.')
129
+
130
+ # We only need to seek on video
131
+ if video_stream is not None:
132
+ container.seek(video_start_pts, stream = video_stream)
133
+ if audio_stream is not None:
134
+ container.seek(audio_start_pts, stream = audio_stream)
135
+
136
+ stream = [
137
+ stream
138
+ for stream in (video_stream, audio_stream)
139
+ if stream
140
+ ]
141
+
142
+ """
143
+ Apparently, if we ignore some packets based
144
+ on the 'pts', we can be ignoring information
145
+ that is needed for the next frames to be
146
+ decoded, so we need to decode them all...
147
+
148
+ If we can find some strategy to seek not for
149
+ the inmediate but some before and read from
150
+ that one to avoid reading all of the packets
151
+ we could save some time, but at what cost?
152
+ We cannot skip any crucial frame so we need
153
+ to know how many we can skip, and that sounds
154
+ a bit difficult depending on the codec.
155
+ """
156
+ stream_finished: str = ''
157
+ for packet in container.demux(stream):
158
+ if packet.pts is None:
159
+ continue
160
+
161
+ # TODO: We cannot skip like this, we need to
162
+ # look for the nearest keyframe to be able
163
+ # to decode the frames later. Take a look at
164
+ # the VideoFrameCache class and use it.
165
+
166
+ # start_pts = (
167
+ # video_start_pts
168
+ # if packet.stream.type == 'video' else
169
+ # audio_start_pts
170
+ # )
171
+ # end_pts = (
172
+ # video_end_pts
173
+ # if packet.stream.type == 'video' else
174
+ # audio_end_pts
175
+ # )
176
+
177
+ # if packet.pts < start_pts:
178
+ # continue
179
+
180
+ # if (
181
+ # end_pts is not None and
182
+ # packet.pts > end_pts
183
+ # ):
184
+ # if (
185
+ # stream_finished != '' and
186
+ # (
187
+ # # Finish if only one stream
188
+ # stream_finished != packet.stream.type or
189
+ # video_stream is None or
190
+ # audio_stream is None
191
+ # )
192
+ # ):
193
+ # # We have yielded all the frames in the
194
+ # # expected range, no more needed
195
+ # return
196
+
197
+ # stream_finished = packet.stream.type
198
+ # continue
199
+
200
+ yield packet
201
+
202
+ def iterate_stream_frames_demuxing(
203
+ container: 'InputContainer',
204
+ video_stream: 'VideoStream',
205
+ audio_stream: 'AudioStream',
206
+ video_start_pts : int = 0,
207
+ video_end_pts: Union[int, None] = None,
208
+ audio_start_pts: int = 0,
209
+ audio_end_pts: Union[int, None] = None
210
+ ):
211
+ """
212
+ Iterate over the provided 'stream' packets
213
+ and decode only the ones in the expected
214
+ range, so only those frames are decoded
215
+ (which is an expensive process).
216
+
217
+ This method returns a tuple of 3 elements:
218
+ - `frame` as a `VideoFrame` instance
219
+ - `t` as the frame time moment
220
+ - `index` as the frame index
221
+
222
+ You can easy transform the frame received
223
+ to a numpy array by using this:
224
+ - `frame.to_ndarray(format = format)`
225
+ """
226
+ # 'start_pts' must be 0 or a positive tps
227
+ # 'end_pts' must be None or a positive tps
228
+
229
+ # We cannot skip packets or we will lose
230
+ # information needed to build the video
231
+ for packet in iterate_streams_packets(
232
+ container = container,
233
+ video_stream = video_stream,
234
+ audio_stream = audio_stream,
235
+ video_start_pts = video_start_pts,
236
+ video_end_pts = video_end_pts,
237
+ audio_start_pts = audio_start_pts,
238
+ audio_end_pts = audio_end_pts
239
+ ):
240
+ # Only valid and in range packets here
241
+ # Here only the accepted ones
242
+ stream_finished: str = ''
243
+ for frame in packet.decode():
244
+ if frame.pts is None:
245
+ continue
246
+
247
+ time_base = (
248
+ video_stream.time_base
249
+ if PythonValidator.is_instance_of(frame, VideoFrame) else
250
+ audio_stream.time_base
251
+ )
252
+
253
+ average_rate = (
254
+ video_stream.average_rate
255
+ if PythonValidator.is_instance_of(frame, VideoFrame) else
256
+ audio_stream.rate
257
+ )
258
+
259
+ start_pts = (
260
+ video_start_pts
261
+ if packet.stream.type == 'video' else
262
+ audio_start_pts
263
+ )
264
+
265
+ end_pts = (
266
+ video_end_pts
267
+ if packet.stream.type == 'video' else
268
+ audio_end_pts
269
+ )
270
+
271
+ if frame.pts < start_pts:
272
+ continue
273
+
274
+ if (
275
+ end_pts is not None and
276
+ frame.pts > end_pts
277
+ ):
278
+ if (
279
+ stream_finished != '' and
280
+ (
281
+ # Finish if only one stream
282
+ stream_finished != packet.stream.type or
283
+ video_stream is None or
284
+ audio_stream is None
285
+ )
286
+ ):
287
+ # We have yielded all the frames in the
288
+ # expected range, no more needed
289
+ return
290
+
291
+ stream_finished = packet.stream.type
292
+ continue
293
+
294
+ time_base = (
295
+ video_stream.time_base
296
+ if PythonValidator.is_instance_of(frame, VideoFrame) else
297
+ audio_stream.time_base
298
+ )
299
+
300
+ average_rate = (
301
+ video_stream.average_rate
302
+ if PythonValidator.is_instance_of(frame, VideoFrame) else
303
+ audio_stream.rate
304
+ )
305
+
306
+ # TODO: Maybe send a @dataclass instead (?)
307
+ yield (
308
+ frame,
309
+ pts_to_t(frame.pts, time_base),
310
+ pts_to_index(frame.pts, time_base, average_rate)
311
+ )
312
+
313
+ def t_to_pts(
314
+ t: float,
315
+ stream_time_base: 'Fraction'
316
+ ) -> int:
317
+ """
318
+ Transform a 't' time moment (in seconds) to
319
+ a packet timestamp (pts) understandable by
320
+ the pyav library.
321
+ """
322
+ return int((t + 0.000001) / stream_time_base)
323
+
324
+ def pts_to_index(
325
+ pts: int,
326
+ stream_time_base: 'Fraction',
327
+ fps: float
328
+ ) -> int:
329
+ """
330
+ Transform a 'pts' packet timestamp to a
331
+ frame index.
332
+ """
333
+ return int(round(pts_to_t(pts, stream_time_base) * fps))
334
+
335
+ def pts_to_t(
336
+ pts: int,
337
+ stream_time_base: 'Fraction'
338
+ ) -> float:
339
+ """
340
+ Transform a 'pts' packet timestamp to a 't'
341
+ time moment.
342
+ """
343
+ return pts * stream_time_base
@@ -0,0 +1,170 @@
1
+ from yta_video_opengl.reader import VideoReader
2
+ from yta_video_opengl.writer import VideoWriter
3
+ from yta_video_opengl.utils import iterate_stream_frames_demuxing
4
+ from yta_validation import PythonValidator
5
+ from typing import Union
6
+
7
+
8
+ # TODO: Where can I obtain this dynamically (?)
9
+ PIXEL_FORMAT = 'yuv420p'
10
+
11
+ # TODO: Maybe rename to 'Media' (?)
12
+ class Video:
13
+ """
14
+ Class to wrap the functionality related to
15
+ handling and modifying a video.
16
+ """
17
+
18
+ @property
19
+ def start_pts(
20
+ self
21
+ ) -> int:
22
+ """
23
+ The start packet time stamp (pts), needed
24
+ to optimize the packet iteration process.
25
+ """
26
+ return int(self.start / self.reader.time_base)
27
+
28
+ @property
29
+ def end_pts(
30
+ self
31
+ ) -> Union[int, None]:
32
+ """
33
+ The end packet time stamp (pts), needed to
34
+ optimize the packet iteration process.
35
+ """
36
+ return (
37
+ int(self.end / self.reader.time_base)
38
+ # TODO: What do we do if no duration (?)
39
+ if self.duration is not None else
40
+ None
41
+ )
42
+
43
+ @property
44
+ def audio_start_pts(
45
+ self
46
+ ) -> int:
47
+ """
48
+ The start packet time stamp (pts), needed
49
+ to optimize the packet iteration process.
50
+ """
51
+ return int(self.start / self.reader.audio_time_base)
52
+
53
+ @property
54
+ def audio_end_pts(
55
+ self
56
+ ) -> Union[int, None]:
57
+ """
58
+ The end packet time stamp (pts), needed to
59
+ optimize the packet iteration process.
60
+ """
61
+ return (
62
+ int(self.end / self.reader.audio_time_base)
63
+ # TODO: What do we do if no duration (?)
64
+ if self.duration is not None else
65
+ None
66
+ )
67
+
68
+ @property
69
+ def duration(
70
+ self
71
+ ):
72
+ """
73
+ The duration of the video.
74
+ """
75
+ return self.end - self.start
76
+
77
+ @property
78
+ def frames(
79
+ self
80
+ ):
81
+ """
82
+ Iterator to yield all the frames, one by
83
+ one, within the range defined by the
84
+ 'start' and 'end' parameters provided when
85
+ instantiating it.
86
+
87
+ This method returns a tuple of 3 elements:
88
+ - `frame` as a `VideoFrame` instance
89
+ - `t` as the frame time moment
90
+ - `index` as the frame index
91
+ """
92
+ for frame in self.reader.get_frames(self.start, self.end):
93
+ yield frame
94
+
95
+ for frame in self.reader.get_audio_frames(self.start, self.end):
96
+ yield frame
97
+
98
+ # for frame in iterate_stream_frames_demuxing(
99
+ # container = self.reader.container,
100
+ # video_stream = self.reader.video_stream,
101
+ # audio_stream = self.reader.audio_stream,
102
+ # video_start_pts = self.start_pts,
103
+ # video_end_pts = self.end_pts,
104
+ # audio_start_pts = self.audio_start_pts,
105
+ # audio_end_pts = self.audio_end_pts
106
+ # ):
107
+ # yield frame
108
+
109
+ def __init__(
110
+ self,
111
+ filename: str,
112
+ start: float = 0.0,
113
+ end: Union[float, None] = None
114
+ ):
115
+ self.filename: str = filename
116
+ """
117
+ The filename of the original video.
118
+ """
119
+ # TODO: Detect the 'pixel_format' from the
120
+ # extension (?)
121
+ self.reader: VideoReader = VideoReader(self.filename)
122
+ """
123
+ The pyav video reader.
124
+ """
125
+ self.start: float = start
126
+ """
127
+ The time moment 't' in which the video
128
+ should start.
129
+ """
130
+ self.end: Union[float, None] = (
131
+ # TODO: Is this 'end' ok (?)
132
+ self.reader.duration
133
+ if end is None else
134
+ end
135
+ )
136
+ """
137
+ The time moment 't' in which the video
138
+ should end.
139
+ """
140
+
141
+ def save_as(
142
+ self,
143
+ filename: str
144
+ ) -> 'Video':
145
+ writer = VideoWriter(filename)
146
+ #writer.set_video_stream(self.reader.video_stream.codec.name, self.reader.fps, self.reader.size, PIXEL_FORMAT)
147
+ writer.set_video_stream_from_template(self.reader.video_stream)
148
+ writer.set_audio_stream_from_template(self.reader.audio_stream)
149
+
150
+ # TODO: I need to process the audio also, so
151
+ # build a method that do the same but for
152
+ # both streams at the same time
153
+ for frame, t, index in self.frames:
154
+ if PythonValidator.is_instance_of(frame, 'VideoFrame'):
155
+ print(f'Saving video frame {str(index)}, with t = {str(t)}')
156
+ writer.mux_video_frame(
157
+ frame = frame
158
+ )
159
+ else:
160
+ print(f'Saving audio frame {str(index)} ({str(round(float(t * self.reader.fps), 2))}), with t = {str(t)}')
161
+ writer.mux_audio_frame(
162
+ frame = frame
163
+ )
164
+
165
+ writer.mux_audio_frame(None)
166
+ writer.mux_video_frame(None)
167
+
168
+ # TODO: Maybe move this to the '__del__' (?)
169
+ writer.output.close()
170
+ self.reader.container.close()
@@ -114,9 +114,23 @@ class VideoWriter:
114
114
  You can pass the audio stream as it was
115
115
  obtained from the reader.
116
116
  """
117
+ self.audio_stream: AudioStream = self.output.add_stream(
118
+ codec_name = template.codec_context.name,
119
+ rate = template.codec_context.rate
120
+ )
121
+ self.audio_stream.codec_context.format = template.codec_context.format
122
+ self.audio_stream.codec_context.layout = template.codec_context.layout
123
+ self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
124
+
125
+ return self
126
+
127
+ # This below is not working
117
128
  self.audio_stream: AudioStream = self.output.add_stream_from_template(
118
129
  template
119
130
  )
131
+ # TODO: Is this actually needed (?)
132
+ # Force this 'rate'
133
+ self.audio_stream.time_base = Fraction(1, template.codec_context.rate)
120
134
 
121
135
  return self
122
136
 
@@ -1,155 +0,0 @@
1
- """
2
- The pyav container stores the information based
3
- on the packets timestamps (called 'pts'). Some
4
- of the packets are considered key_frames because
5
- they include those key frames.
6
-
7
- Also, this library uses those key frames to start
8
- decodifying from there to the next one, obtaining
9
- all the frames in between able to be read and
10
- modified.
11
-
12
- This cache system will look for the range of
13
- frames that belong to the key frame related to the
14
- frame we are requesting in the moment, keeping in
15
- memory all those frames to be handled fast. It
16
- will remove the old frames if needed to use only
17
- the 'size' we set when creating it.
18
- """
19
- from collections import OrderedDict
20
-
21
-
22
- class VideoFrameCache:
23
- """
24
- Class to manage the frames cache of a video
25
- within a video reader instance.
26
- """
27
-
28
- @property
29
- def container(
30
- self
31
- ) -> 'InputContainer':
32
- """
33
- Shortcut to the video reader instance container.
34
- """
35
- return self.reader_instance.container
36
-
37
- @property
38
- def stream(
39
- self
40
- ) -> 'VideoStream':
41
- """
42
- Shortcut to the video reader instance video
43
- stream.
44
- """
45
- return self.reader_instance.video_stream
46
-
47
- def __init__(
48
- self,
49
- reader: 'VideoReader',
50
- size: int = 50
51
- ):
52
- self.reader_instance: 'VideoReader' = reader
53
- """
54
- The video reader instance this cache belongs
55
- to.
56
- """
57
- self.cache: OrderedDict = OrderedDict()
58
- """
59
- The cache ordered dictionary.
60
- """
61
- self.size = size
62
- """
63
- The size (in number of frames) of the cache.
64
- """
65
- self.key_frames_pts: list[int] = []
66
- """
67
- The list that contains the timestamps of the
68
- key frame packets, ordered from begining to
69
- end.
70
- """
71
-
72
- # Index key frames
73
- for packet in self.container.demux(self.stream):
74
- if packet.is_keyframe:
75
- self.key_frames_pts.append(packet.pts)
76
-
77
- self.container.seek(0)
78
- # TODO: Maybe this is better (?)
79
- #self.reader_instance.reset()
80
-
81
- def _get_frame_by_pts(
82
- self,
83
- target_pts
84
- ):
85
- """
86
- Get the frame that has the provided 'target_pts'.
87
-
88
- This method will start decoding frames from the
89
- most near key frame (the one with the nearer
90
- pts) until the one requested is found. All those
91
- frames will be stored in cache.
92
-
93
- This method must be called when the frame
94
- requested is not stored in the caché.
95
- """
96
- # Look for the most near key frame
97
- key_frame_pts = max([
98
- key_frame_pts
99
- for key_frame_pts in self.key_frames_pts
100
- if key_frame_pts <= target_pts
101
- ])
102
-
103
- # Go to the key frame that includes it
104
- self.container.seek(key_frame_pts, stream = self.stream)
105
-
106
- decoded = None
107
- for frame in self.container.decode(self.stream):
108
- # TODO: Could 'frame' be None (?)
109
- pts = frame.pts
110
- if pts is None:
111
- continue
112
-
113
- # Store in cache if needed
114
- if pts not in self.cache:
115
- # TODO: The 'format' must be dynamic
116
- self.cache[pts] = frame.to_ndarray(format = "rgb24")
117
-
118
- # Clean cache if full
119
- if len(self.cache) > self.size:
120
- self.cache.popitem(last = False)
121
-
122
- if pts >= target_pts:
123
- decoded = self.cache[pts]
124
- break
125
-
126
- return decoded
127
-
128
- def get_frame(
129
- self,
130
- index: int
131
- ) -> 'VideoFrame':
132
- """
133
- Get the frame with the given 'index' from
134
- the cache.
135
- """
136
- # convertir frame_number a PTS (timestamps internos)
137
- time_base = self.stream.time_base
138
- fps = float(self.stream.average_rate)
139
- target_pts = int(index / fps / time_base)
140
-
141
- return (
142
- self.cache[target_pts]
143
- if target_pts in self.cache else
144
- self._get_frame_by_pts(target_pts)
145
- )
146
-
147
- def clear(
148
- self
149
- ) -> 'VideoFrameCache':
150
- """
151
- Clear the cache by removing all the items.
152
- """
153
- self.cache.clear()
154
-
155
- return self
@@ -1,100 +0,0 @@
1
- from yta_validation import PythonValidator
2
- from typing import Union
3
-
4
- import av
5
- import numpy as np
6
- import moderngl
7
-
8
-
9
- def frame_to_texture(
10
- frame: Union['VideoFrame', 'np.ndarray'],
11
- context: moderngl.Context,
12
- numpy_format: str = 'rgb24'
13
- ):
14
- """
15
- Transform the given 'frame' to an opengl
16
- texture. The frame can be a VideoFrame
17
- instance (from pyav library) or a numpy
18
- array.
19
- """
20
- # To numpy RGB inverted for opengl
21
- frame: np.ndarray = (
22
- np.flipud(frame.to_ndarray(format = numpy_format))
23
- if PythonValidator.is_instance_of(frame, 'VideoFrame') else
24
- np.flipud(frame)
25
- )
26
-
27
- return context.texture(
28
- size = (frame.shape[1], frame.shape[0]),
29
- components = frame.shape[2],
30
- data = frame.tobytes()
31
- )
32
-
33
- # TODO: I should make different methods to
34
- # obtain a VideoFrame or a numpy array frame
35
- def texture_to_frame(
36
- texture: moderngl.Texture
37
- ) -> 'VideoFrame':
38
- """
39
- Transform an opengl texture into a pyav
40
- VideoFrame instance.
41
- """
42
- # RGBA8
43
- data = texture.read(alignment = 1)
44
- frame = np.frombuffer(data, dtype = np.uint8).reshape((texture.size[1], texture.size[0], 4))
45
- # Opengl gives it with the y inverted
46
- frame = np.flipud(frame)
47
- # TODO: This can be returned as a numpy frame
48
-
49
- # This is if we need an 'av' VideoFrame (to
50
- # export through the demuxer, for example)
51
- frame = av.VideoFrame.from_ndarray(frame, format = 'rgba')
52
- # TODO: Make this customizable
53
- frame = frame.reformat(format = 'yuv420p')
54
-
55
- return frame
56
-
57
- def get_fullscreen_quad_vao(
58
- context: moderngl.Context,
59
- program: moderngl.Program
60
- ) -> moderngl.VertexArray:
61
- """
62
- Get the vertex array object of a quad, by
63
- using the vertices, the indexes, the vbo,
64
- the ibo and the vao content.
65
- """
66
- # Quad vertices in NDC (-1..1) with texture
67
- # coords (0..1)
68
- """
69
- The UV coordinates to build the quad we
70
- will use to represent the frame by
71
- applying it as a texture.
72
- """
73
- vertices = np.array([
74
- # pos.x, pos.y, tex.u, tex.v
75
- -1.0, -1.0, 0.0, 0.0, # vertex 0 - bottom left
76
- 1.0, -1.0, 1.0, 0.0, # vertex 1 - bottom right
77
- -1.0, 1.0, 0.0, 1.0, # vertex 2 - top left
78
- 1.0, 1.0, 1.0, 1.0, # vertex 3 - top right
79
- ], dtype = 'f4')
80
-
81
- """
82
- The indexes of the vertices (see 'vertices'
83
- property) to build the 2 opengl triangles
84
- that will represent the quad we need for
85
- the frame.
86
- """
87
- indices = np.array([
88
- 0, 1, 2,
89
- 2, 1, 3
90
- ], dtype = 'i4')
91
-
92
- vbo = context.buffer(vertices.tobytes())
93
- ibo = context.buffer(indices.tobytes())
94
-
95
- vao_content = [
96
- # 2 floats position, 2 floats texcoords
97
- (vbo, '2f 2f', 'in_vert', 'in_texcoord'),
98
- ]
99
-
100
- return context.vertex_array(program, vao_content, ibo)