yta-video-opengl 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  A video reader using the PyAv (av) library
3
3
  that, using ffmpeg, detects the video.
4
4
  """
5
+ from yta_video_opengl.reader.cache import VideoFrameCache
5
6
  from yta_validation import PythonValidator
6
7
  from av.video.frame import VideoFrame
7
8
  from av.audio.frame import AudioFrame
@@ -31,7 +32,7 @@ class VideoReaderFrame:
31
32
  Flag to indicate if the instance is a video
32
33
  frame.
33
34
  """
34
- return PythonValidator.is_instance_of(self.data, VideoFrame)
35
+ return PythonValidator.is_instance_of(self.value, VideoFrame)
35
36
 
36
37
  @property
37
38
  def is_audio(
@@ -41,18 +42,37 @@ class VideoReaderFrame:
41
42
  Flag to indicate if the instance is an audio
42
43
  frame.
43
44
  """
44
- return PythonValidator.is_instance_of(self.data, AudioFrame)
45
+ return PythonValidator.is_instance_of(self.value, AudioFrame)
46
+
47
+ @property
48
+ def as_numpy(
49
+ self
50
+ ):
51
+ """
52
+ The frame as a numpy array.
53
+ """
54
+ return self.value.to_ndarray(format = self.pixel_format)
45
55
 
46
56
  def __init__(
47
57
  self,
48
58
  # TODO: Add the type, please
49
- data: any
59
+ frame: any,
60
+ t: float = None,
61
+ pixel_format: str = 'rgb24'
50
62
  ):
51
- self.data: Union[AudioFrame, VideoFrame] = data
63
+ self.value: Union[AudioFrame, VideoFrame] = frame
52
64
  """
53
65
  The frame content, that can be audio or video
54
66
  frame.
55
67
  """
68
+ self.t: float = t
69
+ """
70
+ The 't' time moment of the frame.
71
+ """
72
+ self.pixel_format: str = pixel_format
73
+ """
74
+ The pixel format of the frame.
75
+ """
56
76
 
57
77
  @dataclass
58
78
  class VideoReaderPacket:
@@ -70,7 +90,7 @@ class VideoReaderPacket:
70
90
  Flag to indicate if the packet includes video
71
91
  frames or not.
72
92
  """
73
- return self.data.stream.type == 'video'
93
+ return self.value.stream.type == 'video'
74
94
 
75
95
  @property
76
96
  def is_audio(
@@ -80,13 +100,13 @@ class VideoReaderPacket:
80
100
  Flag to indicate if the packet includes audio
81
101
  frames or not.
82
102
  """
83
- return self.data.stream.type == 'audio'
103
+ return self.value.stream.type == 'audio'
84
104
 
85
105
  def __init__(
86
106
  self,
87
- data: Packet
107
+ packet: Packet
88
108
  ):
89
- self.data: Packet = data
109
+ self.value: Packet = packet
90
110
  """
91
111
  The packet, that can include video or audio
92
112
  frames and can be decoded.
@@ -99,7 +119,7 @@ class VideoReaderPacket:
99
119
  Get the frames but decoded, perfect to make
100
120
  modifications and encode to save them again.
101
121
  """
102
- return self.data.decode()
122
+ return self.value.decode()
103
123
 
104
124
 
105
125
  class VideoReader:
@@ -267,6 +287,24 @@ class VideoReader:
267
287
  # TODO: What if no audio (?)
268
288
  return self.audio_stream.average_rate
269
289
 
290
+ @property
291
+ def time_base(
292
+ self
293
+ ) -> Fraction:
294
+ """
295
+ The time base of the video.
296
+ """
297
+ return self.video_stream.time_base
298
+
299
+ @property
300
+ def audio_time_base(
301
+ self
302
+ ) -> Fraction:
303
+ """
304
+ The time base of the audio.
305
+ """
306
+ return self.audio_stream.time_base
307
+
270
308
  @property
271
309
  def size(
272
310
  self
@@ -302,29 +340,70 @@ class VideoReader:
302
340
 
303
341
  def __init__(
304
342
  self,
305
- filename: str
343
+ filename: str,
344
+ # Use 'rgba' if alpha channel
345
+ pixel_format: str = 'rgb24'
306
346
  ):
307
347
  self.filename: str = filename
308
348
  """
309
349
  The filename of the video source.
310
350
  """
311
- self.container: InputContainer = av_open(filename)
351
+ self.pixel_format: str = pixel_format
352
+ """
353
+ The pixel format.
354
+ """
355
+ self.container: InputContainer = None
312
356
  """
313
357
  The av input general container of the
314
358
  video (that also includes the audio) we
315
359
  are reading.
316
360
  """
317
- self.video_stream: VideoStream = self.container.streams.video[0]
361
+ self.video_stream: VideoStream = None
318
362
  """
319
363
  The stream that includes the video.
320
364
  """
321
- self.video_stream.thread_type = 'AUTO'
322
365
  # TODO: What if no audio (?)
323
- self.audio_stream: AudioStream = self.container.streams.audio[0]
366
+ self.audio_stream: AudioStream = None
324
367
  """
325
368
  The stream that includes the audio.
326
369
  """
327
- self.audio_stream.thread_type = 'AUTO'
370
+ self.cache: VideoFrameCache = None
371
+ """
372
+ The frame cache system to optimize
373
+ the way we access to the frames.
374
+ """
375
+
376
+ # TODO: Maybe we can read the first
377
+ # frame, store it and reset, so we have
378
+ # it in memory since the first moment.
379
+ # We should do it here because if we
380
+ # iterate in some moment and then we
381
+ # want to obtain it... it will be
382
+ # difficult.
383
+ # Lets load the variables
384
+ self.reset()
385
+
386
+ def reset(
387
+ self
388
+ ) -> 'VideoReader':
389
+ """
390
+ Reset all the instances, closing the file
391
+ and opening again.
392
+
393
+ This will also return to the first frame.
394
+ """
395
+ if self.container is not None:
396
+ # TODO: Maybe accept forcing it (?)
397
+ self.container.seek(0)
398
+ #self.container.close()
399
+ else:
400
+ self.container = av_open(self.filename)
401
+ # TODO: Should this be 'AUTO' (?)
402
+ self.video_stream = self.container.streams.video[0]
403
+ self.video_stream.thread_type = 'AUTO'
404
+ self.audio_stream = self.container.streams.audio[0]
405
+ self.audio_stream.thread_type = 'AUTO'
406
+ self.cache = VideoFrameCache(self)
328
407
 
329
408
  def iterate(
330
409
  self
@@ -334,7 +413,11 @@ class VideoReader:
334
413
  (already decoded).
335
414
  """
336
415
  for frame in self.frame_iterator:
337
- yield VideoReaderFrame(frame)
416
+ yield VideoReaderFrame(
417
+ frame = frame,
418
+ t = float(frame.pts * self.time_base),
419
+ pixel_format = self.pixel_format
420
+ )
338
421
 
339
422
  def iterate_with_audio(
340
423
  self,
@@ -373,6 +456,25 @@ class VideoReader:
373
456
  # Return the packet as it is
374
457
  yield VideoReaderPacket(packet)
375
458
 
459
+ # TODO: Will we use this (?)
460
+ def get_frame(
461
+ self,
462
+ index: int
463
+ ) -> 'VideoFrame':
464
+ """
465
+ Get the frame with the given 'index', using
466
+ the cache system.
467
+ """
468
+ return self.cache.get_frame(index)
469
+
470
+ def close(
471
+ self
472
+ ) -> None:
473
+ """
474
+ Close the container to free it.
475
+ """
476
+ self.container.close()
477
+
376
478
 
377
479
 
378
480
 
@@ -0,0 +1,155 @@
1
+ """
2
+ The pyav container stores the information based
3
+ on the packets timestamps (called 'pts'). Some
4
+ of the packets are considered key_frames because
5
+ they include those key frames.
6
+
7
+ Also, this library uses those key frames to start
8
+ decodifying from there to the next one, obtaining
9
+ all the frames in between able to be read and
10
+ modified.
11
+
12
+ This cache system will look for the range of
13
+ frames that belong to the key frame related to the
14
+ frame we are requesting in the moment, keeping in
15
+ memory all those frames to be handled fast. It
16
+ will remove the old frames if needed to use only
17
+ the 'size' we set when creating it.
18
+ """
19
+ from collections import OrderedDict
20
+
21
+
22
+ class VideoFrameCache:
23
+ """
24
+ Class to manage the frames cache of a video
25
+ within a video reader instance.
26
+ """
27
+
28
+ @property
29
+ def container(
30
+ self
31
+ ) -> 'InputContainer':
32
+ """
33
+ Shortcut to the video reader instance container.
34
+ """
35
+ return self.reader_instance.container
36
+
37
+ @property
38
+ def stream(
39
+ self
40
+ ) -> 'VideoStream':
41
+ """
42
+ Shortcut to the video reader instance video
43
+ stream.
44
+ """
45
+ return self.reader_instance.video_stream
46
+
47
+ def __init__(
48
+ self,
49
+ reader: 'VideoReader',
50
+ size: int = 50
51
+ ):
52
+ self.reader_instance: 'VideoReader' = reader
53
+ """
54
+ The video reader instance this cache belongs
55
+ to.
56
+ """
57
+ self.cache: OrderedDict = OrderedDict()
58
+ """
59
+ The cache ordered dictionary.
60
+ """
61
+ self.size = size
62
+ """
63
+ The size (in number of frames) of the cache.
64
+ """
65
+ self.key_frames_pts: list[int] = []
66
+ """
67
+ The list that contains the timestamps of the
68
+ key frame packets, ordered from begining to
69
+ end.
70
+ """
71
+
72
+ # Index key frames
73
+ for packet in self.container.demux(self.stream):
74
+ if packet.is_keyframe:
75
+ self.key_frames_pts.append(packet.pts)
76
+
77
+ self.container.seek(0)
78
+ # TODO: Maybe this is better (?)
79
+ #self.reader_instance.reset()
80
+
81
+ def _get_frame_by_pts(
82
+ self,
83
+ target_pts
84
+ ):
85
+ """
86
+ Get the frame that has the provided 'target_pts'.
87
+
88
+ This method will start decoding frames from the
89
+ most near key frame (the one with the nearer
90
+ pts) until the one requested is found. All those
91
+ frames will be stored in cache.
92
+
93
+ This method must be called when the frame
94
+ requested is not stored in the caché.
95
+ """
96
+ # Look for the most near key frame
97
+ key_frame_pts = max([
98
+ key_frame_pts
99
+ for key_frame_pts in self.key_frames_pts
100
+ if key_frame_pts <= target_pts
101
+ ])
102
+
103
+ # Go to the key frame that includes it
104
+ self.container.seek(key_frame_pts, stream = self.stream)
105
+
106
+ decoded = None
107
+ for frame in self.container.decode(self.stream):
108
+ # TODO: Could 'frame' be None (?)
109
+ pts = frame.pts
110
+ if pts is None:
111
+ continue
112
+
113
+ # Store in cache if needed
114
+ if pts not in self.cache:
115
+ # TODO: The 'format' must be dynamic
116
+ self.cache[pts] = frame.to_ndarray(format = "rgb24")
117
+
118
+ # Clean cache if full
119
+ if len(self.cache) > self.size:
120
+ self.cache.popitem(last = False)
121
+
122
+ if pts >= target_pts:
123
+ decoded = self.cache[pts]
124
+ break
125
+
126
+ return decoded
127
+
128
+ def get_frame(
129
+ self,
130
+ index: int
131
+ ) -> 'VideoFrame':
132
+ """
133
+ Get the frame with the given 'index' from
134
+ the cache.
135
+ """
136
+ # convertir frame_number a PTS (timestamps internos)
137
+ time_base = self.stream.time_base
138
+ fps = float(self.stream.average_rate)
139
+ target_pts = int(index / fps / time_base)
140
+
141
+ return (
142
+ self.cache[target_pts]
143
+ if target_pts in self.cache else
144
+ self._get_frame_by_pts(target_pts)
145
+ )
146
+
147
+ def clear(
148
+ self
149
+ ) -> 'VideoFrameCache':
150
+ """
151
+ Clear the cache by removing all the items.
152
+ """
153
+ self.cache.clear()
154
+
155
+ return self