yta-video-opengl 0.0.13__tar.gz → 0.0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/PKG-INFO +1 -1
  2. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/pyproject.toml +1 -1
  3. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/timeline.py +1 -0
  4. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/track.py +1 -1
  5. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/video_on_track.py +1 -4
  6. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/reader/__init__.py +7 -25
  7. yta_video_opengl-0.0.14/src/yta_video_opengl/reader/cache.py +512 -0
  8. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/t.py +55 -7
  9. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/video.py +8 -3
  10. yta_video_opengl-0.0.13/src/yta_video_opengl/reader/cache.py +0 -529
  11. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/LICENSE +0 -0
  12. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/README.md +0 -0
  13. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/__init__.py +0 -0
  14. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/classes.py +0 -0
  15. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/complete/__init__.py +0 -0
  16. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/nodes/__init__.py +0 -0
  17. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/nodes/audio/__init__.py +0 -0
  18. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/nodes/video/__init__.py +0 -0
  19. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/nodes/video/opengl.py +0 -0
  20. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/tests.py +0 -0
  21. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/utils.py +0 -0
  22. {yta_video_opengl-0.0.13 → yta_video_opengl-0.0.14}/src/yta_video_opengl/writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yta-video-opengl
3
- Version: 0.0.13
3
+ Version: 0.0.14
4
4
  Summary: Youtube Autonomous Video OpenGL Module
5
5
  Author: danialcala94
6
6
  Author-email: danielalcalavalera@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "yta-video-opengl"
3
- version = "0.0.13"
3
+ version = "0.0.14"
4
4
  description = "Youtube Autonomous Video OpenGL Module"
5
5
  authors = [
6
6
  {name = "danialcala94",email = "danielalcalavalera@gmail.com"}
@@ -218,6 +218,7 @@ class Timeline:
218
218
  for t in get_ts(start, end, self.fps):
219
219
  frame = self.get_frame_at(t)
220
220
 
221
+ print(f'Getting t:{str(float(t))}')
221
222
  #print(frame)
222
223
 
223
224
  # We need to adjust our output elements to be
@@ -94,7 +94,7 @@ class _Part:
94
94
  # TODO: By now I'm raising exception to check if
95
95
  # this happens or not because I think it would
96
96
  # be malfunctioning
97
- raise Exception(f'Video is returning None frame at t={str(t)}.')
97
+ raise Exception(f'Video is returning None video frame at t={str(t)}.')
98
98
 
99
99
  return frame
100
100
 
@@ -150,10 +150,7 @@ class VideoOnTrack:
150
150
  )
151
151
 
152
152
  for frame in frames:
153
- # TODO: I am generating a tuple in the
154
- # src\yta_video_opengl\reader\cache.py
155
- # get_frames method... maybe remove it (?)
156
- yield frame[0]
153
+ yield frame
157
154
 
158
155
  # # TODO: This was a simple return before
159
156
  # return (
@@ -620,17 +620,6 @@ class VideoReader:
620
620
  ):
621
621
  yield frame
622
622
 
623
- # TODO: Will we use this (?)
624
- def get_frame(
625
- self,
626
- index: int
627
- ) -> 'VideoFrame':
628
- """
629
- Get the video frame with the given 'index',
630
- using the video cache system.
631
- """
632
- return self.video_cache.get_frame(index)
633
-
634
623
  def get_frame_from_t(
635
624
  self,
636
625
  t: Union[int, float, Fraction]
@@ -639,18 +628,8 @@ class VideoReader:
639
628
  Get the video frame with the given 't' time
640
629
  moment, using the video cache system.
641
630
  """
642
- return self.video_cache.get_frame_from_t(t)
631
+ return self.video_cache.get_video_frame(t)
643
632
 
644
- def get_audio_frame(
645
- self,
646
- index: int
647
- ) -> 'AudioFrame':
648
- """
649
- Get the audio frame with the given 'index',
650
- using the audio cache system.
651
- """
652
- return self.audio_cache.get_frame(index)
653
-
654
633
  def get_audio_frame_from_t(
655
634
  self,
656
635
  t: Union[int, float, Fraction]
@@ -659,7 +638,7 @@ class VideoReader:
659
638
  Get the audio frame with the given 't' time
660
639
  moment, using the audio cache system.
661
640
  """
662
- return self.audio_cache.get_frame_from_t(t)
641
+ return self.audio_cache.get_audio_frame_from_t(t)
663
642
 
664
643
  def get_audio_frames_from_t(
665
644
  self,
@@ -677,7 +656,10 @@ class VideoReader:
677
656
  with more than 1 audio frame).
678
657
  """
679
658
  t: T = T.from_fps(t, self.fps)
680
- for frame in self.audio_cache.get_frames(t.truncated, t.next(1).truncated):
659
+ # We want all the audios that must be played
660
+ # during the video frame that starts in the
661
+ # 't' time moment
662
+ for frame in self.audio_cache.get_audio_frames(t.truncated, t.next(1).truncated):
681
663
  yield frame
682
664
 
683
665
  def get_frames(
@@ -701,7 +683,7 @@ class VideoReader:
701
683
  Iterator to get the audio frames in between
702
684
  the provided 'start' and 'end' time moments.
703
685
  """
704
- for frame in self.audio_cache.get_frames(start, end):
686
+ for frame in self.audio_cache.get_audio_frames(start, end):
705
687
  yield frame
706
688
 
707
689
  def close(
@@ -0,0 +1,512 @@
1
+ """
2
+ The pyav container stores the information based
3
+ on the packets timestamps (called 'pts'). Some
4
+ of the packets are considered key_frames because
5
+ they include those key frames.
6
+
7
+ Also, this library uses those key frames to start
8
+ decodifying from there to the next one, obtaining
9
+ all the frames in between able to be read and
10
+ modified.
11
+
12
+ This cache system will look for the range of
13
+ frames that belong to the key frame related to the
14
+ frame we are requesting in the moment, keeping in
15
+ memory all those frames to be handled fast. It
16
+ will remove the old frames if needed to use only
17
+ the 'size' we set when creating it.
18
+
19
+ A stream can have 'fps = 60' but use another
20
+ different time base that make the pts values go 0,
21
+ 256, 512... for example. The 'time_base' is the
22
+ only accurate way to obtain the pts.
23
+
24
+ Feel free to move this explanation to other
25
+ place, its about the duration.
26
+
27
+ The stream 'duration' parameter is measured
28
+ on ticks, the amount of ticks that the
29
+ stream lasts. Here below is an example:
30
+
31
+ - Duration raw: 529200
32
+ - Time base: 1/44100
33
+ - Duration (seconds): 12.0
34
+ """
35
+ from yta_video_opengl.t import T
36
+ from av.container import InputContainer
37
+ from av.video.stream import VideoStream
38
+ from av.audio.stream import AudioStream
39
+ from av.video.frame import VideoFrame
40
+ from av.audio.frame import AudioFrame
41
+ from av.packet import Packet
42
+ from yta_validation.parameter import ParameterValidator
43
+ from yta_validation import PythonValidator
44
+ from quicktions import Fraction
45
+ from collections import OrderedDict
46
+ from typing import Union
47
+
48
+ import numpy as np
49
+ import math
50
+
51
+
52
+ # TODO: This is not actually a Video
53
+ # cache, is a FrameCache because we
54
+ # create one for video but another
55
+ # one for audio. Rename it please.
56
+ class VideoFrameCache:
57
+ """
58
+ Class to manage the frames cache of a video
59
+ within a video reader instance.
60
+ """
61
+
62
+ @property
63
+ def fps(
64
+ self
65
+ ) -> Union[int, Fraction, None]:
66
+ """
67
+ The frames per second.
68
+ """
69
+ return (
70
+ self.stream.average_rate
71
+ if self.stream.type == 'video' else
72
+ self.stream.rate
73
+ )
74
+
75
+ @property
76
+ def time_base(
77
+ self
78
+ ) -> Union[Fraction, None]:
79
+ """
80
+ The time base of the stream.
81
+ """
82
+ return self.stream.time_base
83
+
84
+ def __init__(
85
+ self,
86
+ container: InputContainer,
87
+ stream: Union[VideoStream, AudioStream],
88
+ size: Union[int, None] = None
89
+ ):
90
+ ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
91
+ ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
92
+ ParameterValidator.validate_positive_int('size', size)
93
+
94
+ self.container: InputContainer = container
95
+ """
96
+ The pyav container.
97
+ """
98
+ self.stream: Union[VideoStream, AudioStream] = stream
99
+ """
100
+ The pyav stream.
101
+ """
102
+ self.cache: OrderedDict = OrderedDict()
103
+ """
104
+ The cache ordered dictionary.
105
+ """
106
+ self.size: Union[int, None] = size
107
+ """
108
+ The size (in number of frames) of the cache.
109
+ """
110
+ self.key_frames_pts: list[int] = []
111
+ """
112
+ The list that contains the timestamps of the
113
+ key frame packets, ordered from begining to
114
+ end.
115
+ """
116
+
117
+ # TODO: This is new, remove this comment if
118
+ # it is ok
119
+ # TODO: This way of obtaining the duration
120
+ # in ticks must be a utils
121
+ self.frame_duration: int = (
122
+ self.stream.duration / self.stream.frames
123
+ if PythonValidator.is_instance_of(stream, VideoStream) else
124
+ # TODO: Is this below ok (?)
125
+ self.stream.frames
126
+ )
127
+ """
128
+ The duration (in ticks) of the frame, that
129
+ is the step between the different pts.
130
+ """
131
+ self._last_packet_accessed: Union[Packet, None] = None
132
+ """
133
+ The last packet that has been accessed
134
+ """
135
+ self._last_frame_read: Union[VideoFrame, AudioFrame, None] = None
136
+ """
137
+ The last frame we have read when decoding.
138
+ Useful to avoid seeking all the time when we
139
+ don't need it.
140
+ """
141
+
142
+ self._prepare()
143
+
144
+ def _prepare(
145
+ self
146
+ ):
147
+ # Index key frames
148
+ for packet in self.container.demux(self.stream):
149
+ if packet.is_keyframe:
150
+ self.key_frames_pts.append(packet.pts)
151
+
152
+ # The cache size will be auto-calculated to
153
+ # use the amount of frames of the biggest
154
+ # interval of frames that belongs to a key
155
+ # frame, or a value by default
156
+ # TODO: Careful if this is too big
157
+ fps = (
158
+ float(self.stream.average_rate)
159
+ if PythonValidator.is_instance_of(self.stream, VideoStream) else
160
+ float(self.stream.rate)
161
+ )
162
+ # Intervals, but in number of frames
163
+ intervals = np.diff(
164
+ # Intervals of time between keyframes
165
+ np.array(self.key_frames_pts) * self.time_base
166
+ ) * fps
167
+
168
+ self.size = (
169
+ math.ceil(np.max(intervals))
170
+ if intervals.size > 0 else
171
+ (
172
+ self.size or
173
+ # TODO: Make this 'default_size' a setting or something
174
+ 60
175
+ )
176
+ )
177
+
178
+ self.container.seek(0)
179
+
180
+ def _get_nearest_keyframe_pts(
181
+ self,
182
+ pts: int
183
+ ):
184
+ """
185
+ Get the fps of the keyframe that is the
186
+ nearest to the provided 'pts'. Useful to
187
+ seek and start decoding frames from that
188
+ keyframe.
189
+ """
190
+ return max([
191
+ key_frame_pts
192
+ for key_frame_pts in self.key_frames_pts
193
+ if key_frame_pts <= pts
194
+ ])
195
+
196
+ def _store_frame_in_cache(
197
+ self,
198
+ frame: Union[VideoFrame, AudioFrame]
199
+ ) -> Union[VideoFrame, AudioFrame]:
200
+ """
201
+ Store the provided 'frame' in cache if it
202
+ is not on it, removing the first item of
203
+ the cache if full.
204
+ """
205
+ if frame.pts not in self.cache:
206
+ self.cache[frame.pts] = frame
207
+
208
+ # Clean cache if full
209
+ if len(self.cache) > self.size:
210
+ self.cache.popitem(last = False)
211
+
212
+ return frame
213
+
214
+ def _seek(
215
+ self,
216
+ pts: int
217
+ ):
218
+ """
219
+ Seek to the given 'pts' only if it is not
220
+ the next 'pts' to the last read, and it
221
+ will also apply a pad to avoid problems
222
+ when reading audio frames.
223
+
224
+ TODO: Apply the padding only to audio
225
+ frame reading (?)
226
+ """
227
+ # I found that it is recommended to
228
+ # read ~100ms before the pts we want to
229
+ # actually read so we obtain the frames
230
+ # clean (this is important in audio)
231
+ # TODO: This is maybe too much for a
232
+ # video and not needed
233
+ pts_pad = int(0.1 / self.time_base)
234
+ self.container.seek(
235
+ offset = max(0, pts - pts_pad),
236
+ stream = self.stream
237
+ )
238
+
239
+ def get_video_frame(
240
+ self,
241
+ t: Union[int, float, Fraction]
242
+ ) -> VideoFrame:
243
+ """
244
+ Get the video frame that is in the 't'
245
+ time moment provided.
246
+ """
247
+ for frame in self.get_video_frames(t):
248
+ return frame
249
+
250
+ def get_video_frames(
251
+ self,
252
+ start: Union[int, float, Fraction] = 0,
253
+ end: Union[int, float, Fraction, None] = None
254
+ ):
255
+ """
256
+ Get all the frames in the range between
257
+ the provided 'start' and 'end' time in
258
+ seconds.
259
+
260
+ This method is an iterator that yields
261
+ the frame, its t and its index.
262
+ """
263
+ start = T(start, self.time_base).truncated
264
+ end = (
265
+ T(end, self.time_base).truncated
266
+ if end is not None else
267
+ # The next frame
268
+ start + (1 / self.fps)
269
+ )
270
+
271
+ key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
272
+
273
+ if (
274
+ self._last_packet_accessed is None or
275
+ self._last_packet_accessed.pts != key_frame_pts
276
+ ):
277
+ self._seek(key_frame_pts)
278
+
279
+ for packet in self.container.demux(self.stream):
280
+ if packet.pts is None:
281
+ continue
282
+
283
+ self._last_packet_accessed = packet
284
+
285
+ for frame in packet.decode():
286
+ if frame.pts is None:
287
+ continue
288
+
289
+ # We store all the frames in cache
290
+ self._store_frame_in_cache(frame)
291
+
292
+ current_frame_time = frame.pts * self.time_base
293
+
294
+ # We want the range [start, end)
295
+ if start <= current_frame_time < end:
296
+ yield frame
297
+
298
+ if current_frame_time >= end:
299
+ break
300
+
301
+ def get_audio_frame_from_t(
302
+ self,
303
+ t: Union[int, float, Fraction]
304
+ ):
305
+ """
306
+ Get the single audio frame that must be
307
+ played at the 't' time moment provided.
308
+ This method is useful to get the single
309
+ audio frame that we need to combine
310
+ when using it in a composition.
311
+
312
+ TODO: Are we actually using this method (?)
313
+ """
314
+ t: T = T(t, self.time_base)
315
+ # We need the just one audio frame
316
+ for frame in self.get_audio_frames(t.truncated, t.next(1).truncated):
317
+ return frame
318
+
319
+ def get_audio_frames_from_t(
320
+ self,
321
+ t: Union[int, float, Fraction]
322
+ ):
323
+ """
324
+ Get all the audio frames that must be
325
+ played at the 't' time moment provided.
326
+ """
327
+ for frame in self.get_audio_frames(t):
328
+ yield frame
329
+
330
+ def get_audio_frames(
331
+ self,
332
+ start: Union[int, float, Fraction] = 0,
333
+ end: Union[int, float, Fraction, None] = None
334
+ ):
335
+ """
336
+ Get all the audio frames in the range
337
+ between the provided 'start' and 'end'
338
+ time (in seconds).
339
+
340
+ This method is an iterator that yields
341
+ the frame, its t and its index.
342
+ """
343
+ # TODO: Is this ok? We are trying to obtain
344
+ # the audio frames for a video frame, so
345
+ # should we use the 'self.time_base' to
346
+ # truncate (?)
347
+ start = T(start, self.time_base).truncated
348
+ end = (
349
+ T(end, self.time_base).truncated
350
+ if end is not None else
351
+ start + (1 / self.fps)
352
+ )
353
+
354
+ key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
355
+
356
+ if (
357
+ self._last_packet_accessed is None or
358
+ self._last_packet_accessed.pts != key_frame_pts
359
+ ):
360
+ self._seek(key_frame_pts)
361
+
362
+ for packet in self.container.demux(self.stream):
363
+ if packet.pts is None:
364
+ continue
365
+
366
+ self._last_packet_accessed = packet
367
+
368
+ for frame in packet.decode():
369
+ if frame.pts is None:
370
+ continue
371
+
372
+ # We store all the frames in cache
373
+ self._store_frame_in_cache(frame)
374
+
375
+ current_frame_time = frame.pts * self.time_base
376
+ # End is not included, its the start of the
377
+ # next frame actually
378
+ frame_end = current_frame_time + (frame.samples / self.stream.sample_rate)
379
+
380
+ # For the next comments imagine we are looking
381
+ # for the [1.0, 2.0) audio time range
382
+ # Previous frame and nothing is inside
383
+ if frame_end <= start:
384
+ # From 0.25 to 1.0
385
+ continue
386
+
387
+ # We finished, nothing is inside and its after
388
+ if current_frame_time >= end:
389
+ # From 2.0 to 2.75
390
+ return
391
+
392
+ # If we need audio from 1 to 2, audio is:
393
+ # - from 0 to 0.75 (Not included, omit)
394
+ # - from 0.5 to 1.5 (Included, take 1.0 to 1.5)
395
+ # - from 0.5 to 2.5 (Included, take 1.0 to 2.0)
396
+ # - from 1.25 to 1.5 (Included, take 1.25 to 1.5)
397
+ # - from 1.25 to 2.5 (Included, take 1.25 to 2.0)
398
+ # - from 2.5 to 3.5 (Not included, omit)
399
+
400
+ # Here below, at least a part is inside
401
+ if (
402
+ current_frame_time < start and
403
+ frame_end > start
404
+ ):
405
+ # A part at the end is included
406
+ end_time = (
407
+ # From 0.5 to 1.5 0> take 1.0 to 1.5
408
+ frame_end
409
+ if frame_end <= end else
410
+ # From 0.5 to 2.5 => take 1.0 to 2.0
411
+ end
412
+ )
413
+ #print('A part at the end is included.')
414
+ frame = trim_audio_frame(
415
+ frame = frame,
416
+ start = start,
417
+ end = end_time,
418
+ time_base = self.time_base
419
+ )
420
+ elif (
421
+ current_frame_time >= start and
422
+ current_frame_time < end
423
+ ):
424
+ end_time = (
425
+ # From 1.25 to 1.5 => take 1.25 to 1.5
426
+ frame_end
427
+ if frame_end <= end else
428
+ # From 1.25 to 2.5 => take 1.25 to 2.0
429
+ end
430
+ )
431
+ # A part at the begining is included
432
+ #print('A part at the begining is included.')
433
+ frame = trim_audio_frame(
434
+ frame = frame,
435
+ start = current_frame_time,
436
+ end = end_time,
437
+ time_base = self.time_base
438
+ )
439
+
440
+ # If the whole frame is in, past as it is
441
+ yield frame
442
+
443
+ def clear(
444
+ self
445
+ ) -> 'VideoFrameCache':
446
+ """
447
+ Clear the cache by removing all the items.
448
+ """
449
+ self.cache.clear()
450
+
451
+ return self
452
+
453
+ def trim_audio_frame(
454
+ frame: AudioFrame,
455
+ start: Union[int, float, Fraction],
456
+ end: Union[int, float, Fraction],
457
+ time_base: Fraction
458
+ ) -> AudioFrame:
459
+ """
460
+ Trim an audio frame to obtain the part between
461
+ [start, end), that is provided in seconds.
462
+ """
463
+ # (channels, n_samples)
464
+ samples = frame.to_ndarray()
465
+ n_samples = samples.shape[1]
466
+
467
+ # In seconds
468
+ frame_start = frame.pts * float(time_base)
469
+ frame_end = frame_start + (n_samples / frame.sample_rate)
470
+
471
+ # Overlapping
472
+ cut_start = max(frame_start, float(start))
473
+ cut_end = min(frame_end, float(end))
474
+
475
+ if cut_start >= cut_end:
476
+ # No overlapping
477
+ return None
478
+
479
+ # To sample indexes
480
+ start_index = int(round((cut_start - frame_start) * frame.sample_rate))
481
+ end_index = int(round((cut_end - frame_start) * frame.sample_rate))
482
+
483
+ new_frame = AudioFrame.from_ndarray(
484
+ # end_index is not included: so [start, end)
485
+ array = samples[:, start_index:end_index],
486
+ format = frame.format,
487
+ layout = frame.layout
488
+ )
489
+
490
+ # Set attributes
491
+ new_frame.sample_rate = frame.sample_rate
492
+ new_frame.time_base = time_base
493
+ new_frame.pts = int(round(cut_start / float(time_base)))
494
+
495
+ return new_frame
496
+
497
+
498
+
499
+ """
500
+ There is a way of editing videos being
501
+ able to arbitrary access to frames, that
502
+ is transforming the source videos to
503
+ intra-frame videos. This is a ffmpeg
504
+ command that can do it:
505
+
506
+ - `ffmpeg -i input.mp4 -c:v libx264 -x264opts keyint=1 -preset fast -crf 18 -c:a copy output_intra.mp4`
507
+
508
+ Once you have the 'output_intra.mp4',
509
+ each packet can decodify its frame
510
+ depending not on the previous one, being
511
+ able to seek and jump easy.
512
+ """
@@ -1,3 +1,14 @@
1
+ """
2
+ This is an example of what a video has:
3
+ - fps = 60
4
+ - time_base = 1 / 15360
5
+ - tick = fps * time_base = 256
6
+
7
+ So, the first pts is 0 and the second
8
+ one is 256. The frame 16 will be 3840,
9
+ that is 256 * 15 (because first index
10
+ is 0).
11
+ """
1
12
  from yta_validation.parameter import ParameterValidator
2
13
  from yta_validation import PythonValidator
3
14
  from yta_validation.number import NumberValidator
@@ -36,7 +47,7 @@ class T:
36
47
  The 't' but as a Fraction that is multiple
37
48
  of the given 'time_base' and rounded (the
38
49
  value could be the same as truncated if it
39
- is closer to the previou value).
50
+ is closer to the previous value).
40
51
  """
41
52
  return round_t(self._t, self.time_base, do_truncate = False)
42
53
 
@@ -96,20 +107,57 @@ class T:
96
107
  """
97
108
  return T(self.truncated + n * self.time_base, self.time_base)
98
109
 
99
- # TODO: Maybe its better to make the '__init__'
100
- # receive the fps and create the 'from_time_base'
101
- # because I think we will provide the fps or the
102
- # sample rate more often
110
+ def previous(
111
+ self,
112
+ n: int = 1
113
+ ) -> 'T':
114
+ """
115
+ Get the value that is 'n' times before the
116
+ 'truncated' property of this instance.
117
+
118
+ Useful when you need the previous value to
119
+ check if the current is the next one or
120
+ similar.
121
+
122
+ Be careful, if the 'truncated' value is 0
123
+ this will give you an unexpected negative
124
+ value.
125
+ """
126
+ return T(self.truncated - n * self.time_base, self.time_base)
127
+
103
128
  @staticmethod
104
129
  def from_fps(
105
130
  t: Union[int, float, Fraction],
106
131
  fps: Union[int, float, Fraction]
107
- ):
132
+ ) -> 'T':
108
133
  """
109
134
  Get the instance but providing the 'fps'
110
- (or sample rate) value directly.
135
+ (or sample rate) value directly, that will
136
+ be turned into a time base.
111
137
  """
112
138
  return T(t, fps_to_time_base(fps))
139
+
140
+ @staticmethod
141
+ def from_pts(
142
+ pts: int,
143
+ time_base: Fraction
144
+ ) -> 'T':
145
+ """
146
+ Get the instance but providing the 'pts'
147
+ and the 'time_base'.
148
+ """
149
+ return T(pts * time_base, time_base)
150
+
151
+
152
+ # TODO: Careful with this below
153
+ """
154
+ To obtain the pts step, or frame duration in
155
+ ticks, you need to apply 2 formulas that are
156
+ different according to if the frame is video
157
+ or audio:
158
+ - Audio: .samples
159
+ - Video: int(round((1 / .fps) / .time_base))
160
+ """
113
161
 
114
162
  def get_ts(
115
163
  start: Union[int, float, Fraction],
@@ -182,15 +182,20 @@ class Video:
182
182
  Get the video frame with the given 't' time
183
183
  moment, using the video cache system.
184
184
  """
185
- return self.reader.video_cache.get_frame_from_t(self._get_real_t(t))
186
-
185
+ return self.reader.video_cache.get_video_frame(self._get_real_t(t))
186
+
187
187
  def get_audio_frame_from_t(
188
188
  self,
189
189
  t: Union[int, float, Fraction]
190
190
  ) -> 'AudioFrame':
191
191
  """
192
192
  Get the audio frame with the given 't' time
193
- moment, using the audio cache system.
193
+ moment, using the audio cache system. This
194
+ method is useful when we need to combine
195
+ many different frames so we can obtain them
196
+ one by one.
197
+
198
+ TODO: Is this actually necessary (?)
194
199
  """
195
200
  return self.reader.audio_cache.get_frame_from_t(self._get_real_t(t))
196
201
 
@@ -1,529 +0,0 @@
1
- """
2
- The pyav container stores the information based
3
- on the packets timestamps (called 'pts'). Some
4
- of the packets are considered key_frames because
5
- they include those key frames.
6
-
7
- Also, this library uses those key frames to start
8
- decodifying from there to the next one, obtaining
9
- all the frames in between able to be read and
10
- modified.
11
-
12
- This cache system will look for the range of
13
- frames that belong to the key frame related to the
14
- frame we are requesting in the moment, keeping in
15
- memory all those frames to be handled fast. It
16
- will remove the old frames if needed to use only
17
- the 'size' we set when creating it.
18
-
19
- A stream can have 'fps = 60' but use another
20
- different time base that make the pts values go 0,
21
- 256, 512... for example. The 'time_base' is the
22
- only accurate way to obtain the pts.
23
- """
24
- from yta_video_opengl.utils import t_to_pts, pts_to_t, pts_to_index, index_to_pts
25
- from yta_video_opengl.t import T
26
- from av.container import InputContainer
27
- from av.video.stream import VideoStream
28
- from av.audio.stream import AudioStream
29
- from av.video.frame import VideoFrame
30
- from av.audio.frame import AudioFrame
31
- from yta_validation.parameter import ParameterValidator
32
- from yta_validation import PythonValidator
33
- from quicktions import Fraction
34
- from collections import OrderedDict
35
- from typing import Union
36
-
37
- import numpy as np
38
- import av
39
- import math
40
-
41
-
42
- # TODO: This is not actually a Video
43
- # cache, is a FrameCache because we
44
- # create one for video but another
45
- # one for audio. Rename it please.
46
- class VideoFrameCache:
47
- """
48
- Class to manage the frames cache of a video
49
- within a video reader instance.
50
- """
51
-
52
- @property
53
- def fps(
54
- self
55
- ) -> float:
56
- """
57
- The frames per second as a float.
58
- """
59
- return (
60
- float(self.stream.average_rate)
61
- if self.stream.type == 'video' else
62
- float(self.stream.rate)
63
- )
64
-
65
- @property
66
- def time_base(
67
- self
68
- ) -> Union[Fraction, None]:
69
- """
70
- The time base of the stream.
71
- """
72
- return self.stream.time_base
73
-
74
- def __init__(
75
- self,
76
- container: InputContainer,
77
- stream: Union[VideoStream, AudioStream],
78
- size: Union[int, None] = None
79
- ):
80
- ParameterValidator.validate_mandatory_instance_of('container', container, InputContainer)
81
- ParameterValidator.validate_mandatory_instance_of('stream', stream, [VideoStream, AudioStream])
82
- ParameterValidator.validate_positive_int('size', size)
83
-
84
- self.container: InputContainer = container
85
- """
86
- The pyav container.
87
- """
88
- self.stream: Union[VideoStream, AudioStream] = stream
89
- """
90
- The pyav stream.
91
- """
92
- self.cache: OrderedDict = OrderedDict()
93
- """
94
- The cache ordered dictionary.
95
- """
96
- self.size: Union[int, None] = size
97
- """
98
- The size (in number of frames) of the cache.
99
- """
100
- self.key_frames_pts: list[int] = []
101
- """
102
- The list that contains the timestamps of the
103
- key frame packets, ordered from begining to
104
- end.
105
- """
106
-
107
- self._prepare()
108
-
109
- def _prepare(
110
- self
111
- ):
112
- # Index key frames
113
- for packet in self.container.demux(self.stream):
114
- if packet.is_keyframe:
115
- self.key_frames_pts.append(packet.pts)
116
-
117
- # The cache size will be auto-calculated to
118
- # use the amount of frames of the biggest
119
- # interval of frames that belongs to a key
120
- # frame, or a value by default
121
- # TODO: Careful if this is too big
122
- fps = (
123
- float(self.stream.average_rate)
124
- if PythonValidator.is_instance_of(self.stream, VideoStream) else
125
- float(self.stream.rate)
126
- )
127
- # Intervals, but in number of frames
128
- intervals = np.diff(
129
- # Intervals of time between keyframes
130
- np.array(self.key_frames_pts) * self.time_base
131
- ) * fps
132
-
133
- self.size = (
134
- math.ceil(np.max(intervals))
135
- if intervals.size > 0 else
136
- (
137
- self.size or
138
- # TODO: Make this 'default_size' a setting or something
139
- 60
140
- )
141
- )
142
-
143
- self.container.seek(0)
144
-
145
- def _get_nearest_keyframe_pts(
146
- self,
147
- pts: int
148
- ):
149
- """
150
- Get the fps of the keyframe that is the
151
- nearest to the provided 'pts'. Useful to
152
- seek and start decoding frames from that
153
- keyframe.
154
- """
155
- return max([
156
- key_frame_pts
157
- for key_frame_pts in self.key_frames_pts
158
- if key_frame_pts <= pts
159
- ])
160
-
161
- def _store_frame_in_cache(
162
- self,
163
- frame: Union[VideoFrame, AudioFrame]
164
- ) -> Union[VideoFrame, AudioFrame]:
165
- """
166
- Store the provided 'frame' in cache if it
167
- is not on it, removing the first item of
168
- the cache if full.
169
- """
170
- if frame.pts not in self.cache:
171
- self.cache[frame.pts] = frame
172
-
173
- # Clean cache if full
174
- if len(self.cache) > self.size:
175
- self.cache.popitem(last = False)
176
-
177
- return frame
178
-
179
- def get_frame_from_pts(
180
- self,
181
- pts: int
182
- ) -> Union[VideoFrame, AudioFrame, None]:
183
- """
184
- Get the frame that has the provided 'pts'.
185
-
186
- This method will start decoding frames from the
187
- most near key frame (the one with the nearer
188
- pts) until the one requested is found. All those
189
- frames will be stored in cache.
190
-
191
- This method must be called when the frame
192
- requested is not stored in the caché.
193
- """
194
- if pts in self.cache:
195
- return self.cache[pts]
196
-
197
- # Look for the most near key frame
198
- key_frame_pts = self._get_nearest_keyframe_pts(pts)
199
-
200
- # Go to the key frame that includes it
201
- # but I read that it is recommended to
202
- # read ~100ms before the pts we want to
203
- # actually read so we obtain the frames
204
- # clean (this is important in audio)
205
- # TODO: This code is repeated, refactor
206
- pts_pad = int(0.1 / self.time_base)
207
- self.container.seek(
208
- offset = max(0, key_frame_pts - pts_pad),
209
- stream = self.stream
210
- )
211
-
212
- decoded = None
213
- for frame in self.container.decode(self.stream):
214
- # TODO: Could 'frame' be None (?)
215
- if frame.pts is None:
216
- continue
217
-
218
- # Store in cache if needed
219
- self._store_frame_in_cache(frame)
220
-
221
- """
222
- The 'frame.pts * frame.time_base' will give
223
- us the index of the frame, and actually the
224
- 'pts' que are looking for seems to be the
225
- index and not a pts.
226
-
227
- TODO: Review all this in all the logic
228
- please.
229
- """
230
- if frame.pts >= pts:
231
- decoded = self.cache[frame.pts]
232
- break
233
-
234
- # TODO: Is this working? We need previous
235
- # frames to be able to decode...
236
- return decoded
237
-
238
- # TODO: I'm not using this method...
239
- def get_frame(
240
- self,
241
- index: int
242
- ) -> Union[VideoFrame, AudioFrame]:
243
- """
244
- Get the frame with the given 'index' from
245
- the cache.
246
- """
247
- # TODO: Maybe we can accept 'pts' also
248
- pts = index_to_pts(index, self.time_base, self.fps)
249
-
250
- return (
251
- self.cache[pts]
252
- if pts in self.cache else
253
- self.get_frame_from_pts(pts)
254
- )
255
-
256
- def get_frame_from_t(
257
- self,
258
- t: Union[int, float, Fraction]
259
- ) -> Union[VideoFrame, AudioFrame]:
260
- """
261
- Get the frame with the given 't' time moment
262
- from the cache.
263
- """
264
- return self.get_frame_from_pts(T(t, self.time_base).truncated_pts)
265
-
266
- def get_frames(
267
- self,
268
- start: Union[int, float, Fraction] = 0,
269
- end: Union[int, float, Fraction, None] = None
270
- ):
271
- """
272
- Get all the frames in the range between
273
- the provided 'start' and 'end' time in
274
- seconds.
275
-
276
- This method is an iterator that yields
277
- the frame, its t and its index.
278
- """
279
- # We use the cache as iterator if all the frames
280
- # requested are stored there
281
- # TODO: I think this is not ok... I will never
282
- # have all the pts form here stored, as they come
283
- # from 't' that is different...
284
-
285
- """
286
- Feel free to move this explanation to other
287
- place, its about the duration.
288
-
289
- The stream 'duration' parameter is measured
290
- on ticks, the amount of ticks that the
291
- stream lasts. Here below is an example:
292
-
293
- - Duration raw: 529200
294
- - Time base: 1/44100
295
- - Duration (seconds): 12.0
296
- """
297
-
298
- # The 'duration' is on pts ticks
299
- duration = float(self.stream.duration * self.time_base)
300
- # TODO: I think it would be better to
301
- # receive and work with pts instead of
302
- # 't' time moments...
303
- # pts_list = [
304
- # t_to_pts(t, self.time_base)
305
- # for t in T.get_frame_indexes(duration, self.fps, start, end)
306
- # ]
307
-
308
- # if all(
309
- # pts in self.cache
310
- # for pts in pts_list
311
- # ):
312
- # for pts in pts_list:
313
- # yield self.cache[pts]
314
-
315
- # If not all, we ignore the cache because we
316
- # need to decode and they are all consecutive
317
- start = T(start, self.time_base).truncated_pts
318
- end = (
319
- T(end, self.time_base).truncated_pts
320
- if end is not None else
321
- None
322
- )
323
- key_frame_pts = self._get_nearest_keyframe_pts(start)
324
-
325
- # Go to the key frame that includes it
326
- # but I read that it is recommended to
327
- # read ~100ms before the pts we want to
328
- # actually read so we obtain the frames
329
- # clean (this is important in audio)
330
- # TODO: This code is repeated, refactor
331
- pts_pad = int(0.1 / self.time_base)
332
- self.container.seek(
333
- offset = max(0, key_frame_pts - pts_pad),
334
- stream = self.stream
335
- )
336
-
337
- for packet in self.container.demux(self.stream):
338
- for frame in packet.decode():
339
- if frame.pts is None:
340
- continue
341
-
342
- # We store all the frames in cache
343
- self._store_frame_in_cache(frame)
344
-
345
- frame_end_pts = frame.pts + int(frame.samples * (1 / self.stream.sample_rate) / self.time_base)
346
- #frame_end_pts = frame.pts + int(frame.samples)
347
- #frame_end_pts = frame.pts + int(frame.samples / (self.stream.sample_rate * self.time_base))
348
-
349
- # For the next comments imagine we are looking
350
- # for the [1.0, 2.0) audio time range
351
- # Previous frame and nothing is inside
352
- if frame_end_pts <= start:
353
- # From 0.25 to 1.0
354
- continue
355
-
356
- # We finished, nothing is inside and its after
357
- if (
358
- end is not None and
359
- frame.pts >= end
360
- ):
361
- # From 2.0 to 2.75
362
- return
363
-
364
- # We need: from 1 to 2
365
- # Audio is:
366
- # - from 0 to 0.75 (Not included, omit)
367
- # - from 0.5 to 1.5 (Included, take 1.0 to 1.5)
368
- # - from 0.5 to 2.5 (Included, take 1.0 to 2.0)
369
- # - from 1.25 to 1.5 (Included, take 1.25 to 1.5)
370
- # - from 1.25 to 2.5 (Included, take 1.25 to 2.0)
371
- # - from 2.5 to 3.5 (Not included, omit)
372
-
373
- # Here below, at least a part is inside
374
- if (
375
- frame.pts < start and
376
- frame_end_pts > start
377
- ):
378
- # A part at the end is included
379
- end_time = (
380
- # From 0.5 to 1.5 0> take 1.0 to 1.5
381
- frame_end_pts
382
- if frame_end_pts <= end else
383
- # From 0.5 to 2.5 => take 1.0 to 2.0
384
- end
385
- )
386
- #print('A part at the end is included.')
387
- # TODO: I'm using too much 'pts_to_t'
388
- frame = trim_audio_frame_pts(
389
- frame = frame,
390
- start_pts = start,
391
- end_pts = end_time,
392
- time_base = self.time_base
393
- )
394
- elif (
395
- frame.pts >= start and
396
- frame.pts < end
397
- ):
398
- end_time = (
399
- # From 1.25 to 1.5 => take 1.25 to 1.5
400
- frame_end_pts
401
- if frame_end_pts <= end else
402
- # From 1.25 to 2.5 => take 1.25 to 2.0
403
- end
404
- )
405
- # A part at the begining is included
406
- #print('A part at the begining is included.')
407
- # TODO: I'm using too much 'pts_to_t'
408
- frame = trim_audio_frame_pts(
409
- frame = frame,
410
- start_pts = frame.pts,
411
- end_pts = end_time,
412
- time_base = self.time_base
413
- )
414
-
415
- # If the whole frame is in, past as it is
416
-
417
- # TODO: Maybe send a @dataclass instead (?)
418
- # TODO: Do I really need these 't' and 'index' (?)
419
- yield (
420
- frame,
421
- pts_to_t(frame.pts, self.time_base),
422
- pts_to_index(frame.pts, self.time_base, self.fps)
423
- )
424
-
425
- def clear(
426
- self
427
- ) -> 'VideoFrameCache':
428
- """
429
- Clear the cache by removing all the items.
430
- """
431
- self.cache.clear()
432
-
433
- return self
434
-
435
-
436
- # TODO: Move this to a utils when refactored
437
- def trim_audio_frame_pts(
438
- frame: av.AudioFrame,
439
- start_pts: int,
440
- end_pts: int,
441
- time_base
442
- ) -> av.AudioFrame:
443
- """
444
- Recorta un AudioFrame para quedarse solo con la parte entre [start_pts, end_pts] en ticks (PTS).
445
- """
446
- samples = frame.to_ndarray() # (channels, n_samples)
447
- n_channels, n_samples = samples.shape
448
- sr = frame.sample_rate
449
-
450
- #frame_end_pts = frame.pts + int((n_samples / sr) / time_base)
451
- # TODO: This could be wrong
452
- frame_end_pts = frame.pts + int(frame.samples)
453
-
454
- # solapamiento en PTS
455
- cut_start_pts = max(frame.pts, start_pts)
456
- cut_end_pts = min(frame_end_pts, end_pts)
457
-
458
- if cut_start_pts >= cut_end_pts:
459
- raise Exception('Oops...')
460
- return None # no hay solapamiento
461
-
462
- # convertir a índices de samples (en ticks → segundos → samples)
463
- cut_start_time = (cut_start_pts - frame.pts) * time_base
464
- cut_end_time = (cut_end_pts - frame.pts) * time_base
465
-
466
- start_idx = int(cut_start_time * sr)
467
- end_idx = int(cut_end_time * sr)
468
-
469
- # print(
470
- # f"cutting [{frame.pts}, {frame_end_pts}] "
471
- # f"to [{cut_start_pts}, {cut_end_pts}] "
472
- # f"({start_idx}:{end_idx} / {frame.samples})"
473
- # #f"({start_idx}:{end_idx} / {n_samples})"
474
- # )
475
-
476
- cut_samples = samples[:, start_idx:end_idx]
477
-
478
- # crear nuevo AudioFrame
479
- new_frame = av.AudioFrame.from_ndarray(cut_samples, format=frame.format, layout=frame.layout)
480
- new_frame.sample_rate = sr
481
-
482
- # ajustar PTS → corresponde al inicio real del recorte
483
- new_frame.pts = cut_start_pts
484
- new_frame.time_base = time_base
485
-
486
- return new_frame
487
-
488
-
489
-
490
- def trim_audio_frame_t(
491
- frame: av.AudioFrame,
492
- start_time: float,
493
- end_time: float,
494
- time_base
495
- ) -> av.AudioFrame:
496
- """
497
- Recorta un AudioFrame para quedarse solo con la parte entre [start_time, end_time] en segundos.
498
- """
499
- samples = frame.to_ndarray() # (channels, n_samples)
500
- n_channels, n_samples = samples.shape
501
- sr = frame.sample_rate
502
-
503
- frame_start = float(frame.pts * time_base)
504
- frame_end = frame_start + (n_samples / sr)
505
-
506
- # calcular solapamiento en segundos
507
- cut_start = max(frame_start, start_time)
508
- cut_end = min(frame_end, end_time)
509
-
510
- if cut_start >= cut_end:
511
- return None # no hay solapamiento
512
-
513
- # convertir a índices de samples
514
- start_idx = int((cut_start - frame_start) * sr)
515
- end_idx = int((cut_end - frame_start) * sr)
516
-
517
- # print(f'cutting [{str(frame_start)}, {str(frame_end)}] to [{str(float(start_time))}, {str(float(end_time))}] from {str(start_idx)} to {str(end_idx)} of {str(int((frame_end - frame_start) * sr))}')
518
- cut_samples = samples[:, start_idx:end_idx]
519
-
520
- # crear nuevo AudioFrame
521
- new_frame = av.AudioFrame.from_ndarray(cut_samples, format = frame.format, layout = frame.layout)
522
- new_frame.sample_rate = sr
523
-
524
- # ajustar PTS → corresponde al inicio real del recorte
525
- new_pts = int(cut_start / time_base)
526
- new_frame.pts = new_pts
527
- new_frame.time_base = time_base
528
-
529
- return new_frame