yta-video-opengl 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ """
2
+ TODO: I don't like the name nor the
3
+ location of this file, but it is here
4
+ to encapsulate some functionality
5
+ related to combining video frames.
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ def blend_alpha(
11
+ bottom,
12
+ top,
13
+ alpha = 0.5
14
+ ):
15
+ return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
16
+
17
+ def blend_add(
18
+ bottom,
19
+ top
20
+ ):
21
+ """
22
+ Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
23
+ """
24
+ return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
25
+
26
+ def blend_multiply(
27
+ bottom,
28
+ top
29
+ ):
30
+ """
31
+ Oscurece, como proyectar dos transparencias juntas.
32
+ """
33
+ return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
34
+
35
+ def blend_screen(
36
+ bottom,
37
+ top
38
+ ):
39
+ """
40
+ Hace lo contrario a Multiply, aclara la imagen.
41
+ """
42
+ return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
43
+
44
+ def blend_overlay(
45
+ bottom,
46
+ top
47
+ ):
48
+ """
49
+ Mezcla entre Multiply y Screen según el brillo de cada píxel.
50
+ """
51
+ b = bottom.astype(np.float32) / 255
52
+ t = top.astype(np.float32) / 255
53
+ mask = b < 0.5
54
+ result = np.zeros_like(b)
55
+ result[mask] = 2 * b[mask] * t[mask]
56
+ result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
57
+ return (result * 255).astype(np.uint8)
58
+
59
+ def blend_difference(
60
+ bottom,
61
+ top
62
+ ):
63
+ """
64
+ Resalta las diferencias entre los dos frames.
65
+ """
66
+ return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
67
+
68
+ # TODO: This one needs a mask, thats why
69
+ # it is commented
70
+ # def blend_mask(
71
+ # bottom,
72
+ # top,
73
+ # mask
74
+ # ):
75
+ # """
76
+ # En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
77
+
78
+ # mask: array float32 entre 0 y 1, mismo tamaño que frame.
79
+ # """
80
+ # return (mask * top + (1 - mask) * bottom).astype(np.uint8)
81
+
82
+
83
+
@@ -14,9 +14,14 @@ from yta_video_opengl.complete.track import Track
14
14
  from yta_video_opengl.video import Video
15
15
  from yta_video_opengl.t import get_ts, fps_to_time_base, T
16
16
  from yta_validation.parameter import ParameterValidator
17
+ from av.video.frame import VideoFrame
18
+ from av.audio.frame import AudioFrame
19
+ from av.audio.resampler import AudioResampler
17
20
  from quicktions import Fraction
18
21
  from typing import Union
19
22
 
23
+ import numpy as np
24
+
20
25
 
21
26
  class Timeline:
22
27
  """
@@ -129,8 +134,25 @@ class Timeline:
129
134
  # other frame in other track, or to know if
130
135
  # I want them as transparent or something
131
136
 
132
- # TODO: Combinate them, I send first by now
133
- return next(frames)
137
+ # TODO: This is just a test function
138
+ from yta_video_opengl.complete.blend import blend_add
139
+
140
+ # TODO: Combinate frames, we force them to
141
+ # rgb24 to obtain them with the same shape,
142
+ # but maybe we have to change this because
143
+ # we also need to handle alphas
144
+ output_frame = next(frames).to_ndarray(format = 'rgb24')
145
+ for frame in frames:
146
+ # Combine them
147
+ # TODO: We need to ignore the frames that
148
+ # are just empty black frames and use them
149
+ # not in the combination process
150
+ output_frame = blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
151
+
152
+ # TODO: How to build this VideoFrame correctly
153
+ # and what about the 'format' (?)
154
+ # We don't handle pts here, just the image
155
+ return VideoFrame.from_ndarray(output_frame, format = 'rgb24')
134
156
 
135
157
  def get_audio_frames_at(
136
158
  self,
@@ -138,20 +160,151 @@ class Timeline:
138
160
  ):
139
161
  # TODO: What if the different audio streams
140
162
  # have also different fps (?)
141
- frames = []
163
+ audio_frames = []
142
164
  for track in self.tracks:
143
165
  # TODO: Make this work properly
144
- audio_frames = track.get_audio_frames_at(t)
166
+ audio_frames.append(list(track.get_audio_frames_at(t)))
145
167
 
146
168
  # TODO: Combine them
147
- if audio_frames is not None:
148
- frames = audio_frames
149
- break
169
+ # TODO: We need to ignore the frames that
170
+ # are just empty black frames and use them
171
+ # not in the combination process
172
+
173
+ def mix_audio_frames_by_index(
174
+ tracks_frames,
175
+ layout = 'stereo'
176
+ ):
177
+ """
178
+ Combine all the columns of the given
179
+ matrix of audio frames 'tracks_frames'.
180
+ The rows are the different tracks and
181
+ the columns are the frame at that 't'
182
+ moment of each of those tracks.
183
+
184
+ The 'tracks_frames' matrix needs to be
185
+ pre-processed to have only 1 single
186
+ frame to combine, so we concatenate
187
+ all the frames if more than 1 per
188
+ column.
189
+ """
190
+ # TODO: Please, improve and clean all this
191
+ # code is so sh*tty, and make utils to
192
+ # combine and those things, not here...
193
+ # Also the formats, make them dynamic and
194
+ # based on the output that is defined here
195
+ # in the Timeline class.
196
+ mixed_frames = []
197
+
198
+ # Iterate by columns (each row is a track)
199
+ for frames_at_index in zip(*tracks_frames):
200
+ arrays = []
201
+ for f in frames_at_index:
202
+ # Resample to output expected values
203
+ # TODO: This must be dynamic depending
204
+ # on the track values
205
+ resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = self.audio_fps)
206
+ arr = resampler.resample(f)
207
+
208
+ arr = f.to_ndarray()
209
+
210
+ # TODO: This below must change depending
211
+ # on the expected output, for us and now
212
+ # it is float32, fltp, stereo, 44_100
213
+ # Same format
214
+ if arr.dtype == np.int16:
215
+ arr = arr.astype(np.float32) / 32768.0
216
+
217
+ # Same layout (number of channels)
218
+ if arr.shape[0] == 1:
219
+ return np.repeat(arr, 2, axis = 0)
220
+ # elif arr.dtype == np.float32:
221
+ # # Ya está en [-1,1], no lo toques
222
+ # pass
223
+
224
+ arrays.append(arr)
225
+
226
+ # Alinear longitudes
227
+ max_len = max(a.shape[1] for a in arrays)
228
+ stacked = []
229
+ for a in arrays:
230
+ buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
231
+ buf[:, :a.shape[1]] = a
232
+ stacked.append(buf)
233
+
234
+ # Mezcla
235
+ mix = np.sum(stacked, axis = 0) / len(stacked)
236
+ #mix = np.sum(stacked, axis = 0)
237
+
238
+ # Limitar al rango [-1,1]
239
+ mix = np.clip(mix, -1.0, 1.0)
240
+
241
+ # Crear frame de salida
242
+ # TODO: What about the 'format' if they
243
+ # are all different (?)
244
+ out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
245
+ out.sample_rate = self.audio_fps
246
+ # TODO: This will be written later when
247
+ # encoding
248
+ # out.pts = frames_at_index[0].pts
249
+ # out.time_base = frames_at_index[0].time_base
250
+
251
+ print(mix.min(), mix.max())
252
+
253
+ mixed_frames.append(out)
254
+
255
+ return mixed_frames
256
+
257
+ def combine_audio_frames(frames):
258
+ """
259
+ Combina varios AudioFrames consecutivos en uno solo.
260
+ - Convierte a float32
261
+ - Concatena muestras a lo largo del tiempo
262
+ - Devuelve un AudioFrame nuevo
263
+ """
264
+ if not frames:
265
+ # TODO: This should not happen
266
+ return None
150
267
 
151
- #from yta_video_opengl.utils import get_silent_audio_frame
152
- #make_silent_audio_frame()
153
- for frame in frames:
154
- yield frame
268
+ if len(frames) == 1:
269
+ return frames
270
+
271
+ # Verificamos consistencia básica
272
+ sample_rate = frames[0].sample_rate
273
+ layout = frames[0].layout.name
274
+ channels = frames[0].layout.channels
275
+
276
+ arrays = []
277
+ for f in frames:
278
+ if f.sample_rate != sample_rate or f.layout.name != layout:
279
+ raise ValueError("Los frames deben tener mismo sample_rate y layout")
280
+
281
+ # arr = f.to_ndarray() # (channels, samples)
282
+ # if arr.dtype == np.int16:
283
+ # arr = arr.astype(np.float32) / 32768.0
284
+ # elif arr.dtype != np.float32:
285
+ # arr = arr.astype(np.float32)
286
+
287
+ arrays.append(f.to_ndarray())
288
+
289
+ # Concatenamos por eje de samples
290
+ combined = np.concatenate(arrays, axis = 1)
291
+
292
+ # Creamos un frame nuevo
293
+ out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
294
+ out.sample_rate = sample_rate
295
+
296
+ return [out]
297
+
298
+ # We need only 1 single audio frame per column
299
+ collapsed = []
300
+ for frames in audio_frames:
301
+ collapsed.append(combine_audio_frames(frames))
302
+
303
+ # Now, mix column by column (track by track)
304
+ frames = mix_audio_frames_by_index(collapsed)
305
+
306
+ for audio_frame in frames:
307
+ yield audio_frame
155
308
 
156
309
  def render(
157
310
  self,
@@ -168,10 +321,8 @@ class Timeline:
168
321
  project will be rendered.
169
322
  """
170
323
  ParameterValidator.validate_mandatory_string('filename', filename, do_accept_empty = False)
171
- # TODO: We need to accept Fraction as number
172
- #ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
173
- # TODO: We need to accept Fraction as number
174
- #ParameterValidator.validate_positive_number('end', end, do_include_zero = False)
324
+ ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
325
+ ParameterValidator.validate_positive_number('end', end, do_include_zero = False)
175
326
 
176
327
  # TODO: Limitate 'end' a bit...
177
328
  end = (
@@ -39,10 +39,8 @@ class _Part:
39
39
  end: Union[int, float, Fraction],
40
40
  video: Union[VideoOnTrack, None] = None
41
41
  ):
42
- # TODO: We need to accept Fraction as number
43
- # ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
44
- # TODO: We need to accept Fraction as number
45
- # ParameterValidator.validate_mandatory_positive_number('end', end, do_include_zero = False)
42
+ ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
43
+ ParameterValidator.validate_mandatory_positive_number('end', end, do_include_zero = False)
46
44
  ParameterValidator.validate_instance_of('video', video, VideoOnTrack)
47
45
 
48
46
  self._track: Track = track
@@ -200,6 +198,17 @@ class Track:
200
198
  for video in self.videos
201
199
  )
202
200
  )
201
+
202
+ @property
203
+ def videos(
204
+ self
205
+ ) -> list[VideoOnTrack]:
206
+ """
207
+ The list of videos we have in the track
208
+ but ordered using the 'start' attribute
209
+ from first to last.
210
+ """
211
+ return sorted(self._videos, key = lambda video: video.start)
203
212
 
204
213
  def __init__(
205
214
  self,
@@ -212,7 +221,7 @@ class Track:
212
221
  # TODO: Where does it come from (?)
213
222
  audio_samples_per_frame: int
214
223
  ):
215
- self.videos: list[VideoOnTrack] = []
224
+ self._videos: list[VideoOnTrack] = []
216
225
  """
217
226
  The list of 'VideoOnTrack' instances that
218
227
  must play on this track.
@@ -354,7 +363,7 @@ class Track:
354
363
  else:
355
364
  t = self.end
356
365
 
357
- self.videos.append(VideoOnTrack(
366
+ self._videos.append(VideoOnTrack(
358
367
  video,
359
368
  t
360
369
  ))
@@ -46,10 +46,7 @@ class VideoOnTrack:
46
46
  start: Union[int, float, Fraction] = 0.0
47
47
  ):
48
48
  ParameterValidator.validate_mandatory_instance_of('video', video, Video)
49
- # TODO: Now we need to accept 'Fraction',
50
- # from 'fractions' or 'quicktions', as a
51
- # number
52
- #ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
49
+ ParameterValidator.validate_mandatory_positive_number('start', start, do_include_zero = True)
53
50
 
54
51
  self.video: Video = video
55
52
  """
@@ -2,7 +2,8 @@
2
2
  A video reader using the PyAv (av) library
3
3
  that, using ffmpeg, detects the video.
4
4
  """
5
- from yta_video_opengl.reader.cache import VideoFrameCache
5
+ from yta_video_opengl.reader.cache.video import VideoFrameCache
6
+ from yta_video_opengl.reader.cache.audio import AudioFrameCache
6
7
  from yta_video_opengl.utils import iterate_stream_frames_demuxing
7
8
  from yta_video_opengl.t import T
8
9
  from yta_validation import PythonValidator
@@ -435,7 +436,7 @@ class VideoReader:
435
436
  The video frame cache system to optimize
436
437
  the way we access to the frames.
437
438
  """
438
- self.audio_cache: VideoFrameCache = None
439
+ self.audio_cache: AudioFrameCache = None
439
440
  """
440
441
  The audio frame cache system to optimize
441
442
  the way we access to the frames.
@@ -490,7 +491,7 @@ class VideoReader:
490
491
  raise Exception(f'No video nor audio stream found in the "{self.filename}" file.')
491
492
 
492
493
  self.video_cache = VideoFrameCache(self.container, self.video_stream)
493
- self.audio_cache = VideoFrameCache(self.container, self.audio_stream)
494
+ self.audio_cache = AudioFrameCache(self.container, self.audio_stream)
494
495
 
495
496
  def seek(
496
497
  self,
@@ -620,16 +621,28 @@ class VideoReader:
620
621
  ):
621
622
  yield frame
622
623
 
623
- def get_frame_from_t(
624
+ def get_frame(
624
625
  self,
625
626
  t: Union[int, float, Fraction]
626
- ) -> 'VideoFrame':
627
+ ) -> VideoFrame:
627
628
  """
628
- Get the video frame with the given 't' time
629
- moment, using the video cache system.
629
+ Get the video frame that is in the 't' time
630
+ moment provided.
630
631
  """
631
- return self.video_cache.get_video_frame(t)
632
-
632
+ return self.video_cache.get_frame(t)
633
+
634
+ def get_frames(
635
+ self,
636
+ start: Union[int, float, Fraction] = 0.0,
637
+ end: Union[int, float, Fraction, None] = None
638
+ ):
639
+ """
640
+ Iterator to get the video frames in between
641
+ the provided 'start' and 'end' time moments.
642
+ """
643
+ for frame in self.video_cache.get_frames(start, end):
644
+ yield frame
645
+
633
646
  def get_audio_frame_from_t(
634
647
  self,
635
648
  t: Union[int, float, Fraction]
@@ -638,7 +651,7 @@ class VideoReader:
638
651
  Get the audio frame with the given 't' time
639
652
  moment, using the audio cache system.
640
653
  """
641
- return self.audio_cache.get_audio_frame_from_t(t)
654
+ return self.audio_cache.get_frame(t)
642
655
 
643
656
  def get_audio_frames_from_t(
644
657
  self,
@@ -659,19 +672,7 @@ class VideoReader:
659
672
  # We want all the audios that must be played
660
673
  # during the video frame that starts in the
661
674
  # 't' time moment
662
- for frame in self.audio_cache.get_audio_frames(t.truncated, t.next(1).truncated):
663
- yield frame
664
-
665
- def get_frames(
666
- self,
667
- start: Union[int, float, Fraction] = 0.0,
668
- end: Union[int, float, Fraction, None] = None
669
- ):
670
- """
671
- Iterator to get the video frames in between
672
- the provided 'start' and 'end' time moments.
673
- """
674
- for frame in self.video_cache.get_frames(start, end):
675
+ for frame in self.get_audio_frames(t.truncated, t.next(1).truncated):
675
676
  yield frame
676
677
 
677
678
  def get_audio_frames(
@@ -683,7 +684,7 @@ class VideoReader:
683
684
  Iterator to get the audio frames in between
684
685
  the provided 'start' and 'end' time moments.
685
686
  """
686
- for frame in self.audio_cache.get_audio_frames(start, end):
687
+ for frame in self.audio_cache.get_frames(start, end):
687
688
  yield frame
688
689
 
689
690
  def close(