yta-video-opengl 0.0.16__tar.gz → 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/PKG-INFO +1 -1
  2. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/pyproject.toml +1 -1
  3. yta_video_opengl-0.0.17/src/yta_video_opengl/complete/frame_combinator.py +293 -0
  4. yta_video_opengl-0.0.17/src/yta_video_opengl/complete/frame_generator.py +271 -0
  5. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/timeline.py +73 -136
  6. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/track.py +28 -9
  7. yta_video_opengl-0.0.16/src/yta_video_opengl/complete/blend.py +0 -83
  8. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/LICENSE +0 -0
  9. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/README.md +0 -0
  10. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/__init__.py +0 -0
  11. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/classes.py +0 -0
  12. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/__init__.py +0 -0
  13. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/complete/video_on_track.py +0 -0
  14. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/nodes/__init__.py +0 -0
  15. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/nodes/audio/__init__.py +0 -0
  16. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/nodes/video/__init__.py +0 -0
  17. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/nodes/video/opengl.py +0 -0
  18. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/reader/__init__.py +0 -0
  19. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/reader/cache/__init__.py +0 -0
  20. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/reader/cache/audio.py +0 -0
  21. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/reader/cache/utils.py +0 -0
  22. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/reader/cache/video.py +0 -0
  23. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/t.py +0 -0
  24. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/tests.py +0 -0
  25. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/utils.py +0 -0
  26. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/video.py +0 -0
  27. {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.17}/src/yta_video_opengl/writer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: yta-video-opengl
3
- Version: 0.0.16
3
+ Version: 0.0.17
4
4
  Summary: Youtube Autonomous Video OpenGL Module
5
5
  Author: danialcala94
6
6
  Author-email: danielalcalavalera@gmail.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "yta-video-opengl"
3
- version = "0.0.16"
3
+ version = "0.0.17"
4
4
  description = "Youtube Autonomous Video OpenGL Module"
5
5
  authors = [
6
6
  {name = "danialcala94",email = "danielalcalavalera@gmail.com"}
@@ -0,0 +1,293 @@
1
+ """
2
+ TODO: I don't like the name nor the
3
+ location of this file, but it is here
4
+ to encapsulate some functionality
5
+ related to combining video frames.
6
+
7
+ Module to contain methods that combine
8
+ video frames. Call them with the 2
9
+ frames you want to combine and you
10
+ will get the combined frame as return.
11
+ """
12
+ from av.audio.resampler import AudioResampler
13
+ from av.audio.frame import AudioFrame
14
+
15
+ import numpy as np
16
+
17
+
18
+ class VideoFrameCombinator:
19
+ """
20
+ Class to wrap the functionality related
21
+ to combine different video frames.
22
+ """
23
+
24
+ @staticmethod
25
+ def blend_alpha(
26
+ bottom: np.ndarray,
27
+ top: np.ndarray,
28
+ alpha = 0.5
29
+ ):
30
+ return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
31
+
32
+ @staticmethod
33
+ def blend_add(
34
+ bottom: np.ndarray,
35
+ top: np.ndarray
36
+ ):
37
+ """
38
+ Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
39
+ """
40
+ return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
41
+
42
+ @staticmethod
43
+ def blend_multiply(
44
+ bottom: np.ndarray,
45
+ top: np.ndarray
46
+ ):
47
+ """
48
+ Oscurece, como proyectar dos transparencias juntas.
49
+ """
50
+ return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
51
+
52
+ @staticmethod
53
+ def blend_screen(
54
+ bottom: np.ndarray,
55
+ top: np.ndarray
56
+ ):
57
+ """
58
+ Hace lo contrario a Multiply, aclara la imagen.
59
+ """
60
+ return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
61
+
62
+ @staticmethod
63
+ def blend_overlay(
64
+ bottom: np.ndarray,
65
+ top: np.ndarray
66
+ ):
67
+ """
68
+ Mezcla entre Multiply y Screen según el brillo de cada píxel.
69
+ """
70
+ b = bottom.astype(np.float32) / 255
71
+ t = top.astype(np.float32) / 255
72
+ mask = b < 0.5
73
+ result = np.zeros_like(b)
74
+ result[mask] = 2 * b[mask] * t[mask]
75
+ result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
76
+
77
+ return (result * 255).astype(np.uint8)
78
+
79
+ @staticmethod
80
+ def blend_difference(
81
+ bottom: np.ndarray,
82
+ top: np.ndarray
83
+ ):
84
+ """
85
+ Resalta las diferencias entre los dos frames.
86
+ """
87
+ return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
88
+
89
+ # TODO: This one needs a mask, thats why
90
+ # it is commented
91
+ # @staticmethod
92
+ # def blend_mask(
93
+ # bottom,
94
+ # top,
95
+ # mask
96
+ # ):
97
+ # """
98
+ # En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
99
+
100
+ # mask: array float32 entre 0 y 1, mismo tamaño que frame.
101
+ # """
102
+ # return (mask * top + (1 - mask) * bottom).astype(np.uint8)
103
+
104
+ class AudioFrameCombinator:
105
+ """
106
+ Class to wrap the functionality related
107
+ to combine different audio frames.
108
+ """
109
+
110
+ @staticmethod
111
+ def sum_tracks_frames(
112
+ tracks_frames: list[AudioFrame],
113
+ sample_rate: int = 44100,
114
+ layout: str = 'stereo',
115
+ format: str = 'fltp',
116
+ do_normalize: bool = True
117
+ ) -> AudioFrame:
118
+ """
119
+ Sum all the audio frames from the different
120
+ tracks that are given in the 'tracks_frames'
121
+ list (each column is a single audio frame of
122
+ a track). This must be a list that should
123
+ come from a converted matrix that was
124
+ representing each track in a row and the
125
+ different audio frames for that track on each
126
+ column.
127
+
128
+ This method is to sum audio frames of one
129
+ specific 't' time moment of a video.
130
+
131
+ The output will be the sum of all the audio
132
+ frames and it will be normalized to avoid
133
+ distortion if 'do_normalize' is True (it is
134
+ recommended).
135
+ """
136
+ if len(tracks_frames) == 0:
137
+ raise Exception('The "tracks_frames" list of audio frames is empty.')
138
+
139
+ arrays = []
140
+ resampler: AudioResampler = AudioResampler(
141
+ format = format,
142
+ layout = layout,
143
+ rate = sample_rate
144
+ )
145
+
146
+ for track_frame in tracks_frames:
147
+ # Resample to output format
148
+ # TODO: What if the resampler creates more
149
+ # than one single frame? I don't know what
150
+ # to do... I'll see when it happens
151
+ track_frame = resampler.resample(track_frame)
152
+
153
+ if len(track_frame) > 1:
154
+ print('[ ! ] The resampler has given more than 1 frame...')
155
+
156
+ track_frame_array = track_frame[0].to_ndarray()
157
+
158
+ # Transform to 'float32' [-1, 1]
159
+ # TODO: I think this is because the output
160
+ # is 'fltp' but we have more combinations
161
+ # so this must be refactored
162
+ if track_frame_array.dtype == np.int16:
163
+ track_frame_array = track_frame_array.astype(np.float32) / 32768.0
164
+ elif track_frame_array.dtype != np.float32:
165
+ track_frame_array = track_frame_array.astype(np.float32)
166
+
167
+ # Mono to stereo if needed
168
+ # TODO: What if source is 'stereo' and we
169
+ # want mono (?)
170
+ if (
171
+ track_frame_array.shape[0] == 1 and
172
+ layout == 'stereo'
173
+ ):
174
+ track_frame_array = np.repeat(track_frame_array, 2, axis = 0)
175
+
176
+ arrays.append(track_frame_array)
177
+
178
+ # Same length and fill with zeros if needed
179
+ max_len = max(a.shape[1] for a in arrays)
180
+ stacked = []
181
+ for a in arrays:
182
+ # TODO: Again, this 'float32' is because output
183
+ # is 'fltp' I think...
184
+ buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
185
+ buf[:, :a.shape[1]] = a
186
+ stacked.append(buf)
187
+
188
+ # Sum all the sounds
189
+ mix = np.sum(stacked, axis = 0)
190
+ if do_normalize:
191
+ # Avoid distortion and saturation
192
+ mix /= len(stacked)
193
+
194
+ # Avoid clipping
195
+ mix = np.clip(mix, -1.0, 1.0)
196
+
197
+ out = AudioFrame.from_ndarray(
198
+ array = mix,
199
+ format = format,
200
+ layout = layout
201
+ )
202
+ out.sample_rate = sample_rate
203
+
204
+ return out
205
+
206
+ # TODO: This method below has been refactored
207
+ # to the 'sum_tracks_frames', so delete it
208
+ # when the one above is working well
209
+ def mix_audio_frames_by_index(
210
+ tracks_frames,
211
+ sample_rate: int,
212
+ layout = 'stereo',
213
+ ):
214
+ """
215
+ Combine all the columns of the given
216
+ matrix of audio frames 'tracks_frames'.
217
+ The rows are the different tracks and
218
+ the columns are the frame at that 't'
219
+ moment of each of those tracks.
220
+
221
+ The 'tracks_frames' matrix needs to be
222
+ pre-processed to have only 1 single
223
+ frame to combine, so we concatenate
224
+ all the frames if more than 1 per
225
+ column.
226
+ """
227
+ # TODO: Please, improve and clean all this
228
+ # code is so sh*tty, and make utils to
229
+ # combine and those things, not here...
230
+ # Also the formats, make them dynamic and
231
+ # based on the output that is defined here
232
+ # in the Timeline class.
233
+ mixed_frames = []
234
+
235
+ # Iterate by columns (each row is a track)
236
+ for frames_at_index in zip(*tracks_frames):
237
+ arrays = []
238
+ for f in frames_at_index:
239
+ # Resample to output expected values
240
+ # TODO: This must be dynamic depending
241
+ # on the track values
242
+ resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
243
+ arr = resampler.resample(f)
244
+
245
+ arr = f.to_ndarray()
246
+
247
+ # TODO: This below must change depending
248
+ # on the expected output, for us and now
249
+ # it is float32, fltp, stereo, 44_100
250
+ # Same format
251
+ if arr.dtype == np.int16:
252
+ arr = arr.astype(np.float32) / 32768.0
253
+
254
+ # Same layout (number of channels)
255
+ if arr.shape[0] == 1:
256
+ return np.repeat(arr, 2, axis = 0)
257
+ # elif arr.dtype == np.float32:
258
+ # # Ya está en [-1,1], no lo toques
259
+ # pass
260
+
261
+ arrays.append(arr)
262
+
263
+ # Alinear longitudes
264
+ max_len = max(a.shape[1] for a in arrays)
265
+ stacked = []
266
+ for a in arrays:
267
+ buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
268
+ buf[:, :a.shape[1]] = a
269
+ stacked.append(buf)
270
+
271
+ # Mezcla
272
+ mix = np.sum(stacked, axis = 0) / len(stacked)
273
+ #mix = np.sum(stacked, axis = 0)
274
+
275
+ # Limitar al rango [-1,1]
276
+ mix = np.clip(mix, -1.0, 1.0)
277
+
278
+ # Crear frame de salida
279
+ # TODO: What about the 'format' if they
280
+ # are all different (?)
281
+ out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
282
+ out.sample_rate = sample_rate
283
+ # TODO: This will be written later when
284
+ # encoding
285
+ # out.pts = frames_at_index[0].pts
286
+ # out.time_base = frames_at_index[0].time_base
287
+
288
+ print(mix.min(), mix.max())
289
+
290
+ mixed_frames.append(out)
291
+
292
+ return mixed_frames
293
+
@@ -0,0 +1,271 @@
1
+ """
2
+ The video frames must be built using the
3
+ (height, width) size when giving the numpy
4
+ array that will be used for it. We will
5
+ receive the values as (width, height) but
6
+ we will invert them when needed.
7
+
8
+ TODO: Check because we have a similar
9
+ module in other project or projects.
10
+ """
11
+ from av.video.frame import VideoFrame
12
+ from av.audio.frame import AudioFrame
13
+ from av.audio.layout import AudioLayout
14
+ from typing import Union
15
+
16
+ import numpy as np
17
+
18
+
19
+ class _FrameGenerator:
20
+ """
21
+ Class to generate frames as numpy arrays.
22
+ """
23
+
24
+ def full_black(
25
+ self,
26
+ size: tuple[int, int] = (1920, 1080),
27
+ dtype: np.dtype = np.uint8
28
+ ):
29
+ """
30
+ Get a numpy array that represents a full
31
+ black frame of the given 'size' and with
32
+ the given 'dtype'.
33
+ """
34
+ # TODO: I think 'zeros' only work if dtype
35
+ # is int
36
+ return np.zeros(
37
+ shape = (size[1], size[0], 3),
38
+ dtype = dtype
39
+ )
40
+
41
+ def full_white(
42
+ self,
43
+ size: tuple[int, int] = (1920, 1080),
44
+ dtype: np.dtype = np.uint8
45
+ ):
46
+ """
47
+ Get a numpy array that represents a full
48
+ black frame of the given 'size' and with
49
+ the given 'dtype'.
50
+ """
51
+ # TODO: I think 'ones' only work if dtype
52
+ # is int
53
+ return np.ones(
54
+ shape = (size[1], size[0], 3),
55
+ dtype = dtype
56
+ )
57
+
58
+ class _BackgroundFrameGenerator:
59
+ """
60
+ Internal class to simplify the way we
61
+ access to the generation of background
62
+ frames form the general generator class.
63
+ """
64
+
65
+ def __init__(
66
+ self
67
+ ):
68
+ self._frame_generator: _FrameGenerator = _FrameGenerator()
69
+ """
70
+ Shortcut to the FrameGenerator.
71
+ """
72
+
73
+ def full_black(
74
+ self,
75
+ size: tuple[int, int] = (1920, 1080),
76
+ dtype: np.dtype = np.uint8,
77
+ format: str = 'rgb24',
78
+ pts: Union[int, None] = None,
79
+ time_base: Union['Fraction', None] = None
80
+ ) -> VideoFrame:
81
+ """
82
+ Get a video frame that is completely black
83
+ and of the given 'size'.
84
+ """
85
+ return numpy_to_video_frame(
86
+ frame = self._frame_generator.full_black(size, dtype),
87
+ format = format,
88
+ pts = pts,
89
+ time_base = time_base
90
+ )
91
+
92
+ def full_white(
93
+ self,
94
+ size: tuple[int, int] = (1920, 1080),
95
+ dtype: np.dtype = np.uint8,
96
+ format: str = 'rgb24',
97
+ pts: Union[int, None] = None,
98
+ time_base: Union['Fraction', None] = None
99
+ ) -> VideoFrame:
100
+ """
101
+ Get a video frame that is completely white
102
+ and of the given 'size'.
103
+ """
104
+ return numpy_to_video_frame(
105
+ frame = self._frame_generator.full_white(size, dtype),
106
+ format = format,
107
+ pts = pts,
108
+ time_base = time_base
109
+ )
110
+
111
+ class VideoFrameGenerator:
112
+ """
113
+ Class to wrap the functionality related to
114
+ generating a pyav video frame.
115
+
116
+ This class is useful when we need to
117
+ generate the black background for empty
118
+ parts within the tracks and in other
119
+ situations.
120
+ """
121
+
122
+ def __init__(
123
+ self
124
+ ):
125
+ self.background = _BackgroundFrameGenerator()
126
+ """
127
+ Shortcut to the background creation.
128
+ """
129
+
130
+ def numpy_to_video_frame(
131
+ frame: np.ndarray,
132
+ format: str = 'rgb24',
133
+ pts: Union[int, None] = None,
134
+ time_base: Union['Fraction', None] = None
135
+ ) -> VideoFrame:
136
+ """
137
+ Transform the given numpy 'frame' into a
138
+ pyav video frame with the given 'format'
139
+ and also the 'pts' and/or 'time_base' if
140
+ provided.
141
+ """
142
+ frame = VideoFrame.from_ndarray(
143
+ # TODO: What if we want alpha (?)
144
+ array = frame,
145
+ format = format
146
+ )
147
+
148
+ if pts is not None:
149
+ frame.pts = pts
150
+
151
+ if time_base is not None:
152
+ frame.time_base = time_base
153
+
154
+ return frame
155
+
156
+ class AudioFrameGenerator:
157
+ """
158
+ Class to wrap the functionality related to
159
+ generating a pyav audio frame.
160
+
161
+ This class is useful when we need to
162
+ generate the silent audio for empty parts
163
+ within the tracks and in other situations.
164
+ """
165
+
166
+ def silent(
167
+ self,
168
+ sample_rate: int,
169
+ layout = 'stereo',
170
+ number_of_samples: int = 1024,
171
+ format = 's16',
172
+ pts: Union[int, None] = None,
173
+ time_base: Union['Fraction', None] = None
174
+ ) -> AudioFrame:
175
+ """
176
+ Get an audio frame that is completely silent.
177
+ This is useful when we want to fill the empty
178
+ parts of our tracks.
179
+ """
180
+ dtype = audio_format_to_dtype(format)
181
+
182
+ if dtype is None:
183
+ raise Exception(f'The format "{format}" is not accepted.')
184
+
185
+ # TODO: Is this raising exception if the
186
+ # 'layout' is not valid? I think yes (?)
187
+ number_of_channels = len(AudioLayout(layout).channels)
188
+
189
+ # TODO: I leave these comments below because
190
+ # I'm not sure what is true and what is not
191
+ # so, until it is more clear... here it is:
192
+ # For packed (or planar) formats we apply:
193
+ # (1, samples * channels). This is the same
194
+ # amount of data but planar, in 1D only
195
+ # TODO: This wasn't in the previous version
196
+ # and it was working, we were sending the
197
+ # same 'number_of_samples' even when 'fltp'
198
+ # that includes the 'p'
199
+ # TODO: This is making the audio last 2x
200
+ # if 'p' in format:
201
+ # number_of_samples *= number_of_channels
202
+
203
+ silent_numpy_array = np.zeros(
204
+ shape = (number_of_channels, number_of_samples),
205
+ dtype = dtype
206
+ )
207
+
208
+ return numpy_to_audio_frame(
209
+ frame = silent_numpy_array,
210
+ sample_rate = sample_rate,
211
+ layout = layout,
212
+ format = format,
213
+ pts = pts,
214
+ time_base = time_base
215
+ )
216
+
217
+ def numpy_to_audio_frame(
218
+ frame: np.ndarray,
219
+ sample_rate: int,
220
+ layout: str = 'stereo',
221
+ format: str = ' s16',
222
+ pts: Union[int, None] = None,
223
+ time_base: Union['Fraction', None] = None
224
+ ) -> AudioFrame:
225
+ """
226
+ Transform the given numpy 'frame' into a
227
+ pyav audio frame with the given 'sample_rate',
228
+ 'layout' and 'format, and also the 'pts
229
+ and/or 'time_base' if provided.
230
+ """
231
+ frame = AudioFrame.from_ndarray(
232
+ array = frame,
233
+ format = format,
234
+ layout = layout
235
+ )
236
+
237
+ frame.sample_rate = sample_rate
238
+
239
+ if pts is not None:
240
+ frame.pts = pts
241
+
242
+ if time_base is not None:
243
+ frame.time_base = time_base
244
+
245
+ return frame
246
+
247
+ # TODO: Maybe transform into a Enum (?)
248
+ def audio_format_to_dtype(
249
+ audio_format: str
250
+ ) -> Union[np.dtype, None]:
251
+ """
252
+ Transform the given 'audio_format' into
253
+ the corresponding numpy dtype value. If
254
+ the 'audio_format' is not accepted this
255
+ method will return None.
256
+
257
+ This method must be used when we are
258
+ building the numpy array that will be
259
+ used to build a pyav audio frame because
260
+ the pyav 'audio_format' need a specific
261
+ np.dtype to be built.
262
+
263
+ For example, 's16' will return 'np.int16'
264
+ and 'fltp' will return 'np.float32'.
265
+ """
266
+ return {
267
+ 's16': np.int16,
268
+ 'flt': np.float32,
269
+ 'fltp': np.float32
270
+ }.get(audio_format, None)
271
+
@@ -13,10 +13,10 @@ video written).
13
13
  from yta_video_opengl.complete.track import Track
14
14
  from yta_video_opengl.video import Video
15
15
  from yta_video_opengl.t import get_ts, fps_to_time_base, T
16
+ from yta_video_opengl.complete.frame_combinator import AudioFrameCombinator
16
17
  from yta_validation.parameter import ParameterValidator
17
18
  from av.video.frame import VideoFrame
18
19
  from av.audio.frame import AudioFrame
19
- from av.audio.resampler import AudioResampler
20
20
  from quicktions import Fraction
21
21
  from typing import Union
22
22
 
@@ -135,7 +135,7 @@ class Timeline:
135
135
  # I want them as transparent or something
136
136
 
137
137
  # TODO: This is just a test function
138
- from yta_video_opengl.complete.blend import blend_add
138
+ from yta_video_opengl.complete.frame_combinator import VideoFrameCombinator
139
139
 
140
140
  # TODO: Combinate frames, we force them to
141
141
  # rgb24 to obtain them with the same shape,
@@ -147,7 +147,8 @@ class Timeline:
147
147
  # TODO: We need to ignore the frames that
148
148
  # are just empty black frames and use them
149
149
  # not in the combination process
150
- output_frame = blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
150
+ # TODO: What about the 'format' (?)
151
+ output_frame = VideoFrameCombinator.blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
151
152
 
152
153
  # TODO: How to build this VideoFrame correctly
153
154
  # and what about the 'format' (?)
@@ -158,150 +159,39 @@ class Timeline:
158
159
  self,
159
160
  t: float
160
161
  ):
162
+ audio_frames = []
163
+ """
164
+ Matrix in which the rows are the different
165
+ tracks we have, and the column includes all
166
+ the audio frames for this 't' time moment
167
+ for the track of that row. We can have more
168
+ than one frame per column per row (track)
169
+ but we need a single frame to combine all
170
+ the tracks.
171
+ """
161
172
  # TODO: What if the different audio streams
162
173
  # have also different fps (?)
163
- audio_frames = []
164
174
  for track in self.tracks:
165
175
  # TODO: Make this work properly
166
176
  audio_frames.append(list(track.get_audio_frames_at(t)))
167
-
168
- # TODO: Combine them
169
177
  # TODO: We need to ignore the frames that
170
178
  # are just empty black frames and use them
171
179
  # not in the combination process
172
180
 
173
- def mix_audio_frames_by_index(
174
- tracks_frames,
175
- layout = 'stereo'
176
- ):
177
- """
178
- Combine all the columns of the given
179
- matrix of audio frames 'tracks_frames'.
180
- The rows are the different tracks and
181
- the columns are the frame at that 't'
182
- moment of each of those tracks.
183
-
184
- The 'tracks_frames' matrix needs to be
185
- pre-processed to have only 1 single
186
- frame to combine, so we concatenate
187
- all the frames if more than 1 per
188
- column.
189
- """
190
- # TODO: Please, improve and clean all this
191
- # code is so sh*tty, and make utils to
192
- # combine and those things, not here...
193
- # Also the formats, make them dynamic and
194
- # based on the output that is defined here
195
- # in the Timeline class.
196
- mixed_frames = []
197
-
198
- # Iterate by columns (each row is a track)
199
- for frames_at_index in zip(*tracks_frames):
200
- arrays = []
201
- for f in frames_at_index:
202
- # Resample to output expected values
203
- # TODO: This must be dynamic depending
204
- # on the track values
205
- resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = self.audio_fps)
206
- arr = resampler.resample(f)
207
-
208
- arr = f.to_ndarray()
209
-
210
- # TODO: This below must change depending
211
- # on the expected output, for us and now
212
- # it is float32, fltp, stereo, 44_100
213
- # Same format
214
- if arr.dtype == np.int16:
215
- arr = arr.astype(np.float32) / 32768.0
216
-
217
- # Same layout (number of channels)
218
- if arr.shape[0] == 1:
219
- return np.repeat(arr, 2, axis = 0)
220
- # elif arr.dtype == np.float32:
221
- # # Ya está en [-1,1], no lo toques
222
- # pass
223
-
224
- arrays.append(arr)
225
-
226
- # Alinear longitudes
227
- max_len = max(a.shape[1] for a in arrays)
228
- stacked = []
229
- for a in arrays:
230
- buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
231
- buf[:, :a.shape[1]] = a
232
- stacked.append(buf)
233
-
234
- # Mezcla
235
- mix = np.sum(stacked, axis = 0) / len(stacked)
236
- #mix = np.sum(stacked, axis = 0)
237
-
238
- # Limitar al rango [-1,1]
239
- mix = np.clip(mix, -1.0, 1.0)
240
-
241
- # Crear frame de salida
242
- # TODO: What about the 'format' if they
243
- # are all different (?)
244
- out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
245
- out.sample_rate = self.audio_fps
246
- # TODO: This will be written later when
247
- # encoding
248
- # out.pts = frames_at_index[0].pts
249
- # out.time_base = frames_at_index[0].time_base
250
-
251
- print(mix.min(), mix.max())
252
-
253
- mixed_frames.append(out)
254
-
255
- return mixed_frames
256
-
257
- def combine_audio_frames(frames):
258
- """
259
- Combina varios AudioFrames consecutivos en uno solo.
260
- - Convierte a float32
261
- - Concatena muestras a lo largo del tiempo
262
- - Devuelve un AudioFrame nuevo
263
- """
264
- if not frames:
265
- # TODO: This should not happen
266
- return None
267
-
268
- if len(frames) == 1:
269
- return frames
270
-
271
- # Verificamos consistencia básica
272
- sample_rate = frames[0].sample_rate
273
- layout = frames[0].layout.name
274
- channels = frames[0].layout.channels
275
-
276
- arrays = []
277
- for f in frames:
278
- if f.sample_rate != sample_rate or f.layout.name != layout:
279
- raise ValueError("Los frames deben tener mismo sample_rate y layout")
280
-
281
- # arr = f.to_ndarray() # (channels, samples)
282
- # if arr.dtype == np.int16:
283
- # arr = arr.astype(np.float32) / 32768.0
284
- # elif arr.dtype != np.float32:
285
- # arr = arr.astype(np.float32)
286
-
287
- arrays.append(f.to_ndarray())
288
-
289
- # Concatenamos por eje de samples
290
- combined = np.concatenate(arrays, axis = 1)
291
-
292
- # Creamos un frame nuevo
293
- out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
294
- out.sample_rate = sample_rate
295
-
296
- return [out]
297
-
298
181
  # We need only 1 single audio frame per column
299
- collapsed = []
300
- for frames in audio_frames:
301
- collapsed.append(combine_audio_frames(frames))
182
+ collapsed = [
183
+ concatenate_audio_frames(frames)
184
+ for frames in audio_frames
185
+ ]
302
186
 
303
187
  # Now, mix column by column (track by track)
304
- frames = mix_audio_frames_by_index(collapsed)
188
+ # TODO: I do this to have an iterator, but
189
+ # maybe we need more than one single audio
190
+ # frame because of the size at the original
191
+ # video or something...
192
+ frames = [
193
+ AudioFrameCombinator.sum_tracks_frames(collapsed, self.audio_fps)
194
+ ]
305
195
 
306
196
  for audio_frame in frames:
307
197
  yield audio_frame
@@ -416,4 +306,51 @@ class Timeline:
416
306
 
417
307
  writer.mux_video_frame(None)
418
308
  writer.mux_audio_frame(None)
419
- writer.output.close()
309
+ writer.output.close()
310
+
311
+
312
+ # TODO: I don't know where to put this
313
+ # method because if a bit special
314
+ # TODO: Refactor and move please
315
+ def concatenate_audio_frames(
316
+ frames: list[AudioFrame]
317
+ ) -> AudioFrame:
318
+ """
319
+ Combina varios AudioFrames consecutivos en uno solo.
320
+ - Convierte a float32
321
+ - Concatena muestras a lo largo del tiempo
322
+ - Devuelve un AudioFrame nuevo
323
+ """
324
+ if not frames:
325
+ # TODO: This should not happen
326
+ return None
327
+
328
+ if len(frames) == 1:
329
+ return frames[0]
330
+
331
+ # Verificamos consistencia básica
332
+ sample_rate = frames[0].sample_rate
333
+ layout = frames[0].layout.name
334
+ channels = frames[0].layout.channels
335
+
336
+ arrays = []
337
+ for f in frames:
338
+ if f.sample_rate != sample_rate or f.layout.name != layout:
339
+ raise ValueError("Los frames deben tener mismo sample_rate y layout")
340
+
341
+ # arr = f.to_ndarray() # (channels, samples)
342
+ # if arr.dtype == np.int16:
343
+ # arr = arr.astype(np.float32) / 32768.0
344
+ # elif arr.dtype != np.float32:
345
+ # arr = arr.astype(np.float32)
346
+
347
+ arrays.append(f.to_ndarray())
348
+
349
+ # Concatenamos por eje de samples
350
+ combined = np.concatenate(arrays, axis = 1)
351
+
352
+ # Creamos un frame nuevo
353
+ out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
354
+ out.sample_rate = sample_rate
355
+
356
+ return out
@@ -1,8 +1,9 @@
1
1
  from yta_video_opengl.complete.video_on_track import VideoOnTrack
2
2
  from yta_video_opengl.video import Video
3
3
  from yta_video_opengl.t import T
4
- from yta_video_opengl.utils import get_black_background_video_frame, get_silent_audio_frame, audio_frames_and_remainder_per_video_frame
4
+ from yta_video_opengl.utils import audio_frames_and_remainder_per_video_frame
5
5
  from yta_video_opengl.t import fps_to_time_base
6
+ from yta_video_opengl.complete.frame_generator import VideoFrameGenerator, AudioFrameGenerator
6
7
  from yta_validation.parameter import ParameterValidator
7
8
  from quicktions import Fraction
8
9
  from typing import Union
@@ -48,6 +49,20 @@ class _Part:
48
49
  The instance of the track this part belongs
49
50
  to.
50
51
  """
52
+ # TODO: I would like to avoid this 2 instances
53
+ # here, and I think I've done it with static
54
+ # properties in other project, but as I don't
55
+ # remember how and where by now, here it is...
56
+ self._video_frame_generator: VideoFrameGenerator = VideoFrameGenerator()
57
+ """
58
+ Useful internal tool to generate background
59
+ frames for the empty parts.
60
+ """
61
+ self._audio_frame_generator: AudioFrameGenerator = AudioFrameGenerator()
62
+ """
63
+ Useful internal tool to generate silent
64
+ audio frames for the empty parts.
65
+ """
51
66
  self.start: Fraction = Fraction(start)
52
67
  """
53
68
  The start 't' time moment of the part.
@@ -79,7 +94,10 @@ class _Part:
79
94
  #return get_black_background_video_frame(self._track.size)
80
95
  # TODO: This 'time_base' maybe has to be related
81
96
  # to a Timeline general 'time_base' and not the fps
82
- return get_black_background_video_frame(self._track.size, time_base = fps_to_time_base(self._track.fps))
97
+ return self._video_frame_generator.background.full_black(
98
+ size = self._track.size,
99
+ time_base = fps_to_time_base(self._track.fps)
100
+ )
83
101
 
84
102
  frame = self.video.get_frame_at(t)
85
103
 
@@ -96,7 +114,6 @@ class _Part:
96
114
 
97
115
  return frame
98
116
 
99
- # TODO: I'm not sure if we need this
100
117
  def get_audio_frames_at(
101
118
  self,
102
119
  t: Union[int, float, Fraction]
@@ -117,13 +134,15 @@ class _Part:
117
134
  # The complete silent frames we need
118
135
  frames = (
119
136
  [
120
- get_silent_audio_frame(
137
+ self._audio_frame_generator.silent(
121
138
  sample_rate = self._track.audio_fps,
122
139
  # TODO: Check where do we get this value from
123
140
  layout = 'stereo',
124
141
  number_of_samples = self._track.audio_samples_per_frame,
125
142
  # TODO: Check where do we get this value from
126
- format = 'fltp'
143
+ format = 'fltp',
144
+ pts = None,
145
+ time_base = None
127
146
  )
128
147
  ] * number_of_frames
129
148
  if number_of_frames > 0 else
@@ -133,20 +152,20 @@ class _Part:
133
152
  # The remaining partial silent frames we need
134
153
  if number_of_remaining_samples > 0:
135
154
  frames.append(
136
- get_silent_audio_frame(
155
+ self._audio_frame_generator.silent(
137
156
  sample_rate = self._track.audio_fps,
138
157
  # TODO: Check where do we get this value from
139
158
  layout = 'stereo',
140
159
  number_of_samples = number_of_remaining_samples,
141
160
  # TODO: Check where do we get this value from
142
- format = 'fltp'
161
+ format = 'fltp',
162
+ pts = None,
163
+ time_base = None
143
164
  )
144
165
  )
145
166
 
146
- # TODO: Return or yield (?)
147
167
  for frame in frames:
148
168
  yield frame
149
- #return frames
150
169
 
151
170
  # TODO: I don't like using t as float,
152
171
  # we need to implement fractions.Fraction
@@ -1,83 +0,0 @@
1
- """
2
- TODO: I don't like the name nor the
3
- location of this file, but it is here
4
- to encapsulate some functionality
5
- related to combining video frames.
6
- """
7
- import numpy as np
8
-
9
-
10
- def blend_alpha(
11
- bottom,
12
- top,
13
- alpha = 0.5
14
- ):
15
- return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
16
-
17
- def blend_add(
18
- bottom,
19
- top
20
- ):
21
- """
22
- Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
23
- """
24
- return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
25
-
26
- def blend_multiply(
27
- bottom,
28
- top
29
- ):
30
- """
31
- Oscurece, como proyectar dos transparencias juntas.
32
- """
33
- return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
34
-
35
- def blend_screen(
36
- bottom,
37
- top
38
- ):
39
- """
40
- Hace lo contrario a Multiply, aclara la imagen.
41
- """
42
- return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
43
-
44
- def blend_overlay(
45
- bottom,
46
- top
47
- ):
48
- """
49
- Mezcla entre Multiply y Screen según el brillo de cada píxel.
50
- """
51
- b = bottom.astype(np.float32) / 255
52
- t = top.astype(np.float32) / 255
53
- mask = b < 0.5
54
- result = np.zeros_like(b)
55
- result[mask] = 2 * b[mask] * t[mask]
56
- result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
57
- return (result * 255).astype(np.uint8)
58
-
59
- def blend_difference(
60
- bottom,
61
- top
62
- ):
63
- """
64
- Resalta las diferencias entre los dos frames.
65
- """
66
- return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
67
-
68
- # TODO: This one needs a mask, thats why
69
- # it is commented
70
- # def blend_mask(
71
- # bottom,
72
- # top,
73
- # mask
74
- # ):
75
- # """
76
- # En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
77
-
78
- # mask: array float32 entre 0 y 1, mismo tamaño que frame.
79
- # """
80
- # return (mask * top + (1 - mask) * bottom).astype(np.uint8)
81
-
82
-
83
-