yta-video-opengl 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,293 @@
1
+ """
2
+ TODO: I don't like the name nor the
3
+ location of this file, but it is here
4
+ to encapsulate some functionality
5
+ related to combining video frames.
6
+
7
+ Module to contain methods that combine
8
+ video frames. Call them with the 2
9
+ frames you want to combine and you
10
+ will get the combined frame as return.
11
+ """
12
+ from av.audio.resampler import AudioResampler
13
+ from av.audio.frame import AudioFrame
14
+
15
+ import numpy as np
16
+
17
+
18
+ class VideoFrameCombinator:
19
+ """
20
+ Class to wrap the functionality related
21
+ to combine different video frames.
22
+ """
23
+
24
+ @staticmethod
25
+ def blend_alpha(
26
+ bottom: np.ndarray,
27
+ top: np.ndarray,
28
+ alpha = 0.5
29
+ ):
30
+ return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
31
+
32
+ @staticmethod
33
+ def blend_add(
34
+ bottom: np.ndarray,
35
+ top: np.ndarray
36
+ ):
37
+ """
38
+ Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
39
+ """
40
+ return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
41
+
42
+ @staticmethod
43
+ def blend_multiply(
44
+ bottom: np.ndarray,
45
+ top: np.ndarray
46
+ ):
47
+ """
48
+ Oscurece, como proyectar dos transparencias juntas.
49
+ """
50
+ return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
51
+
52
+ @staticmethod
53
+ def blend_screen(
54
+ bottom: np.ndarray,
55
+ top: np.ndarray
56
+ ):
57
+ """
58
+ Hace lo contrario a Multiply, aclara la imagen.
59
+ """
60
+ return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
61
+
62
+ @staticmethod
63
+ def blend_overlay(
64
+ bottom: np.ndarray,
65
+ top: np.ndarray
66
+ ):
67
+ """
68
+ Mezcla entre Multiply y Screen según el brillo de cada píxel.
69
+ """
70
+ b = bottom.astype(np.float32) / 255
71
+ t = top.astype(np.float32) / 255
72
+ mask = b < 0.5
73
+ result = np.zeros_like(b)
74
+ result[mask] = 2 * b[mask] * t[mask]
75
+ result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
76
+
77
+ return (result * 255).astype(np.uint8)
78
+
79
+ @staticmethod
80
+ def blend_difference(
81
+ bottom: np.ndarray,
82
+ top: np.ndarray
83
+ ):
84
+ """
85
+ Resalta las diferencias entre los dos frames.
86
+ """
87
+ return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
88
+
89
+ # TODO: This one needs a mask, thats why
90
+ # it is commented
91
+ # @staticmethod
92
+ # def blend_mask(
93
+ # bottom,
94
+ # top,
95
+ # mask
96
+ # ):
97
+ # """
98
+ # En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
99
+
100
+ # mask: array float32 entre 0 y 1, mismo tamaño que frame.
101
+ # """
102
+ # return (mask * top + (1 - mask) * bottom).astype(np.uint8)
103
+
104
+ class AudioFrameCombinator:
105
+ """
106
+ Class to wrap the functionality related
107
+ to combine different audio frames.
108
+ """
109
+
110
+ @staticmethod
111
+ def sum_tracks_frames(
112
+ tracks_frames: list[AudioFrame],
113
+ sample_rate: int = 44100,
114
+ layout: str = 'stereo',
115
+ format: str = 'fltp',
116
+ do_normalize: bool = True
117
+ ) -> AudioFrame:
118
+ """
119
+ Sum all the audio frames from the different
120
+ tracks that are given in the 'tracks_frames'
121
+ list (each column is a single audio frame of
122
+ a track). This must be a list that should
123
+ come from a converted matrix that was
124
+ representing each track in a row and the
125
+ different audio frames for that track on each
126
+ column.
127
+
128
+ This method is to sum audio frames of one
129
+ specific 't' time moment of a video.
130
+
131
+ The output will be the sum of all the audio
132
+ frames and it will be normalized to avoid
133
+ distortion if 'do_normalize' is True (it is
134
+ recommended).
135
+ """
136
+ if len(tracks_frames) == 0:
137
+ raise Exception('The "tracks_frames" list of audio frames is empty.')
138
+
139
+ arrays = []
140
+ resampler: AudioResampler = AudioResampler(
141
+ format = format,
142
+ layout = layout,
143
+ rate = sample_rate
144
+ )
145
+
146
+ for track_frame in tracks_frames:
147
+ # Resample to output format
148
+ # TODO: What if the resampler creates more
149
+ # than one single frame? I don't know what
150
+ # to do... I'll see when it happens
151
+ track_frame = resampler.resample(track_frame)
152
+
153
+ if len(track_frame) > 1:
154
+ print('[ ! ] The resampler has given more than 1 frame...')
155
+
156
+ track_frame_array = track_frame[0].to_ndarray()
157
+
158
+ # Transform to 'float32' [-1, 1]
159
+ # TODO: I think this is because the output
160
+ # is 'fltp' but we have more combinations
161
+ # so this must be refactored
162
+ if track_frame_array.dtype == np.int16:
163
+ track_frame_array = track_frame_array.astype(np.float32) / 32768.0
164
+ elif track_frame_array.dtype != np.float32:
165
+ track_frame_array = track_frame_array.astype(np.float32)
166
+
167
+ # Mono to stereo if needed
168
+ # TODO: What if source is 'stereo' and we
169
+ # want mono (?)
170
+ if (
171
+ track_frame_array.shape[0] == 1 and
172
+ layout == 'stereo'
173
+ ):
174
+ track_frame_array = np.repeat(track_frame_array, 2, axis = 0)
175
+
176
+ arrays.append(track_frame_array)
177
+
178
+ # Same length and fill with zeros if needed
179
+ max_len = max(a.shape[1] for a in arrays)
180
+ stacked = []
181
+ for a in arrays:
182
+ # TODO: Again, this 'float32' is because output
183
+ # is 'fltp' I think...
184
+ buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
185
+ buf[:, :a.shape[1]] = a
186
+ stacked.append(buf)
187
+
188
+ # Sum all the sounds
189
+ mix = np.sum(stacked, axis = 0)
190
+ if do_normalize:
191
+ # Avoid distortion and saturation
192
+ mix /= len(stacked)
193
+
194
+ # Avoid clipping
195
+ mix = np.clip(mix, -1.0, 1.0)
196
+
197
+ out = AudioFrame.from_ndarray(
198
+ array = mix,
199
+ format = format,
200
+ layout = layout
201
+ )
202
+ out.sample_rate = sample_rate
203
+
204
+ return out
205
+
206
+ # TODO: This method below has been refactored
207
+ # to the 'sum_tracks_frames', so delete it
208
+ # when the one above is working well
209
+ def mix_audio_frames_by_index(
210
+ tracks_frames,
211
+ sample_rate: int,
212
+ layout = 'stereo',
213
+ ):
214
+ """
215
+ Combine all the columns of the given
216
+ matrix of audio frames 'tracks_frames'.
217
+ The rows are the different tracks and
218
+ the columns are the frame at that 't'
219
+ moment of each of those tracks.
220
+
221
+ The 'tracks_frames' matrix needs to be
222
+ pre-processed to have only 1 single
223
+ frame to combine, so we concatenate
224
+ all the frames if more than 1 per
225
+ column.
226
+ """
227
+ # TODO: Please, improve and clean all this
228
+ # code is so sh*tty, and make utils to
229
+ # combine and those things, not here...
230
+ # Also the formats, make them dynamic and
231
+ # based on the output that is defined here
232
+ # in the Timeline class.
233
+ mixed_frames = []
234
+
235
+ # Iterate by columns (each row is a track)
236
+ for frames_at_index in zip(*tracks_frames):
237
+ arrays = []
238
+ for f in frames_at_index:
239
+ # Resample to output expected values
240
+ # TODO: This must be dynamic depending
241
+ # on the track values
242
+ resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
243
+ arr = resampler.resample(f)
244
+
245
+ arr = f.to_ndarray()
246
+
247
+ # TODO: This below must change depending
248
+ # on the expected output, for us and now
249
+ # it is float32, fltp, stereo, 44_100
250
+ # Same format
251
+ if arr.dtype == np.int16:
252
+ arr = arr.astype(np.float32) / 32768.0
253
+
254
+ # Same layout (number of channels)
255
+ if arr.shape[0] == 1:
256
+ return np.repeat(arr, 2, axis = 0)
257
+ # elif arr.dtype == np.float32:
258
+ # # Ya está en [-1,1], no lo toques
259
+ # pass
260
+
261
+ arrays.append(arr)
262
+
263
+ # Alinear longitudes
264
+ max_len = max(a.shape[1] for a in arrays)
265
+ stacked = []
266
+ for a in arrays:
267
+ buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
268
+ buf[:, :a.shape[1]] = a
269
+ stacked.append(buf)
270
+
271
+ # Mezcla
272
+ mix = np.sum(stacked, axis = 0) / len(stacked)
273
+ #mix = np.sum(stacked, axis = 0)
274
+
275
+ # Limitar al rango [-1,1]
276
+ mix = np.clip(mix, -1.0, 1.0)
277
+
278
+ # Crear frame de salida
279
+ # TODO: What about the 'format' if they
280
+ # are all different (?)
281
+ out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
282
+ out.sample_rate = sample_rate
283
+ # TODO: This will be written later when
284
+ # encoding
285
+ # out.pts = frames_at_index[0].pts
286
+ # out.time_base = frames_at_index[0].time_base
287
+
288
+ print(mix.min(), mix.max())
289
+
290
+ mixed_frames.append(out)
291
+
292
+ return mixed_frames
293
+
@@ -0,0 +1,278 @@
1
+ """
2
+ The video frames must be built using the
3
+ (height, width) size when giving the numpy
4
+ array that will be used for it. We will
5
+ receive the values as (width, height) but
6
+ we will invert them when needed.
7
+
8
+ The frames that come from an empty part
9
+ are flagged with the .metadata attribute
10
+ 'is_from_empty_part' so we can recognize
11
+ them and ignore when combining on the
12
+ timeline. We have that metadata in the
13
+ wrapper class we created.
14
+
15
+ TODO: Check because we have a similar
16
+ module in other project or projects.
17
+ """
18
+ from av.video.frame import VideoFrame
19
+ from av.audio.frame import AudioFrame
20
+ from av.audio.layout import AudioLayout
21
+ from typing import Union
22
+
23
+ import numpy as np
24
+
25
+
26
+ class _FrameGenerator:
27
+ """
28
+ Class to generate frames as numpy arrays.
29
+ """
30
+
31
+ def full_black(
32
+ self,
33
+ size: tuple[int, int] = (1920, 1080),
34
+ dtype: np.dtype = np.uint8
35
+ ):
36
+ """
37
+ Get a numpy array that represents a full
38
+ black frame of the given 'size' and with
39
+ the given 'dtype'.
40
+ """
41
+ # TODO: I think 'zeros' only work if dtype
42
+ # is int
43
+ return np.zeros(
44
+ shape = (size[1], size[0], 3),
45
+ dtype = dtype
46
+ )
47
+
48
+ def full_white(
49
+ self,
50
+ size: tuple[int, int] = (1920, 1080),
51
+ dtype: np.dtype = np.uint8
52
+ ):
53
+ """
54
+ Get a numpy array that represents a full
55
+ black frame of the given 'size' and with
56
+ the given 'dtype'.
57
+ """
58
+ # TODO: I think 'ones' only work if dtype
59
+ # is int
60
+ return np.ones(
61
+ shape = (size[1], size[0], 3),
62
+ dtype = dtype
63
+ )
64
+
65
+ class _BackgroundFrameGenerator:
66
+ """
67
+ Internal class to simplify the way we
68
+ access to the generation of background
69
+ frames form the general generator class.
70
+ """
71
+
72
+ def __init__(
73
+ self
74
+ ):
75
+ self._frame_generator: _FrameGenerator = _FrameGenerator()
76
+ """
77
+ Shortcut to the FrameGenerator.
78
+ """
79
+
80
+ def full_black(
81
+ self,
82
+ size: tuple[int, int] = (1920, 1080),
83
+ dtype: np.dtype = np.uint8,
84
+ format: str = 'rgb24',
85
+ pts: Union[int, None] = None,
86
+ time_base: Union['Fraction', None] = None
87
+ ) -> VideoFrame:
88
+ """
89
+ Get a video frame that is completely black
90
+ and of the given 'size'.
91
+ """
92
+ return numpy_to_video_frame(
93
+ frame = self._frame_generator.full_black(size, dtype),
94
+ format = format,
95
+ pts = pts,
96
+ time_base = time_base
97
+ )
98
+
99
+ def full_white(
100
+ self,
101
+ size: tuple[int, int] = (1920, 1080),
102
+ dtype: np.dtype = np.uint8,
103
+ format: str = 'rgb24',
104
+ pts: Union[int, None] = None,
105
+ time_base: Union['Fraction', None] = None
106
+ ) -> VideoFrame:
107
+ """
108
+ Get a video frame that is completely white
109
+ and of the given 'size'.
110
+ """
111
+ return numpy_to_video_frame(
112
+ frame = self._frame_generator.full_white(size, dtype),
113
+ format = format,
114
+ pts = pts,
115
+ time_base = time_base
116
+ )
117
+
118
+ class VideoFrameGenerator:
119
+ """
120
+ Class to wrap the functionality related to
121
+ generating a pyav video frame.
122
+
123
+ This class is useful when we need to
124
+ generate the black background for empty
125
+ parts within the tracks and in other
126
+ situations.
127
+ """
128
+
129
+ def __init__(
130
+ self
131
+ ):
132
+ self.background = _BackgroundFrameGenerator()
133
+ """
134
+ Shortcut to the background creation.
135
+ """
136
+
137
+ def numpy_to_video_frame(
138
+ frame: np.ndarray,
139
+ format: str = 'rgb24',
140
+ pts: Union[int, None] = None,
141
+ time_base: Union['Fraction', None] = None
142
+ ) -> VideoFrame:
143
+ """
144
+ Transform the given numpy 'frame' into a
145
+ pyav video frame with the given 'format'
146
+ and also the 'pts' and/or 'time_base' if
147
+ provided.
148
+ """
149
+ frame = VideoFrame.from_ndarray(
150
+ # TODO: What if we want alpha (?)
151
+ array = frame,
152
+ format = format
153
+ )
154
+
155
+ if pts is not None:
156
+ frame.pts = pts
157
+
158
+ if time_base is not None:
159
+ frame.time_base = time_base
160
+
161
+ return frame
162
+
163
+ class AudioFrameGenerator:
164
+ """
165
+ Class to wrap the functionality related to
166
+ generating a pyav audio frame.
167
+
168
+ This class is useful when we need to
169
+ generate the silent audio for empty parts
170
+ within the tracks and in other situations.
171
+ """
172
+
173
+ def silent(
174
+ self,
175
+ sample_rate: int,
176
+ layout = 'stereo',
177
+ number_of_samples: int = 1024,
178
+ format = 's16',
179
+ pts: Union[int, None] = None,
180
+ time_base: Union['Fraction', None] = None
181
+ ) -> AudioFrame:
182
+ """
183
+ Get an audio frame that is completely silent.
184
+ This is useful when we want to fill the empty
185
+ parts of our tracks.
186
+ """
187
+ dtype = audio_format_to_dtype(format)
188
+
189
+ if dtype is None:
190
+ raise Exception(f'The format "{format}" is not accepted.')
191
+
192
+ # TODO: Is this raising exception if the
193
+ # 'layout' is not valid? I think yes (?)
194
+ number_of_channels = len(AudioLayout(layout).channels)
195
+
196
+ # TODO: I leave these comments below because
197
+ # I'm not sure what is true and what is not
198
+ # so, until it is more clear... here it is:
199
+ # For packed (or planar) formats we apply:
200
+ # (1, samples * channels). This is the same
201
+ # amount of data but planar, in 1D only
202
+ # TODO: This wasn't in the previous version
203
+ # and it was working, we were sending the
204
+ # same 'number_of_samples' even when 'fltp'
205
+ # that includes the 'p'
206
+ # TODO: This is making the audio last 2x
207
+ # if 'p' in format:
208
+ # number_of_samples *= number_of_channels
209
+
210
+ silent_numpy_array = np.zeros(
211
+ shape = (number_of_channels, number_of_samples),
212
+ dtype = dtype
213
+ )
214
+
215
+ return numpy_to_audio_frame(
216
+ frame = silent_numpy_array,
217
+ sample_rate = sample_rate,
218
+ layout = layout,
219
+ format = format,
220
+ pts = pts,
221
+ time_base = time_base
222
+ )
223
+
224
+ def numpy_to_audio_frame(
225
+ frame: np.ndarray,
226
+ sample_rate: int,
227
+ layout: str = 'stereo',
228
+ format: str = ' s16',
229
+ pts: Union[int, None] = None,
230
+ time_base: Union['Fraction', None] = None
231
+ ) -> AudioFrame:
232
+ """
233
+ Transform the given numpy 'frame' into a
234
+ pyav audio frame with the given 'sample_rate',
235
+ 'layout' and 'format, and also the 'pts
236
+ and/or 'time_base' if provided.
237
+ """
238
+ frame = AudioFrame.from_ndarray(
239
+ array = frame,
240
+ format = format,
241
+ layout = layout
242
+ )
243
+
244
+ frame.sample_rate = sample_rate
245
+
246
+ if pts is not None:
247
+ frame.pts = pts
248
+
249
+ if time_base is not None:
250
+ frame.time_base = time_base
251
+
252
+ return frame
253
+
254
+ # TODO: Maybe transform into a Enum (?)
255
+ def audio_format_to_dtype(
256
+ audio_format: str
257
+ ) -> Union[np.dtype, None]:
258
+ """
259
+ Transform the given 'audio_format' into
260
+ the corresponding numpy dtype value. If
261
+ the 'audio_format' is not accepted this
262
+ method will return None.
263
+
264
+ This method must be used when we are
265
+ building the numpy array that will be
266
+ used to build a pyav audio frame because
267
+ the pyav 'audio_format' need a specific
268
+ np.dtype to be built.
269
+
270
+ For example, 's16' will return 'np.int16'
271
+ and 'fltp' will return 'np.float32'.
272
+ """
273
+ return {
274
+ 's16': np.int16,
275
+ 'flt': np.float32,
276
+ 'fltp': np.float32
277
+ }.get(audio_format, None)
278
+
@@ -0,0 +1,122 @@
1
+ from yta_validation.parameter import ParameterValidator
2
+ from av.video.frame import VideoFrame
3
+ from av.audio.frame import AudioFrame
4
+ from typing import Union
5
+
6
+
7
+ IS_FROM_EMPTY_PART_METADATA = 'is_from_empty_part'
8
+
9
+ class _FrameWrappedBase:
10
+ """
11
+ Class to wrap video and audio frames from
12
+ the pyav library but to support a metadata
13
+ field to inject some information we need
14
+ when processing and combining them.
15
+ """
16
+
17
+ @property
18
+ def is_from_empty_part(
19
+ self
20
+ ) -> bool:
21
+ """
22
+ Flag to indicate if the frame comes from
23
+ an empty part or not, that will be done
24
+ by checking the 'is_from_empty_part'
25
+ attribute in the metadata.
26
+ """
27
+ return IS_FROM_EMPTY_PART_METADATA in self.metadata
28
+
29
+ def __init__(
30
+ self,
31
+ frame,
32
+ metadata: dict = {}
33
+ ):
34
+ ParameterValidator.validate_mandatory_instance_of('frame', frame, [VideoFrame, AudioFrame])
35
+ ParameterValidator.validate_mandatory_dict('metadata', metadata)
36
+
37
+ self._frame: Union[VideoFrame, AudioFrame] = frame
38
+ self.metadata: dict = metadata or {}
39
+
40
+ def __getattr__(
41
+ self,
42
+ name
43
+ ):
44
+ return getattr(self._frame, name)
45
+
46
+ def __setattr__(
47
+ self,
48
+ name,
49
+ value
50
+ ):
51
+ super().__setattr__(name, value)
52
+ #setattr(self._frame, name, value)
53
+ # if name in ('_frame', 'metadata'):
54
+ # super().__setattr__(name, value)
55
+ # else:
56
+ # setattr(self._frame, name, value)
57
+
58
+ def __repr__(
59
+ self
60
+ ):
61
+ cname = self.__class__.__name__
62
+ return f'<{cname} metadata={self.metadata} frame={self._frame!r}>'
63
+
64
+ def set_as_from_empty_part(
65
+ self
66
+ ) -> None:
67
+ """
68
+ Add the metadata information to indicate
69
+ that this is a frame that comes from an
70
+ empty part.
71
+ """
72
+ self.metadata[IS_FROM_EMPTY_PART_METADATA] = 'True'
73
+
74
+ def unwrap(
75
+ self
76
+ ):
77
+ """
78
+ Get the original frame instance.
79
+ """
80
+ return self._frame
81
+
82
+ class VideoFrameWrapped(_FrameWrappedBase):
83
+ """
84
+ Class to wrap video frames from the pyav
85
+ library but to support a metadata field
86
+ to inject some information we need when
87
+ processing and combining them.
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ frame: VideoFrame,
93
+ metadata: dict = {},
94
+ is_from_empty_part: bool = False
95
+ ):
96
+ ParameterValidator.validate_mandatory_instance_of('frame', frame, VideoFrame)
97
+
98
+ super().__init__(frame, metadata)
99
+
100
+ if is_from_empty_part:
101
+ self.set_as_from_empty_part()
102
+
103
+ class AudioFrameWrapped(_FrameWrappedBase):
104
+ """
105
+ Class to wrap audio frames from the pyav
106
+ library but to support a metadata field
107
+ to inject some information we need when
108
+ processing and combining them.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ frame: AudioFrame,
114
+ metadata: dict = {},
115
+ is_from_empty_part: bool = False
116
+ ):
117
+ ParameterValidator.validate_mandatory_instance_of('frame', frame, AudioFrame)
118
+
119
+ super().__init__(frame, metadata)
120
+
121
+ if is_from_empty_part:
122
+ self.set_as_from_empty_part()