yta-video-opengl 0.0.16__py3-none-any.whl → 0.0.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_video_opengl/complete/frame_combinator.py +293 -0
- yta_video_opengl/complete/frame_generator.py +271 -0
- yta_video_opengl/complete/timeline.py +73 -136
- yta_video_opengl/complete/track.py +28 -9
- {yta_video_opengl-0.0.16.dist-info → yta_video_opengl-0.0.17.dist-info}/METADATA +1 -1
- {yta_video_opengl-0.0.16.dist-info → yta_video_opengl-0.0.17.dist-info}/RECORD +8 -7
- yta_video_opengl/complete/blend.py +0 -83
- {yta_video_opengl-0.0.16.dist-info → yta_video_opengl-0.0.17.dist-info}/LICENSE +0 -0
- {yta_video_opengl-0.0.16.dist-info → yta_video_opengl-0.0.17.dist-info}/WHEEL +0 -0
@@ -0,0 +1,293 @@
|
|
1
|
+
"""
|
2
|
+
TODO: I don't like the name nor the
|
3
|
+
location of this file, but it is here
|
4
|
+
to encapsulate some functionality
|
5
|
+
related to combining video frames.
|
6
|
+
|
7
|
+
Module to contain methods that combine
|
8
|
+
video frames. Call them with the 2
|
9
|
+
frames you want to combine and you
|
10
|
+
will get the combined frame as return.
|
11
|
+
"""
|
12
|
+
from av.audio.resampler import AudioResampler
|
13
|
+
from av.audio.frame import AudioFrame
|
14
|
+
|
15
|
+
import numpy as np
|
16
|
+
|
17
|
+
|
18
|
+
class VideoFrameCombinator:
|
19
|
+
"""
|
20
|
+
Class to wrap the functionality related
|
21
|
+
to combine different video frames.
|
22
|
+
"""
|
23
|
+
|
24
|
+
@staticmethod
|
25
|
+
def blend_alpha(
|
26
|
+
bottom: np.ndarray,
|
27
|
+
top: np.ndarray,
|
28
|
+
alpha = 0.5
|
29
|
+
):
|
30
|
+
return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
|
31
|
+
|
32
|
+
@staticmethod
|
33
|
+
def blend_add(
|
34
|
+
bottom: np.ndarray,
|
35
|
+
top: np.ndarray
|
36
|
+
):
|
37
|
+
"""
|
38
|
+
Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
|
39
|
+
"""
|
40
|
+
return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
|
41
|
+
|
42
|
+
@staticmethod
|
43
|
+
def blend_multiply(
|
44
|
+
bottom: np.ndarray,
|
45
|
+
top: np.ndarray
|
46
|
+
):
|
47
|
+
"""
|
48
|
+
Oscurece, como proyectar dos transparencias juntas.
|
49
|
+
"""
|
50
|
+
return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def blend_screen(
|
54
|
+
bottom: np.ndarray,
|
55
|
+
top: np.ndarray
|
56
|
+
):
|
57
|
+
"""
|
58
|
+
Hace lo contrario a Multiply, aclara la imagen.
|
59
|
+
"""
|
60
|
+
return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
|
61
|
+
|
62
|
+
@staticmethod
|
63
|
+
def blend_overlay(
|
64
|
+
bottom: np.ndarray,
|
65
|
+
top: np.ndarray
|
66
|
+
):
|
67
|
+
"""
|
68
|
+
Mezcla entre Multiply y Screen según el brillo de cada píxel.
|
69
|
+
"""
|
70
|
+
b = bottom.astype(np.float32) / 255
|
71
|
+
t = top.astype(np.float32) / 255
|
72
|
+
mask = b < 0.5
|
73
|
+
result = np.zeros_like(b)
|
74
|
+
result[mask] = 2 * b[mask] * t[mask]
|
75
|
+
result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
|
76
|
+
|
77
|
+
return (result * 255).astype(np.uint8)
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def blend_difference(
|
81
|
+
bottom: np.ndarray,
|
82
|
+
top: np.ndarray
|
83
|
+
):
|
84
|
+
"""
|
85
|
+
Resalta las diferencias entre los dos frames.
|
86
|
+
"""
|
87
|
+
return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
|
88
|
+
|
89
|
+
# TODO: This one needs a mask, thats why
|
90
|
+
# it is commented
|
91
|
+
# @staticmethod
|
92
|
+
# def blend_mask(
|
93
|
+
# bottom,
|
94
|
+
# top,
|
95
|
+
# mask
|
96
|
+
# ):
|
97
|
+
# """
|
98
|
+
# En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
|
99
|
+
|
100
|
+
# mask: array float32 entre 0 y 1, mismo tamaño que frame.
|
101
|
+
# """
|
102
|
+
# return (mask * top + (1 - mask) * bottom).astype(np.uint8)
|
103
|
+
|
104
|
+
class AudioFrameCombinator:
|
105
|
+
"""
|
106
|
+
Class to wrap the functionality related
|
107
|
+
to combine different audio frames.
|
108
|
+
"""
|
109
|
+
|
110
|
+
@staticmethod
|
111
|
+
def sum_tracks_frames(
|
112
|
+
tracks_frames: list[AudioFrame],
|
113
|
+
sample_rate: int = 44100,
|
114
|
+
layout: str = 'stereo',
|
115
|
+
format: str = 'fltp',
|
116
|
+
do_normalize: bool = True
|
117
|
+
) -> AudioFrame:
|
118
|
+
"""
|
119
|
+
Sum all the audio frames from the different
|
120
|
+
tracks that are given in the 'tracks_frames'
|
121
|
+
list (each column is a single audio frame of
|
122
|
+
a track). This must be a list that should
|
123
|
+
come from a converted matrix that was
|
124
|
+
representing each track in a row and the
|
125
|
+
different audio frames for that track on each
|
126
|
+
column.
|
127
|
+
|
128
|
+
This method is to sum audio frames of one
|
129
|
+
specific 't' time moment of a video.
|
130
|
+
|
131
|
+
The output will be the sum of all the audio
|
132
|
+
frames and it will be normalized to avoid
|
133
|
+
distortion if 'do_normalize' is True (it is
|
134
|
+
recommended).
|
135
|
+
"""
|
136
|
+
if len(tracks_frames) == 0:
|
137
|
+
raise Exception('The "tracks_frames" list of audio frames is empty.')
|
138
|
+
|
139
|
+
arrays = []
|
140
|
+
resampler: AudioResampler = AudioResampler(
|
141
|
+
format = format,
|
142
|
+
layout = layout,
|
143
|
+
rate = sample_rate
|
144
|
+
)
|
145
|
+
|
146
|
+
for track_frame in tracks_frames:
|
147
|
+
# Resample to output format
|
148
|
+
# TODO: What if the resampler creates more
|
149
|
+
# than one single frame? I don't know what
|
150
|
+
# to do... I'll see when it happens
|
151
|
+
track_frame = resampler.resample(track_frame)
|
152
|
+
|
153
|
+
if len(track_frame) > 1:
|
154
|
+
print('[ ! ] The resampler has given more than 1 frame...')
|
155
|
+
|
156
|
+
track_frame_array = track_frame[0].to_ndarray()
|
157
|
+
|
158
|
+
# Transform to 'float32' [-1, 1]
|
159
|
+
# TODO: I think this is because the output
|
160
|
+
# is 'fltp' but we have more combinations
|
161
|
+
# so this must be refactored
|
162
|
+
if track_frame_array.dtype == np.int16:
|
163
|
+
track_frame_array = track_frame_array.astype(np.float32) / 32768.0
|
164
|
+
elif track_frame_array.dtype != np.float32:
|
165
|
+
track_frame_array = track_frame_array.astype(np.float32)
|
166
|
+
|
167
|
+
# Mono to stereo if needed
|
168
|
+
# TODO: What if source is 'stereo' and we
|
169
|
+
# want mono (?)
|
170
|
+
if (
|
171
|
+
track_frame_array.shape[0] == 1 and
|
172
|
+
layout == 'stereo'
|
173
|
+
):
|
174
|
+
track_frame_array = np.repeat(track_frame_array, 2, axis = 0)
|
175
|
+
|
176
|
+
arrays.append(track_frame_array)
|
177
|
+
|
178
|
+
# Same length and fill with zeros if needed
|
179
|
+
max_len = max(a.shape[1] for a in arrays)
|
180
|
+
stacked = []
|
181
|
+
for a in arrays:
|
182
|
+
# TODO: Again, this 'float32' is because output
|
183
|
+
# is 'fltp' I think...
|
184
|
+
buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
|
185
|
+
buf[:, :a.shape[1]] = a
|
186
|
+
stacked.append(buf)
|
187
|
+
|
188
|
+
# Sum all the sounds
|
189
|
+
mix = np.sum(stacked, axis = 0)
|
190
|
+
if do_normalize:
|
191
|
+
# Avoid distortion and saturation
|
192
|
+
mix /= len(stacked)
|
193
|
+
|
194
|
+
# Avoid clipping
|
195
|
+
mix = np.clip(mix, -1.0, 1.0)
|
196
|
+
|
197
|
+
out = AudioFrame.from_ndarray(
|
198
|
+
array = mix,
|
199
|
+
format = format,
|
200
|
+
layout = layout
|
201
|
+
)
|
202
|
+
out.sample_rate = sample_rate
|
203
|
+
|
204
|
+
return out
|
205
|
+
|
206
|
+
# TODO: This method below has been refactored
|
207
|
+
# to the 'sum_tracks_frames', so delete it
|
208
|
+
# when the one above is working well
|
209
|
+
def mix_audio_frames_by_index(
|
210
|
+
tracks_frames,
|
211
|
+
sample_rate: int,
|
212
|
+
layout = 'stereo',
|
213
|
+
):
|
214
|
+
"""
|
215
|
+
Combine all the columns of the given
|
216
|
+
matrix of audio frames 'tracks_frames'.
|
217
|
+
The rows are the different tracks and
|
218
|
+
the columns are the frame at that 't'
|
219
|
+
moment of each of those tracks.
|
220
|
+
|
221
|
+
The 'tracks_frames' matrix needs to be
|
222
|
+
pre-processed to have only 1 single
|
223
|
+
frame to combine, so we concatenate
|
224
|
+
all the frames if more than 1 per
|
225
|
+
column.
|
226
|
+
"""
|
227
|
+
# TODO: Please, improve and clean all this
|
228
|
+
# code is so sh*tty, and make utils to
|
229
|
+
# combine and those things, not here...
|
230
|
+
# Also the formats, make them dynamic and
|
231
|
+
# based on the output that is defined here
|
232
|
+
# in the Timeline class.
|
233
|
+
mixed_frames = []
|
234
|
+
|
235
|
+
# Iterate by columns (each row is a track)
|
236
|
+
for frames_at_index in zip(*tracks_frames):
|
237
|
+
arrays = []
|
238
|
+
for f in frames_at_index:
|
239
|
+
# Resample to output expected values
|
240
|
+
# TODO: This must be dynamic depending
|
241
|
+
# on the track values
|
242
|
+
resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
|
243
|
+
arr = resampler.resample(f)
|
244
|
+
|
245
|
+
arr = f.to_ndarray()
|
246
|
+
|
247
|
+
# TODO: This below must change depending
|
248
|
+
# on the expected output, for us and now
|
249
|
+
# it is float32, fltp, stereo, 44_100
|
250
|
+
# Same format
|
251
|
+
if arr.dtype == np.int16:
|
252
|
+
arr = arr.astype(np.float32) / 32768.0
|
253
|
+
|
254
|
+
# Same layout (number of channels)
|
255
|
+
if arr.shape[0] == 1:
|
256
|
+
return np.repeat(arr, 2, axis = 0)
|
257
|
+
# elif arr.dtype == np.float32:
|
258
|
+
# # Ya está en [-1,1], no lo toques
|
259
|
+
# pass
|
260
|
+
|
261
|
+
arrays.append(arr)
|
262
|
+
|
263
|
+
# Alinear longitudes
|
264
|
+
max_len = max(a.shape[1] for a in arrays)
|
265
|
+
stacked = []
|
266
|
+
for a in arrays:
|
267
|
+
buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
|
268
|
+
buf[:, :a.shape[1]] = a
|
269
|
+
stacked.append(buf)
|
270
|
+
|
271
|
+
# Mezcla
|
272
|
+
mix = np.sum(stacked, axis = 0) / len(stacked)
|
273
|
+
#mix = np.sum(stacked, axis = 0)
|
274
|
+
|
275
|
+
# Limitar al rango [-1,1]
|
276
|
+
mix = np.clip(mix, -1.0, 1.0)
|
277
|
+
|
278
|
+
# Crear frame de salida
|
279
|
+
# TODO: What about the 'format' if they
|
280
|
+
# are all different (?)
|
281
|
+
out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
|
282
|
+
out.sample_rate = sample_rate
|
283
|
+
# TODO: This will be written later when
|
284
|
+
# encoding
|
285
|
+
# out.pts = frames_at_index[0].pts
|
286
|
+
# out.time_base = frames_at_index[0].time_base
|
287
|
+
|
288
|
+
print(mix.min(), mix.max())
|
289
|
+
|
290
|
+
mixed_frames.append(out)
|
291
|
+
|
292
|
+
return mixed_frames
|
293
|
+
|
@@ -0,0 +1,271 @@
|
|
1
|
+
"""
|
2
|
+
The video frames must be built using the
|
3
|
+
(height, width) size when giving the numpy
|
4
|
+
array that will be used for it. We will
|
5
|
+
receive the values as (width, height) but
|
6
|
+
we will invert them when needed.
|
7
|
+
|
8
|
+
TODO: Check because we have a similar
|
9
|
+
module in other project or projects.
|
10
|
+
"""
|
11
|
+
from av.video.frame import VideoFrame
|
12
|
+
from av.audio.frame import AudioFrame
|
13
|
+
from av.audio.layout import AudioLayout
|
14
|
+
from typing import Union
|
15
|
+
|
16
|
+
import numpy as np
|
17
|
+
|
18
|
+
|
19
|
+
class _FrameGenerator:
|
20
|
+
"""
|
21
|
+
Class to generate frames as numpy arrays.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def full_black(
|
25
|
+
self,
|
26
|
+
size: tuple[int, int] = (1920, 1080),
|
27
|
+
dtype: np.dtype = np.uint8
|
28
|
+
):
|
29
|
+
"""
|
30
|
+
Get a numpy array that represents a full
|
31
|
+
black frame of the given 'size' and with
|
32
|
+
the given 'dtype'.
|
33
|
+
"""
|
34
|
+
# TODO: I think 'zeros' only work if dtype
|
35
|
+
# is int
|
36
|
+
return np.zeros(
|
37
|
+
shape = (size[1], size[0], 3),
|
38
|
+
dtype = dtype
|
39
|
+
)
|
40
|
+
|
41
|
+
def full_white(
|
42
|
+
self,
|
43
|
+
size: tuple[int, int] = (1920, 1080),
|
44
|
+
dtype: np.dtype = np.uint8
|
45
|
+
):
|
46
|
+
"""
|
47
|
+
Get a numpy array that represents a full
|
48
|
+
black frame of the given 'size' and with
|
49
|
+
the given 'dtype'.
|
50
|
+
"""
|
51
|
+
# TODO: I think 'ones' only work if dtype
|
52
|
+
# is int
|
53
|
+
return np.ones(
|
54
|
+
shape = (size[1], size[0], 3),
|
55
|
+
dtype = dtype
|
56
|
+
)
|
57
|
+
|
58
|
+
class _BackgroundFrameGenerator:
|
59
|
+
"""
|
60
|
+
Internal class to simplify the way we
|
61
|
+
access to the generation of background
|
62
|
+
frames form the general generator class.
|
63
|
+
"""
|
64
|
+
|
65
|
+
def __init__(
|
66
|
+
self
|
67
|
+
):
|
68
|
+
self._frame_generator: _FrameGenerator = _FrameGenerator()
|
69
|
+
"""
|
70
|
+
Shortcut to the FrameGenerator.
|
71
|
+
"""
|
72
|
+
|
73
|
+
def full_black(
|
74
|
+
self,
|
75
|
+
size: tuple[int, int] = (1920, 1080),
|
76
|
+
dtype: np.dtype = np.uint8,
|
77
|
+
format: str = 'rgb24',
|
78
|
+
pts: Union[int, None] = None,
|
79
|
+
time_base: Union['Fraction', None] = None
|
80
|
+
) -> VideoFrame:
|
81
|
+
"""
|
82
|
+
Get a video frame that is completely black
|
83
|
+
and of the given 'size'.
|
84
|
+
"""
|
85
|
+
return numpy_to_video_frame(
|
86
|
+
frame = self._frame_generator.full_black(size, dtype),
|
87
|
+
format = format,
|
88
|
+
pts = pts,
|
89
|
+
time_base = time_base
|
90
|
+
)
|
91
|
+
|
92
|
+
def full_white(
|
93
|
+
self,
|
94
|
+
size: tuple[int, int] = (1920, 1080),
|
95
|
+
dtype: np.dtype = np.uint8,
|
96
|
+
format: str = 'rgb24',
|
97
|
+
pts: Union[int, None] = None,
|
98
|
+
time_base: Union['Fraction', None] = None
|
99
|
+
) -> VideoFrame:
|
100
|
+
"""
|
101
|
+
Get a video frame that is completely white
|
102
|
+
and of the given 'size'.
|
103
|
+
"""
|
104
|
+
return numpy_to_video_frame(
|
105
|
+
frame = self._frame_generator.full_white(size, dtype),
|
106
|
+
format = format,
|
107
|
+
pts = pts,
|
108
|
+
time_base = time_base
|
109
|
+
)
|
110
|
+
|
111
|
+
class VideoFrameGenerator:
|
112
|
+
"""
|
113
|
+
Class to wrap the functionality related to
|
114
|
+
generating a pyav video frame.
|
115
|
+
|
116
|
+
This class is useful when we need to
|
117
|
+
generate the black background for empty
|
118
|
+
parts within the tracks and in other
|
119
|
+
situations.
|
120
|
+
"""
|
121
|
+
|
122
|
+
def __init__(
|
123
|
+
self
|
124
|
+
):
|
125
|
+
self.background = _BackgroundFrameGenerator()
|
126
|
+
"""
|
127
|
+
Shortcut to the background creation.
|
128
|
+
"""
|
129
|
+
|
130
|
+
def numpy_to_video_frame(
|
131
|
+
frame: np.ndarray,
|
132
|
+
format: str = 'rgb24',
|
133
|
+
pts: Union[int, None] = None,
|
134
|
+
time_base: Union['Fraction', None] = None
|
135
|
+
) -> VideoFrame:
|
136
|
+
"""
|
137
|
+
Transform the given numpy 'frame' into a
|
138
|
+
pyav video frame with the given 'format'
|
139
|
+
and also the 'pts' and/or 'time_base' if
|
140
|
+
provided.
|
141
|
+
"""
|
142
|
+
frame = VideoFrame.from_ndarray(
|
143
|
+
# TODO: What if we want alpha (?)
|
144
|
+
array = frame,
|
145
|
+
format = format
|
146
|
+
)
|
147
|
+
|
148
|
+
if pts is not None:
|
149
|
+
frame.pts = pts
|
150
|
+
|
151
|
+
if time_base is not None:
|
152
|
+
frame.time_base = time_base
|
153
|
+
|
154
|
+
return frame
|
155
|
+
|
156
|
+
class AudioFrameGenerator:
|
157
|
+
"""
|
158
|
+
Class to wrap the functionality related to
|
159
|
+
generating a pyav audio frame.
|
160
|
+
|
161
|
+
This class is useful when we need to
|
162
|
+
generate the silent audio for empty parts
|
163
|
+
within the tracks and in other situations.
|
164
|
+
"""
|
165
|
+
|
166
|
+
def silent(
|
167
|
+
self,
|
168
|
+
sample_rate: int,
|
169
|
+
layout = 'stereo',
|
170
|
+
number_of_samples: int = 1024,
|
171
|
+
format = 's16',
|
172
|
+
pts: Union[int, None] = None,
|
173
|
+
time_base: Union['Fraction', None] = None
|
174
|
+
) -> AudioFrame:
|
175
|
+
"""
|
176
|
+
Get an audio frame that is completely silent.
|
177
|
+
This is useful when we want to fill the empty
|
178
|
+
parts of our tracks.
|
179
|
+
"""
|
180
|
+
dtype = audio_format_to_dtype(format)
|
181
|
+
|
182
|
+
if dtype is None:
|
183
|
+
raise Exception(f'The format "{format}" is not accepted.')
|
184
|
+
|
185
|
+
# TODO: Is this raising exception if the
|
186
|
+
# 'layout' is not valid? I think yes (?)
|
187
|
+
number_of_channels = len(AudioLayout(layout).channels)
|
188
|
+
|
189
|
+
# TODO: I leave these comments below because
|
190
|
+
# I'm not sure what is true and what is not
|
191
|
+
# so, until it is more clear... here it is:
|
192
|
+
# For packed (or planar) formats we apply:
|
193
|
+
# (1, samples * channels). This is the same
|
194
|
+
# amount of data but planar, in 1D only
|
195
|
+
# TODO: This wasn't in the previous version
|
196
|
+
# and it was working, we were sending the
|
197
|
+
# same 'number_of_samples' even when 'fltp'
|
198
|
+
# that includes the 'p'
|
199
|
+
# TODO: This is making the audio last 2x
|
200
|
+
# if 'p' in format:
|
201
|
+
# number_of_samples *= number_of_channels
|
202
|
+
|
203
|
+
silent_numpy_array = np.zeros(
|
204
|
+
shape = (number_of_channels, number_of_samples),
|
205
|
+
dtype = dtype
|
206
|
+
)
|
207
|
+
|
208
|
+
return numpy_to_audio_frame(
|
209
|
+
frame = silent_numpy_array,
|
210
|
+
sample_rate = sample_rate,
|
211
|
+
layout = layout,
|
212
|
+
format = format,
|
213
|
+
pts = pts,
|
214
|
+
time_base = time_base
|
215
|
+
)
|
216
|
+
|
217
|
+
def numpy_to_audio_frame(
|
218
|
+
frame: np.ndarray,
|
219
|
+
sample_rate: int,
|
220
|
+
layout: str = 'stereo',
|
221
|
+
format: str = ' s16',
|
222
|
+
pts: Union[int, None] = None,
|
223
|
+
time_base: Union['Fraction', None] = None
|
224
|
+
) -> AudioFrame:
|
225
|
+
"""
|
226
|
+
Transform the given numpy 'frame' into a
|
227
|
+
pyav audio frame with the given 'sample_rate',
|
228
|
+
'layout' and 'format, and also the 'pts
|
229
|
+
and/or 'time_base' if provided.
|
230
|
+
"""
|
231
|
+
frame = AudioFrame.from_ndarray(
|
232
|
+
array = frame,
|
233
|
+
format = format,
|
234
|
+
layout = layout
|
235
|
+
)
|
236
|
+
|
237
|
+
frame.sample_rate = sample_rate
|
238
|
+
|
239
|
+
if pts is not None:
|
240
|
+
frame.pts = pts
|
241
|
+
|
242
|
+
if time_base is not None:
|
243
|
+
frame.time_base = time_base
|
244
|
+
|
245
|
+
return frame
|
246
|
+
|
247
|
+
# TODO: Maybe transform into a Enum (?)
|
248
|
+
def audio_format_to_dtype(
|
249
|
+
audio_format: str
|
250
|
+
) -> Union[np.dtype, None]:
|
251
|
+
"""
|
252
|
+
Transform the given 'audio_format' into
|
253
|
+
the corresponding numpy dtype value. If
|
254
|
+
the 'audio_format' is not accepted this
|
255
|
+
method will return None.
|
256
|
+
|
257
|
+
This method must be used when we are
|
258
|
+
building the numpy array that will be
|
259
|
+
used to build a pyav audio frame because
|
260
|
+
the pyav 'audio_format' need a specific
|
261
|
+
np.dtype to be built.
|
262
|
+
|
263
|
+
For example, 's16' will return 'np.int16'
|
264
|
+
and 'fltp' will return 'np.float32'.
|
265
|
+
"""
|
266
|
+
return {
|
267
|
+
's16': np.int16,
|
268
|
+
'flt': np.float32,
|
269
|
+
'fltp': np.float32
|
270
|
+
}.get(audio_format, None)
|
271
|
+
|
@@ -13,10 +13,10 @@ video written).
|
|
13
13
|
from yta_video_opengl.complete.track import Track
|
14
14
|
from yta_video_opengl.video import Video
|
15
15
|
from yta_video_opengl.t import get_ts, fps_to_time_base, T
|
16
|
+
from yta_video_opengl.complete.frame_combinator import AudioFrameCombinator
|
16
17
|
from yta_validation.parameter import ParameterValidator
|
17
18
|
from av.video.frame import VideoFrame
|
18
19
|
from av.audio.frame import AudioFrame
|
19
|
-
from av.audio.resampler import AudioResampler
|
20
20
|
from quicktions import Fraction
|
21
21
|
from typing import Union
|
22
22
|
|
@@ -135,7 +135,7 @@ class Timeline:
|
|
135
135
|
# I want them as transparent or something
|
136
136
|
|
137
137
|
# TODO: This is just a test function
|
138
|
-
from yta_video_opengl.complete.
|
138
|
+
from yta_video_opengl.complete.frame_combinator import VideoFrameCombinator
|
139
139
|
|
140
140
|
# TODO: Combinate frames, we force them to
|
141
141
|
# rgb24 to obtain them with the same shape,
|
@@ -147,7 +147,8 @@ class Timeline:
|
|
147
147
|
# TODO: We need to ignore the frames that
|
148
148
|
# are just empty black frames and use them
|
149
149
|
# not in the combination process
|
150
|
-
|
150
|
+
# TODO: What about the 'format' (?)
|
151
|
+
output_frame = VideoFrameCombinator.blend_add(output_frame, frame.to_ndarray(format = 'rgb24'))
|
151
152
|
|
152
153
|
# TODO: How to build this VideoFrame correctly
|
153
154
|
# and what about the 'format' (?)
|
@@ -158,150 +159,39 @@ class Timeline:
|
|
158
159
|
self,
|
159
160
|
t: float
|
160
161
|
):
|
162
|
+
audio_frames = []
|
163
|
+
"""
|
164
|
+
Matrix in which the rows are the different
|
165
|
+
tracks we have, and the column includes all
|
166
|
+
the audio frames for this 't' time moment
|
167
|
+
for the track of that row. We can have more
|
168
|
+
than one frame per column per row (track)
|
169
|
+
but we need a single frame to combine all
|
170
|
+
the tracks.
|
171
|
+
"""
|
161
172
|
# TODO: What if the different audio streams
|
162
173
|
# have also different fps (?)
|
163
|
-
audio_frames = []
|
164
174
|
for track in self.tracks:
|
165
175
|
# TODO: Make this work properly
|
166
176
|
audio_frames.append(list(track.get_audio_frames_at(t)))
|
167
|
-
|
168
|
-
# TODO: Combine them
|
169
177
|
# TODO: We need to ignore the frames that
|
170
178
|
# are just empty black frames and use them
|
171
179
|
# not in the combination process
|
172
180
|
|
173
|
-
def mix_audio_frames_by_index(
|
174
|
-
tracks_frames,
|
175
|
-
layout = 'stereo'
|
176
|
-
):
|
177
|
-
"""
|
178
|
-
Combine all the columns of the given
|
179
|
-
matrix of audio frames 'tracks_frames'.
|
180
|
-
The rows are the different tracks and
|
181
|
-
the columns are the frame at that 't'
|
182
|
-
moment of each of those tracks.
|
183
|
-
|
184
|
-
The 'tracks_frames' matrix needs to be
|
185
|
-
pre-processed to have only 1 single
|
186
|
-
frame to combine, so we concatenate
|
187
|
-
all the frames if more than 1 per
|
188
|
-
column.
|
189
|
-
"""
|
190
|
-
# TODO: Please, improve and clean all this
|
191
|
-
# code is so sh*tty, and make utils to
|
192
|
-
# combine and those things, not here...
|
193
|
-
# Also the formats, make them dynamic and
|
194
|
-
# based on the output that is defined here
|
195
|
-
# in the Timeline class.
|
196
|
-
mixed_frames = []
|
197
|
-
|
198
|
-
# Iterate by columns (each row is a track)
|
199
|
-
for frames_at_index in zip(*tracks_frames):
|
200
|
-
arrays = []
|
201
|
-
for f in frames_at_index:
|
202
|
-
# Resample to output expected values
|
203
|
-
# TODO: This must be dynamic depending
|
204
|
-
# on the track values
|
205
|
-
resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = self.audio_fps)
|
206
|
-
arr = resampler.resample(f)
|
207
|
-
|
208
|
-
arr = f.to_ndarray()
|
209
|
-
|
210
|
-
# TODO: This below must change depending
|
211
|
-
# on the expected output, for us and now
|
212
|
-
# it is float32, fltp, stereo, 44_100
|
213
|
-
# Same format
|
214
|
-
if arr.dtype == np.int16:
|
215
|
-
arr = arr.astype(np.float32) / 32768.0
|
216
|
-
|
217
|
-
# Same layout (number of channels)
|
218
|
-
if arr.shape[0] == 1:
|
219
|
-
return np.repeat(arr, 2, axis = 0)
|
220
|
-
# elif arr.dtype == np.float32:
|
221
|
-
# # Ya está en [-1,1], no lo toques
|
222
|
-
# pass
|
223
|
-
|
224
|
-
arrays.append(arr)
|
225
|
-
|
226
|
-
# Alinear longitudes
|
227
|
-
max_len = max(a.shape[1] for a in arrays)
|
228
|
-
stacked = []
|
229
|
-
for a in arrays:
|
230
|
-
buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
|
231
|
-
buf[:, :a.shape[1]] = a
|
232
|
-
stacked.append(buf)
|
233
|
-
|
234
|
-
# Mezcla
|
235
|
-
mix = np.sum(stacked, axis = 0) / len(stacked)
|
236
|
-
#mix = np.sum(stacked, axis = 0)
|
237
|
-
|
238
|
-
# Limitar al rango [-1,1]
|
239
|
-
mix = np.clip(mix, -1.0, 1.0)
|
240
|
-
|
241
|
-
# Crear frame de salida
|
242
|
-
# TODO: What about the 'format' if they
|
243
|
-
# are all different (?)
|
244
|
-
out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
|
245
|
-
out.sample_rate = self.audio_fps
|
246
|
-
# TODO: This will be written later when
|
247
|
-
# encoding
|
248
|
-
# out.pts = frames_at_index[0].pts
|
249
|
-
# out.time_base = frames_at_index[0].time_base
|
250
|
-
|
251
|
-
print(mix.min(), mix.max())
|
252
|
-
|
253
|
-
mixed_frames.append(out)
|
254
|
-
|
255
|
-
return mixed_frames
|
256
|
-
|
257
|
-
def combine_audio_frames(frames):
|
258
|
-
"""
|
259
|
-
Combina varios AudioFrames consecutivos en uno solo.
|
260
|
-
- Convierte a float32
|
261
|
-
- Concatena muestras a lo largo del tiempo
|
262
|
-
- Devuelve un AudioFrame nuevo
|
263
|
-
"""
|
264
|
-
if not frames:
|
265
|
-
# TODO: This should not happen
|
266
|
-
return None
|
267
|
-
|
268
|
-
if len(frames) == 1:
|
269
|
-
return frames
|
270
|
-
|
271
|
-
# Verificamos consistencia básica
|
272
|
-
sample_rate = frames[0].sample_rate
|
273
|
-
layout = frames[0].layout.name
|
274
|
-
channels = frames[0].layout.channels
|
275
|
-
|
276
|
-
arrays = []
|
277
|
-
for f in frames:
|
278
|
-
if f.sample_rate != sample_rate or f.layout.name != layout:
|
279
|
-
raise ValueError("Los frames deben tener mismo sample_rate y layout")
|
280
|
-
|
281
|
-
# arr = f.to_ndarray() # (channels, samples)
|
282
|
-
# if arr.dtype == np.int16:
|
283
|
-
# arr = arr.astype(np.float32) / 32768.0
|
284
|
-
# elif arr.dtype != np.float32:
|
285
|
-
# arr = arr.astype(np.float32)
|
286
|
-
|
287
|
-
arrays.append(f.to_ndarray())
|
288
|
-
|
289
|
-
# Concatenamos por eje de samples
|
290
|
-
combined = np.concatenate(arrays, axis = 1)
|
291
|
-
|
292
|
-
# Creamos un frame nuevo
|
293
|
-
out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
|
294
|
-
out.sample_rate = sample_rate
|
295
|
-
|
296
|
-
return [out]
|
297
|
-
|
298
181
|
# We need only 1 single audio frame per column
|
299
|
-
collapsed = [
|
300
|
-
|
301
|
-
|
182
|
+
collapsed = [
|
183
|
+
concatenate_audio_frames(frames)
|
184
|
+
for frames in audio_frames
|
185
|
+
]
|
302
186
|
|
303
187
|
# Now, mix column by column (track by track)
|
304
|
-
|
188
|
+
# TODO: I do this to have an iterator, but
|
189
|
+
# maybe we need more than one single audio
|
190
|
+
# frame because of the size at the original
|
191
|
+
# video or something...
|
192
|
+
frames = [
|
193
|
+
AudioFrameCombinator.sum_tracks_frames(collapsed, self.audio_fps)
|
194
|
+
]
|
305
195
|
|
306
196
|
for audio_frame in frames:
|
307
197
|
yield audio_frame
|
@@ -416,4 +306,51 @@ class Timeline:
|
|
416
306
|
|
417
307
|
writer.mux_video_frame(None)
|
418
308
|
writer.mux_audio_frame(None)
|
419
|
-
writer.output.close()
|
309
|
+
writer.output.close()
|
310
|
+
|
311
|
+
|
312
|
+
# TODO: I don't know where to put this
|
313
|
+
# method because if a bit special
|
314
|
+
# TODO: Refactor and move please
|
315
|
+
def concatenate_audio_frames(
|
316
|
+
frames: list[AudioFrame]
|
317
|
+
) -> AudioFrame:
|
318
|
+
"""
|
319
|
+
Combina varios AudioFrames consecutivos en uno solo.
|
320
|
+
- Convierte a float32
|
321
|
+
- Concatena muestras a lo largo del tiempo
|
322
|
+
- Devuelve un AudioFrame nuevo
|
323
|
+
"""
|
324
|
+
if not frames:
|
325
|
+
# TODO: This should not happen
|
326
|
+
return None
|
327
|
+
|
328
|
+
if len(frames) == 1:
|
329
|
+
return frames[0]
|
330
|
+
|
331
|
+
# Verificamos consistencia básica
|
332
|
+
sample_rate = frames[0].sample_rate
|
333
|
+
layout = frames[0].layout.name
|
334
|
+
channels = frames[0].layout.channels
|
335
|
+
|
336
|
+
arrays = []
|
337
|
+
for f in frames:
|
338
|
+
if f.sample_rate != sample_rate or f.layout.name != layout:
|
339
|
+
raise ValueError("Los frames deben tener mismo sample_rate y layout")
|
340
|
+
|
341
|
+
# arr = f.to_ndarray() # (channels, samples)
|
342
|
+
# if arr.dtype == np.int16:
|
343
|
+
# arr = arr.astype(np.float32) / 32768.0
|
344
|
+
# elif arr.dtype != np.float32:
|
345
|
+
# arr = arr.astype(np.float32)
|
346
|
+
|
347
|
+
arrays.append(f.to_ndarray())
|
348
|
+
|
349
|
+
# Concatenamos por eje de samples
|
350
|
+
combined = np.concatenate(arrays, axis = 1)
|
351
|
+
|
352
|
+
# Creamos un frame nuevo
|
353
|
+
out = AudioFrame.from_ndarray(combined, format = frames[0].format, layout = layout)
|
354
|
+
out.sample_rate = sample_rate
|
355
|
+
|
356
|
+
return out
|
@@ -1,8 +1,9 @@
|
|
1
1
|
from yta_video_opengl.complete.video_on_track import VideoOnTrack
|
2
2
|
from yta_video_opengl.video import Video
|
3
3
|
from yta_video_opengl.t import T
|
4
|
-
from yta_video_opengl.utils import
|
4
|
+
from yta_video_opengl.utils import audio_frames_and_remainder_per_video_frame
|
5
5
|
from yta_video_opengl.t import fps_to_time_base
|
6
|
+
from yta_video_opengl.complete.frame_generator import VideoFrameGenerator, AudioFrameGenerator
|
6
7
|
from yta_validation.parameter import ParameterValidator
|
7
8
|
from quicktions import Fraction
|
8
9
|
from typing import Union
|
@@ -48,6 +49,20 @@ class _Part:
|
|
48
49
|
The instance of the track this part belongs
|
49
50
|
to.
|
50
51
|
"""
|
52
|
+
# TODO: I would like to avoid this 2 instances
|
53
|
+
# here, and I think I've done it with static
|
54
|
+
# properties in other project, but as I don't
|
55
|
+
# remember how and where by now, here it is...
|
56
|
+
self._video_frame_generator: VideoFrameGenerator = VideoFrameGenerator()
|
57
|
+
"""
|
58
|
+
Useful internal tool to generate background
|
59
|
+
frames for the empty parts.
|
60
|
+
"""
|
61
|
+
self._audio_frame_generator: AudioFrameGenerator = AudioFrameGenerator()
|
62
|
+
"""
|
63
|
+
Useful internal tool to generate silent
|
64
|
+
audio frames for the empty parts.
|
65
|
+
"""
|
51
66
|
self.start: Fraction = Fraction(start)
|
52
67
|
"""
|
53
68
|
The start 't' time moment of the part.
|
@@ -79,7 +94,10 @@ class _Part:
|
|
79
94
|
#return get_black_background_video_frame(self._track.size)
|
80
95
|
# TODO: This 'time_base' maybe has to be related
|
81
96
|
# to a Timeline general 'time_base' and not the fps
|
82
|
-
return
|
97
|
+
return self._video_frame_generator.background.full_black(
|
98
|
+
size = self._track.size,
|
99
|
+
time_base = fps_to_time_base(self._track.fps)
|
100
|
+
)
|
83
101
|
|
84
102
|
frame = self.video.get_frame_at(t)
|
85
103
|
|
@@ -96,7 +114,6 @@ class _Part:
|
|
96
114
|
|
97
115
|
return frame
|
98
116
|
|
99
|
-
# TODO: I'm not sure if we need this
|
100
117
|
def get_audio_frames_at(
|
101
118
|
self,
|
102
119
|
t: Union[int, float, Fraction]
|
@@ -117,13 +134,15 @@ class _Part:
|
|
117
134
|
# The complete silent frames we need
|
118
135
|
frames = (
|
119
136
|
[
|
120
|
-
|
137
|
+
self._audio_frame_generator.silent(
|
121
138
|
sample_rate = self._track.audio_fps,
|
122
139
|
# TODO: Check where do we get this value from
|
123
140
|
layout = 'stereo',
|
124
141
|
number_of_samples = self._track.audio_samples_per_frame,
|
125
142
|
# TODO: Check where do we get this value from
|
126
|
-
format = 'fltp'
|
143
|
+
format = 'fltp',
|
144
|
+
pts = None,
|
145
|
+
time_base = None
|
127
146
|
)
|
128
147
|
] * number_of_frames
|
129
148
|
if number_of_frames > 0 else
|
@@ -133,20 +152,20 @@ class _Part:
|
|
133
152
|
# The remaining partial silent frames we need
|
134
153
|
if number_of_remaining_samples > 0:
|
135
154
|
frames.append(
|
136
|
-
|
155
|
+
self._audio_frame_generator.silent(
|
137
156
|
sample_rate = self._track.audio_fps,
|
138
157
|
# TODO: Check where do we get this value from
|
139
158
|
layout = 'stereo',
|
140
159
|
number_of_samples = number_of_remaining_samples,
|
141
160
|
# TODO: Check where do we get this value from
|
142
|
-
format = 'fltp'
|
161
|
+
format = 'fltp',
|
162
|
+
pts = None,
|
163
|
+
time_base = None
|
143
164
|
)
|
144
165
|
)
|
145
166
|
|
146
|
-
# TODO: Return or yield (?)
|
147
167
|
for frame in frames:
|
148
168
|
yield frame
|
149
|
-
#return frames
|
150
169
|
|
151
170
|
# TODO: I don't like using t as float,
|
152
171
|
# we need to implement fractions.Fraction
|
@@ -1,9 +1,10 @@
|
|
1
1
|
yta_video_opengl/__init__.py,sha256=ycAx_XYMVDfkuObSvtW6irQ0Wo-fgxEz3fjIRMe8PpY,205
|
2
2
|
yta_video_opengl/classes.py,sha256=t5-Tfc7ecvHl8JlVBp_FVzZT6ole6Ly5-FeBBH7wcxo,37742
|
3
3
|
yta_video_opengl/complete/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
yta_video_opengl/complete/
|
5
|
-
yta_video_opengl/complete/
|
6
|
-
yta_video_opengl/complete/
|
4
|
+
yta_video_opengl/complete/frame_combinator.py,sha256=uYg7907knjBlmZUZCCzkxDcj0Nown0muvL5PNVS707A,9413
|
5
|
+
yta_video_opengl/complete/frame_generator.py,sha256=mhtm22COVb_BmlLOIWsCPBz1a0ge_O9pN4UQXafCNXE,7625
|
6
|
+
yta_video_opengl/complete/timeline.py,sha256=z9I6cFUvx2U-Xe18piZ3ZxH_ypGmmX3xnOHZwKiZ_gY,12647
|
7
|
+
yta_video_opengl/complete/track.py,sha256=UiA8-vh-CRW3h29Pk3kXZArzAm0bmtireNkbuYQ5qMs,14741
|
7
8
|
yta_video_opengl/complete/video_on_track.py,sha256=KROAI0bndnfcvKlHGsSEyWg9o1xozW0PI_Rhqp0r9kw,4844
|
8
9
|
yta_video_opengl/nodes/__init__.py,sha256=TZ-ZO05PZ0_ABq675E22_PngLWOe-_w5s1cLlV3NbWM,3469
|
9
10
|
yta_video_opengl/nodes/audio/__init__.py,sha256=4nKkC70k1UgLcCSPqFWm3cKdaJM0KUmQTwGWv1xFarQ,2926
|
@@ -19,7 +20,7 @@ yta_video_opengl/tests.py,sha256=6QvJx9y4kCiq7b9-AKMetzGuJjd__pTBK5r4tJp3aso,273
|
|
19
20
|
yta_video_opengl/utils.py,sha256=yUi17EjNR4SVpvdDUwUaKl4mBCb1uyFCSGoIX3Zr2F0,15586
|
20
21
|
yta_video_opengl/video.py,sha256=JPIWDQcYlLi8eT2LOFQtS1jVu5xVmW4bz1VMtP0gMeA,8626
|
21
22
|
yta_video_opengl/writer.py,sha256=QwvjQcEkzn1WAVqVTFiI6tYIXJO67LKKUTJGO_eflFM,8893
|
22
|
-
yta_video_opengl-0.0.
|
23
|
-
yta_video_opengl-0.0.
|
24
|
-
yta_video_opengl-0.0.
|
25
|
-
yta_video_opengl-0.0.
|
23
|
+
yta_video_opengl-0.0.17.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
24
|
+
yta_video_opengl-0.0.17.dist-info/METADATA,sha256=joHFJiGnEqyDbZYS_L4igkLn6GSHOCCWQ9nxOYjZoOw,714
|
25
|
+
yta_video_opengl-0.0.17.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
26
|
+
yta_video_opengl-0.0.17.dist-info/RECORD,,
|
@@ -1,83 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
TODO: I don't like the name nor the
|
3
|
-
location of this file, but it is here
|
4
|
-
to encapsulate some functionality
|
5
|
-
related to combining video frames.
|
6
|
-
"""
|
7
|
-
import numpy as np
|
8
|
-
|
9
|
-
|
10
|
-
def blend_alpha(
|
11
|
-
bottom,
|
12
|
-
top,
|
13
|
-
alpha = 0.5
|
14
|
-
):
|
15
|
-
return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
|
16
|
-
|
17
|
-
def blend_add(
|
18
|
-
bottom,
|
19
|
-
top
|
20
|
-
):
|
21
|
-
"""
|
22
|
-
Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
|
23
|
-
"""
|
24
|
-
return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
|
25
|
-
|
26
|
-
def blend_multiply(
|
27
|
-
bottom,
|
28
|
-
top
|
29
|
-
):
|
30
|
-
"""
|
31
|
-
Oscurece, como proyectar dos transparencias juntas.
|
32
|
-
"""
|
33
|
-
return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
|
34
|
-
|
35
|
-
def blend_screen(
|
36
|
-
bottom,
|
37
|
-
top
|
38
|
-
):
|
39
|
-
"""
|
40
|
-
Hace lo contrario a Multiply, aclara la imagen.
|
41
|
-
"""
|
42
|
-
return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
|
43
|
-
|
44
|
-
def blend_overlay(
|
45
|
-
bottom,
|
46
|
-
top
|
47
|
-
):
|
48
|
-
"""
|
49
|
-
Mezcla entre Multiply y Screen según el brillo de cada píxel.
|
50
|
-
"""
|
51
|
-
b = bottom.astype(np.float32) / 255
|
52
|
-
t = top.astype(np.float32) / 255
|
53
|
-
mask = b < 0.5
|
54
|
-
result = np.zeros_like(b)
|
55
|
-
result[mask] = 2 * b[mask] * t[mask]
|
56
|
-
result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
|
57
|
-
return (result * 255).astype(np.uint8)
|
58
|
-
|
59
|
-
def blend_difference(
|
60
|
-
bottom,
|
61
|
-
top
|
62
|
-
):
|
63
|
-
"""
|
64
|
-
Resalta las diferencias entre los dos frames.
|
65
|
-
"""
|
66
|
-
return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
|
67
|
-
|
68
|
-
# TODO: This one needs a mask, thats why
|
69
|
-
# it is commented
|
70
|
-
# def blend_mask(
|
71
|
-
# bottom,
|
72
|
-
# top,
|
73
|
-
# mask
|
74
|
-
# ):
|
75
|
-
# """
|
76
|
-
# En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
|
77
|
-
|
78
|
-
# mask: array float32 entre 0 y 1, mismo tamaño que frame.
|
79
|
-
# """
|
80
|
-
# return (mask * top + (1 - mask) * bottom).astype(np.uint8)
|
81
|
-
|
82
|
-
|
83
|
-
|
File without changes
|
File without changes
|