yta-video-opengl 0.0.16__tar.gz → 0.0.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/PKG-INFO +1 -1
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/pyproject.toml +2 -2
- yta_video_opengl-0.0.18/src/yta_video_opengl/complete/frame_combinator.py +293 -0
- yta_video_opengl-0.0.18/src/yta_video_opengl/complete/frame_generator.py +278 -0
- yta_video_opengl-0.0.18/src/yta_video_opengl/complete/frame_wrapper.py +122 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/complete/timeline.py +192 -148
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/complete/track.py +75 -28
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/complete/video_on_track.py +1 -1
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/reader/cache/video.py +3 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/tests.py +6 -1
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/video.py +6 -2
- yta_video_opengl-0.0.16/src/yta_video_opengl/complete/blend.py +0 -83
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/LICENSE +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/README.md +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/classes.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/complete/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/nodes/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/nodes/audio/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/nodes/video/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/nodes/video/opengl.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/reader/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/reader/cache/__init__.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/reader/cache/audio.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/reader/cache/utils.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/t.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/utils.py +0 -0
- {yta_video_opengl-0.0.16 → yta_video_opengl-0.0.18}/src/yta_video_opengl/writer.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "yta-video-opengl"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.18"
|
4
4
|
description = "Youtube Autonomous Video OpenGL Module"
|
5
5
|
authors = [
|
6
6
|
{name = "danialcala94",email = "danielalcalavalera@gmail.com"}
|
@@ -14,7 +14,7 @@ dependencies = [
|
|
14
14
|
"av (>=0.0.1,<19.0.0)",
|
15
15
|
"moderngl (>=0.0.1,<9.0.0)",
|
16
16
|
"numpy (>=0.0.1,<9.0.0)",
|
17
|
-
"quicktions (>=0.0.1,<9.0.0)"
|
17
|
+
"quicktions (>=0.0.1,<9.0.0)",
|
18
18
|
]
|
19
19
|
|
20
20
|
[tool.poetry]
|
@@ -0,0 +1,293 @@
|
|
1
|
+
"""
|
2
|
+
TODO: I don't like the name nor the
|
3
|
+
location of this file, but it is here
|
4
|
+
to encapsulate some functionality
|
5
|
+
related to combining video frames.
|
6
|
+
|
7
|
+
Module to contain methods that combine
|
8
|
+
video frames. Call them with the 2
|
9
|
+
frames you want to combine and you
|
10
|
+
will get the combined frame as return.
|
11
|
+
"""
|
12
|
+
from av.audio.resampler import AudioResampler
|
13
|
+
from av.audio.frame import AudioFrame
|
14
|
+
|
15
|
+
import numpy as np
|
16
|
+
|
17
|
+
|
18
|
+
class VideoFrameCombinator:
|
19
|
+
"""
|
20
|
+
Class to wrap the functionality related
|
21
|
+
to combine different video frames.
|
22
|
+
"""
|
23
|
+
|
24
|
+
@staticmethod
|
25
|
+
def blend_alpha(
|
26
|
+
bottom: np.ndarray,
|
27
|
+
top: np.ndarray,
|
28
|
+
alpha = 0.5
|
29
|
+
):
|
30
|
+
return (alpha * top + (1 - alpha) * bottom).astype(np.uint8)
|
31
|
+
|
32
|
+
@staticmethod
|
33
|
+
def blend_add(
|
34
|
+
bottom: np.ndarray,
|
35
|
+
top: np.ndarray
|
36
|
+
):
|
37
|
+
"""
|
38
|
+
Aclara la imagen combinada, como si superpusieras dos proyectores de luz.
|
39
|
+
"""
|
40
|
+
return np.clip(bottom.astype(np.int16) + top.astype(np.int16), 0, 255).astype(np.uint8)
|
41
|
+
|
42
|
+
@staticmethod
|
43
|
+
def blend_multiply(
|
44
|
+
bottom: np.ndarray,
|
45
|
+
top: np.ndarray
|
46
|
+
):
|
47
|
+
"""
|
48
|
+
Oscurece, como proyectar dos transparencias juntas.
|
49
|
+
"""
|
50
|
+
return ((bottom.astype(np.float32) * top.astype(np.float32)) / 255).astype(np.uint8)
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def blend_screen(
|
54
|
+
bottom: np.ndarray,
|
55
|
+
top: np.ndarray
|
56
|
+
):
|
57
|
+
"""
|
58
|
+
Hace lo contrario a Multiply, aclara la imagen.
|
59
|
+
"""
|
60
|
+
return (255 - ((255 - bottom.astype(np.float32)) * (255 - top.astype(np.float32)) / 255)).astype(np.uint8)
|
61
|
+
|
62
|
+
@staticmethod
|
63
|
+
def blend_overlay(
|
64
|
+
bottom: np.ndarray,
|
65
|
+
top: np.ndarray
|
66
|
+
):
|
67
|
+
"""
|
68
|
+
Mezcla entre Multiply y Screen según el brillo de cada píxel.
|
69
|
+
"""
|
70
|
+
b = bottom.astype(np.float32) / 255
|
71
|
+
t = top.astype(np.float32) / 255
|
72
|
+
mask = b < 0.5
|
73
|
+
result = np.zeros_like(b)
|
74
|
+
result[mask] = 2 * b[mask] * t[mask]
|
75
|
+
result[~mask] = 1 - 2 * (1 - b[~mask]) * (1 - t[~mask])
|
76
|
+
|
77
|
+
return (result * 255).astype(np.uint8)
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def blend_difference(
|
81
|
+
bottom: np.ndarray,
|
82
|
+
top: np.ndarray
|
83
|
+
):
|
84
|
+
"""
|
85
|
+
Resalta las diferencias entre los dos frames.
|
86
|
+
"""
|
87
|
+
return np.abs(bottom.astype(np.int16) - top.astype(np.int16)).astype(np.uint8)
|
88
|
+
|
89
|
+
# TODO: This one needs a mask, thats why
|
90
|
+
# it is commented
|
91
|
+
# @staticmethod
|
92
|
+
# def blend_mask(
|
93
|
+
# bottom,
|
94
|
+
# top,
|
95
|
+
# mask
|
96
|
+
# ):
|
97
|
+
# """
|
98
|
+
# En lugar de un alpha fijo, puedes pasar una máscara (por ejemplo, un degradado o un canal alfa real)
|
99
|
+
|
100
|
+
# mask: array float32 entre 0 y 1, mismo tamaño que frame.
|
101
|
+
# """
|
102
|
+
# return (mask * top + (1 - mask) * bottom).astype(np.uint8)
|
103
|
+
|
104
|
+
class AudioFrameCombinator:
|
105
|
+
"""
|
106
|
+
Class to wrap the functionality related
|
107
|
+
to combine different audio frames.
|
108
|
+
"""
|
109
|
+
|
110
|
+
@staticmethod
|
111
|
+
def sum_tracks_frames(
|
112
|
+
tracks_frames: list[AudioFrame],
|
113
|
+
sample_rate: int = 44100,
|
114
|
+
layout: str = 'stereo',
|
115
|
+
format: str = 'fltp',
|
116
|
+
do_normalize: bool = True
|
117
|
+
) -> AudioFrame:
|
118
|
+
"""
|
119
|
+
Sum all the audio frames from the different
|
120
|
+
tracks that are given in the 'tracks_frames'
|
121
|
+
list (each column is a single audio frame of
|
122
|
+
a track). This must be a list that should
|
123
|
+
come from a converted matrix that was
|
124
|
+
representing each track in a row and the
|
125
|
+
different audio frames for that track on each
|
126
|
+
column.
|
127
|
+
|
128
|
+
This method is to sum audio frames of one
|
129
|
+
specific 't' time moment of a video.
|
130
|
+
|
131
|
+
The output will be the sum of all the audio
|
132
|
+
frames and it will be normalized to avoid
|
133
|
+
distortion if 'do_normalize' is True (it is
|
134
|
+
recommended).
|
135
|
+
"""
|
136
|
+
if len(tracks_frames) == 0:
|
137
|
+
raise Exception('The "tracks_frames" list of audio frames is empty.')
|
138
|
+
|
139
|
+
arrays = []
|
140
|
+
resampler: AudioResampler = AudioResampler(
|
141
|
+
format = format,
|
142
|
+
layout = layout,
|
143
|
+
rate = sample_rate
|
144
|
+
)
|
145
|
+
|
146
|
+
for track_frame in tracks_frames:
|
147
|
+
# Resample to output format
|
148
|
+
# TODO: What if the resampler creates more
|
149
|
+
# than one single frame? I don't know what
|
150
|
+
# to do... I'll see when it happens
|
151
|
+
track_frame = resampler.resample(track_frame)
|
152
|
+
|
153
|
+
if len(track_frame) > 1:
|
154
|
+
print('[ ! ] The resampler has given more than 1 frame...')
|
155
|
+
|
156
|
+
track_frame_array = track_frame[0].to_ndarray()
|
157
|
+
|
158
|
+
# Transform to 'float32' [-1, 1]
|
159
|
+
# TODO: I think this is because the output
|
160
|
+
# is 'fltp' but we have more combinations
|
161
|
+
# so this must be refactored
|
162
|
+
if track_frame_array.dtype == np.int16:
|
163
|
+
track_frame_array = track_frame_array.astype(np.float32) / 32768.0
|
164
|
+
elif track_frame_array.dtype != np.float32:
|
165
|
+
track_frame_array = track_frame_array.astype(np.float32)
|
166
|
+
|
167
|
+
# Mono to stereo if needed
|
168
|
+
# TODO: What if source is 'stereo' and we
|
169
|
+
# want mono (?)
|
170
|
+
if (
|
171
|
+
track_frame_array.shape[0] == 1 and
|
172
|
+
layout == 'stereo'
|
173
|
+
):
|
174
|
+
track_frame_array = np.repeat(track_frame_array, 2, axis = 0)
|
175
|
+
|
176
|
+
arrays.append(track_frame_array)
|
177
|
+
|
178
|
+
# Same length and fill with zeros if needed
|
179
|
+
max_len = max(a.shape[1] for a in arrays)
|
180
|
+
stacked = []
|
181
|
+
for a in arrays:
|
182
|
+
# TODO: Again, this 'float32' is because output
|
183
|
+
# is 'fltp' I think...
|
184
|
+
buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
|
185
|
+
buf[:, :a.shape[1]] = a
|
186
|
+
stacked.append(buf)
|
187
|
+
|
188
|
+
# Sum all the sounds
|
189
|
+
mix = np.sum(stacked, axis = 0)
|
190
|
+
if do_normalize:
|
191
|
+
# Avoid distortion and saturation
|
192
|
+
mix /= len(stacked)
|
193
|
+
|
194
|
+
# Avoid clipping
|
195
|
+
mix = np.clip(mix, -1.0, 1.0)
|
196
|
+
|
197
|
+
out = AudioFrame.from_ndarray(
|
198
|
+
array = mix,
|
199
|
+
format = format,
|
200
|
+
layout = layout
|
201
|
+
)
|
202
|
+
out.sample_rate = sample_rate
|
203
|
+
|
204
|
+
return out
|
205
|
+
|
206
|
+
# TODO: This method below has been refactored
|
207
|
+
# to the 'sum_tracks_frames', so delete it
|
208
|
+
# when the one above is working well
|
209
|
+
def mix_audio_frames_by_index(
|
210
|
+
tracks_frames,
|
211
|
+
sample_rate: int,
|
212
|
+
layout = 'stereo',
|
213
|
+
):
|
214
|
+
"""
|
215
|
+
Combine all the columns of the given
|
216
|
+
matrix of audio frames 'tracks_frames'.
|
217
|
+
The rows are the different tracks and
|
218
|
+
the columns are the frame at that 't'
|
219
|
+
moment of each of those tracks.
|
220
|
+
|
221
|
+
The 'tracks_frames' matrix needs to be
|
222
|
+
pre-processed to have only 1 single
|
223
|
+
frame to combine, so we concatenate
|
224
|
+
all the frames if more than 1 per
|
225
|
+
column.
|
226
|
+
"""
|
227
|
+
# TODO: Please, improve and clean all this
|
228
|
+
# code is so sh*tty, and make utils to
|
229
|
+
# combine and those things, not here...
|
230
|
+
# Also the formats, make them dynamic and
|
231
|
+
# based on the output that is defined here
|
232
|
+
# in the Timeline class.
|
233
|
+
mixed_frames = []
|
234
|
+
|
235
|
+
# Iterate by columns (each row is a track)
|
236
|
+
for frames_at_index in zip(*tracks_frames):
|
237
|
+
arrays = []
|
238
|
+
for f in frames_at_index:
|
239
|
+
# Resample to output expected values
|
240
|
+
# TODO: This must be dynamic depending
|
241
|
+
# on the track values
|
242
|
+
resampler = AudioResampler(format = 'fltp', layout = 'stereo', rate = sample_rate)
|
243
|
+
arr = resampler.resample(f)
|
244
|
+
|
245
|
+
arr = f.to_ndarray()
|
246
|
+
|
247
|
+
# TODO: This below must change depending
|
248
|
+
# on the expected output, for us and now
|
249
|
+
# it is float32, fltp, stereo, 44_100
|
250
|
+
# Same format
|
251
|
+
if arr.dtype == np.int16:
|
252
|
+
arr = arr.astype(np.float32) / 32768.0
|
253
|
+
|
254
|
+
# Same layout (number of channels)
|
255
|
+
if arr.shape[0] == 1:
|
256
|
+
return np.repeat(arr, 2, axis = 0)
|
257
|
+
# elif arr.dtype == np.float32:
|
258
|
+
# # Ya está en [-1,1], no lo toques
|
259
|
+
# pass
|
260
|
+
|
261
|
+
arrays.append(arr)
|
262
|
+
|
263
|
+
# Alinear longitudes
|
264
|
+
max_len = max(a.shape[1] for a in arrays)
|
265
|
+
stacked = []
|
266
|
+
for a in arrays:
|
267
|
+
buf = np.zeros((a.shape[0], max_len), dtype = np.float32)
|
268
|
+
buf[:, :a.shape[1]] = a
|
269
|
+
stacked.append(buf)
|
270
|
+
|
271
|
+
# Mezcla
|
272
|
+
mix = np.sum(stacked, axis = 0) / len(stacked)
|
273
|
+
#mix = np.sum(stacked, axis = 0)
|
274
|
+
|
275
|
+
# Limitar al rango [-1,1]
|
276
|
+
mix = np.clip(mix, -1.0, 1.0)
|
277
|
+
|
278
|
+
# Crear frame de salida
|
279
|
+
# TODO: What about the 'format' if they
|
280
|
+
# are all different (?)
|
281
|
+
out = AudioFrame.from_ndarray(mix, format = 'fltp', layout = layout)
|
282
|
+
out.sample_rate = sample_rate
|
283
|
+
# TODO: This will be written later when
|
284
|
+
# encoding
|
285
|
+
# out.pts = frames_at_index[0].pts
|
286
|
+
# out.time_base = frames_at_index[0].time_base
|
287
|
+
|
288
|
+
print(mix.min(), mix.max())
|
289
|
+
|
290
|
+
mixed_frames.append(out)
|
291
|
+
|
292
|
+
return mixed_frames
|
293
|
+
|
@@ -0,0 +1,278 @@
|
|
1
|
+
"""
|
2
|
+
The video frames must be built using the
|
3
|
+
(height, width) size when giving the numpy
|
4
|
+
array that will be used for it. We will
|
5
|
+
receive the values as (width, height) but
|
6
|
+
we will invert them when needed.
|
7
|
+
|
8
|
+
The frames that come from an empty part
|
9
|
+
are flagged with the .metadata attribute
|
10
|
+
'is_from_empty_part' so we can recognize
|
11
|
+
them and ignore when combining on the
|
12
|
+
timeline. We have that metadata in the
|
13
|
+
wrapper class we created.
|
14
|
+
|
15
|
+
TODO: Check because we have a similar
|
16
|
+
module in other project or projects.
|
17
|
+
"""
|
18
|
+
from av.video.frame import VideoFrame
|
19
|
+
from av.audio.frame import AudioFrame
|
20
|
+
from av.audio.layout import AudioLayout
|
21
|
+
from typing import Union
|
22
|
+
|
23
|
+
import numpy as np
|
24
|
+
|
25
|
+
|
26
|
+
class _FrameGenerator:
|
27
|
+
"""
|
28
|
+
Class to generate frames as numpy arrays.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def full_black(
|
32
|
+
self,
|
33
|
+
size: tuple[int, int] = (1920, 1080),
|
34
|
+
dtype: np.dtype = np.uint8
|
35
|
+
):
|
36
|
+
"""
|
37
|
+
Get a numpy array that represents a full
|
38
|
+
black frame of the given 'size' and with
|
39
|
+
the given 'dtype'.
|
40
|
+
"""
|
41
|
+
# TODO: I think 'zeros' only work if dtype
|
42
|
+
# is int
|
43
|
+
return np.zeros(
|
44
|
+
shape = (size[1], size[0], 3),
|
45
|
+
dtype = dtype
|
46
|
+
)
|
47
|
+
|
48
|
+
def full_white(
|
49
|
+
self,
|
50
|
+
size: tuple[int, int] = (1920, 1080),
|
51
|
+
dtype: np.dtype = np.uint8
|
52
|
+
):
|
53
|
+
"""
|
54
|
+
Get a numpy array that represents a full
|
55
|
+
black frame of the given 'size' and with
|
56
|
+
the given 'dtype'.
|
57
|
+
"""
|
58
|
+
# TODO: I think 'ones' only work if dtype
|
59
|
+
# is int
|
60
|
+
return np.ones(
|
61
|
+
shape = (size[1], size[0], 3),
|
62
|
+
dtype = dtype
|
63
|
+
)
|
64
|
+
|
65
|
+
class _BackgroundFrameGenerator:
|
66
|
+
"""
|
67
|
+
Internal class to simplify the way we
|
68
|
+
access to the generation of background
|
69
|
+
frames form the general generator class.
|
70
|
+
"""
|
71
|
+
|
72
|
+
def __init__(
|
73
|
+
self
|
74
|
+
):
|
75
|
+
self._frame_generator: _FrameGenerator = _FrameGenerator()
|
76
|
+
"""
|
77
|
+
Shortcut to the FrameGenerator.
|
78
|
+
"""
|
79
|
+
|
80
|
+
def full_black(
|
81
|
+
self,
|
82
|
+
size: tuple[int, int] = (1920, 1080),
|
83
|
+
dtype: np.dtype = np.uint8,
|
84
|
+
format: str = 'rgb24',
|
85
|
+
pts: Union[int, None] = None,
|
86
|
+
time_base: Union['Fraction', None] = None
|
87
|
+
) -> VideoFrame:
|
88
|
+
"""
|
89
|
+
Get a video frame that is completely black
|
90
|
+
and of the given 'size'.
|
91
|
+
"""
|
92
|
+
return numpy_to_video_frame(
|
93
|
+
frame = self._frame_generator.full_black(size, dtype),
|
94
|
+
format = format,
|
95
|
+
pts = pts,
|
96
|
+
time_base = time_base
|
97
|
+
)
|
98
|
+
|
99
|
+
def full_white(
|
100
|
+
self,
|
101
|
+
size: tuple[int, int] = (1920, 1080),
|
102
|
+
dtype: np.dtype = np.uint8,
|
103
|
+
format: str = 'rgb24',
|
104
|
+
pts: Union[int, None] = None,
|
105
|
+
time_base: Union['Fraction', None] = None
|
106
|
+
) -> VideoFrame:
|
107
|
+
"""
|
108
|
+
Get a video frame that is completely white
|
109
|
+
and of the given 'size'.
|
110
|
+
"""
|
111
|
+
return numpy_to_video_frame(
|
112
|
+
frame = self._frame_generator.full_white(size, dtype),
|
113
|
+
format = format,
|
114
|
+
pts = pts,
|
115
|
+
time_base = time_base
|
116
|
+
)
|
117
|
+
|
118
|
+
class VideoFrameGenerator:
|
119
|
+
"""
|
120
|
+
Class to wrap the functionality related to
|
121
|
+
generating a pyav video frame.
|
122
|
+
|
123
|
+
This class is useful when we need to
|
124
|
+
generate the black background for empty
|
125
|
+
parts within the tracks and in other
|
126
|
+
situations.
|
127
|
+
"""
|
128
|
+
|
129
|
+
def __init__(
|
130
|
+
self
|
131
|
+
):
|
132
|
+
self.background = _BackgroundFrameGenerator()
|
133
|
+
"""
|
134
|
+
Shortcut to the background creation.
|
135
|
+
"""
|
136
|
+
|
137
|
+
def numpy_to_video_frame(
|
138
|
+
frame: np.ndarray,
|
139
|
+
format: str = 'rgb24',
|
140
|
+
pts: Union[int, None] = None,
|
141
|
+
time_base: Union['Fraction', None] = None
|
142
|
+
) -> VideoFrame:
|
143
|
+
"""
|
144
|
+
Transform the given numpy 'frame' into a
|
145
|
+
pyav video frame with the given 'format'
|
146
|
+
and also the 'pts' and/or 'time_base' if
|
147
|
+
provided.
|
148
|
+
"""
|
149
|
+
frame = VideoFrame.from_ndarray(
|
150
|
+
# TODO: What if we want alpha (?)
|
151
|
+
array = frame,
|
152
|
+
format = format
|
153
|
+
)
|
154
|
+
|
155
|
+
if pts is not None:
|
156
|
+
frame.pts = pts
|
157
|
+
|
158
|
+
if time_base is not None:
|
159
|
+
frame.time_base = time_base
|
160
|
+
|
161
|
+
return frame
|
162
|
+
|
163
|
+
class AudioFrameGenerator:
|
164
|
+
"""
|
165
|
+
Class to wrap the functionality related to
|
166
|
+
generating a pyav audio frame.
|
167
|
+
|
168
|
+
This class is useful when we need to
|
169
|
+
generate the silent audio for empty parts
|
170
|
+
within the tracks and in other situations.
|
171
|
+
"""
|
172
|
+
|
173
|
+
def silent(
|
174
|
+
self,
|
175
|
+
sample_rate: int,
|
176
|
+
layout = 'stereo',
|
177
|
+
number_of_samples: int = 1024,
|
178
|
+
format = 's16',
|
179
|
+
pts: Union[int, None] = None,
|
180
|
+
time_base: Union['Fraction', None] = None
|
181
|
+
) -> AudioFrame:
|
182
|
+
"""
|
183
|
+
Get an audio frame that is completely silent.
|
184
|
+
This is useful when we want to fill the empty
|
185
|
+
parts of our tracks.
|
186
|
+
"""
|
187
|
+
dtype = audio_format_to_dtype(format)
|
188
|
+
|
189
|
+
if dtype is None:
|
190
|
+
raise Exception(f'The format "{format}" is not accepted.')
|
191
|
+
|
192
|
+
# TODO: Is this raising exception if the
|
193
|
+
# 'layout' is not valid? I think yes (?)
|
194
|
+
number_of_channels = len(AudioLayout(layout).channels)
|
195
|
+
|
196
|
+
# TODO: I leave these comments below because
|
197
|
+
# I'm not sure what is true and what is not
|
198
|
+
# so, until it is more clear... here it is:
|
199
|
+
# For packed (or planar) formats we apply:
|
200
|
+
# (1, samples * channels). This is the same
|
201
|
+
# amount of data but planar, in 1D only
|
202
|
+
# TODO: This wasn't in the previous version
|
203
|
+
# and it was working, we were sending the
|
204
|
+
# same 'number_of_samples' even when 'fltp'
|
205
|
+
# that includes the 'p'
|
206
|
+
# TODO: This is making the audio last 2x
|
207
|
+
# if 'p' in format:
|
208
|
+
# number_of_samples *= number_of_channels
|
209
|
+
|
210
|
+
silent_numpy_array = np.zeros(
|
211
|
+
shape = (number_of_channels, number_of_samples),
|
212
|
+
dtype = dtype
|
213
|
+
)
|
214
|
+
|
215
|
+
return numpy_to_audio_frame(
|
216
|
+
frame = silent_numpy_array,
|
217
|
+
sample_rate = sample_rate,
|
218
|
+
layout = layout,
|
219
|
+
format = format,
|
220
|
+
pts = pts,
|
221
|
+
time_base = time_base
|
222
|
+
)
|
223
|
+
|
224
|
+
def numpy_to_audio_frame(
|
225
|
+
frame: np.ndarray,
|
226
|
+
sample_rate: int,
|
227
|
+
layout: str = 'stereo',
|
228
|
+
format: str = ' s16',
|
229
|
+
pts: Union[int, None] = None,
|
230
|
+
time_base: Union['Fraction', None] = None
|
231
|
+
) -> AudioFrame:
|
232
|
+
"""
|
233
|
+
Transform the given numpy 'frame' into a
|
234
|
+
pyav audio frame with the given 'sample_rate',
|
235
|
+
'layout' and 'format, and also the 'pts
|
236
|
+
and/or 'time_base' if provided.
|
237
|
+
"""
|
238
|
+
frame = AudioFrame.from_ndarray(
|
239
|
+
array = frame,
|
240
|
+
format = format,
|
241
|
+
layout = layout
|
242
|
+
)
|
243
|
+
|
244
|
+
frame.sample_rate = sample_rate
|
245
|
+
|
246
|
+
if pts is not None:
|
247
|
+
frame.pts = pts
|
248
|
+
|
249
|
+
if time_base is not None:
|
250
|
+
frame.time_base = time_base
|
251
|
+
|
252
|
+
return frame
|
253
|
+
|
254
|
+
# TODO: Maybe transform into a Enum (?)
|
255
|
+
def audio_format_to_dtype(
|
256
|
+
audio_format: str
|
257
|
+
) -> Union[np.dtype, None]:
|
258
|
+
"""
|
259
|
+
Transform the given 'audio_format' into
|
260
|
+
the corresponding numpy dtype value. If
|
261
|
+
the 'audio_format' is not accepted this
|
262
|
+
method will return None.
|
263
|
+
|
264
|
+
This method must be used when we are
|
265
|
+
building the numpy array that will be
|
266
|
+
used to build a pyav audio frame because
|
267
|
+
the pyav 'audio_format' need a specific
|
268
|
+
np.dtype to be built.
|
269
|
+
|
270
|
+
For example, 's16' will return 'np.int16'
|
271
|
+
and 'fltp' will return 'np.float32'.
|
272
|
+
"""
|
273
|
+
return {
|
274
|
+
's16': np.int16,
|
275
|
+
'flt': np.float32,
|
276
|
+
'fltp': np.float32
|
277
|
+
}.get(audio_format, None)
|
278
|
+
|