yta-video-opengl 0.0.12__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yta_video_opengl/complete/timeline.py +41 -44
- yta_video_opengl/complete/track.py +40 -32
- yta_video_opengl/complete/video_on_track.py +27 -16
- yta_video_opengl/reader/__init__.py +27 -82
- yta_video_opengl/reader/cache.py +250 -245
- yta_video_opengl/t.py +233 -0
- yta_video_opengl/tests.py +4 -2
- yta_video_opengl/utils.py +108 -86
- yta_video_opengl/video.py +90 -12
- yta_video_opengl/writer.py +13 -14
- {yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/METADATA +2 -1
- yta_video_opengl-0.0.14.dist-info/RECORD +21 -0
- yta_video_opengl-0.0.12.dist-info/RECORD +0 -20
- {yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/LICENSE +0 -0
- {yta_video_opengl-0.0.12.dist-info → yta_video_opengl-0.0.14.dist-info}/WHEEL +0 -0
yta_video_opengl/reader/cache.py
CHANGED
@@ -15,17 +15,33 @@ frame we are requesting in the moment, keeping in
|
|
15
15
|
memory all those frames to be handled fast. It
|
16
16
|
will remove the old frames if needed to use only
|
17
17
|
the 'size' we set when creating it.
|
18
|
+
|
19
|
+
A stream can have 'fps = 60' but use another
|
20
|
+
different time base that make the pts values go 0,
|
21
|
+
256, 512... for example. The 'time_base' is the
|
22
|
+
only accurate way to obtain the pts.
|
23
|
+
|
24
|
+
Feel free to move this explanation to other
|
25
|
+
place, its about the duration.
|
26
|
+
|
27
|
+
The stream 'duration' parameter is measured
|
28
|
+
on ticks, the amount of ticks that the
|
29
|
+
stream lasts. Here below is an example:
|
30
|
+
|
31
|
+
- Duration raw: 529200
|
32
|
+
- Time base: 1/44100
|
33
|
+
- Duration (seconds): 12.0
|
18
34
|
"""
|
19
|
-
from yta_video_opengl.
|
20
|
-
from yta_video_frame_time import T
|
35
|
+
from yta_video_opengl.t import T
|
21
36
|
from av.container import InputContainer
|
22
37
|
from av.video.stream import VideoStream
|
23
38
|
from av.audio.stream import AudioStream
|
24
39
|
from av.video.frame import VideoFrame
|
25
40
|
from av.audio.frame import AudioFrame
|
41
|
+
from av.packet import Packet
|
26
42
|
from yta_validation.parameter import ParameterValidator
|
27
43
|
from yta_validation import PythonValidator
|
28
|
-
from
|
44
|
+
from quicktions import Fraction
|
29
45
|
from collections import OrderedDict
|
30
46
|
from typing import Union
|
31
47
|
|
@@ -33,6 +49,10 @@ import numpy as np
|
|
33
49
|
import math
|
34
50
|
|
35
51
|
|
52
|
+
# TODO: This is not actually a Video
|
53
|
+
# cache, is a FrameCache because we
|
54
|
+
# create one for video but another
|
55
|
+
# one for audio. Rename it please.
|
36
56
|
class VideoFrameCache:
|
37
57
|
"""
|
38
58
|
Class to manage the frames cache of a video
|
@@ -42,14 +62,14 @@ class VideoFrameCache:
|
|
42
62
|
@property
|
43
63
|
def fps(
|
44
64
|
self
|
45
|
-
) ->
|
65
|
+
) -> Union[int, Fraction, None]:
|
46
66
|
"""
|
47
|
-
The frames per second
|
67
|
+
The frames per second.
|
48
68
|
"""
|
49
69
|
return (
|
50
|
-
|
70
|
+
self.stream.average_rate
|
51
71
|
if self.stream.type == 'video' else
|
52
|
-
|
72
|
+
self.stream.rate
|
53
73
|
)
|
54
74
|
|
55
75
|
@property
|
@@ -94,6 +114,31 @@ class VideoFrameCache:
|
|
94
114
|
end.
|
95
115
|
"""
|
96
116
|
|
117
|
+
# TODO: This is new, remove this comment if
|
118
|
+
# it is ok
|
119
|
+
# TODO: This way of obtaining the duration
|
120
|
+
# in ticks must be a utils
|
121
|
+
self.frame_duration: int = (
|
122
|
+
self.stream.duration / self.stream.frames
|
123
|
+
if PythonValidator.is_instance_of(stream, VideoStream) else
|
124
|
+
# TODO: Is this below ok (?)
|
125
|
+
self.stream.frames
|
126
|
+
)
|
127
|
+
"""
|
128
|
+
The duration (in ticks) of the frame, that
|
129
|
+
is the step between the different pts.
|
130
|
+
"""
|
131
|
+
self._last_packet_accessed: Union[Packet, None] = None
|
132
|
+
"""
|
133
|
+
The last packet that has been accessed
|
134
|
+
"""
|
135
|
+
self._last_frame_read: Union[VideoFrame, AudioFrame, None] = None
|
136
|
+
"""
|
137
|
+
The last frame we have read when decoding.
|
138
|
+
Useful to avoid seeking all the time when we
|
139
|
+
don't need it.
|
140
|
+
"""
|
141
|
+
|
97
142
|
self._prepare()
|
98
143
|
|
99
144
|
def _prepare(
|
@@ -108,6 +153,7 @@ class VideoFrameCache:
|
|
108
153
|
# use the amount of frames of the biggest
|
109
154
|
# interval of frames that belongs to a key
|
110
155
|
# frame, or a value by default
|
156
|
+
# TODO: Careful if this is too big
|
111
157
|
fps = (
|
112
158
|
float(self.stream.average_rate)
|
113
159
|
if PythonValidator.is_instance_of(self.stream, VideoStream) else
|
@@ -116,7 +162,7 @@ class VideoFrameCache:
|
|
116
162
|
# Intervals, but in number of frames
|
117
163
|
intervals = np.diff(
|
118
164
|
# Intervals of time between keyframes
|
119
|
-
np.array(self.key_frames_pts) * self.
|
165
|
+
np.array(self.key_frames_pts) * self.time_base
|
120
166
|
) * fps
|
121
167
|
|
122
168
|
self.size = (
|
@@ -131,7 +177,7 @@ class VideoFrameCache:
|
|
131
177
|
|
132
178
|
self.container.seek(0)
|
133
179
|
|
134
|
-
def
|
180
|
+
def _get_nearest_keyframe_pts(
|
135
181
|
self,
|
136
182
|
pts: int
|
137
183
|
):
|
@@ -157,7 +203,6 @@ class VideoFrameCache:
|
|
157
203
|
the cache if full.
|
158
204
|
"""
|
159
205
|
if frame.pts not in self.cache:
|
160
|
-
# TODO: The 'format' must be dynamic
|
161
206
|
self.cache[frame.pts] = frame
|
162
207
|
|
163
208
|
# Clean cache if full
|
@@ -165,145 +210,161 @@ class VideoFrameCache:
|
|
165
210
|
self.cache.popitem(last = False)
|
166
211
|
|
167
212
|
return frame
|
168
|
-
|
169
|
-
def
|
213
|
+
|
214
|
+
def _seek(
|
170
215
|
self,
|
171
216
|
pts: int
|
172
|
-
)
|
217
|
+
):
|
173
218
|
"""
|
174
|
-
|
219
|
+
Seek to the given 'pts' only if it is not
|
220
|
+
the next 'pts' to the last read, and it
|
221
|
+
will also apply a pad to avoid problems
|
222
|
+
when reading audio frames.
|
175
223
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
224
|
+
TODO: Apply the padding only to audio
|
225
|
+
frame reading (?)
|
226
|
+
"""
|
227
|
+
# I found that it is recommended to
|
228
|
+
# read ~100ms before the pts we want to
|
229
|
+
# actually read so we obtain the frames
|
230
|
+
# clean (this is important in audio)
|
231
|
+
# TODO: This is maybe too much for a
|
232
|
+
# video and not needed
|
233
|
+
pts_pad = int(0.1 / self.time_base)
|
234
|
+
self.container.seek(
|
235
|
+
offset = max(0, pts - pts_pad),
|
236
|
+
stream = self.stream
|
237
|
+
)
|
180
238
|
|
181
|
-
|
182
|
-
|
239
|
+
def get_video_frame(
|
240
|
+
self,
|
241
|
+
t: Union[int, float, Fraction]
|
242
|
+
) -> VideoFrame:
|
183
243
|
"""
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
244
|
+
Get the video frame that is in the 't'
|
245
|
+
time moment provided.
|
246
|
+
"""
|
247
|
+
for frame in self.get_video_frames(t):
|
248
|
+
return frame
|
189
249
|
|
190
|
-
|
191
|
-
self
|
250
|
+
def get_video_frames(
|
251
|
+
self,
|
252
|
+
start: Union[int, float, Fraction] = 0,
|
253
|
+
end: Union[int, float, Fraction, None] = None
|
254
|
+
):
|
255
|
+
"""
|
256
|
+
Get all the frames in the range between
|
257
|
+
the provided 'start' and 'end' time in
|
258
|
+
seconds.
|
259
|
+
|
260
|
+
This method is an iterator that yields
|
261
|
+
the frame, its t and its index.
|
262
|
+
"""
|
263
|
+
start = T(start, self.time_base).truncated
|
264
|
+
end = (
|
265
|
+
T(end, self.time_base).truncated
|
266
|
+
if end is not None else
|
267
|
+
# The next frame
|
268
|
+
start + (1 / self.fps)
|
269
|
+
)
|
270
|
+
|
271
|
+
key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
|
192
272
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
273
|
+
if (
|
274
|
+
self._last_packet_accessed is None or
|
275
|
+
self._last_packet_accessed.pts != key_frame_pts
|
276
|
+
):
|
277
|
+
self._seek(key_frame_pts)
|
278
|
+
|
279
|
+
for packet in self.container.demux(self.stream):
|
280
|
+
if packet.pts is None:
|
197
281
|
continue
|
198
282
|
|
199
|
-
|
200
|
-
self._store_frame_in_cache(frame)
|
283
|
+
self._last_packet_accessed = packet
|
201
284
|
|
202
|
-
|
203
|
-
|
204
|
-
|
285
|
+
for frame in packet.decode():
|
286
|
+
if frame.pts is None:
|
287
|
+
continue
|
205
288
|
|
206
|
-
|
207
|
-
|
208
|
-
|
289
|
+
# We store all the frames in cache
|
290
|
+
self._store_frame_in_cache(frame)
|
291
|
+
|
292
|
+
current_frame_time = frame.pts * self.time_base
|
293
|
+
|
294
|
+
# We want the range [start, end)
|
295
|
+
if start <= current_frame_time < end:
|
296
|
+
yield frame
|
209
297
|
|
210
|
-
|
298
|
+
if current_frame_time >= end:
|
299
|
+
break
|
300
|
+
|
301
|
+
def get_audio_frame_from_t(
|
211
302
|
self,
|
212
|
-
|
213
|
-
)
|
303
|
+
t: Union[int, float, Fraction]
|
304
|
+
):
|
214
305
|
"""
|
215
|
-
Get the
|
216
|
-
the
|
306
|
+
Get the single audio frame that must be
|
307
|
+
played at the 't' time moment provided.
|
308
|
+
This method is useful to get the single
|
309
|
+
audio frame that we need to combine
|
310
|
+
when using it in a composition.
|
311
|
+
|
312
|
+
TODO: Are we actually using this method (?)
|
217
313
|
"""
|
218
|
-
|
219
|
-
|
314
|
+
t: T = T(t, self.time_base)
|
315
|
+
# We need the just one audio frame
|
316
|
+
for frame in self.get_audio_frames(t.truncated, t.next(1).truncated):
|
317
|
+
return frame
|
220
318
|
|
221
|
-
|
222
|
-
self.cache[pts]
|
223
|
-
if pts in self.cache else
|
224
|
-
self.get_frame_from_pts(pts)
|
225
|
-
)
|
226
|
-
|
227
|
-
def get_frame_from_t(
|
319
|
+
def get_audio_frames_from_t(
|
228
320
|
self,
|
229
|
-
t: float
|
230
|
-
)
|
321
|
+
t: Union[int, float, Fraction]
|
322
|
+
):
|
231
323
|
"""
|
232
|
-
Get the
|
233
|
-
|
324
|
+
Get all the audio frames that must be
|
325
|
+
played at the 't' time moment provided.
|
234
326
|
"""
|
235
|
-
|
236
|
-
|
237
|
-
return (
|
238
|
-
self.cache[pts]
|
239
|
-
if pts in self.cache else
|
240
|
-
self.get_frame_from_pts(pts)
|
241
|
-
)
|
327
|
+
for frame in self.get_audio_frames(t):
|
328
|
+
yield frame
|
242
329
|
|
243
|
-
def
|
330
|
+
def get_audio_frames(
|
244
331
|
self,
|
245
|
-
start: float = 0,
|
246
|
-
end: Union[float, None] = None
|
332
|
+
start: Union[int, float, Fraction] = 0,
|
333
|
+
end: Union[int, float, Fraction, None] = None
|
247
334
|
):
|
248
335
|
"""
|
249
|
-
Get all the frames in the range
|
250
|
-
the provided 'start' and 'end'
|
251
|
-
seconds.
|
336
|
+
Get all the audio frames in the range
|
337
|
+
between the provided 'start' and 'end'
|
338
|
+
time (in seconds).
|
252
339
|
|
253
340
|
This method is an iterator that yields
|
254
341
|
the frame, its t and its index.
|
255
342
|
"""
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
259
|
-
#
|
260
|
-
|
261
|
-
|
262
|
-
"""
|
263
|
-
Feel free to move this explanation to other
|
264
|
-
place, its about the duration.
|
265
|
-
|
266
|
-
The stream 'duration' parameter is measured
|
267
|
-
on ticks, the amount of ticks that the
|
268
|
-
stream lasts. Here below is an example:
|
269
|
-
|
270
|
-
- Duration raw: 529200
|
271
|
-
- Time base: 1/44100
|
272
|
-
- Duration (seconds): 12.0
|
273
|
-
"""
|
274
|
-
|
275
|
-
# The 'duration' is on pts ticks
|
276
|
-
duration = float(self.stream.duration * self.stream.time_base)
|
277
|
-
print(f'duration of the whole stream: {str(duration)}s, asking for [{str(start)}, {str(end)})')
|
278
|
-
# TODO: I think it would be better to
|
279
|
-
# receive and work with pts instead of
|
280
|
-
# 't' time moments...
|
281
|
-
# pts_list = [
|
282
|
-
# t_to_pts(t, self.time_base)
|
283
|
-
# for t in T.get_frame_indexes(duration, self.fps, start, end)
|
284
|
-
# ]
|
285
|
-
|
286
|
-
# if all(
|
287
|
-
# pts in self.cache
|
288
|
-
# for pts in pts_list
|
289
|
-
# ):
|
290
|
-
# for pts in pts_list:
|
291
|
-
# yield self.cache[pts]
|
292
|
-
|
293
|
-
# If not all, we ignore the cache because we
|
294
|
-
# need to decode and they are all consecutive
|
295
|
-
start = t_to_pts(start, self.time_base)
|
343
|
+
# TODO: Is this ok? We are trying to obtain
|
344
|
+
# the audio frames for a video frame, so
|
345
|
+
# should we use the 'self.time_base' to
|
346
|
+
# truncate (?)
|
347
|
+
start = T(start, self.time_base).truncated
|
296
348
|
end = (
|
297
|
-
|
349
|
+
T(end, self.time_base).truncated
|
298
350
|
if end is not None else
|
299
|
-
|
351
|
+
start + (1 / self.fps)
|
300
352
|
)
|
301
|
-
key_frame_pts = self._get_nearest_keyframe_fps(start)
|
302
353
|
|
303
|
-
|
304
|
-
|
354
|
+
key_frame_pts = self._get_nearest_keyframe_pts(start / self.time_base)
|
355
|
+
|
356
|
+
if (
|
357
|
+
self._last_packet_accessed is None or
|
358
|
+
self._last_packet_accessed.pts != key_frame_pts
|
359
|
+
):
|
360
|
+
self._seek(key_frame_pts)
|
305
361
|
|
306
362
|
for packet in self.container.demux(self.stream):
|
363
|
+
if packet.pts is None:
|
364
|
+
continue
|
365
|
+
|
366
|
+
self._last_packet_accessed = packet
|
367
|
+
|
307
368
|
for frame in packet.decode():
|
308
369
|
if frame.pts is None:
|
309
370
|
continue
|
@@ -311,29 +372,24 @@ class VideoFrameCache:
|
|
311
372
|
# We store all the frames in cache
|
312
373
|
self._store_frame_in_cache(frame)
|
313
374
|
|
314
|
-
|
315
|
-
|
316
|
-
#
|
317
|
-
|
318
|
-
print(f' Frame from [{str(frame.pts)}, {str(frame_end_pts)}] and looking for [{str(start)}, {str(end)}]')
|
375
|
+
current_frame_time = frame.pts * self.time_base
|
376
|
+
# End is not included, its the start of the
|
377
|
+
# next frame actually
|
378
|
+
frame_end = current_frame_time + (frame.samples / self.stream.sample_rate)
|
319
379
|
|
320
380
|
# For the next comments imagine we are looking
|
321
381
|
# for the [1.0, 2.0) audio time range
|
322
382
|
# Previous frame and nothing is inside
|
323
|
-
if
|
383
|
+
if frame_end <= start:
|
324
384
|
# From 0.25 to 1.0
|
325
385
|
continue
|
326
|
-
|
386
|
+
|
327
387
|
# We finished, nothing is inside and its after
|
328
|
-
if
|
329
|
-
end is not None and
|
330
|
-
frame.pts >= end
|
331
|
-
):
|
388
|
+
if current_frame_time >= end:
|
332
389
|
# From 2.0 to 2.75
|
333
390
|
return
|
334
391
|
|
335
|
-
#
|
336
|
-
# Audio is:
|
392
|
+
# If we need audio from 1 to 2, audio is:
|
337
393
|
# - from 0 to 0.75 (Not included, omit)
|
338
394
|
# - from 0.5 to 1.5 (Included, take 1.0 to 1.5)
|
339
395
|
# - from 0.5 to 2.5 (Included, take 1.0 to 2.0)
|
@@ -343,55 +399,46 @@ class VideoFrameCache:
|
|
343
399
|
|
344
400
|
# Here below, at least a part is inside
|
345
401
|
if (
|
346
|
-
|
347
|
-
|
402
|
+
current_frame_time < start and
|
403
|
+
frame_end > start
|
348
404
|
):
|
349
405
|
# A part at the end is included
|
350
406
|
end_time = (
|
351
407
|
# From 0.5 to 1.5 0> take 1.0 to 1.5
|
352
|
-
|
353
|
-
if
|
408
|
+
frame_end
|
409
|
+
if frame_end <= end else
|
354
410
|
# From 0.5 to 2.5 => take 1.0 to 2.0
|
355
411
|
end
|
356
412
|
)
|
357
|
-
print('A part at the end is included.')
|
358
|
-
|
359
|
-
frame = trim_audio_frame_pts(
|
413
|
+
#print('A part at the end is included.')
|
414
|
+
frame = trim_audio_frame(
|
360
415
|
frame = frame,
|
361
|
-
|
362
|
-
|
416
|
+
start = start,
|
417
|
+
end = end_time,
|
363
418
|
time_base = self.time_base
|
364
419
|
)
|
365
420
|
elif (
|
366
|
-
|
367
|
-
|
421
|
+
current_frame_time >= start and
|
422
|
+
current_frame_time < end
|
368
423
|
):
|
369
424
|
end_time = (
|
370
425
|
# From 1.25 to 1.5 => take 1.25 to 1.5
|
371
|
-
|
372
|
-
if
|
426
|
+
frame_end
|
427
|
+
if frame_end <= end else
|
373
428
|
# From 1.25 to 2.5 => take 1.25 to 2.0
|
374
429
|
end
|
375
430
|
)
|
376
431
|
# A part at the begining is included
|
377
|
-
print('A part at the begining is included.')
|
378
|
-
|
379
|
-
frame = trim_audio_frame_pts(
|
432
|
+
#print('A part at the begining is included.')
|
433
|
+
frame = trim_audio_frame(
|
380
434
|
frame = frame,
|
381
|
-
|
382
|
-
|
435
|
+
start = current_frame_time,
|
436
|
+
end = end_time,
|
383
437
|
time_base = self.time_base
|
384
438
|
)
|
385
439
|
|
386
440
|
# If the whole frame is in, past as it is
|
387
|
-
|
388
|
-
# TODO: Maybe send a @dataclass instead (?)
|
389
|
-
# TODO: Do I really need these 't' and 'index' (?)
|
390
|
-
yield (
|
391
|
-
frame,
|
392
|
-
pts_to_t(frame.pts, self.time_base),
|
393
|
-
pts_to_index(frame.pts, self.time_base, self.fps)
|
394
|
-
)
|
441
|
+
yield frame
|
395
442
|
|
396
443
|
def clear(
|
397
444
|
self
|
@@ -402,106 +449,64 @@ class VideoFrameCache:
|
|
402
449
|
self.cache.clear()
|
403
450
|
|
404
451
|
return self
|
405
|
-
|
406
|
-
|
407
452
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
def trim_audio_frame_pts(
|
416
|
-
frame: av.AudioFrame,
|
417
|
-
start_pts: int,
|
418
|
-
end_pts: int,
|
419
|
-
time_base
|
420
|
-
) -> av.AudioFrame:
|
453
|
+
def trim_audio_frame(
|
454
|
+
frame: AudioFrame,
|
455
|
+
start: Union[int, float, Fraction],
|
456
|
+
end: Union[int, float, Fraction],
|
457
|
+
time_base: Fraction
|
458
|
+
) -> AudioFrame:
|
421
459
|
"""
|
422
|
-
|
460
|
+
Trim an audio frame to obtain the part between
|
461
|
+
[start, end), that is provided in seconds.
|
423
462
|
"""
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
#frame_end_pts = frame.pts + int((n_samples / sr) / time_base)
|
429
|
-
# TODO: This could be wrong
|
430
|
-
frame_end_pts = frame.pts + int(frame.samples)
|
431
|
-
|
432
|
-
# solapamiento en PTS
|
433
|
-
cut_start_pts = max(frame.pts, start_pts)
|
434
|
-
cut_end_pts = min(frame_end_pts, end_pts)
|
435
|
-
|
436
|
-
if cut_start_pts >= cut_end_pts:
|
437
|
-
raise Exception('Oops...')
|
438
|
-
return None # no hay solapamiento
|
439
|
-
|
440
|
-
# convertir a índices de samples (en ticks → segundos → samples)
|
441
|
-
cut_start_time = (cut_start_pts - frame.pts) * time_base
|
442
|
-
cut_end_time = (cut_end_pts - frame.pts) * time_base
|
443
|
-
|
444
|
-
start_idx = int(cut_start_time * sr)
|
445
|
-
end_idx = int(cut_end_time * sr)
|
446
|
-
|
447
|
-
print(
|
448
|
-
f"cutting [{frame.pts}, {frame_end_pts}] "
|
449
|
-
f"to [{cut_start_pts}, {cut_end_pts}] "
|
450
|
-
f"({start_idx}:{end_idx} / {frame.samples})"
|
451
|
-
#f"({start_idx}:{end_idx} / {n_samples})"
|
452
|
-
)
|
453
|
-
|
454
|
-
cut_samples = samples[:, start_idx:end_idx]
|
455
|
-
|
456
|
-
# crear nuevo AudioFrame
|
457
|
-
new_frame = av.AudioFrame.from_ndarray(cut_samples, format=frame.format, layout=frame.layout)
|
458
|
-
new_frame.sample_rate = sr
|
459
|
-
|
460
|
-
# ajustar PTS → corresponde al inicio real del recorte
|
461
|
-
new_frame.pts = cut_start_pts
|
462
|
-
new_frame.time_base = time_base
|
463
|
-
|
464
|
-
return new_frame
|
463
|
+
# (channels, n_samples)
|
464
|
+
samples = frame.to_ndarray()
|
465
|
+
n_samples = samples.shape[1]
|
465
466
|
|
467
|
+
# In seconds
|
468
|
+
frame_start = frame.pts * float(time_base)
|
469
|
+
frame_end = frame_start + (n_samples / frame.sample_rate)
|
466
470
|
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
start_time: float,
|
471
|
-
end_time: float,
|
472
|
-
time_base
|
473
|
-
) -> av.AudioFrame:
|
474
|
-
"""
|
475
|
-
Recorta un AudioFrame para quedarse solo con la parte entre [start_time, end_time] en segundos.
|
476
|
-
"""
|
477
|
-
samples = frame.to_ndarray() # (channels, n_samples)
|
478
|
-
n_channels, n_samples = samples.shape
|
479
|
-
sr = frame.sample_rate
|
480
|
-
|
481
|
-
frame_start = float(frame.pts * time_base)
|
482
|
-
frame_end = frame_start + (n_samples / sr)
|
483
|
-
|
484
|
-
# calcular solapamiento en segundos
|
485
|
-
cut_start = max(frame_start, start_time)
|
486
|
-
cut_end = min(frame_end, end_time)
|
471
|
+
# Overlapping
|
472
|
+
cut_start = max(frame_start, float(start))
|
473
|
+
cut_end = min(frame_end, float(end))
|
487
474
|
|
488
475
|
if cut_start >= cut_end:
|
489
|
-
|
476
|
+
# No overlapping
|
477
|
+
return None
|
478
|
+
|
479
|
+
# To sample indexes
|
480
|
+
start_index = int(round((cut_start - frame_start) * frame.sample_rate))
|
481
|
+
end_index = int(round((cut_end - frame_start) * frame.sample_rate))
|
482
|
+
|
483
|
+
new_frame = AudioFrame.from_ndarray(
|
484
|
+
# end_index is not included: so [start, end)
|
485
|
+
array = samples[:, start_index:end_index],
|
486
|
+
format = frame.format,
|
487
|
+
layout = frame.layout
|
488
|
+
)
|
490
489
|
|
491
|
-
#
|
492
|
-
|
493
|
-
|
490
|
+
# Set attributes
|
491
|
+
new_frame.sample_rate = frame.sample_rate
|
492
|
+
new_frame.time_base = time_base
|
493
|
+
new_frame.pts = int(round(cut_start / float(time_base)))
|
494
494
|
|
495
|
-
|
496
|
-
cut_samples = samples[:, start_idx:end_idx]
|
495
|
+
return new_frame
|
497
496
|
|
498
|
-
# crear nuevo AudioFrame
|
499
|
-
new_frame = av.AudioFrame.from_ndarray(cut_samples, format = frame.format, layout = frame.layout)
|
500
|
-
new_frame.sample_rate = sr
|
501
497
|
|
502
|
-
# ajustar PTS → corresponde al inicio real del recorte
|
503
|
-
new_pts = int(cut_start / time_base)
|
504
|
-
new_frame.pts = new_pts
|
505
|
-
new_frame.time_base = time_base
|
506
498
|
|
507
|
-
|
499
|
+
"""
|
500
|
+
There is a way of editing videos being
|
501
|
+
able to arbitrary access to frames, that
|
502
|
+
is transforming the source videos to
|
503
|
+
intra-frame videos. This is a ffmpeg
|
504
|
+
command that can do it:
|
505
|
+
|
506
|
+
- `ffmpeg -i input.mp4 -c:v libx264 -x264opts keyint=1 -preset fast -crf 18 -c:a copy output_intra.mp4`
|
507
|
+
|
508
|
+
Once you have the 'output_intra.mp4',
|
509
|
+
each packet can decodify its frame
|
510
|
+
depending not on the previous one, being
|
511
|
+
able to seek and jump easy.
|
512
|
+
"""
|