torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchcodec/__init__.py +27 -0
- torchcodec/_core/AVIOContextHolder.cpp +60 -0
- torchcodec/_core/AVIOContextHolder.h +64 -0
- torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
- torchcodec/_core/AVIOFileLikeContext.h +55 -0
- torchcodec/_core/AVIOTensorContext.cpp +130 -0
- torchcodec/_core/AVIOTensorContext.h +44 -0
- torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
- torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
- torchcodec/_core/CMakeLists.txt +295 -0
- torchcodec/_core/CUDACommon.cpp +330 -0
- torchcodec/_core/CUDACommon.h +51 -0
- torchcodec/_core/Cache.h +124 -0
- torchcodec/_core/CpuDeviceInterface.cpp +509 -0
- torchcodec/_core/CpuDeviceInterface.h +141 -0
- torchcodec/_core/CudaDeviceInterface.cpp +602 -0
- torchcodec/_core/CudaDeviceInterface.h +79 -0
- torchcodec/_core/DeviceInterface.cpp +117 -0
- torchcodec/_core/DeviceInterface.h +191 -0
- torchcodec/_core/Encoder.cpp +1054 -0
- torchcodec/_core/Encoder.h +192 -0
- torchcodec/_core/FFMPEGCommon.cpp +684 -0
- torchcodec/_core/FFMPEGCommon.h +314 -0
- torchcodec/_core/FilterGraph.cpp +159 -0
- torchcodec/_core/FilterGraph.h +59 -0
- torchcodec/_core/Frame.cpp +47 -0
- torchcodec/_core/Frame.h +72 -0
- torchcodec/_core/Metadata.cpp +124 -0
- torchcodec/_core/Metadata.h +92 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +60 -0
- torchcodec/_core/NVDECCache.h +102 -0
- torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
- torchcodec/_core/SingleStreamDecoder.h +391 -0
- torchcodec/_core/StreamOptions.h +70 -0
- torchcodec/_core/Transform.cpp +128 -0
- torchcodec/_core/Transform.h +86 -0
- torchcodec/_core/ValidationUtils.cpp +35 -0
- torchcodec/_core/ValidationUtils.h +21 -0
- torchcodec/_core/__init__.py +46 -0
- torchcodec/_core/_metadata.py +262 -0
- torchcodec/_core/custom_ops.cpp +1090 -0
- torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
- torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
- torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
- torchcodec/_core/ops.py +605 -0
- torchcodec/_core/pybind_ops.cpp +50 -0
- torchcodec/_frame.py +146 -0
- torchcodec/_internally_replaced_utils.py +68 -0
- torchcodec/_samplers/__init__.py +7 -0
- torchcodec/_samplers/video_clip_sampler.py +419 -0
- torchcodec/decoders/__init__.py +12 -0
- torchcodec/decoders/_audio_decoder.py +185 -0
- torchcodec/decoders/_decoder_utils.py +113 -0
- torchcodec/decoders/_video_decoder.py +601 -0
- torchcodec/encoders/__init__.py +2 -0
- torchcodec/encoders/_audio_encoder.py +149 -0
- torchcodec/encoders/_video_encoder.py +196 -0
- torchcodec/libtorchcodec_core4.so +0 -0
- torchcodec/libtorchcodec_core5.so +0 -0
- torchcodec/libtorchcodec_core6.so +0 -0
- torchcodec/libtorchcodec_core7.so +0 -0
- torchcodec/libtorchcodec_core8.so +0 -0
- torchcodec/libtorchcodec_custom_ops4.so +0 -0
- torchcodec/libtorchcodec_custom_ops5.so +0 -0
- torchcodec/libtorchcodec_custom_ops6.so +0 -0
- torchcodec/libtorchcodec_custom_ops7.so +0 -0
- torchcodec/libtorchcodec_custom_ops8.so +0 -0
- torchcodec/libtorchcodec_pybind_ops4.so +0 -0
- torchcodec/libtorchcodec_pybind_ops5.so +0 -0
- torchcodec/libtorchcodec_pybind_ops6.so +0 -0
- torchcodec/libtorchcodec_pybind_ops7.so +0 -0
- torchcodec/libtorchcodec_pybind_ops8.so +0 -0
- torchcodec/samplers/__init__.py +2 -0
- torchcodec/samplers/_common.py +84 -0
- torchcodec/samplers/_index_based.py +287 -0
- torchcodec/samplers/_time_based.py +358 -0
- torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
- torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
- torchcodec/transforms/__init__.py +12 -0
- torchcodec/transforms/_decoder_transforms.py +375 -0
- torchcodec/version.py +2 -0
- torchcodec-0.10.0.dist-info/METADATA +286 -0
- torchcodec-0.10.0.dist-info/RECORD +88 -0
- torchcodec-0.10.0.dist-info/WHEEL +5 -0
- torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
- torchcodec-0.10.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,684 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "FFMPEGCommon.h"
|
|
8
|
+
|
|
9
|
+
#include <c10/util/Exception.h>
|
|
10
|
+
|
|
11
|
+
extern "C" {
|
|
12
|
+
#include <libavfilter/avfilter.h>
|
|
13
|
+
#include <libavfilter/buffersink.h>
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
namespace facebook::torchcodec {
|
|
17
|
+
|
|
18
|
+
AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {
|
|
19
|
+
TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
AutoAVPacket::~AutoAVPacket() {
|
|
23
|
+
av_packet_free(&avPacket_);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)
|
|
27
|
+
: avPacket_(shared.avPacket_) {}
|
|
28
|
+
|
|
29
|
+
ReferenceAVPacket::~ReferenceAVPacket() {
|
|
30
|
+
av_packet_unref(avPacket_);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
AVPacket* ReferenceAVPacket::get() {
|
|
34
|
+
return avPacket_;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
AVPacket* ReferenceAVPacket::operator->() {
|
|
38
|
+
return avPacket_;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
AVCodecOnlyUseForCallingAVFindBestStream
|
|
42
|
+
makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec) {
|
|
43
|
+
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
|
|
44
|
+
return const_cast<AVCodec*>(codec);
|
|
45
|
+
#else
|
|
46
|
+
return codec;
|
|
47
|
+
#endif
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
std::string getFFMPEGErrorStringFromErrorCode(int errorCode) {
|
|
51
|
+
char errorBuffer[AV_ERROR_MAX_STRING_SIZE] = {0};
|
|
52
|
+
av_strerror(errorCode, errorBuffer, AV_ERROR_MAX_STRING_SIZE);
|
|
53
|
+
return std::string(errorBuffer);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
int64_t getDuration(const UniqueAVFrame& avFrame) {
|
|
57
|
+
#if LIBAVUTIL_VERSION_MAJOR < 58
|
|
58
|
+
return avFrame->pkt_duration;
|
|
59
|
+
#else
|
|
60
|
+
return avFrame->duration;
|
|
61
|
+
#endif
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
void setDuration(const UniqueAVFrame& avFrame, int64_t duration) {
|
|
65
|
+
#if LIBAVUTIL_VERSION_MAJOR < 58
|
|
66
|
+
avFrame->pkt_duration = duration;
|
|
67
|
+
#else
|
|
68
|
+
avFrame->duration = duration;
|
|
69
|
+
#endif
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const int* getSupportedSampleRates(const AVCodec& avCodec) {
|
|
73
|
+
const int* supportedSampleRates = nullptr;
|
|
74
|
+
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1
|
|
75
|
+
int numSampleRates = 0;
|
|
76
|
+
int ret = avcodec_get_supported_config(
|
|
77
|
+
nullptr,
|
|
78
|
+
&avCodec,
|
|
79
|
+
AV_CODEC_CONFIG_SAMPLE_RATE,
|
|
80
|
+
0,
|
|
81
|
+
reinterpret_cast<const void**>(&supportedSampleRates),
|
|
82
|
+
&numSampleRates);
|
|
83
|
+
if (ret < 0 || supportedSampleRates == nullptr) {
|
|
84
|
+
// Return nullptr to skip validation in validateSampleRate.
|
|
85
|
+
return nullptr;
|
|
86
|
+
}
|
|
87
|
+
#else
|
|
88
|
+
supportedSampleRates = avCodec.supported_samplerates;
|
|
89
|
+
#endif
|
|
90
|
+
return supportedSampleRates;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec) {
|
|
94
|
+
const AVPixelFormat* supportedPixelFormats = nullptr;
|
|
95
|
+
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1
|
|
96
|
+
int numPixelFormats = 0;
|
|
97
|
+
int ret = avcodec_get_supported_config(
|
|
98
|
+
nullptr,
|
|
99
|
+
&avCodec,
|
|
100
|
+
AV_CODEC_CONFIG_PIX_FORMAT,
|
|
101
|
+
0,
|
|
102
|
+
reinterpret_cast<const void**>(&supportedPixelFormats),
|
|
103
|
+
&numPixelFormats);
|
|
104
|
+
if (ret < 0 || supportedPixelFormats == nullptr) {
|
|
105
|
+
TORCH_CHECK(false, "Couldn't get supported pixel formats from encoder.");
|
|
106
|
+
}
|
|
107
|
+
#else
|
|
108
|
+
supportedPixelFormats = avCodec.pix_fmts;
|
|
109
|
+
#endif
|
|
110
|
+
return supportedPixelFormats;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec) {
|
|
114
|
+
const AVSampleFormat* supportedSampleFormats = nullptr;
|
|
115
|
+
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1
|
|
116
|
+
int numSampleFormats = 0;
|
|
117
|
+
int ret = avcodec_get_supported_config(
|
|
118
|
+
nullptr,
|
|
119
|
+
&avCodec,
|
|
120
|
+
AV_CODEC_CONFIG_SAMPLE_FORMAT,
|
|
121
|
+
0,
|
|
122
|
+
reinterpret_cast<const void**>(&supportedSampleFormats),
|
|
123
|
+
&numSampleFormats);
|
|
124
|
+
if (ret < 0 || supportedSampleFormats == nullptr) {
|
|
125
|
+
// Return nullptr to use default output format in
|
|
126
|
+
// findBestOutputSampleFormat.
|
|
127
|
+
return nullptr;
|
|
128
|
+
}
|
|
129
|
+
#else
|
|
130
|
+
supportedSampleFormats = avCodec.sample_fmts;
|
|
131
|
+
#endif
|
|
132
|
+
return supportedSampleFormats;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
int getNumChannels(const UniqueAVFrame& avFrame) {
|
|
136
|
+
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
|
|
137
|
+
(LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
|
|
138
|
+
return avFrame->ch_layout.nb_channels;
|
|
139
|
+
#else
|
|
140
|
+
int numChannels = av_get_channel_layout_nb_channels(avFrame->channel_layout);
|
|
141
|
+
// Handle FFmpeg 4 bug where channel_layout and numChannels are 0 or unset
|
|
142
|
+
// Set values based on avFrame->channels which appears to be correct
|
|
143
|
+
// to allow successful initialization of SwrContext
|
|
144
|
+
if (numChannels == 0 && avFrame->channels > 0) {
|
|
145
|
+
avFrame->channel_layout = av_get_default_channel_layout(avFrame->channels);
|
|
146
|
+
numChannels = avFrame->channels;
|
|
147
|
+
}
|
|
148
|
+
return numChannels;
|
|
149
|
+
#endif
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
int getNumChannels(const SharedAVCodecContext& avCodecContext) {
|
|
153
|
+
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
|
|
154
|
+
(LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
|
|
155
|
+
return avCodecContext->ch_layout.nb_channels;
|
|
156
|
+
#else
|
|
157
|
+
return avCodecContext->channels;
|
|
158
|
+
#endif
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
int getNumChannels(const AVCodecParameters* codecpar) {
|
|
162
|
+
TORCH_CHECK(codecpar != nullptr, "codecpar is null")
|
|
163
|
+
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
|
|
164
|
+
(LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
|
|
165
|
+
return codecpar->ch_layout.nb_channels;
|
|
166
|
+
#else
|
|
167
|
+
return codecpar->channels;
|
|
168
|
+
#endif
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
void setDefaultChannelLayout(
|
|
172
|
+
UniqueAVCodecContext& avCodecContext,
|
|
173
|
+
int numChannels) {
|
|
174
|
+
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
175
|
+
AVChannelLayout channel_layout;
|
|
176
|
+
av_channel_layout_default(&channel_layout, numChannels);
|
|
177
|
+
avCodecContext->ch_layout = channel_layout;
|
|
178
|
+
#else
|
|
179
|
+
uint64_t channel_layout = av_get_default_channel_layout(numChannels);
|
|
180
|
+
avCodecContext->channel_layout = channel_layout;
|
|
181
|
+
avCodecContext->channels = numChannels;
|
|
182
|
+
#endif
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels) {
|
|
186
|
+
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
187
|
+
AVChannelLayout channel_layout;
|
|
188
|
+
av_channel_layout_default(&channel_layout, numChannels);
|
|
189
|
+
avFrame->ch_layout = channel_layout;
|
|
190
|
+
#else
|
|
191
|
+
uint64_t channel_layout = av_get_default_channel_layout(numChannels);
|
|
192
|
+
avFrame->channel_layout = channel_layout;
|
|
193
|
+
avFrame->channels = numChannels;
|
|
194
|
+
#endif
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
void validateNumChannels(const AVCodec& avCodec, int numChannels) {
|
|
198
|
+
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 13, 100) // FFmpeg >= 7.1
|
|
199
|
+
std::stringstream supportedNumChannels;
|
|
200
|
+
const AVChannelLayout* supportedLayouts = nullptr;
|
|
201
|
+
int numLayouts = 0;
|
|
202
|
+
int ret = avcodec_get_supported_config(
|
|
203
|
+
nullptr,
|
|
204
|
+
&avCodec,
|
|
205
|
+
AV_CODEC_CONFIG_CHANNEL_LAYOUT,
|
|
206
|
+
0,
|
|
207
|
+
reinterpret_cast<const void**>(&supportedLayouts),
|
|
208
|
+
&numLayouts);
|
|
209
|
+
if (ret < 0 || supportedLayouts == nullptr) {
|
|
210
|
+
// If we can't validate, we must assume it'll be fine. If not, FFmpeg will
|
|
211
|
+
// eventually raise.
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
for (int i = 0; i < numLayouts; ++i) {
|
|
215
|
+
if (i > 0) {
|
|
216
|
+
supportedNumChannels << ", ";
|
|
217
|
+
}
|
|
218
|
+
supportedNumChannels << supportedLayouts[i].nb_channels;
|
|
219
|
+
if (numChannels == supportedLayouts[i].nb_channels) {
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
#elif LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
224
|
+
if (avCodec.ch_layouts == nullptr) {
|
|
225
|
+
// If we can't validate, we must assume it'll be fine. If not, FFmpeg will
|
|
226
|
+
// eventually raise.
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
// FFmpeg doc indicate that the ch_layouts array is terminated by a zeroed
|
|
230
|
+
// layout, so checking for nb_channels == 0 should indicate its end.
|
|
231
|
+
for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) {
|
|
232
|
+
if (numChannels == avCodec.ch_layouts[i].nb_channels) {
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// At this point it seems that the encoder doesn't support the requested
|
|
237
|
+
// number of channels, so we error out.
|
|
238
|
+
std::stringstream supportedNumChannels;
|
|
239
|
+
for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) {
|
|
240
|
+
if (i > 0) {
|
|
241
|
+
supportedNumChannels << ", ";
|
|
242
|
+
}
|
|
243
|
+
supportedNumChannels << avCodec.ch_layouts[i].nb_channels;
|
|
244
|
+
}
|
|
245
|
+
#else // FFmpeg <= 4
|
|
246
|
+
if (avCodec.channel_layouts == nullptr) {
|
|
247
|
+
// can't validate, same as above.
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
|
|
251
|
+
if (numChannels ==
|
|
252
|
+
av_get_channel_layout_nb_channels(avCodec.channel_layouts[i])) {
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
// At this point it seems that the encoder doesn't support the requested
|
|
257
|
+
// number of channels, so we error out.
|
|
258
|
+
std::stringstream supportedNumChannels;
|
|
259
|
+
for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
|
|
260
|
+
if (i > 0) {
|
|
261
|
+
supportedNumChannels << ", ";
|
|
262
|
+
}
|
|
263
|
+
supportedNumChannels << av_get_channel_layout_nb_channels(
|
|
264
|
+
avCodec.channel_layouts[i]);
|
|
265
|
+
}
|
|
266
|
+
#endif
|
|
267
|
+
TORCH_CHECK(
|
|
268
|
+
false,
|
|
269
|
+
"Desired number of channels (",
|
|
270
|
+
numChannels,
|
|
271
|
+
") is not supported by the ",
|
|
272
|
+
"encoder. Supported number of channels are: ",
|
|
273
|
+
supportedNumChannels.str(),
|
|
274
|
+
".");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
namespace {
|
|
278
|
+
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
279
|
+
|
|
280
|
+
// Returns:
|
|
281
|
+
// - the srcAVFrame's channel layout if srcAVFrame has outNumChannels
|
|
282
|
+
// - the default channel layout with outNumChannels otherwise.
|
|
283
|
+
AVChannelLayout getOutputChannelLayout(
|
|
284
|
+
int outNumChannels,
|
|
285
|
+
const UniqueAVFrame& srcAVFrame) {
|
|
286
|
+
AVChannelLayout outLayout;
|
|
287
|
+
if (outNumChannels == getNumChannels(srcAVFrame)) {
|
|
288
|
+
outLayout = srcAVFrame->ch_layout;
|
|
289
|
+
} else {
|
|
290
|
+
av_channel_layout_default(&outLayout, outNumChannels);
|
|
291
|
+
}
|
|
292
|
+
return outLayout;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
#else
|
|
296
|
+
|
|
297
|
+
// Same as above
|
|
298
|
+
int64_t getOutputChannelLayout(
|
|
299
|
+
int outNumChannels,
|
|
300
|
+
const UniqueAVFrame& srcAVFrame) {
|
|
301
|
+
int64_t outLayout;
|
|
302
|
+
if (outNumChannels == getNumChannels(srcAVFrame)) {
|
|
303
|
+
outLayout = srcAVFrame->channel_layout;
|
|
304
|
+
} else {
|
|
305
|
+
outLayout = av_get_default_channel_layout(outNumChannels);
|
|
306
|
+
}
|
|
307
|
+
return outLayout;
|
|
308
|
+
}
|
|
309
|
+
#endif
|
|
310
|
+
} // namespace
|
|
311
|
+
|
|
312
|
+
// Sets dstAVFrame' channel layout to getOutputChannelLayout(): see doc above
|
|
313
|
+
void setChannelLayout(
|
|
314
|
+
UniqueAVFrame& dstAVFrame,
|
|
315
|
+
const UniqueAVFrame& srcAVFrame,
|
|
316
|
+
int outNumChannels) {
|
|
317
|
+
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
318
|
+
AVChannelLayout outLayout =
|
|
319
|
+
getOutputChannelLayout(outNumChannels, srcAVFrame);
|
|
320
|
+
auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &outLayout);
|
|
321
|
+
TORCH_CHECK(
|
|
322
|
+
status == AVSUCCESS,
|
|
323
|
+
"Couldn't copy channel layout to avFrame: ",
|
|
324
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
325
|
+
#else
|
|
326
|
+
dstAVFrame->channel_layout =
|
|
327
|
+
getOutputChannelLayout(outNumChannels, srcAVFrame);
|
|
328
|
+
dstAVFrame->channels = outNumChannels;
|
|
329
|
+
#endif
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
UniqueAVFrame allocateAVFrame(
|
|
333
|
+
int numSamples,
|
|
334
|
+
int sampleRate,
|
|
335
|
+
int numChannels,
|
|
336
|
+
AVSampleFormat sampleFormat) {
|
|
337
|
+
auto avFrame = UniqueAVFrame(av_frame_alloc());
|
|
338
|
+
TORCH_CHECK(avFrame != nullptr, "Couldn't allocate AVFrame.");
|
|
339
|
+
|
|
340
|
+
avFrame->nb_samples = numSamples;
|
|
341
|
+
avFrame->sample_rate = sampleRate;
|
|
342
|
+
setDefaultChannelLayout(avFrame, numChannels);
|
|
343
|
+
avFrame->format = sampleFormat;
|
|
344
|
+
auto status = av_frame_get_buffer(avFrame.get(), 0);
|
|
345
|
+
|
|
346
|
+
TORCH_CHECK(
|
|
347
|
+
status == AVSUCCESS,
|
|
348
|
+
"Couldn't allocate avFrame's buffers: ",
|
|
349
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
350
|
+
|
|
351
|
+
status = av_frame_make_writable(avFrame.get());
|
|
352
|
+
TORCH_CHECK(
|
|
353
|
+
status == AVSUCCESS,
|
|
354
|
+
"Couldn't make AVFrame writable: ",
|
|
355
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
356
|
+
return avFrame;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
SwrContext* createSwrContext(
|
|
360
|
+
AVSampleFormat srcSampleFormat,
|
|
361
|
+
AVSampleFormat outSampleFormat,
|
|
362
|
+
int srcSampleRate,
|
|
363
|
+
int outSampleRate,
|
|
364
|
+
const UniqueAVFrame& srcAVFrame,
|
|
365
|
+
int outNumChannels) {
|
|
366
|
+
SwrContext* swrContext = nullptr;
|
|
367
|
+
int status = AVSUCCESS;
|
|
368
|
+
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
|
|
369
|
+
AVChannelLayout outLayout =
|
|
370
|
+
getOutputChannelLayout(outNumChannels, srcAVFrame);
|
|
371
|
+
status = swr_alloc_set_opts2(
|
|
372
|
+
&swrContext,
|
|
373
|
+
&outLayout,
|
|
374
|
+
outSampleFormat,
|
|
375
|
+
outSampleRate,
|
|
376
|
+
&srcAVFrame->ch_layout,
|
|
377
|
+
srcSampleFormat,
|
|
378
|
+
srcSampleRate,
|
|
379
|
+
0,
|
|
380
|
+
nullptr);
|
|
381
|
+
|
|
382
|
+
TORCH_CHECK(
|
|
383
|
+
status == AVSUCCESS,
|
|
384
|
+
"Couldn't create SwrContext: ",
|
|
385
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
386
|
+
#else
|
|
387
|
+
int64_t outLayout = getOutputChannelLayout(outNumChannels, srcAVFrame);
|
|
388
|
+
swrContext = swr_alloc_set_opts(
|
|
389
|
+
nullptr,
|
|
390
|
+
outLayout,
|
|
391
|
+
outSampleFormat,
|
|
392
|
+
outSampleRate,
|
|
393
|
+
srcAVFrame->channel_layout,
|
|
394
|
+
srcSampleFormat,
|
|
395
|
+
srcSampleRate,
|
|
396
|
+
0,
|
|
397
|
+
nullptr);
|
|
398
|
+
#endif
|
|
399
|
+
|
|
400
|
+
TORCH_CHECK(swrContext != nullptr, "Couldn't create swrContext");
|
|
401
|
+
status = swr_init(swrContext);
|
|
402
|
+
TORCH_CHECK(
|
|
403
|
+
status == AVSUCCESS,
|
|
404
|
+
"Couldn't initialize SwrContext: ",
|
|
405
|
+
getFFMPEGErrorStringFromErrorCode(status),
|
|
406
|
+
". If the error says 'Invalid argument', it's likely that you are using "
|
|
407
|
+
"a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
|
|
408
|
+
"valid scenarios. Try to upgrade FFmpeg?");
|
|
409
|
+
return swrContext;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
AVFilterContext* createAVFilterContextWithOptions(
|
|
413
|
+
AVFilterGraph* filterGraph,
|
|
414
|
+
const AVFilter* buffer,
|
|
415
|
+
const enum AVPixelFormat outputFormat) {
|
|
416
|
+
AVFilterContext* avFilterContext = nullptr;
|
|
417
|
+
const char* filterName = "out";
|
|
418
|
+
|
|
419
|
+
enum AVPixelFormat pixFmts[] = {outputFormat, AV_PIX_FMT_NONE};
|
|
420
|
+
|
|
421
|
+
// av_opt_set_int_list was replaced by av_opt_set_array() in FFmpeg 8.
|
|
422
|
+
#if LIBAVUTIL_VERSION_MAJOR >= 60 // FFmpeg >= 8
|
|
423
|
+
// Output options like pixel_formats must be set before filter init
|
|
424
|
+
avFilterContext =
|
|
425
|
+
avfilter_graph_alloc_filter(filterGraph, buffer, filterName);
|
|
426
|
+
TORCH_CHECK(
|
|
427
|
+
avFilterContext != nullptr, "Failed to allocate buffer filter context.");
|
|
428
|
+
|
|
429
|
+
// When setting pix_fmts, only the first element is used, so nb_elems = 1
|
|
430
|
+
// AV_PIX_FMT_NONE acts as a terminator for the array in av_opt_set_int_list
|
|
431
|
+
int status = av_opt_set_array(
|
|
432
|
+
avFilterContext,
|
|
433
|
+
"pixel_formats",
|
|
434
|
+
AV_OPT_SEARCH_CHILDREN,
|
|
435
|
+
0, // start_elem
|
|
436
|
+
1, // nb_elems
|
|
437
|
+
AV_OPT_TYPE_PIXEL_FMT,
|
|
438
|
+
pixFmts);
|
|
439
|
+
TORCH_CHECK(
|
|
440
|
+
status >= 0,
|
|
441
|
+
"Failed to set pixel format for buffer filter: ",
|
|
442
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
443
|
+
|
|
444
|
+
status = avfilter_init_str(avFilterContext, nullptr);
|
|
445
|
+
TORCH_CHECK(
|
|
446
|
+
status >= 0,
|
|
447
|
+
"Failed to initialize buffer filter: ",
|
|
448
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
449
|
+
#else // FFmpeg <= 7
|
|
450
|
+
// For older FFmpeg versions, create filter and then set options
|
|
451
|
+
int status = avfilter_graph_create_filter(
|
|
452
|
+
&avFilterContext, buffer, filterName, nullptr, nullptr, filterGraph);
|
|
453
|
+
TORCH_CHECK(
|
|
454
|
+
status >= 0,
|
|
455
|
+
"Failed to create buffer filter: ",
|
|
456
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
457
|
+
|
|
458
|
+
status = av_opt_set_int_list(
|
|
459
|
+
avFilterContext,
|
|
460
|
+
"pix_fmts",
|
|
461
|
+
pixFmts,
|
|
462
|
+
AV_PIX_FMT_NONE,
|
|
463
|
+
AV_OPT_SEARCH_CHILDREN);
|
|
464
|
+
TORCH_CHECK(
|
|
465
|
+
status >= 0,
|
|
466
|
+
"Failed to set pixel formats for buffer filter: ",
|
|
467
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
468
|
+
#endif
|
|
469
|
+
|
|
470
|
+
return avFilterContext;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
UniqueAVFrame convertAudioAVFrameSamples(
|
|
474
|
+
const UniqueSwrContext& swrContext,
|
|
475
|
+
const UniqueAVFrame& srcAVFrame,
|
|
476
|
+
AVSampleFormat outSampleFormat,
|
|
477
|
+
int outSampleRate,
|
|
478
|
+
int outNumChannels) {
|
|
479
|
+
UniqueAVFrame convertedAVFrame(av_frame_alloc());
|
|
480
|
+
TORCH_CHECK(
|
|
481
|
+
convertedAVFrame,
|
|
482
|
+
"Could not allocate frame for sample format conversion.");
|
|
483
|
+
|
|
484
|
+
convertedAVFrame->pts = srcAVFrame->pts;
|
|
485
|
+
convertedAVFrame->format = static_cast<int>(outSampleFormat);
|
|
486
|
+
|
|
487
|
+
convertedAVFrame->sample_rate = outSampleRate;
|
|
488
|
+
int srcSampleRate = srcAVFrame->sample_rate;
|
|
489
|
+
if (srcSampleRate != outSampleRate) {
|
|
490
|
+
// Note that this is an upper bound on the number of output samples.
|
|
491
|
+
// `swr_convert()` will likely not fill convertedAVFrame with that many
|
|
492
|
+
// samples if sample rate conversion is needed. It will buffer the last few
|
|
493
|
+
// ones because those require future samples. That's also why we reset
|
|
494
|
+
// nb_samples after the call to `swr_convert()`.
|
|
495
|
+
// We could also use `swr_get_out_samples()` to determine the number of
|
|
496
|
+
// output samples, but empirically `av_rescale_rnd()` seems to provide a
|
|
497
|
+
// tighter bound.
|
|
498
|
+
convertedAVFrame->nb_samples = av_rescale_rnd(
|
|
499
|
+
swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples,
|
|
500
|
+
outSampleRate,
|
|
501
|
+
srcSampleRate,
|
|
502
|
+
AV_ROUND_UP);
|
|
503
|
+
} else {
|
|
504
|
+
convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
setChannelLayout(convertedAVFrame, srcAVFrame, outNumChannels);
|
|
508
|
+
|
|
509
|
+
auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
|
|
510
|
+
TORCH_CHECK(
|
|
511
|
+
status == AVSUCCESS,
|
|
512
|
+
"Could not allocate frame buffers for sample format conversion: ",
|
|
513
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
514
|
+
|
|
515
|
+
auto numConvertedSamples = swr_convert(
|
|
516
|
+
swrContext.get(),
|
|
517
|
+
convertedAVFrame->data,
|
|
518
|
+
convertedAVFrame->nb_samples,
|
|
519
|
+
static_cast<const uint8_t**>(
|
|
520
|
+
const_cast<const uint8_t**>(srcAVFrame->data)),
|
|
521
|
+
srcAVFrame->nb_samples);
|
|
522
|
+
// numConvertedSamples can be 0 if we're downsampling by a great factor and
|
|
523
|
+
// the first frame doesn't contain a lot of samples. It should be handled
|
|
524
|
+
// properly by the caller.
|
|
525
|
+
TORCH_CHECK(
|
|
526
|
+
numConvertedSamples >= 0,
|
|
527
|
+
"Error in swr_convert: ",
|
|
528
|
+
getFFMPEGErrorStringFromErrorCode(numConvertedSamples));
|
|
529
|
+
|
|
530
|
+
// See comment above about nb_samples
|
|
531
|
+
convertedAVFrame->nb_samples = numConvertedSamples;
|
|
532
|
+
|
|
533
|
+
return convertedAVFrame;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
void setFFmpegLogLevel() {
|
|
537
|
+
auto logLevel = AV_LOG_QUIET;
|
|
538
|
+
const char* logLevelEnvPtr = std::getenv("TORCHCODEC_FFMPEG_LOG_LEVEL");
|
|
539
|
+
if (logLevelEnvPtr != nullptr) {
|
|
540
|
+
std::string logLevelEnv(logLevelEnvPtr);
|
|
541
|
+
if (logLevelEnv == "QUIET") {
|
|
542
|
+
logLevel = AV_LOG_QUIET;
|
|
543
|
+
} else if (logLevelEnv == "PANIC") {
|
|
544
|
+
logLevel = AV_LOG_PANIC;
|
|
545
|
+
} else if (logLevelEnv == "FATAL") {
|
|
546
|
+
logLevel = AV_LOG_FATAL;
|
|
547
|
+
} else if (logLevelEnv == "ERROR") {
|
|
548
|
+
logLevel = AV_LOG_ERROR;
|
|
549
|
+
} else if (logLevelEnv == "WARNING") {
|
|
550
|
+
logLevel = AV_LOG_WARNING;
|
|
551
|
+
} else if (logLevelEnv == "INFO") {
|
|
552
|
+
logLevel = AV_LOG_INFO;
|
|
553
|
+
} else if (logLevelEnv == "VERBOSE") {
|
|
554
|
+
logLevel = AV_LOG_VERBOSE;
|
|
555
|
+
} else if (logLevelEnv == "DEBUG") {
|
|
556
|
+
logLevel = AV_LOG_DEBUG;
|
|
557
|
+
} else if (logLevelEnv == "TRACE") {
|
|
558
|
+
logLevel = AV_LOG_TRACE;
|
|
559
|
+
} else {
|
|
560
|
+
TORCH_CHECK(
|
|
561
|
+
false,
|
|
562
|
+
"Invalid TORCHCODEC_FFMPEG_LOG_LEVEL: ",
|
|
563
|
+
logLevelEnv,
|
|
564
|
+
". Use e.g. 'QUIET', 'PANIC', 'VERBOSE', etc.");
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
av_log_set_level(logLevel);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
AVIOContext* avioAllocContext(
|
|
571
|
+
uint8_t* buffer,
|
|
572
|
+
int buffer_size,
|
|
573
|
+
int write_flag,
|
|
574
|
+
void* opaque,
|
|
575
|
+
AVIOReadFunction read_packet,
|
|
576
|
+
AVIOWriteFunction write_packet,
|
|
577
|
+
AVIOSeekFunction seek) {
|
|
578
|
+
return avio_alloc_context(
|
|
579
|
+
buffer,
|
|
580
|
+
buffer_size,
|
|
581
|
+
write_flag,
|
|
582
|
+
opaque,
|
|
583
|
+
read_packet,
|
|
584
|
+
// The buf parameter of the write function is not const before FFmpeg 7.
|
|
585
|
+
#if LIBAVFILTER_VERSION_MAJOR >= 10 // FFmpeg >= 7
|
|
586
|
+
write_packet,
|
|
587
|
+
#else
|
|
588
|
+
reinterpret_cast<AVIOWriteFunctionOld>(write_packet),
|
|
589
|
+
#endif
|
|
590
|
+
seek);
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
double ptsToSeconds(int64_t pts, const AVRational& timeBase) {
|
|
594
|
+
// To perform the multiplication before the division, av_q2d is not used
|
|
595
|
+
return static_cast<double>(pts) * timeBase.num / timeBase.den;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
int64_t secondsToClosestPts(double seconds, const AVRational& timeBase) {
|
|
599
|
+
return static_cast<int64_t>(
|
|
600
|
+
std::round(seconds * timeBase.den / timeBase.num));
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
int64_t computeSafeDuration(
|
|
604
|
+
const AVRational& frameRate,
|
|
605
|
+
const AVRational& timeBase) {
|
|
606
|
+
if (frameRate.num <= 0 || frameRate.den <= 0 || timeBase.num <= 0 ||
|
|
607
|
+
timeBase.den <= 0) {
|
|
608
|
+
return 0;
|
|
609
|
+
} else {
|
|
610
|
+
return (static_cast<int64_t>(frameRate.den) * timeBase.den) /
|
|
611
|
+
(static_cast<int64_t>(timeBase.num) * frameRate.num);
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
SwsFrameContext::SwsFrameContext(
|
|
616
|
+
int inputWidth,
|
|
617
|
+
int inputHeight,
|
|
618
|
+
AVPixelFormat inputFormat,
|
|
619
|
+
int outputWidth,
|
|
620
|
+
int outputHeight)
|
|
621
|
+
: inputWidth(inputWidth),
|
|
622
|
+
inputHeight(inputHeight),
|
|
623
|
+
inputFormat(inputFormat),
|
|
624
|
+
outputWidth(outputWidth),
|
|
625
|
+
outputHeight(outputHeight) {}
|
|
626
|
+
|
|
627
|
+
bool SwsFrameContext::operator==(const SwsFrameContext& other) const {
|
|
628
|
+
return inputWidth == other.inputWidth && inputHeight == other.inputHeight &&
|
|
629
|
+
inputFormat == other.inputFormat && outputWidth == other.outputWidth &&
|
|
630
|
+
outputHeight == other.outputHeight;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
bool SwsFrameContext::operator!=(const SwsFrameContext& other) const {
|
|
634
|
+
return !(*this == other);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
UniqueSwsContext createSwsContext(
|
|
638
|
+
const SwsFrameContext& swsFrameContext,
|
|
639
|
+
AVColorSpace colorspace,
|
|
640
|
+
AVPixelFormat outputFormat,
|
|
641
|
+
int swsFlags) {
|
|
642
|
+
SwsContext* swsContext = sws_getContext(
|
|
643
|
+
swsFrameContext.inputWidth,
|
|
644
|
+
swsFrameContext.inputHeight,
|
|
645
|
+
swsFrameContext.inputFormat,
|
|
646
|
+
swsFrameContext.outputWidth,
|
|
647
|
+
swsFrameContext.outputHeight,
|
|
648
|
+
outputFormat,
|
|
649
|
+
swsFlags,
|
|
650
|
+
nullptr,
|
|
651
|
+
nullptr,
|
|
652
|
+
nullptr);
|
|
653
|
+
TORCH_CHECK(swsContext, "sws_getContext() returned nullptr");
|
|
654
|
+
|
|
655
|
+
int* invTable = nullptr;
|
|
656
|
+
int* table = nullptr;
|
|
657
|
+
int srcRange, dstRange, brightness, contrast, saturation;
|
|
658
|
+
int ret = sws_getColorspaceDetails(
|
|
659
|
+
swsContext,
|
|
660
|
+
&invTable,
|
|
661
|
+
&srcRange,
|
|
662
|
+
&table,
|
|
663
|
+
&dstRange,
|
|
664
|
+
&brightness,
|
|
665
|
+
&contrast,
|
|
666
|
+
&saturation);
|
|
667
|
+
TORCH_CHECK(ret != -1, "sws_getColorspaceDetails returned -1");
|
|
668
|
+
|
|
669
|
+
const int* colorspaceTable = sws_getCoefficients(colorspace);
|
|
670
|
+
ret = sws_setColorspaceDetails(
|
|
671
|
+
swsContext,
|
|
672
|
+
colorspaceTable,
|
|
673
|
+
srcRange,
|
|
674
|
+
colorspaceTable,
|
|
675
|
+
dstRange,
|
|
676
|
+
brightness,
|
|
677
|
+
contrast,
|
|
678
|
+
saturation);
|
|
679
|
+
TORCH_CHECK(ret != -1, "sws_setColorspaceDetails returned -1");
|
|
680
|
+
|
|
681
|
+
return UniqueSwsContext(swsContext);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
} // namespace facebook::torchcodec
|