torchcodec 0.7.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (67) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  7. torchcodec/_core/AVIOTensorContext.h +43 -0
  8. torchcodec/_core/CMakeLists.txt +292 -0
  9. torchcodec/_core/Cache.h +138 -0
  10. torchcodec/_core/CpuDeviceInterface.cpp +266 -0
  11. torchcodec/_core/CpuDeviceInterface.h +70 -0
  12. torchcodec/_core/CudaDeviceInterface.cpp +514 -0
  13. torchcodec/_core/CudaDeviceInterface.h +37 -0
  14. torchcodec/_core/DeviceInterface.cpp +79 -0
  15. torchcodec/_core/DeviceInterface.h +67 -0
  16. torchcodec/_core/Encoder.cpp +514 -0
  17. torchcodec/_core/Encoder.h +123 -0
  18. torchcodec/_core/FFMPEGCommon.cpp +421 -0
  19. torchcodec/_core/FFMPEGCommon.h +227 -0
  20. torchcodec/_core/FilterGraph.cpp +142 -0
  21. torchcodec/_core/FilterGraph.h +45 -0
  22. torchcodec/_core/Frame.cpp +32 -0
  23. torchcodec/_core/Frame.h +118 -0
  24. torchcodec/_core/Metadata.h +72 -0
  25. torchcodec/_core/SingleStreamDecoder.cpp +1715 -0
  26. torchcodec/_core/SingleStreamDecoder.h +380 -0
  27. torchcodec/_core/StreamOptions.h +53 -0
  28. torchcodec/_core/ValidationUtils.cpp +35 -0
  29. torchcodec/_core/ValidationUtils.h +21 -0
  30. torchcodec/_core/__init__.py +40 -0
  31. torchcodec/_core/_metadata.py +317 -0
  32. torchcodec/_core/custom_ops.cpp +727 -0
  33. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +300 -0
  34. torchcodec/_core/ops.py +455 -0
  35. torchcodec/_core/pybind_ops.cpp +87 -0
  36. torchcodec/_frame.py +145 -0
  37. torchcodec/_internally_replaced_utils.py +67 -0
  38. torchcodec/_samplers/__init__.py +7 -0
  39. torchcodec/_samplers/video_clip_sampler.py +430 -0
  40. torchcodec/decoders/__init__.py +11 -0
  41. torchcodec/decoders/_audio_decoder.py +177 -0
  42. torchcodec/decoders/_decoder_utils.py +52 -0
  43. torchcodec/decoders/_video_decoder.py +464 -0
  44. torchcodec/encoders/__init__.py +1 -0
  45. torchcodec/encoders/_audio_encoder.py +150 -0
  46. torchcodec/libtorchcodec_core4.dll +0 -0
  47. torchcodec/libtorchcodec_core5.dll +0 -0
  48. torchcodec/libtorchcodec_core6.dll +0 -0
  49. torchcodec/libtorchcodec_core7.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/samplers/__init__.py +2 -0
  59. torchcodec/samplers/_common.py +84 -0
  60. torchcodec/samplers/_index_based.py +287 -0
  61. torchcodec/samplers/_time_based.py +350 -0
  62. torchcodec/version.py +2 -0
  63. torchcodec-0.7.0.dist-info/METADATA +242 -0
  64. torchcodec-0.7.0.dist-info/RECORD +67 -0
  65. torchcodec-0.7.0.dist-info/WHEEL +5 -0
  66. torchcodec-0.7.0.dist-info/licenses/LICENSE +28 -0
  67. torchcodec-0.7.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,123 @@
1
+ #pragma once
2
+ #include <torch/types.h>
3
+ #include "src/torchcodec/_core/AVIOContextHolder.h"
4
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
5
+ #include "src/torchcodec/_core/StreamOptions.h"
6
+
7
+ namespace facebook::torchcodec {
8
+ class AudioEncoder {
9
+ public:
10
+ ~AudioEncoder();
11
+
12
+ AudioEncoder(
13
+ const torch::Tensor& samples,
14
+ int sampleRate,
15
+ std::string_view fileName,
16
+ const AudioStreamOptions& audioStreamOptions);
17
+
18
+ AudioEncoder(
19
+ const torch::Tensor& samples,
20
+ int sampleRate,
21
+ std::string_view formatName,
22
+ std::unique_ptr<AVIOContextHolder> avioContextHolder,
23
+ const AudioStreamOptions& audioStreamOptions);
24
+
25
+ void encode();
26
+
27
+ torch::Tensor encodeToTensor();
28
+
29
+ private:
30
+ void initializeEncoder(const AudioStreamOptions& audioStreamOptions);
31
+ UniqueAVFrame maybeConvertAVFrame(const UniqueAVFrame& avFrame);
32
+ void encodeFrameThroughFifo(
33
+ AutoAVPacket& autoAVPacket,
34
+ const UniqueAVFrame& avFrame,
35
+ bool flushFifo = false);
36
+ void encodeFrame(AutoAVPacket& autoAVPacket, const UniqueAVFrame& avFrame);
37
+ void maybeFlushSwrBuffers(AutoAVPacket& autoAVPacket);
38
+ void flushBuffers();
39
+ void close_avio();
40
+
41
+ UniqueEncodingAVFormatContext avFormatContext_;
42
+ UniqueAVCodecContext avCodecContext_;
43
+ int streamIndex_;
44
+ UniqueSwrContext swrContext_;
45
+ AudioStreamOptions audioStreamOptions;
46
+
47
+ const torch::Tensor samples_;
48
+
49
+ int outNumChannels_ = -1;
50
+ int outSampleRate_ = -1;
51
+ int inSampleRate_ = -1;
52
+
53
+ UniqueAVAudioFifo avAudioFifo_;
54
+
55
+ std::unique_ptr<AVIOContextHolder> avioContextHolder_;
56
+
57
+ bool encodeWasCalled_ = false;
58
+ int64_t lastEncodedAVFramePts_ = 0;
59
+ };
60
+ } // namespace facebook::torchcodec
61
+
62
+ /* clang-format off */
63
+ //
64
+ // Note: [Encoding loop, sample rate conversion and FIFO]
65
+ //
66
+ // The input samples are in a given format, sample rate, and number of channels.
67
+ // We may want to change these properties before encoding. The conversion is
68
+ // done in maybeConvertAVFrame() and we rely on libswresample. When sample rate
69
+ // conversion is needed, this means two things:
70
+ // - swr will be storing samples in its internal buffers, which we'll need to
71
+ // flush at the very end of the encoding process.
72
+ // - the converted AVFrame we get back from maybeConvertAVFrame() typically
73
+ // won't have the same number of samples as the original AVFrame. And that's
74
+ // a problem, because some encoders expect AVFrames with a specific and
75
+ // constant number of samples. If we were to send it as-is, we'd get an error
76
+ // in avcodec_send_frame(). In order to feed the encoder with AVFrames
77
+ // with the expected number of samples, we go through an intermediate FIFO
78
+ // from which we can pull the exact number of samples that we need. Note that
79
+ // this involves at least 2 additional copies.
80
+ //
81
+ // To be clear, the FIFO is only used if BOTH the following conditions are met:
82
+ // - sample rate conversion is needed (inSampleRate_ != outSampleRate_)
83
+ // - the encoder expects a specific number of samples per AVFrame (fixed frame size)
84
+ // This is not the case for all encoders, e.g. WAV doesn't care about frame size.
85
+ //
86
+ // Also, the FIFO is either:
87
+ // - used for every single frame during the encoding process, or
88
+ // - not used at all.
89
+ // There is no scenario where a given Encoder() instance would sometimes use a
90
+ // FIFO, sometimes not.
91
+ //
92
+ // Drawing made with https://asciiflow.com/, can be copy/pasted there if it
93
+ // needs editing:
94
+ //
95
+ // ┌─One─iteration─of─main─encoding─loop─(encode())───────────────────────────────────────────┐
96
+ // │ │
97
+ // │ Converts: │
98
+ // │ - num channels │
99
+ // │ - format │
100
+ // │ - sample rate │
101
+ // │ If sample rate is converted, stores data in swr buffers, │
102
+ // │ which will need to be flushed by maybeFlushSwrBuffers() │
103
+ // │ │
104
+ // │ ▲ │
105
+ // │ │ ┌─EncodeFrameThroughFifo()──────────────┐│
106
+ // │ │ │ ││
107
+ // │ AVFrame ──────► MaybeConvertAVFrame()───▲──│─┬──► NO FIFO ─►┬──▲────►encodeFrame() ││
108
+ // │ with │ │ │ │ │ ││
109
+ // │ input │ │ │ │ │ ││
110
+ // │ samples │ │ │ │ │ ││
111
+ // │ │ │ │ │ │ ││
112
+ // │ │ │ └────► FIFO ─►─┘ │ ││
113
+ // │ │ └───────────────────┼───────────────────┘│
114
+ // └──────────────────────────────────────────────┼──────────────────────┼────────────────────┘
115
+ // │ │
116
+ // AVFrame from maybeFlushSwrBuffers() ───┘ │
117
+ // Only if sample rate conversion was needed nullptr, to flush
118
+ // The call to maybeFlushSwrBuffers() will FFmpeg buffers
119
+ // also instruct to flush the FIFO, if it exists.
120
+ //
121
+ //
122
+ //
123
+ /* clang-format on */
@@ -0,0 +1,421 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
8
+
9
+ #include <c10/util/Exception.h>
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ AutoAVPacket::AutoAVPacket() : avPacket_(av_packet_alloc()) {
14
+ TORCH_CHECK(avPacket_ != nullptr, "Couldn't allocate avPacket.");
15
+ }
16
+
17
+ AutoAVPacket::~AutoAVPacket() {
18
+ av_packet_free(&avPacket_);
19
+ }
20
+
21
+ ReferenceAVPacket::ReferenceAVPacket(AutoAVPacket& shared)
22
+ : avPacket_(shared.avPacket_) {}
23
+
24
+ ReferenceAVPacket::~ReferenceAVPacket() {
25
+ av_packet_unref(avPacket_);
26
+ }
27
+
28
+ AVPacket* ReferenceAVPacket::get() {
29
+ return avPacket_;
30
+ }
31
+
32
+ AVPacket* ReferenceAVPacket::operator->() {
33
+ return avPacket_;
34
+ }
35
+
36
+ AVCodecOnlyUseForCallingAVFindBestStream
37
+ makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec) {
38
+ #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
39
+ return const_cast<AVCodec*>(codec);
40
+ #else
41
+ return codec;
42
+ #endif
43
+ }
44
+
45
+ std::string getFFMPEGErrorStringFromErrorCode(int errorCode) {
46
+ char errorBuffer[AV_ERROR_MAX_STRING_SIZE] = {0};
47
+ av_strerror(errorCode, errorBuffer, AV_ERROR_MAX_STRING_SIZE);
48
+ return std::string(errorBuffer);
49
+ }
50
+
51
+ int64_t getDuration(const UniqueAVFrame& avFrame) {
52
+ #if LIBAVUTIL_VERSION_MAJOR < 58
53
+ return avFrame->pkt_duration;
54
+ #else
55
+ return avFrame->duration;
56
+ #endif
57
+ }
58
+
59
+ int getNumChannels(const UniqueAVFrame& avFrame) {
60
+ #if LIBAVFILTER_VERSION_MAJOR > 8 || \
61
+ (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
62
+ return avFrame->ch_layout.nb_channels;
63
+ #else
64
+ int numChannels = av_get_channel_layout_nb_channels(avFrame->channel_layout);
65
+ // Handle FFmpeg 4 bug where channel_layout and numChannels are 0 or unset
66
+ // Set values based on avFrame->channels which appears to be correct
67
+ // to allow successful initialization of SwrContext
68
+ if (numChannels == 0 && avFrame->channels > 0) {
69
+ avFrame->channel_layout = av_get_default_channel_layout(avFrame->channels);
70
+ numChannels = avFrame->channels;
71
+ }
72
+ return numChannels;
73
+ #endif
74
+ }
75
+
76
+ int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
77
+ #if LIBAVFILTER_VERSION_MAJOR > 8 || \
78
+ (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
79
+ return avCodecContext->ch_layout.nb_channels;
80
+ #else
81
+ return avCodecContext->channels;
82
+ #endif
83
+ }
84
+
85
+ void setDefaultChannelLayout(
86
+ UniqueAVCodecContext& avCodecContext,
87
+ int numChannels) {
88
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
89
+ AVChannelLayout channel_layout;
90
+ av_channel_layout_default(&channel_layout, numChannels);
91
+ avCodecContext->ch_layout = channel_layout;
92
+ #else
93
+ uint64_t channel_layout = av_get_default_channel_layout(numChannels);
94
+ avCodecContext->channel_layout = channel_layout;
95
+ avCodecContext->channels = numChannels;
96
+ #endif
97
+ }
98
+
99
+ void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels) {
100
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
101
+ AVChannelLayout channel_layout;
102
+ av_channel_layout_default(&channel_layout, numChannels);
103
+ avFrame->ch_layout = channel_layout;
104
+ #else
105
+ uint64_t channel_layout = av_get_default_channel_layout(numChannels);
106
+ avFrame->channel_layout = channel_layout;
107
+ avFrame->channels = numChannels;
108
+ #endif
109
+ }
110
+
111
+ void validateNumChannels(const AVCodec& avCodec, int numChannels) {
112
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
113
+ if (avCodec.ch_layouts == nullptr) {
114
+ // If we can't validate, we must assume it'll be fine. If not, FFmpeg will
115
+ // eventually raise.
116
+ return;
117
+ }
118
+ // FFmpeg doc indicate that the ch_layouts array is terminated by a zeroed
119
+ // layout, so checking for nb_channels == 0 should indicate its end.
120
+ for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) {
121
+ if (numChannels == avCodec.ch_layouts[i].nb_channels) {
122
+ return;
123
+ }
124
+ }
125
+ // At this point it seems that the encoder doesn't support the requested
126
+ // number of channels, so we error out.
127
+ std::stringstream supportedNumChannels;
128
+ for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) {
129
+ if (i > 0) {
130
+ supportedNumChannels << ", ";
131
+ }
132
+ supportedNumChannels << avCodec.ch_layouts[i].nb_channels;
133
+ }
134
+ #else
135
+ if (avCodec.channel_layouts == nullptr) {
136
+ // can't validate, same as above.
137
+ return;
138
+ }
139
+ for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
140
+ if (numChannels ==
141
+ av_get_channel_layout_nb_channels(avCodec.channel_layouts[i])) {
142
+ return;
143
+ }
144
+ }
145
+ // At this point it seems that the encoder doesn't support the requested
146
+ // number of channels, so we error out.
147
+ std::stringstream supportedNumChannels;
148
+ for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) {
149
+ if (i > 0) {
150
+ supportedNumChannels << ", ";
151
+ }
152
+ supportedNumChannels << av_get_channel_layout_nb_channels(
153
+ avCodec.channel_layouts[i]);
154
+ }
155
+ #endif
156
+ TORCH_CHECK(
157
+ false,
158
+ "Desired number of channels (",
159
+ numChannels,
160
+ ") is not supported by the ",
161
+ "encoder. Supported number of channels are: ",
162
+ supportedNumChannels.str(),
163
+ ".");
164
+ }
165
+
166
+ namespace {
167
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
168
+
169
+ // Returns:
170
+ // - the srcAVFrame's channel layout if srcAVFrame has outNumChannels
171
+ // - the default channel layout with outNumChannels otherwise.
172
+ AVChannelLayout getOutputChannelLayout(
173
+ int outNumChannels,
174
+ const UniqueAVFrame& srcAVFrame) {
175
+ AVChannelLayout outLayout;
176
+ if (outNumChannels == getNumChannels(srcAVFrame)) {
177
+ outLayout = srcAVFrame->ch_layout;
178
+ } else {
179
+ av_channel_layout_default(&outLayout, outNumChannels);
180
+ }
181
+ return outLayout;
182
+ }
183
+
184
+ #else
185
+
186
+ // Same as above
187
+ int64_t getOutputChannelLayout(
188
+ int outNumChannels,
189
+ const UniqueAVFrame& srcAVFrame) {
190
+ int64_t outLayout;
191
+ if (outNumChannels == getNumChannels(srcAVFrame)) {
192
+ outLayout = srcAVFrame->channel_layout;
193
+ } else {
194
+ outLayout = av_get_default_channel_layout(outNumChannels);
195
+ }
196
+ return outLayout;
197
+ }
198
+ #endif
199
+ } // namespace
200
+
201
+ // Sets dstAVFrame' channel layout to getOutputChannelLayout(): see doc above
202
+ void setChannelLayout(
203
+ UniqueAVFrame& dstAVFrame,
204
+ const UniqueAVFrame& srcAVFrame,
205
+ int outNumChannels) {
206
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
207
+ AVChannelLayout outLayout =
208
+ getOutputChannelLayout(outNumChannels, srcAVFrame);
209
+ auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &outLayout);
210
+ TORCH_CHECK(
211
+ status == AVSUCCESS,
212
+ "Couldn't copy channel layout to avFrame: ",
213
+ getFFMPEGErrorStringFromErrorCode(status));
214
+ #else
215
+ dstAVFrame->channel_layout =
216
+ getOutputChannelLayout(outNumChannels, srcAVFrame);
217
+ dstAVFrame->channels = outNumChannels;
218
+ #endif
219
+ }
220
+
221
+ UniqueAVFrame allocateAVFrame(
222
+ int numSamples,
223
+ int sampleRate,
224
+ int numChannels,
225
+ AVSampleFormat sampleFormat) {
226
+ auto avFrame = UniqueAVFrame(av_frame_alloc());
227
+ TORCH_CHECK(avFrame != nullptr, "Couldn't allocate AVFrame.");
228
+
229
+ avFrame->nb_samples = numSamples;
230
+ avFrame->sample_rate = sampleRate;
231
+ setDefaultChannelLayout(avFrame, numChannels);
232
+ avFrame->format = sampleFormat;
233
+ auto status = av_frame_get_buffer(avFrame.get(), 0);
234
+
235
+ TORCH_CHECK(
236
+ status == AVSUCCESS,
237
+ "Couldn't allocate avFrame's buffers: ",
238
+ getFFMPEGErrorStringFromErrorCode(status));
239
+
240
+ status = av_frame_make_writable(avFrame.get());
241
+ TORCH_CHECK(
242
+ status == AVSUCCESS,
243
+ "Couldn't make AVFrame writable: ",
244
+ getFFMPEGErrorStringFromErrorCode(status));
245
+ return avFrame;
246
+ }
247
+
248
+ SwrContext* createSwrContext(
249
+ AVSampleFormat srcSampleFormat,
250
+ AVSampleFormat outSampleFormat,
251
+ int srcSampleRate,
252
+ int outSampleRate,
253
+ const UniqueAVFrame& srcAVFrame,
254
+ int outNumChannels) {
255
+ SwrContext* swrContext = nullptr;
256
+ int status = AVSUCCESS;
257
+ #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
258
+ AVChannelLayout outLayout =
259
+ getOutputChannelLayout(outNumChannels, srcAVFrame);
260
+ status = swr_alloc_set_opts2(
261
+ &swrContext,
262
+ &outLayout,
263
+ outSampleFormat,
264
+ outSampleRate,
265
+ &srcAVFrame->ch_layout,
266
+ srcSampleFormat,
267
+ srcSampleRate,
268
+ 0,
269
+ nullptr);
270
+
271
+ TORCH_CHECK(
272
+ status == AVSUCCESS,
273
+ "Couldn't create SwrContext: ",
274
+ getFFMPEGErrorStringFromErrorCode(status));
275
+ #else
276
+ int64_t outLayout = getOutputChannelLayout(outNumChannels, srcAVFrame);
277
+ swrContext = swr_alloc_set_opts(
278
+ nullptr,
279
+ outLayout,
280
+ outSampleFormat,
281
+ outSampleRate,
282
+ srcAVFrame->channel_layout,
283
+ srcSampleFormat,
284
+ srcSampleRate,
285
+ 0,
286
+ nullptr);
287
+ #endif
288
+
289
+ TORCH_CHECK(swrContext != nullptr, "Couldn't create swrContext");
290
+ status = swr_init(swrContext);
291
+ TORCH_CHECK(
292
+ status == AVSUCCESS,
293
+ "Couldn't initialize SwrContext: ",
294
+ getFFMPEGErrorStringFromErrorCode(status),
295
+ ". If the error says 'Invalid argument', it's likely that you are using "
296
+ "a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
297
+ "valid scenarios. Try to upgrade FFmpeg?");
298
+ return swrContext;
299
+ }
300
+
301
+ UniqueAVFrame convertAudioAVFrameSamples(
302
+ const UniqueSwrContext& swrContext,
303
+ const UniqueAVFrame& srcAVFrame,
304
+ AVSampleFormat outSampleFormat,
305
+ int outSampleRate,
306
+ int outNumChannels) {
307
+ UniqueAVFrame convertedAVFrame(av_frame_alloc());
308
+ TORCH_CHECK(
309
+ convertedAVFrame,
310
+ "Could not allocate frame for sample format conversion.");
311
+
312
+ convertedAVFrame->pts = srcAVFrame->pts;
313
+ convertedAVFrame->format = static_cast<int>(outSampleFormat);
314
+
315
+ convertedAVFrame->sample_rate = outSampleRate;
316
+ int srcSampleRate = srcAVFrame->sample_rate;
317
+ if (srcSampleRate != outSampleRate) {
318
+ // Note that this is an upper bound on the number of output samples.
319
+ // `swr_convert()` will likely not fill convertedAVFrame with that many
320
+ // samples if sample rate conversion is needed. It will buffer the last few
321
+ // ones because those require future samples. That's also why we reset
322
+ // nb_samples after the call to `swr_convert()`.
323
+ // We could also use `swr_get_out_samples()` to determine the number of
324
+ // output samples, but empirically `av_rescale_rnd()` seems to provide a
325
+ // tighter bound.
326
+ convertedAVFrame->nb_samples = av_rescale_rnd(
327
+ swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples,
328
+ outSampleRate,
329
+ srcSampleRate,
330
+ AV_ROUND_UP);
331
+ } else {
332
+ convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
333
+ }
334
+
335
+ setChannelLayout(convertedAVFrame, srcAVFrame, outNumChannels);
336
+
337
+ auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
338
+ TORCH_CHECK(
339
+ status == AVSUCCESS,
340
+ "Could not allocate frame buffers for sample format conversion: ",
341
+ getFFMPEGErrorStringFromErrorCode(status));
342
+
343
+ auto numConvertedSamples = swr_convert(
344
+ swrContext.get(),
345
+ convertedAVFrame->data,
346
+ convertedAVFrame->nb_samples,
347
+ static_cast<const uint8_t**>(
348
+ const_cast<const uint8_t**>(srcAVFrame->data)),
349
+ srcAVFrame->nb_samples);
350
+ // numConvertedSamples can be 0 if we're downsampling by a great factor and
351
+ // the first frame doesn't contain a lot of samples. It should be handled
352
+ // properly by the caller.
353
+ TORCH_CHECK(
354
+ numConvertedSamples >= 0,
355
+ "Error in swr_convert: ",
356
+ getFFMPEGErrorStringFromErrorCode(numConvertedSamples));
357
+
358
+ // See comment above about nb_samples
359
+ convertedAVFrame->nb_samples = numConvertedSamples;
360
+
361
+ return convertedAVFrame;
362
+ }
363
+
364
+ void setFFmpegLogLevel() {
365
+ auto logLevel = AV_LOG_QUIET;
366
+ const char* logLevelEnvPtr = std::getenv("TORCHCODEC_FFMPEG_LOG_LEVEL");
367
+ if (logLevelEnvPtr != nullptr) {
368
+ std::string logLevelEnv(logLevelEnvPtr);
369
+ if (logLevelEnv == "QUIET") {
370
+ logLevel = AV_LOG_QUIET;
371
+ } else if (logLevelEnv == "PANIC") {
372
+ logLevel = AV_LOG_PANIC;
373
+ } else if (logLevelEnv == "FATAL") {
374
+ logLevel = AV_LOG_FATAL;
375
+ } else if (logLevelEnv == "ERROR") {
376
+ logLevel = AV_LOG_ERROR;
377
+ } else if (logLevelEnv == "WARNING") {
378
+ logLevel = AV_LOG_WARNING;
379
+ } else if (logLevelEnv == "INFO") {
380
+ logLevel = AV_LOG_INFO;
381
+ } else if (logLevelEnv == "VERBOSE") {
382
+ logLevel = AV_LOG_VERBOSE;
383
+ } else if (logLevelEnv == "DEBUG") {
384
+ logLevel = AV_LOG_DEBUG;
385
+ } else if (logLevelEnv == "TRACE") {
386
+ logLevel = AV_LOG_TRACE;
387
+ } else {
388
+ TORCH_CHECK(
389
+ false,
390
+ "Invalid TORCHCODEC_FFMPEG_LOG_LEVEL: ",
391
+ logLevelEnv,
392
+ ". Use e.g. 'QUIET', 'PANIC', 'VERBOSE', etc.");
393
+ }
394
+ }
395
+ av_log_set_level(logLevel);
396
+ }
397
+
398
+ AVIOContext* avioAllocContext(
399
+ uint8_t* buffer,
400
+ int buffer_size,
401
+ int write_flag,
402
+ void* opaque,
403
+ AVIOReadFunction read_packet,
404
+ AVIOWriteFunction write_packet,
405
+ AVIOSeekFunction seek) {
406
+ return avio_alloc_context(
407
+ buffer,
408
+ buffer_size,
409
+ write_flag,
410
+ opaque,
411
+ read_packet,
412
+ // The buf parameter of the write function is not const before FFmpeg 7.
413
+ #if LIBAVFILTER_VERSION_MAJOR >= 10 // FFmpeg >= 7
414
+ write_packet,
415
+ #else
416
+ reinterpret_cast<AVIOWriteFunctionOld>(write_packet),
417
+ #endif
418
+ seek);
419
+ }
420
+
421
+ } // namespace facebook::torchcodec