torchcodec 0.8.0__cp313-cp313-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (82) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.13.dylib +0 -0
  3. torchcodec/__init__.py +16 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  5. torchcodec/_core/AVIOContextHolder.h +64 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  8. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  9. torchcodec/_core/AVIOTensorContext.h +43 -0
  10. torchcodec/_core/BetaCudaDeviceInterface.cpp +636 -0
  11. torchcodec/_core/BetaCudaDeviceInterface.h +191 -0
  12. torchcodec/_core/CMakeLists.txt +325 -0
  13. torchcodec/_core/CUDACommon.cpp +315 -0
  14. torchcodec/_core/CUDACommon.h +46 -0
  15. torchcodec/_core/Cache.h +138 -0
  16. torchcodec/_core/CpuDeviceInterface.cpp +347 -0
  17. torchcodec/_core/CpuDeviceInterface.h +132 -0
  18. torchcodec/_core/CudaDeviceInterface.cpp +357 -0
  19. torchcodec/_core/CudaDeviceInterface.h +64 -0
  20. torchcodec/_core/DeviceInterface.cpp +117 -0
  21. torchcodec/_core/DeviceInterface.h +148 -0
  22. torchcodec/_core/Encoder.cpp +807 -0
  23. torchcodec/_core/Encoder.h +173 -0
  24. torchcodec/_core/FFMPEGCommon.cpp +608 -0
  25. torchcodec/_core/FFMPEGCommon.h +245 -0
  26. torchcodec/_core/FilterGraph.cpp +149 -0
  27. torchcodec/_core/FilterGraph.h +59 -0
  28. torchcodec/_core/Frame.cpp +42 -0
  29. torchcodec/_core/Frame.h +72 -0
  30. torchcodec/_core/Metadata.h +72 -0
  31. torchcodec/_core/NVDECCache.cpp +70 -0
  32. torchcodec/_core/NVDECCache.h +104 -0
  33. torchcodec/_core/SingleStreamDecoder.cpp +1719 -0
  34. torchcodec/_core/SingleStreamDecoder.h +405 -0
  35. torchcodec/_core/StreamOptions.h +63 -0
  36. torchcodec/_core/Transform.cpp +60 -0
  37. torchcodec/_core/Transform.h +59 -0
  38. torchcodec/_core/ValidationUtils.cpp +35 -0
  39. torchcodec/_core/ValidationUtils.h +21 -0
  40. torchcodec/_core/__init__.py +41 -0
  41. torchcodec/_core/_metadata.py +317 -0
  42. torchcodec/_core/custom_ops.cpp +875 -0
  43. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +360 -0
  44. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  45. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  46. torchcodec/_core/ops.py +498 -0
  47. torchcodec/_core/pybind_ops.cpp +50 -0
  48. torchcodec/_frame.py +145 -0
  49. torchcodec/_internally_replaced_utils.py +67 -0
  50. torchcodec/_samplers/__init__.py +7 -0
  51. torchcodec/_samplers/video_clip_sampler.py +418 -0
  52. torchcodec/decoders/__init__.py +12 -0
  53. torchcodec/decoders/_audio_decoder.py +177 -0
  54. torchcodec/decoders/_decoder_utils.py +112 -0
  55. torchcodec/decoders/_video_decoder.py +500 -0
  56. torchcodec/encoders/__init__.py +1 -0
  57. torchcodec/encoders/_audio_encoder.py +150 -0
  58. torchcodec/libtorchcodec_core4.dylib +0 -0
  59. torchcodec/libtorchcodec_core5.dylib +0 -0
  60. torchcodec/libtorchcodec_core6.dylib +0 -0
  61. torchcodec/libtorchcodec_core7.dylib +0 -0
  62. torchcodec/libtorchcodec_core8.dylib +0 -0
  63. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  64. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  65. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  66. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  67. torchcodec/libtorchcodec_custom_ops8.dylib +0 -0
  68. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  69. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  73. torchcodec/samplers/__init__.py +2 -0
  74. torchcodec/samplers/_common.py +84 -0
  75. torchcodec/samplers/_index_based.py +287 -0
  76. torchcodec/samplers/_time_based.py +358 -0
  77. torchcodec/version.py +2 -0
  78. torchcodec-0.8.0.dist-info/METADATA +253 -0
  79. torchcodec-0.8.0.dist-info/RECORD +82 -0
  80. torchcodec-0.8.0.dist-info/WHEEL +5 -0
  81. torchcodec-0.8.0.dist-info/licenses/LICENSE +28 -0
  82. torchcodec-0.8.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,357 @@
1
+ #include <ATen/cuda/CUDAEvent.h>
2
+ #include <c10/cuda/CUDAStream.h>
3
+ #include <torch/types.h>
4
+ #include <mutex>
5
+
6
+ #include "src/torchcodec/_core/Cache.h"
7
+ #include "src/torchcodec/_core/CudaDeviceInterface.h"
8
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
9
+
10
+ extern "C" {
11
+ #include <libavutil/hwcontext_cuda.h>
12
+ #include <libavutil/pixdesc.h>
13
+ }
14
+
15
+ namespace facebook::torchcodec {
16
+ namespace {
17
+
18
+ static bool g_cuda = registerDeviceInterface(
19
+ DeviceInterfaceKey(torch::kCUDA),
20
+ [](const torch::Device& device) {
21
+ return new CudaDeviceInterface(device);
22
+ });
23
+
24
+ // We reuse cuda contexts across VideoDeoder instances. This is because
25
+ // creating a cuda context is expensive. The cache mechanism is as follows:
26
+ // 1. There is a cache of size MAX_CONTEXTS_PER_GPU_IN_CACHE cuda contexts for
27
+ // each GPU.
28
+ // 2. When we destroy a SingleStreamDecoder instance we release the cuda context
29
+ // to
30
+ // the cache if the cache is not full.
31
+ // 3. When we create a SingleStreamDecoder instance we try to get a cuda context
32
+ // from
33
+ // the cache. If the cache is empty we create a new cuda context.
34
+
35
+ // Pytorch can only handle up to 128 GPUs.
36
+ // https://github.com/pytorch/pytorch/blob/e30c55ee527b40d67555464b9e402b4b7ce03737/c10/cuda/CUDAMacros.h#L44
37
+ const int MAX_CUDA_GPUS = 128;
38
+ // Set to -1 to have an infinitely sized cache. Set it to 0 to disable caching.
39
+ // Set to a positive number to have a cache of that size.
40
+ const int MAX_CONTEXTS_PER_GPU_IN_CACHE = -1;
41
+ PerGpuCache<AVBufferRef, Deleterp<AVBufferRef, void, av_buffer_unref>>
42
+ g_cached_hw_device_ctxs(MAX_CUDA_GPUS, MAX_CONTEXTS_PER_GPU_IN_CACHE);
43
+
44
+ int getFlagsAVHardwareDeviceContextCreate() {
45
+ // 58.26.100 introduced the concept of reusing the existing cuda context
46
+ // which is much faster and lower memory than creating a new cuda context.
47
+ #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 26, 100)
48
+ return AV_CUDA_USE_CURRENT_CONTEXT;
49
+ #else
50
+ return 0;
51
+ #endif
52
+ }
53
+
54
+ UniqueAVBufferRef getHardwareDeviceContext(const torch::Device& device) {
55
+ enum AVHWDeviceType type = av_hwdevice_find_type_by_name("cuda");
56
+ TORCH_CHECK(type != AV_HWDEVICE_TYPE_NONE, "Failed to find cuda device");
57
+ torch::DeviceIndex nonNegativeDeviceIndex = getNonNegativeDeviceIndex(device);
58
+
59
+ UniqueAVBufferRef hardwareDeviceCtx = g_cached_hw_device_ctxs.get(device);
60
+ if (hardwareDeviceCtx) {
61
+ return hardwareDeviceCtx;
62
+ }
63
+
64
+ // Create hardware device context
65
+ c10::cuda::CUDAGuard deviceGuard(device);
66
+ // Valid values for the argument to cudaSetDevice are 0 to maxDevices - 1:
67
+ // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g159587909ffa0791bbe4b40187a4c6bb
68
+ // So we ensure the deviceIndex is not negative.
69
+ // We set the device because we may be called from a different thread than
70
+ // the one that initialized the cuda context.
71
+ cudaSetDevice(nonNegativeDeviceIndex);
72
+ AVBufferRef* hardwareDeviceCtxRaw = nullptr;
73
+ std::string deviceOrdinal = std::to_string(nonNegativeDeviceIndex);
74
+
75
+ int err = av_hwdevice_ctx_create(
76
+ &hardwareDeviceCtxRaw,
77
+ type,
78
+ deviceOrdinal.c_str(),
79
+ nullptr,
80
+ getFlagsAVHardwareDeviceContextCreate());
81
+
82
+ if (err < 0) {
83
+ /* clang-format off */
84
+ TORCH_CHECK(
85
+ false,
86
+ "Failed to create specified HW device. This typically happens when ",
87
+ "your installed FFmpeg doesn't support CUDA (see ",
88
+ "https://github.com/pytorch/torchcodec#installing-cuda-enabled-torchcodec",
89
+ "). FFmpeg error: ", getFFMPEGErrorStringFromErrorCode(err));
90
+ /* clang-format on */
91
+ }
92
+
93
+ return UniqueAVBufferRef(hardwareDeviceCtxRaw);
94
+ }
95
+
96
+ } // namespace
97
+
98
+ CudaDeviceInterface::CudaDeviceInterface(const torch::Device& device)
99
+ : DeviceInterface(device) {
100
+ TORCH_CHECK(g_cuda, "CudaDeviceInterface was not registered!");
101
+ TORCH_CHECK(
102
+ device_.type() == torch::kCUDA, "Unsupported device: ", device_.str());
103
+
104
+ initializeCudaContextWithPytorch(device_);
105
+
106
+ hardwareDeviceCtx_ = getHardwareDeviceContext(device_);
107
+ nppCtx_ = getNppStreamContext(device_);
108
+ }
109
+
110
+ CudaDeviceInterface::~CudaDeviceInterface() {
111
+ if (hardwareDeviceCtx_) {
112
+ g_cached_hw_device_ctxs.addIfCacheHasCapacity(
113
+ device_, std::move(hardwareDeviceCtx_));
114
+ }
115
+ returnNppStreamContextToCache(device_, std::move(nppCtx_));
116
+ }
117
+
118
+ void CudaDeviceInterface::initialize(
119
+ const AVStream* avStream,
120
+ const UniqueDecodingAVFormatContext& avFormatCtx) {
121
+ TORCH_CHECK(avStream != nullptr, "avStream is null");
122
+ timeBase_ = avStream->time_base;
123
+
124
+ // TODO: Ideally, we should keep all interface implementations independent.
125
+ cpuInterface_ = createDeviceInterface(torch::kCPU);
126
+ TORCH_CHECK(
127
+ cpuInterface_ != nullptr, "Failed to create CPU device interface");
128
+ cpuInterface_->initialize(avStream, avFormatCtx);
129
+ cpuInterface_->initializeVideo(
130
+ VideoStreamOptions(),
131
+ {},
132
+ /*resizedOutputDims=*/std::nullopt);
133
+ }
134
+
135
+ void CudaDeviceInterface::initializeVideo(
136
+ const VideoStreamOptions& videoStreamOptions,
137
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>& transforms,
138
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {
139
+ videoStreamOptions_ = videoStreamOptions;
140
+ }
141
+
142
+ void CudaDeviceInterface::registerHardwareDeviceWithCodec(
143
+ AVCodecContext* codecContext) {
144
+ TORCH_CHECK(
145
+ hardwareDeviceCtx_, "Hardware device context has not been initialized");
146
+ TORCH_CHECK(codecContext != nullptr, "codecContext is null");
147
+ codecContext->hw_device_ctx = av_buffer_ref(hardwareDeviceCtx_.get());
148
+ }
149
+
150
+ UniqueAVFrame CudaDeviceInterface::maybeConvertAVFrameToNV12OrRGB24(
151
+ UniqueAVFrame& avFrame) {
152
+ // We need FFmpeg filters to handle those conversion cases which are not
153
+ // directly implemented in CUDA or CPU device interface (in case of a
154
+ // fallback).
155
+
156
+ // Input frame is on CPU, we will just pass it to CPU device interface, so
157
+ // skipping filters context as CPU device interface will handle everything for
158
+ // us.
159
+ if (avFrame->format != AV_PIX_FMT_CUDA) {
160
+ return std::move(avFrame);
161
+ }
162
+
163
+ auto hwFramesCtx =
164
+ reinterpret_cast<AVHWFramesContext*>(avFrame->hw_frames_ctx->data);
165
+ TORCH_CHECK(
166
+ hwFramesCtx != nullptr,
167
+ "The AVFrame does not have a hw_frames_ctx. "
168
+ "That's unexpected, please report this to the TorchCodec repo.");
169
+
170
+ AVPixelFormat actualFormat = hwFramesCtx->sw_format;
171
+
172
+ // If the frame is already in NV12 format, we don't need to do anything.
173
+ if (actualFormat == AV_PIX_FMT_NV12) {
174
+ return std::move(avFrame);
175
+ }
176
+
177
+ AVPixelFormat outputFormat;
178
+ std::stringstream filters;
179
+
180
+ unsigned version_int = avfilter_version();
181
+ if (version_int < AV_VERSION_INT(8, 0, 103)) {
182
+ // Color conversion support ('format=' option) was added to scale_cuda from
183
+ // n5.0. With the earlier version of ffmpeg we have no choice but use CPU
184
+ // filters. See:
185
+ // https://github.com/FFmpeg/FFmpeg/commit/62dc5df941f5e196164c151691e4274195523e95
186
+ outputFormat = AV_PIX_FMT_RGB24;
187
+
188
+ auto actualFormatName = av_get_pix_fmt_name(actualFormat);
189
+ TORCH_CHECK(
190
+ actualFormatName != nullptr,
191
+ "The actual format of a frame is unknown to FFmpeg. "
192
+ "That's unexpected, please report this to the TorchCodec repo.");
193
+
194
+ filters << "hwdownload,format=" << actualFormatName;
195
+ } else {
196
+ // Actual output color format will be set via filter options
197
+ outputFormat = AV_PIX_FMT_CUDA;
198
+
199
+ filters << "scale_cuda=format=nv12:interp_algo=bilinear";
200
+ }
201
+
202
+ enum AVPixelFormat frameFormat =
203
+ static_cast<enum AVPixelFormat>(avFrame->format);
204
+
205
+ auto newContext = std::make_unique<FiltersContext>(
206
+ avFrame->width,
207
+ avFrame->height,
208
+ frameFormat,
209
+ avFrame->sample_aspect_ratio,
210
+ avFrame->width,
211
+ avFrame->height,
212
+ outputFormat,
213
+ filters.str(),
214
+ timeBase_,
215
+ av_buffer_ref(avFrame->hw_frames_ctx));
216
+
217
+ if (!nv12Conversion_ || *nv12ConversionContext_ != *newContext) {
218
+ nv12Conversion_ =
219
+ std::make_unique<FilterGraph>(*newContext, videoStreamOptions_);
220
+ nv12ConversionContext_ = std::move(newContext);
221
+ }
222
+ auto filteredAVFrame = nv12Conversion_->convert(avFrame);
223
+
224
+ // If this check fails it means the frame wasn't
225
+ // reshaped to its expected dimensions by filtergraph.
226
+ TORCH_CHECK(
227
+ (filteredAVFrame->width == nv12ConversionContext_->outputWidth) &&
228
+ (filteredAVFrame->height == nv12ConversionContext_->outputHeight),
229
+ "Expected frame from filter graph of ",
230
+ nv12ConversionContext_->outputWidth,
231
+ "x",
232
+ nv12ConversionContext_->outputHeight,
233
+ ", got ",
234
+ filteredAVFrame->width,
235
+ "x",
236
+ filteredAVFrame->height);
237
+
238
+ return filteredAVFrame;
239
+ }
240
+
241
+ void CudaDeviceInterface::convertAVFrameToFrameOutput(
242
+ UniqueAVFrame& avFrame,
243
+ FrameOutput& frameOutput,
244
+ std::optional<torch::Tensor> preAllocatedOutputTensor) {
245
+ validatePreAllocatedTensorShape(preAllocatedOutputTensor, avFrame);
246
+
247
+ // All of our CUDA decoding assumes NV12 format. We handle non-NV12 formats by
248
+ // converting them to NV12.
249
+ avFrame = maybeConvertAVFrameToNV12OrRGB24(avFrame);
250
+
251
+ if (avFrame->format != AV_PIX_FMT_CUDA) {
252
+ // The frame's format is AV_PIX_FMT_CUDA if and only if its content is on
253
+ // the GPU. In this branch, the frame is on the CPU. There are two possible
254
+ // reasons:
255
+ //
256
+ // 1. During maybeConvertAVFrameToNV12OrRGB24(), we had a non-NV12 format
257
+ // frame and we're on FFmpeg 4.4 or earlier. In such cases, we had to
258
+ // use CPU filters and we just converted the frame to RGB24.
259
+ // 2. This is what NVDEC gave us if it wasn't able to decode a frame, for
260
+ // whatever reason. Typically that happens if the video's encoder isn't
261
+ // supported by NVDEC.
262
+ //
263
+ // In both cases, we have a frame on the CPU. We send the frame back to the
264
+ // CUDA device when we're done.
265
+
266
+ enum AVPixelFormat frameFormat =
267
+ static_cast<enum AVPixelFormat>(avFrame->format);
268
+
269
+ FrameOutput cpuFrameOutput;
270
+ if (frameFormat == AV_PIX_FMT_RGB24) {
271
+ // Reason 1 above. The frame is already in RGB24, we just need to convert
272
+ // it to a tensor.
273
+ cpuFrameOutput.data = rgbAVFrameToTensor(avFrame);
274
+ } else {
275
+ // Reason 2 above. We need to do a full conversion which requires an
276
+ // actual CPU device.
277
+ cpuInterface_->convertAVFrameToFrameOutput(avFrame, cpuFrameOutput);
278
+ }
279
+
280
+ // Finally, we need to send the frame back to the GPU. Note that the
281
+ // pre-allocated tensor is on the GPU, so we can't send that to the CPU
282
+ // device interface. We copy it over here.
283
+ if (preAllocatedOutputTensor.has_value()) {
284
+ preAllocatedOutputTensor.value().copy_(cpuFrameOutput.data);
285
+ frameOutput.data = preAllocatedOutputTensor.value();
286
+ } else {
287
+ frameOutput.data = cpuFrameOutput.data.to(device_);
288
+ }
289
+
290
+ return;
291
+ }
292
+
293
+ // Above we checked that the AVFrame was on GPU, but that's not enough, we
294
+ // also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits),
295
+ // because this is what the NPP color conversion routines expect. This SHOULD
296
+ // be enforced by our call to maybeConvertAVFrameToNV12OrRGB24() above.
297
+ TORCH_CHECK(
298
+ avFrame->hw_frames_ctx != nullptr,
299
+ "The AVFrame does not have a hw_frames_ctx. This should never happen");
300
+ AVHWFramesContext* hwFramesCtx =
301
+ reinterpret_cast<AVHWFramesContext*>(avFrame->hw_frames_ctx->data);
302
+ TORCH_CHECK(
303
+ hwFramesCtx != nullptr,
304
+ "The AVFrame does not have a valid hw_frames_ctx. This should never happen");
305
+
306
+ AVPixelFormat actualFormat = hwFramesCtx->sw_format;
307
+ TORCH_CHECK(
308
+ actualFormat == AV_PIX_FMT_NV12,
309
+ "The AVFrame is ",
310
+ (av_get_pix_fmt_name(actualFormat) ? av_get_pix_fmt_name(actualFormat)
311
+ : "unknown"),
312
+ ", but we expected AV_PIX_FMT_NV12. "
313
+ "That's unexpected, please report this to the TorchCodec repo.");
314
+
315
+ // Figure out the NVDEC stream from the avFrame's hardware context.
316
+ // In reality, we know that this stream is hardcoded to be the default stream
317
+ // by FFmpeg:
318
+ // https://github.com/FFmpeg/FFmpeg/blob/66e40840d15b514f275ce3ce2a4bf72ec68c7311/libavutil/hwcontext_cuda.c#L387-L388
319
+ TORCH_CHECK(
320
+ hwFramesCtx->device_ctx != nullptr,
321
+ "The AVFrame's hw_frames_ctx does not have a device_ctx. ");
322
+ auto cudaDeviceCtx =
323
+ static_cast<AVCUDADeviceContext*>(hwFramesCtx->device_ctx->hwctx);
324
+ TORCH_CHECK(cudaDeviceCtx != nullptr, "The hardware context is null");
325
+ at::cuda::CUDAStream nvdecStream = // That's always the default stream. Sad.
326
+ c10::cuda::getStreamFromExternal(cudaDeviceCtx->stream, device_.index());
327
+
328
+ frameOutput.data = convertNV12FrameToRGB(
329
+ avFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor);
330
+ }
331
+
332
+ // inspired by https://github.com/FFmpeg/FFmpeg/commit/ad67ea9
333
+ // we have to do this because of an FFmpeg bug where hardware decoding is not
334
+ // appropriately set, so we just go off and find the matching codec for the CUDA
335
+ // device
336
+ std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
337
+ const AVCodecID& codecId) {
338
+ void* i = nullptr;
339
+ const AVCodec* codec = nullptr;
340
+ while ((codec = av_codec_iterate(&i)) != nullptr) {
341
+ if (codec->id != codecId || !av_codec_is_decoder(codec)) {
342
+ continue;
343
+ }
344
+
345
+ const AVCodecHWConfig* config = nullptr;
346
+ for (int j = 0; (config = avcodec_get_hw_config(codec, j)) != nullptr;
347
+ ++j) {
348
+ if (config->device_type == AV_HWDEVICE_TYPE_CUDA) {
349
+ return codec;
350
+ }
351
+ }
352
+ }
353
+
354
+ return std::nullopt;
355
+ }
356
+
357
+ } // namespace facebook::torchcodec
@@ -0,0 +1,64 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include "src/torchcodec/_core/CUDACommon.h"
10
+ #include "src/torchcodec/_core/DeviceInterface.h"
11
+ #include "src/torchcodec/_core/FilterGraph.h"
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ class CudaDeviceInterface : public DeviceInterface {
16
+ public:
17
+ CudaDeviceInterface(const torch::Device& device);
18
+
19
+ virtual ~CudaDeviceInterface();
20
+
21
+ std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
22
+
23
+ void initialize(
24
+ const AVStream* avStream,
25
+ const UniqueDecodingAVFormatContext& avFormatCtx) override;
26
+
27
+ void initializeVideo(
28
+ const VideoStreamOptions& videoStreamOptions,
29
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
30
+ transforms,
31
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims)
32
+ override;
33
+
34
+ void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
35
+
36
+ void convertAVFrameToFrameOutput(
37
+ UniqueAVFrame& avFrame,
38
+ FrameOutput& frameOutput,
39
+ std::optional<torch::Tensor> preAllocatedOutputTensor =
40
+ std::nullopt) override;
41
+
42
+ private:
43
+ // Our CUDA decoding code assumes NV12 format. In order to handle other
44
+ // kinds of input, we need to convert them to NV12. Our current implementation
45
+ // does this using filtergraph.
46
+ UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame);
47
+
48
+ // We sometimes encounter frames that cannot be decoded on the CUDA device.
49
+ // Rather than erroring out, we decode them on the CPU.
50
+ std::unique_ptr<DeviceInterface> cpuInterface_;
51
+
52
+ VideoStreamOptions videoStreamOptions_;
53
+ AVRational timeBase_;
54
+
55
+ UniqueAVBufferRef hardwareDeviceCtx_;
56
+ UniqueNppContext nppCtx_;
57
+
58
+ // This filtergraph instance is only used for NV12 format conversion in
59
+ // maybeConvertAVFrameToNV12().
60
+ std::unique_ptr<FiltersContext> nv12ConversionContext_;
61
+ std::unique_ptr<FilterGraph> nv12Conversion_;
62
+ };
63
+
64
+ } // namespace facebook::torchcodec
@@ -0,0 +1,117 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/DeviceInterface.h"
8
+ #include <map>
9
+ #include <mutex>
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ namespace {
14
+ using DeviceInterfaceMap =
15
+ std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
16
+ static std::mutex g_interface_mutex;
17
+
18
+ DeviceInterfaceMap& getDeviceMap() {
19
+ static DeviceInterfaceMap deviceMap;
20
+ return deviceMap;
21
+ }
22
+
23
+ std::string getDeviceType(const std::string& device) {
24
+ size_t pos = device.find(':');
25
+ if (pos == std::string::npos) {
26
+ return device;
27
+ }
28
+ return device.substr(0, pos);
29
+ }
30
+
31
+ } // namespace
32
+
33
+ bool registerDeviceInterface(
34
+ const DeviceInterfaceKey& key,
35
+ CreateDeviceInterfaceFn createInterface) {
36
+ std::scoped_lock lock(g_interface_mutex);
37
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
38
+
39
+ TORCH_CHECK(
40
+ deviceMap.find(key) == deviceMap.end(),
41
+ "Device interface already registered for device type ",
42
+ key.deviceType,
43
+ " variant '",
44
+ key.variant,
45
+ "'");
46
+ deviceMap.insert({key, createInterface});
47
+
48
+ return true;
49
+ }
50
+
51
+ void validateDeviceInterface(
52
+ const std::string device,
53
+ const std::string variant) {
54
+ std::scoped_lock lock(g_interface_mutex);
55
+ std::string deviceType = getDeviceType(device);
56
+
57
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
58
+
59
+ // Find device interface that matches device type and variant
60
+ torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
61
+
62
+ auto deviceInterface = std::find_if(
63
+ deviceMap.begin(),
64
+ deviceMap.end(),
65
+ [&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
66
+ return arg.first.deviceType == deviceTypeEnum &&
67
+ arg.first.variant == variant;
68
+ });
69
+
70
+ TORCH_CHECK(
71
+ deviceInterface != deviceMap.end(),
72
+ "Unsupported device: ",
73
+ device,
74
+ " (device type: ",
75
+ deviceType,
76
+ ", variant: ",
77
+ variant,
78
+ ")");
79
+ }
80
+
81
+ std::unique_ptr<DeviceInterface> createDeviceInterface(
82
+ const torch::Device& device,
83
+ const std::string_view variant) {
84
+ DeviceInterfaceKey key(device.type(), variant);
85
+ std::scoped_lock lock(g_interface_mutex);
86
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
87
+
88
+ auto it = deviceMap.find(key);
89
+ if (it != deviceMap.end()) {
90
+ return std::unique_ptr<DeviceInterface>(it->second(device));
91
+ }
92
+
93
+ TORCH_CHECK(
94
+ false,
95
+ "No device interface found for device type: ",
96
+ device.type(),
97
+ " variant: '",
98
+ variant,
99
+ "'");
100
+ }
101
+
102
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
103
+ TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
104
+
105
+ int height = avFrame->height;
106
+ int width = avFrame->width;
107
+ std::vector<int64_t> shape = {height, width, 3};
108
+ std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
109
+ AVFrame* avFrameClone = av_frame_clone(avFrame.get());
110
+ auto deleter = [avFrameClone](void*) {
111
+ UniqueAVFrame avFrameToDelete(avFrameClone);
112
+ };
113
+ return torch::from_blob(
114
+ avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
115
+ }
116
+
117
+ } // namespace facebook::torchcodec
@@ -0,0 +1,148 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include <functional>
11
+ #include <memory>
12
+ #include <stdexcept>
13
+ #include <string>
14
+ #include "FFMPEGCommon.h"
15
+ #include "src/torchcodec/_core/Frame.h"
16
+ #include "src/torchcodec/_core/StreamOptions.h"
17
+ #include "src/torchcodec/_core/Transform.h"
18
+
19
+ namespace facebook::torchcodec {
20
+
21
+ // Key for device interface registration with device type + variant support
22
+ struct DeviceInterfaceKey {
23
+ torch::DeviceType deviceType;
24
+ std::string_view variant = "default"; // e.g., "default", "beta", etc.
25
+
26
+ bool operator<(const DeviceInterfaceKey& other) const {
27
+ if (deviceType != other.deviceType) {
28
+ return deviceType < other.deviceType;
29
+ }
30
+ return variant < other.variant;
31
+ }
32
+
33
+ explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
34
+
35
+ DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
36
+ : deviceType(type), variant(variant) {}
37
+ };
38
+
39
+ class DeviceInterface {
40
+ public:
41
+ DeviceInterface(const torch::Device& device) : device_(device) {}
42
+
43
+ virtual ~DeviceInterface(){};
44
+
45
+ torch::Device& device() {
46
+ return device_;
47
+ };
48
+
49
+ virtual std::optional<const AVCodec*> findCodec(
50
+ [[maybe_unused]] const AVCodecID& codecId) {
51
+ return std::nullopt;
52
+ };
53
+
54
+ // Initialize the device with parameters generic to all kinds of decoding.
55
+ virtual void initialize(
56
+ const AVStream* avStream,
57
+ const UniqueDecodingAVFormatContext& avFormatCtx) = 0;
58
+
59
+ // Initialize the device with parameters specific to video decoding. There is
60
+ // a default empty implementation.
61
+ virtual void initializeVideo(
62
+ [[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
63
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
64
+ transforms,
65
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
66
+
67
+ // In order for decoding to actually happen on an FFmpeg managed hardware
68
+ // device, we need to register the DeviceInterface managed
69
+ // AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
70
+ // on the CPU and if FFmpeg is not managing the hardware device.
71
+ virtual void registerHardwareDeviceWithCodec(
72
+ [[maybe_unused]] AVCodecContext* codecContext) {}
73
+
74
+ virtual void convertAVFrameToFrameOutput(
75
+ UniqueAVFrame& avFrame,
76
+ FrameOutput& frameOutput,
77
+ std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
78
+
79
+ // ------------------------------------------
80
+ // Extension points for custom decoding paths
81
+ // ------------------------------------------
82
+
83
+ // Override to return true if this device interface can decode packets
84
+ // directly. This means that the following two member functions can both
85
+ // be called:
86
+ //
87
+ // 1. sendPacket()
88
+ // 2. receiveFrame()
89
+ virtual bool canDecodePacketDirectly() const {
90
+ return false;
91
+ }
92
+
93
+ // Moral equivalent of avcodec_send_packet()
94
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
95
+ // other AVERROR on failure
96
+ virtual int sendPacket([[maybe_unused]] ReferenceAVPacket& avPacket) {
97
+ TORCH_CHECK(
98
+ false,
99
+ "Send/receive packet decoding not implemented for this device interface");
100
+ return AVERROR(ENOSYS);
101
+ }
102
+
103
+ // Send an EOF packet to flush the decoder
104
+ // Returns AVSUCCESS on success, or other AVERROR on failure
105
+ virtual int sendEOFPacket() {
106
+ TORCH_CHECK(
107
+ false, "Send EOF packet not implemented for this device interface");
108
+ return AVERROR(ENOSYS);
109
+ }
110
+
111
+ // Moral equivalent of avcodec_receive_frame()
112
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
113
+ // AVERROR_EOF if end of stream, or other AVERROR on failure
114
+ virtual int receiveFrame([[maybe_unused]] UniqueAVFrame& avFrame) {
115
+ TORCH_CHECK(
116
+ false,
117
+ "Send/receive packet decoding not implemented for this device interface");
118
+ return AVERROR(ENOSYS);
119
+ }
120
+
121
+ // Flush remaining frames from decoder
122
+ virtual void flush() {
123
+ // Default implementation is no-op for standard decoders
124
+ // Custom decoders can override this method
125
+ }
126
+
127
+ protected:
128
+ torch::Device device_;
129
+ };
130
+
131
+ using CreateDeviceInterfaceFn =
132
+ std::function<DeviceInterface*(const torch::Device& device)>;
133
+
134
+ bool registerDeviceInterface(
135
+ const DeviceInterfaceKey& key,
136
+ const CreateDeviceInterfaceFn createInterface);
137
+
138
+ void validateDeviceInterface(
139
+ const std::string device,
140
+ const std::string variant);
141
+
142
+ std::unique_ptr<DeviceInterface> createDeviceInterface(
143
+ const torch::Device& device,
144
+ const std::string_view variant = "default");
145
+
146
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
147
+
148
+ } // namespace facebook::torchcodec