torchcodec 0.7.0__cp312-cp312-win_amd64.whl → 0.8.1__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (66) hide show
  1. torchcodec/_core/AVIOTensorContext.cpp +23 -16
  2. torchcodec/_core/AVIOTensorContext.h +2 -1
  3. torchcodec/_core/BetaCudaDeviceInterface.cpp +718 -0
  4. torchcodec/_core/BetaCudaDeviceInterface.h +193 -0
  5. torchcodec/_core/CMakeLists.txt +18 -3
  6. torchcodec/_core/CUDACommon.cpp +330 -0
  7. torchcodec/_core/CUDACommon.h +51 -0
  8. torchcodec/_core/Cache.h +6 -20
  9. torchcodec/_core/CpuDeviceInterface.cpp +195 -108
  10. torchcodec/_core/CpuDeviceInterface.h +84 -19
  11. torchcodec/_core/CudaDeviceInterface.cpp +227 -376
  12. torchcodec/_core/CudaDeviceInterface.h +38 -6
  13. torchcodec/_core/DeviceInterface.cpp +57 -19
  14. torchcodec/_core/DeviceInterface.h +97 -16
  15. torchcodec/_core/Encoder.cpp +346 -9
  16. torchcodec/_core/Encoder.h +62 -1
  17. torchcodec/_core/FFMPEGCommon.cpp +190 -3
  18. torchcodec/_core/FFMPEGCommon.h +27 -1
  19. torchcodec/_core/FilterGraph.cpp +30 -22
  20. torchcodec/_core/FilterGraph.h +15 -1
  21. torchcodec/_core/Frame.cpp +22 -7
  22. torchcodec/_core/Frame.h +15 -61
  23. torchcodec/_core/Metadata.h +2 -2
  24. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  25. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  26. torchcodec/_core/NVDECCache.cpp +60 -0
  27. torchcodec/_core/NVDECCache.h +102 -0
  28. torchcodec/_core/SingleStreamDecoder.cpp +196 -201
  29. torchcodec/_core/SingleStreamDecoder.h +42 -15
  30. torchcodec/_core/StreamOptions.h +16 -6
  31. torchcodec/_core/Transform.cpp +87 -0
  32. torchcodec/_core/Transform.h +84 -0
  33. torchcodec/_core/__init__.py +4 -0
  34. torchcodec/_core/custom_ops.cpp +257 -32
  35. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +61 -1
  36. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  37. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  38. torchcodec/_core/ops.py +147 -44
  39. torchcodec/_core/pybind_ops.cpp +22 -59
  40. torchcodec/_samplers/video_clip_sampler.py +7 -19
  41. torchcodec/decoders/__init__.py +1 -0
  42. torchcodec/decoders/_decoder_utils.py +61 -1
  43. torchcodec/decoders/_video_decoder.py +46 -20
  44. torchcodec/libtorchcodec_core4.dll +0 -0
  45. torchcodec/libtorchcodec_core5.dll +0 -0
  46. torchcodec/libtorchcodec_core6.dll +0 -0
  47. torchcodec/libtorchcodec_core7.dll +0 -0
  48. torchcodec/libtorchcodec_core8.dll +0 -0
  49. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops8.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
  59. torchcodec/samplers/_time_based.py +8 -0
  60. torchcodec/version.py +1 -1
  61. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +29 -16
  62. torchcodec-0.8.1.dist-info/RECORD +82 -0
  63. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +1 -1
  64. torchcodec-0.7.0.dist-info/RECORD +0 -67
  65. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
  66. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@
12
12
 
13
13
  extern "C" {
14
14
  #include <libavcodec/avcodec.h>
15
+ #include <libavcodec/bsf.h>
15
16
  #include <libavfilter/avfilter.h>
16
17
  #include <libavfilter/buffersrc.h>
17
18
  #include <libavformat/avformat.h>
@@ -70,6 +71,14 @@ using UniqueEncodingAVFormatContext = std::unique_ptr<
70
71
  using UniqueAVCodecContext = std::unique_ptr<
71
72
  AVCodecContext,
72
73
  Deleterp<AVCodecContext, void, avcodec_free_context>>;
74
+ using SharedAVCodecContext = std::shared_ptr<AVCodecContext>;
75
+
76
+ // create SharedAVCodecContext with custom deleter
77
+ inline SharedAVCodecContext makeSharedAVCodecContext(AVCodecContext* ctx) {
78
+ return SharedAVCodecContext(
79
+ ctx, Deleterp<AVCodecContext, void, avcodec_free_context>{});
80
+ }
81
+
73
82
  using UniqueAVFrame =
74
83
  std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
75
84
  using UniqueAVFilterGraph = std::unique_ptr<
@@ -86,6 +95,8 @@ using UniqueSwrContext =
86
95
  std::unique_ptr<SwrContext, Deleterp<SwrContext, void, swr_free>>;
87
96
  using UniqueAVAudioFifo = std::
88
97
  unique_ptr<AVAudioFifo, Deleter<AVAudioFifo, void, av_audio_fifo_free>>;
98
+ using UniqueAVBSFContext =
99
+ std::unique_ptr<AVBSFContext, Deleterp<AVBSFContext, void, av_bsf_free>>;
89
100
  using UniqueAVBufferRef =
90
101
  std::unique_ptr<AVBufferRef, Deleterp<AVBufferRef, void, av_buffer_unref>>;
91
102
  using UniqueAVBufferSrcParameters = std::unique_ptr<
@@ -161,9 +172,14 @@ std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
161
172
  // struct member representing duration has changed across the versions we
162
173
  // support.
163
174
  int64_t getDuration(const UniqueAVFrame& frame);
175
+ void setDuration(const UniqueAVFrame& frame, int64_t duration);
176
+
177
+ const int* getSupportedSampleRates(const AVCodec& avCodec);
178
+ const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec);
179
+ const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec);
164
180
 
165
181
  int getNumChannels(const UniqueAVFrame& avFrame);
166
- int getNumChannels(const UniqueAVCodecContext& avCodecContext);
182
+ int getNumChannels(const SharedAVCodecContext& avCodecContext);
167
183
 
168
184
  void setDefaultChannelLayout(
169
185
  UniqueAVCodecContext& avCodecContext,
@@ -224,4 +240,14 @@ AVIOContext* avioAllocContext(
224
240
  AVIOWriteFunction write_packet,
225
241
  AVIOSeekFunction seek);
226
242
 
243
+ double ptsToSeconds(int64_t pts, const AVRational& timeBase);
244
+ int64_t secondsToClosestPts(double seconds, const AVRational& timeBase);
245
+ int64_t computeSafeDuration(
246
+ const AVRational& frameRate,
247
+ const AVRational& timeBase);
248
+
249
+ AVFilterContext* createBuffersinkFilter(
250
+ AVFilterGraph* filterGraph,
251
+ enum AVPixelFormat outputFormat);
252
+
227
253
  } // namespace facebook::torchcodec
@@ -5,6 +5,7 @@
5
5
  // LICENSE file in the root directory of this source tree.
6
6
 
7
7
  #include "src/torchcodec/_core/FilterGraph.h"
8
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
8
9
 
9
10
  extern "C" {
10
11
  #include <libavfilter/buffersink.h>
@@ -13,6 +14,28 @@ extern "C" {
13
14
 
14
15
  namespace facebook::torchcodec {
15
16
 
17
+ FiltersContext::FiltersContext(
18
+ int inputWidth,
19
+ int inputHeight,
20
+ AVPixelFormat inputFormat,
21
+ AVRational inputAspectRatio,
22
+ int outputWidth,
23
+ int outputHeight,
24
+ AVPixelFormat outputFormat,
25
+ const std::string& filtergraphStr,
26
+ AVRational timeBase,
27
+ AVBufferRef* hwFramesCtx)
28
+ : inputWidth(inputWidth),
29
+ inputHeight(inputHeight),
30
+ inputFormat(inputFormat),
31
+ inputAspectRatio(inputAspectRatio),
32
+ outputWidth(outputWidth),
33
+ outputHeight(outputHeight),
34
+ outputFormat(outputFormat),
35
+ filtergraphStr(filtergraphStr),
36
+ timeBase(timeBase),
37
+ hwFramesCtx(hwFramesCtx) {}
38
+
16
39
  bool operator==(const AVRational& lhs, const AVRational& rhs) {
17
40
  return lhs.num == rhs.num && lhs.den == rhs.den;
18
41
  }
@@ -41,7 +64,6 @@ FilterGraph::FilterGraph(
41
64
  }
42
65
 
43
66
  const AVFilter* buffersrc = avfilter_get_by_name("buffer");
44
- const AVFilter* buffersink = avfilter_get_by_name("buffersink");
45
67
 
46
68
  UniqueAVBufferSrcParameters srcParams(av_buffersrc_parameters_alloc());
47
69
  TORCH_CHECK(srcParams, "Failed to allocate buffersrc params");
@@ -71,26 +93,10 @@ FilterGraph::FilterGraph(
71
93
  "Failed to create filter graph : ",
72
94
  getFFMPEGErrorStringFromErrorCode(status));
73
95
 
74
- status = avfilter_graph_create_filter(
75
- &sinkContext_, buffersink, "out", nullptr, nullptr, filterGraph_.get());
96
+ sinkContext_ =
97
+ createBuffersinkFilter(filterGraph_.get(), filtersContext.outputFormat);
76
98
  TORCH_CHECK(
77
- status >= 0,
78
- "Failed to create filter graph: ",
79
- getFFMPEGErrorStringFromErrorCode(status));
80
-
81
- enum AVPixelFormat pix_fmts[] = {
82
- filtersContext.outputFormat, AV_PIX_FMT_NONE};
83
-
84
- status = av_opt_set_int_list(
85
- sinkContext_,
86
- "pix_fmts",
87
- pix_fmts,
88
- AV_PIX_FMT_NONE,
89
- AV_OPT_SEARCH_CHILDREN);
90
- TORCH_CHECK(
91
- status >= 0,
92
- "Failed to set output pixel formats: ",
93
- getFFMPEGErrorStringFromErrorCode(status));
99
+ sinkContext_ != nullptr, "Failed to create and configure buffersink");
94
100
 
95
101
  UniqueAVFilterInOut outputs(avfilter_inout_alloc());
96
102
  UniqueAVFilterInOut inputs(avfilter_inout_alloc());
@@ -117,13 +123,15 @@ FilterGraph::FilterGraph(
117
123
  TORCH_CHECK(
118
124
  status >= 0,
119
125
  "Failed to parse filter description: ",
120
- getFFMPEGErrorStringFromErrorCode(status));
126
+ getFFMPEGErrorStringFromErrorCode(status),
127
+ ", provided filters: " + filtersContext.filtergraphStr);
121
128
 
122
129
  status = avfilter_graph_config(filterGraph_.get(), nullptr);
123
130
  TORCH_CHECK(
124
131
  status >= 0,
125
132
  "Failed to configure filter graph: ",
126
- getFFMPEGErrorStringFromErrorCode(status));
133
+ getFFMPEGErrorStringFromErrorCode(status),
134
+ ", provided filters: " + filtersContext.filtergraphStr);
127
135
  }
128
136
 
129
137
  UniqueAVFrame FilterGraph::convert(const UniqueAVFrame& avFrame) {
@@ -19,11 +19,25 @@ struct FiltersContext {
19
19
  int outputWidth = 0;
20
20
  int outputHeight = 0;
21
21
  AVPixelFormat outputFormat = AV_PIX_FMT_NONE;
22
-
23
22
  std::string filtergraphStr;
24
23
  AVRational timeBase = {0, 0};
25
24
  UniqueAVBufferRef hwFramesCtx;
26
25
 
26
+ FiltersContext() = default;
27
+ FiltersContext(FiltersContext&&) = default;
28
+ FiltersContext& operator=(FiltersContext&&) = default;
29
+ FiltersContext(
30
+ int inputWidth,
31
+ int inputHeight,
32
+ AVPixelFormat inputFormat,
33
+ AVRational inputAspectRatio,
34
+ int outputWidth,
35
+ int outputHeight,
36
+ AVPixelFormat outputFormat,
37
+ const std::string& filtergraphStr,
38
+ AVRational timeBase,
39
+ AVBufferRef* hwFramesCtx = nullptr);
40
+
27
41
  bool operator==(const FiltersContext&) const;
28
42
  bool operator!=(const FiltersContext&) const;
29
43
  };
@@ -8,24 +8,39 @@
8
8
 
9
9
  namespace facebook::torchcodec {
10
10
 
11
+ FrameDims::FrameDims(int height, int width) : height(height), width(width) {
12
+ TORCH_CHECK(height > 0, "FrameDims.height must be > 0, got: ", height);
13
+ TORCH_CHECK(width > 0, "FrameDims.width must be > 0, got: ", width);
14
+ }
15
+
16
+ FrameBatchOutput::FrameBatchOutput(
17
+ int64_t numFrames,
18
+ const FrameDims& outputDims,
19
+ const torch::Device& device)
20
+ : ptsSeconds(torch::empty({numFrames}, {torch::kFloat64})),
21
+ durationSeconds(torch::empty({numFrames}, {torch::kFloat64})) {
22
+ data = allocateEmptyHWCTensor(outputDims, device, numFrames);
23
+ }
24
+
11
25
  torch::Tensor allocateEmptyHWCTensor(
12
- int height,
13
- int width,
14
- torch::Device device,
26
+ const FrameDims& frameDims,
27
+ const torch::Device& device,
15
28
  std::optional<int> numFrames) {
16
29
  auto tensorOptions = torch::TensorOptions()
17
30
  .dtype(torch::kUInt8)
18
31
  .layout(torch::kStrided)
19
32
  .device(device);
20
- TORCH_CHECK(height > 0, "height must be > 0, got: ", height);
21
- TORCH_CHECK(width > 0, "width must be > 0, got: ", width);
33
+ TORCH_CHECK(
34
+ frameDims.height > 0, "height must be > 0, got: ", frameDims.height);
35
+ TORCH_CHECK(frameDims.width > 0, "width must be > 0, got: ", frameDims.width);
22
36
  if (numFrames.has_value()) {
23
37
  auto numFramesValue = numFrames.value();
24
38
  TORCH_CHECK(
25
39
  numFramesValue >= 0, "numFrames must be >= 0, got: ", numFramesValue);
26
- return torch::empty({numFramesValue, height, width, 3}, tensorOptions);
40
+ return torch::empty(
41
+ {numFramesValue, frameDims.height, frameDims.width, 3}, tensorOptions);
27
42
  } else {
28
- return torch::empty({height, width, 3}, tensorOptions);
43
+ return torch::empty({frameDims.height, frameDims.width, 3}, tensorOptions);
29
44
  }
30
45
  }
31
46
 
torchcodec/_core/Frame.h CHANGED
@@ -13,6 +13,15 @@
13
13
 
14
14
  namespace facebook::torchcodec {
15
15
 
16
+ struct FrameDims {
17
+ int height = 0;
18
+ int width = 0;
19
+
20
+ FrameDims() = default;
21
+
22
+ FrameDims(int h, int w);
23
+ };
24
+
16
25
  // All public video decoding entry points return either a FrameOutput or a
17
26
  // FrameBatchOutput.
18
27
  // They are the equivalent of the user-facing Frame and FrameBatch classes in
@@ -34,10 +43,10 @@ struct FrameBatchOutput {
34
43
  torch::Tensor ptsSeconds; // 1D of shape (N,)
35
44
  torch::Tensor durationSeconds; // 1D of shape (N,)
36
45
 
37
- explicit FrameBatchOutput(
46
+ FrameBatchOutput(
38
47
  int64_t numFrames,
39
- const VideoStreamOptions& videoStreamOptions,
40
- const StreamMetadata& streamMetadata);
48
+ const FrameDims& outputDims,
49
+ const torch::Device& device);
41
50
  };
42
51
 
43
52
  struct AudioFramesOutput {
@@ -49,70 +58,15 @@ struct AudioFramesOutput {
49
58
  // FRAME TENSOR ALLOCATION APIs
50
59
  // --------------------------------------------------------------------------
51
60
 
52
- // Note [Frame Tensor allocation and height and width]
61
+ // Note [Frame Tensor allocation]
53
62
  //
54
63
  // We always allocate [N]HWC tensors. The low-level decoding functions all
55
64
  // assume HWC tensors, since this is what FFmpeg natively handles. It's up to
56
65
  // the high-level decoding entry-points to permute that back to CHW, by calling
57
66
  // maybePermuteHWC2CHW().
58
- //
59
- // Also, importantly, the way we figure out the the height and width of the
60
- // output frame tensor varies, and depends on the decoding entry-point. In
61
- // *decreasing order of accuracy*, we use the following sources for determining
62
- // height and width:
63
- // - getHeightAndWidthFromResizedAVFrame(). This is the height and width of the
64
- // AVframe, *post*-resizing. This is only used for single-frame decoding APIs,
65
- // on CPU, with filtergraph.
66
- // - getHeightAndWidthFromOptionsOrAVFrame(). This is the height and width from
67
- // the user-specified options if they exist, or the height and width of the
68
- // AVFrame *before* it is resized. In theory, i.e. if there are no bugs within
69
- // our code or within FFmpeg code, this should be exactly the same as
70
- // getHeightAndWidthFromResizedAVFrame(). This is used by single-frame
71
- // decoding APIs, on CPU with swscale, and on GPU.
72
- // - getHeightAndWidthFromOptionsOrMetadata(). This is the height and width from
73
- // the user-specified options if they exist, or the height and width form the
74
- // stream metadata, which itself got its value from the CodecContext, when the
75
- // stream was added. This is used by batch decoding APIs, for both GPU and
76
- // CPU.
77
- //
78
- // The source of truth for height and width really is the (resized) AVFrame: it
79
- // comes from the decoded ouptut of FFmpeg. The info from the metadata (i.e.
80
- // from the CodecContext) may not be as accurate. However, the AVFrame is only
81
- // available late in the call stack, when the frame is decoded, while the
82
- // CodecContext is available early when a stream is added. This is why we use
83
- // the CodecContext for pre-allocating batched output tensors (we could
84
- // pre-allocate those only once we decode the first frame to get the info frame
85
- // the AVFrame, but that's a more complex logic).
86
- //
87
- // Because the sources for height and width may disagree, we may end up with
88
- // conflicts: e.g. if we pre-allocate a batch output tensor based on the
89
- // metadata info, but the decoded AVFrame has a different height and width.
90
- // it is very important to check the height and width assumptions where the
91
- // tensors memory is used/filled in order to avoid segfaults.
92
-
93
- struct FrameDims {
94
- int height;
95
- int width;
96
-
97
- FrameDims(int h, int w) : height(h), width(w) {}
98
- };
99
-
100
- // There's nothing preventing you from calling this on a non-resized frame, but
101
- // please don't.
102
- FrameDims getHeightAndWidthFromResizedAVFrame(const AVFrame& resizedAVFrame);
103
-
104
- FrameDims getHeightAndWidthFromOptionsOrMetadata(
105
- const VideoStreamOptions& videoStreamOptions,
106
- const StreamMetadata& streamMetadata);
107
-
108
- FrameDims getHeightAndWidthFromOptionsOrAVFrame(
109
- const VideoStreamOptions& videoStreamOptions,
110
- const UniqueAVFrame& avFrame);
111
-
112
67
  torch::Tensor allocateEmptyHWCTensor(
113
- int height,
114
- int width,
115
- torch::Device device,
68
+ const FrameDims& frameDims,
69
+ const torch::Device& device,
116
70
  std::optional<int> numFrames = std::nullopt);
117
71
 
118
72
  } // namespace facebook::torchcodec
@@ -44,8 +44,8 @@ struct StreamMetadata {
44
44
  std::optional<int64_t> numFramesFromContent;
45
45
 
46
46
  // Video-only fields derived from the AVCodecContext.
47
- std::optional<int64_t> width;
48
- std::optional<int64_t> height;
47
+ std::optional<int> width;
48
+ std::optional<int> height;
49
49
  std::optional<AVRational> sampleAspectRatio;
50
50
 
51
51
  // Audio-only fields
@@ -0,0 +1,320 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #ifdef FBCODE_CAFFE2
8
+ // No need to do anything on fbcode. NVCUVID is available there, we can take a
9
+ // hard dependency on it.
10
+ // The FBCODE_CAFFE2 macro is defined in the upstream fbcode build of torch, so
11
+ // we can rely on it, that's what torch does too.
12
+
13
+ namespace facebook::torchcodec {
14
+ bool loadNVCUVIDLibrary() {
15
+ return true;
16
+ }
17
+ } // namespace facebook::torchcodec
18
+ #else
19
+
20
+ #include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h"
21
+
22
+ #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
23
+ #include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"
24
+
25
+ #include <torch/types.h>
26
+ #include <cstdio>
27
+ #include <mutex>
28
+
29
+ #if defined(WIN64) || defined(_WIN64)
30
+ #include <windows.h>
31
+ typedef HMODULE tHandle;
32
+ #else
33
+ #include <dlfcn.h>
34
+ typedef void* tHandle;
35
+ #endif
36
+
37
+ namespace facebook::torchcodec {
38
+
39
+ /* clang-format off */
40
+ // This file defines the logic to load the NVCUVID library **at runtime**,
41
+ // along with the corresponding NVCUVID functions that we'll need.
42
+ //
43
+ // We do this because we *do not want* to link (statically or dynamically)
44
+ // against libnvcuvid.so: it is not always available on the users machine! If we
45
+ // were to link against libnvcuvid.so, that would mean that our
46
+ // libtorchcodec_coreN.so would try to look for it when loaded at import time.
47
+ // And if it's not on the users machine, that causes `import torchcodec` to
48
+ // fail. Source: that's what we did, and we got user reports.
49
+ //
50
+ // So, we don't link against libnvcuvid.so. But we still want to call its
51
+ // functions. So here's how it's done, we'll use cuvidCreateVideoParser as an
52
+ // example, but it works the same for all. We are largely following the
53
+ // instructions from the NVCUVID docs:
54
+ // https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvdec-video-decoder-api-prog-guide/index.html#dynamic-loading-nvidia-components
55
+ //
56
+ // This:
57
+ // typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
58
+ // defines tcuvidCreateVideoParser, which is the *type* of a *function*.
59
+ // We define such a function of that type just below with:
60
+ // static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
61
+ // "dl" is for "dynamically loaded. For now dl_cuvidCreateVideoParser is
62
+ // nullptr, but later it will be a proper function [pointer] that can be called
63
+ // with dl_cuvidCreateVideoParser(...);
64
+ //
65
+ // For that to happen we need to call loadNVCUVIDLibrary(): in there, we first
66
+ // dlopen(libnvcuvid.so) which loads the .so somewhere in memory. Then we call
67
+ // dlsym(...), which binds dl_cuvidCreateVideoParser to its actual address: it
68
+ // literally sets the value of the dl_cuvidCreateVideoParser pointer to the
69
+ // address of the actual code section. If all went well, by now, we can safely
70
+ // call dl_cuvidCreateVideoParser(...);
71
+ // All of that happens at runtime *after* import time, when the first instance
72
+ // of the Beta CUDA interface is created, i.e. only when the user explicitly
73
+ // requests it.
74
+ //
75
+ // At the bottom of this file we have an `extern "C"` section with function
76
+ // definitions like:
77
+ //
78
+ // CUresult CUDAAPI cuvidCreateVideoParser(
79
+ // CUvideoparser* videoParser,
80
+ // CUVIDPARSERPARAMS* parserParams) {...}
81
+ //
82
+ // These are the actual functions that are compiled against and called by the
83
+ // Beta CUDA interface code. Crucially, these functions signature match exactly
84
+ // the NVCUVID functions (as defined in cuviddec.h). Inside of
85
+ // cuvidCreateVideoParser(...) we simply call the dl_cuvidCreateVideoParser
86
+ // function [pointer] that we dynamically loaded earlier.
87
+ //
88
+ // At runtime, within the Beta CUDA interface code we have a fallback mechanism
89
+ // to switch back to the CPU backend if any of the NVCUVID functions are not
90
+ // available, or if libnvcuvid.so itself couldn't be found. This is what FFmpeg
91
+ // does too.
92
+
93
+
94
+ // Function pointers types
95
+ typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
96
+ typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser, CUVIDSOURCEDATAPACKET*);
97
+ typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser);
98
+ typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS*);
99
+ typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder*, CUVIDDECODECREATEINFO*);
100
+ typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder);
101
+ typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder, CUVIDPICPARAMS*);
102
+ typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder, int, unsigned int*, unsigned int*, CUVIDPROCPARAMS*);
103
+ typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder, unsigned int);
104
+ typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder, int, unsigned long long*, unsigned int*, CUVIDPROCPARAMS*);
105
+ typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder, unsigned long long);
106
+ /* clang-format on */
107
+
108
+ // Global function pointers - will be dynamically loaded
109
+ static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
110
+ static tcuvidParseVideoData* dl_cuvidParseVideoData = nullptr;
111
+ static tcuvidDestroyVideoParser* dl_cuvidDestroyVideoParser = nullptr;
112
+ static tcuvidGetDecoderCaps* dl_cuvidGetDecoderCaps = nullptr;
113
+ static tcuvidCreateDecoder* dl_cuvidCreateDecoder = nullptr;
114
+ static tcuvidDestroyDecoder* dl_cuvidDestroyDecoder = nullptr;
115
+ static tcuvidDecodePicture* dl_cuvidDecodePicture = nullptr;
116
+ static tcuvidMapVideoFrame* dl_cuvidMapVideoFrame = nullptr;
117
+ static tcuvidUnmapVideoFrame* dl_cuvidUnmapVideoFrame = nullptr;
118
+ static tcuvidMapVideoFrame64* dl_cuvidMapVideoFrame64 = nullptr;
119
+ static tcuvidUnmapVideoFrame64* dl_cuvidUnmapVideoFrame64 = nullptr;
120
+
121
+ static tHandle g_nvcuvid_handle = nullptr;
122
+ static std::mutex g_nvcuvid_mutex;
123
+
124
+ bool isLoaded() {
125
+ return (
126
+ g_nvcuvid_handle && dl_cuvidCreateVideoParser && dl_cuvidParseVideoData &&
127
+ dl_cuvidDestroyVideoParser && dl_cuvidGetDecoderCaps &&
128
+ dl_cuvidCreateDecoder && dl_cuvidDestroyDecoder &&
129
+ dl_cuvidDecodePicture && dl_cuvidMapVideoFrame &&
130
+ dl_cuvidUnmapVideoFrame && dl_cuvidMapVideoFrame64 &&
131
+ dl_cuvidUnmapVideoFrame64);
132
+ }
133
+
134
+ template <typename T>
135
+ T* bindFunction(const char* functionName) {
136
+ #if defined(WIN64) || defined(_WIN64)
137
+ return reinterpret_cast<T*>(GetProcAddress(g_nvcuvid_handle, functionName));
138
+ #else
139
+ return reinterpret_cast<T*>(dlsym(g_nvcuvid_handle, functionName));
140
+ #endif
141
+ }
142
+
143
+ bool _loadLibrary() {
144
+ // Helper that just calls dlopen or equivalent on Windows. In a separate
145
+ // function because of the #ifdef uglyness.
146
+ #if defined(WIN64) || defined(_WIN64)
147
+ #ifdef UNICODE
148
+ static LPCWSTR nvcuvidDll = L"nvcuvid.dll";
149
+ #else
150
+ static LPCSTR nvcuvidDll = "nvcuvid.dll";
151
+ #endif
152
+ g_nvcuvid_handle = LoadLibrary(nvcuvidDll);
153
+ if (g_nvcuvid_handle == nullptr) {
154
+ return false;
155
+ }
156
+ #else
157
+ g_nvcuvid_handle = dlopen("libnvcuvid.so", RTLD_NOW);
158
+ if (g_nvcuvid_handle == nullptr) {
159
+ g_nvcuvid_handle = dlopen("libnvcuvid.so.1", RTLD_NOW);
160
+ }
161
+ if (g_nvcuvid_handle == nullptr) {
162
+ return false;
163
+ }
164
+ #endif
165
+
166
+ return true;
167
+ }
168
+
169
+ bool loadNVCUVIDLibrary() {
170
+ // Loads NVCUVID library and all required function pointers.
171
+ // Returns true on success, false on failure.
172
+ std::lock_guard<std::mutex> lock(g_nvcuvid_mutex);
173
+
174
+ if (isLoaded()) {
175
+ return true;
176
+ }
177
+
178
+ if (!_loadLibrary()) {
179
+ return false;
180
+ }
181
+
182
+ // Load all function pointers. They'll be set to nullptr if not found.
183
+ dl_cuvidCreateVideoParser =
184
+ bindFunction<tcuvidCreateVideoParser>("cuvidCreateVideoParser");
185
+ dl_cuvidParseVideoData =
186
+ bindFunction<tcuvidParseVideoData>("cuvidParseVideoData");
187
+ dl_cuvidDestroyVideoParser =
188
+ bindFunction<tcuvidDestroyVideoParser>("cuvidDestroyVideoParser");
189
+ dl_cuvidGetDecoderCaps =
190
+ bindFunction<tcuvidGetDecoderCaps>("cuvidGetDecoderCaps");
191
+ dl_cuvidCreateDecoder =
192
+ bindFunction<tcuvidCreateDecoder>("cuvidCreateDecoder");
193
+ dl_cuvidDestroyDecoder =
194
+ bindFunction<tcuvidDestroyDecoder>("cuvidDestroyDecoder");
195
+ dl_cuvidDecodePicture =
196
+ bindFunction<tcuvidDecodePicture>("cuvidDecodePicture");
197
+ dl_cuvidMapVideoFrame =
198
+ bindFunction<tcuvidMapVideoFrame>("cuvidMapVideoFrame");
199
+ dl_cuvidUnmapVideoFrame =
200
+ bindFunction<tcuvidUnmapVideoFrame>("cuvidUnmapVideoFrame");
201
+ dl_cuvidMapVideoFrame64 =
202
+ bindFunction<tcuvidMapVideoFrame64>("cuvidMapVideoFrame64");
203
+ dl_cuvidUnmapVideoFrame64 =
204
+ bindFunction<tcuvidUnmapVideoFrame64>("cuvidUnmapVideoFrame64");
205
+
206
+ return isLoaded();
207
+ }
208
+
209
+ } // namespace facebook::torchcodec
210
+
211
+ extern "C" {
212
+
213
+ CUresult CUDAAPI cuvidCreateVideoParser(
214
+ CUvideoparser* videoParser,
215
+ CUVIDPARSERPARAMS* parserParams) {
216
+ TORCH_CHECK(
217
+ facebook::torchcodec::dl_cuvidCreateVideoParser,
218
+ "cuvidCreateVideoParser called but NVCUVID not loaded!");
219
+ return facebook::torchcodec::dl_cuvidCreateVideoParser(
220
+ videoParser, parserParams);
221
+ }
222
+
223
+ CUresult CUDAAPI cuvidParseVideoData(
224
+ CUvideoparser videoParser,
225
+ CUVIDSOURCEDATAPACKET* cuvidPacket) {
226
+ TORCH_CHECK(
227
+ facebook::torchcodec::dl_cuvidParseVideoData,
228
+ "cuvidParseVideoData called but NVCUVID not loaded!");
229
+ return facebook::torchcodec::dl_cuvidParseVideoData(videoParser, cuvidPacket);
230
+ }
231
+
232
+ CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser videoParser) {
233
+ TORCH_CHECK(
234
+ facebook::torchcodec::dl_cuvidDestroyVideoParser,
235
+ "cuvidDestroyVideoParser called but NVCUVID not loaded!");
236
+ return facebook::torchcodec::dl_cuvidDestroyVideoParser(videoParser);
237
+ }
238
+
239
+ CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS* caps) {
240
+ TORCH_CHECK(
241
+ facebook::torchcodec::dl_cuvidGetDecoderCaps,
242
+ "cuvidGetDecoderCaps called but NVCUVID not loaded!");
243
+ return facebook::torchcodec::dl_cuvidGetDecoderCaps(caps);
244
+ }
245
+
246
+ CUresult CUDAAPI cuvidCreateDecoder(
247
+ CUvideodecoder* decoder,
248
+ CUVIDDECODECREATEINFO* decoderParams) {
249
+ TORCH_CHECK(
250
+ facebook::torchcodec::dl_cuvidCreateDecoder,
251
+ "cuvidCreateDecoder called but NVCUVID not loaded!");
252
+ return facebook::torchcodec::dl_cuvidCreateDecoder(decoder, decoderParams);
253
+ }
254
+
255
+ CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder decoder) {
256
+ TORCH_CHECK(
257
+ facebook::torchcodec::dl_cuvidDestroyDecoder,
258
+ "cuvidDestroyDecoder called but NVCUVID not loaded!");
259
+ return facebook::torchcodec::dl_cuvidDestroyDecoder(decoder);
260
+ }
261
+
262
+ CUresult CUDAAPI
263
+ cuvidDecodePicture(CUvideodecoder decoder, CUVIDPICPARAMS* picParams) {
264
+ TORCH_CHECK(
265
+ facebook::torchcodec::dl_cuvidDecodePicture,
266
+ "cuvidDecodePicture called but NVCUVID not loaded!");
267
+ return facebook::torchcodec::dl_cuvidDecodePicture(decoder, picParams);
268
+ }
269
+
270
+ #if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
271
+ // We need to protect the definition of the 32bit versions under the above
272
+ // conditions (see cuviddec.h). Defining them unconditionally would cause
273
+ // conflict compilation errors when cuviddec.h redefines those to the 64bit
274
+ // versions.
275
+ CUresult CUDAAPI cuvidMapVideoFrame(
276
+ CUvideodecoder decoder,
277
+ int pixIndex,
278
+ unsigned int* framePtr,
279
+ unsigned int* pitch,
280
+ CUVIDPROCPARAMS* procParams) {
281
+ TORCH_CHECK(
282
+ facebook::torchcodec::dl_cuvidMapVideoFrame,
283
+ "cuvidMapVideoFrame called but NVCUVID not loaded!");
284
+ return facebook::torchcodec::dl_cuvidMapVideoFrame(
285
+ decoder, pixIndex, framePtr, pitch, procParams);
286
+ }
287
+
288
+ CUresult CUDAAPI
289
+ cuvidUnmapVideoFrame(CUvideodecoder decoder, unsigned int framePtr) {
290
+ TORCH_CHECK(
291
+ facebook::torchcodec::dl_cuvidUnmapVideoFrame,
292
+ "cuvidUnmapVideoFrame called but NVCUVID not loaded!");
293
+ return facebook::torchcodec::dl_cuvidUnmapVideoFrame(decoder, framePtr);
294
+ }
295
+ #endif
296
+
297
+ CUresult CUDAAPI cuvidMapVideoFrame64(
298
+ CUvideodecoder decoder,
299
+ int pixIndex,
300
+ unsigned long long* framePtr,
301
+ unsigned int* pitch,
302
+ CUVIDPROCPARAMS* procParams) {
303
+ TORCH_CHECK(
304
+ facebook::torchcodec::dl_cuvidMapVideoFrame64,
305
+ "cuvidMapVideoFrame64 called but NVCUVID not loaded!");
306
+ return facebook::torchcodec::dl_cuvidMapVideoFrame64(
307
+ decoder, pixIndex, framePtr, pitch, procParams);
308
+ }
309
+
310
+ CUresult CUDAAPI
311
+ cuvidUnmapVideoFrame64(CUvideodecoder decoder, unsigned long long framePtr) {
312
+ TORCH_CHECK(
313
+ facebook::torchcodec::dl_cuvidUnmapVideoFrame64,
314
+ "cuvidUnmapVideoFrame64 called but NVCUVID not loaded!");
315
+ return facebook::torchcodec::dl_cuvidUnmapVideoFrame64(decoder, framePtr);
316
+ }
317
+
318
+ } // extern "C"
319
+
320
+ #endif // FBCODE_CAFFE2
@@ -0,0 +1,14 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ namespace facebook::torchcodec {
10
+
11
+ // See note in corresponding cpp file
12
+ bool loadNVCUVIDLibrary();
13
+
14
+ } // namespace facebook::torchcodec