torchcodec 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (66) hide show
  1. torchcodec/_core/AVIOTensorContext.cpp +23 -16
  2. torchcodec/_core/AVIOTensorContext.h +2 -1
  3. torchcodec/_core/BetaCudaDeviceInterface.cpp +718 -0
  4. torchcodec/_core/BetaCudaDeviceInterface.h +193 -0
  5. torchcodec/_core/CMakeLists.txt +18 -3
  6. torchcodec/_core/CUDACommon.cpp +330 -0
  7. torchcodec/_core/CUDACommon.h +51 -0
  8. torchcodec/_core/Cache.h +6 -20
  9. torchcodec/_core/CpuDeviceInterface.cpp +195 -108
  10. torchcodec/_core/CpuDeviceInterface.h +84 -19
  11. torchcodec/_core/CudaDeviceInterface.cpp +227 -376
  12. torchcodec/_core/CudaDeviceInterface.h +38 -6
  13. torchcodec/_core/DeviceInterface.cpp +57 -19
  14. torchcodec/_core/DeviceInterface.h +97 -16
  15. torchcodec/_core/Encoder.cpp +346 -9
  16. torchcodec/_core/Encoder.h +62 -1
  17. torchcodec/_core/FFMPEGCommon.cpp +190 -3
  18. torchcodec/_core/FFMPEGCommon.h +27 -1
  19. torchcodec/_core/FilterGraph.cpp +30 -22
  20. torchcodec/_core/FilterGraph.h +15 -1
  21. torchcodec/_core/Frame.cpp +22 -7
  22. torchcodec/_core/Frame.h +15 -61
  23. torchcodec/_core/Metadata.h +2 -2
  24. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  25. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  26. torchcodec/_core/NVDECCache.cpp +60 -0
  27. torchcodec/_core/NVDECCache.h +102 -0
  28. torchcodec/_core/SingleStreamDecoder.cpp +196 -201
  29. torchcodec/_core/SingleStreamDecoder.h +42 -15
  30. torchcodec/_core/StreamOptions.h +16 -6
  31. torchcodec/_core/Transform.cpp +87 -0
  32. torchcodec/_core/Transform.h +84 -0
  33. torchcodec/_core/__init__.py +4 -0
  34. torchcodec/_core/custom_ops.cpp +257 -32
  35. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +61 -1
  36. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  37. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  38. torchcodec/_core/ops.py +147 -44
  39. torchcodec/_core/pybind_ops.cpp +22 -59
  40. torchcodec/_samplers/video_clip_sampler.py +7 -19
  41. torchcodec/decoders/__init__.py +1 -0
  42. torchcodec/decoders/_decoder_utils.py +61 -1
  43. torchcodec/decoders/_video_decoder.py +46 -20
  44. torchcodec/libtorchcodec_core4.dll +0 -0
  45. torchcodec/libtorchcodec_core5.dll +0 -0
  46. torchcodec/libtorchcodec_core6.dll +0 -0
  47. torchcodec/libtorchcodec_core7.dll +0 -0
  48. torchcodec/libtorchcodec_core8.dll +0 -0
  49. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops8.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
  59. torchcodec/samplers/_time_based.py +8 -0
  60. torchcodec/version.py +1 -1
  61. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +29 -16
  62. torchcodec-0.8.1.dist-info/RECORD +82 -0
  63. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +1 -1
  64. torchcodec-0.7.0.dist-info/RECORD +0 -67
  65. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
  66. {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
@@ -6,8 +6,9 @@
6
6
 
7
7
  #pragma once
8
8
 
9
- #include <npp.h>
9
+ #include "src/torchcodec/_core/CUDACommon.h"
10
10
  #include "src/torchcodec/_core/DeviceInterface.h"
11
+ #include "src/torchcodec/_core/FilterGraph.h"
11
12
 
12
13
  namespace facebook::torchcodec {
13
14
 
@@ -19,19 +20,50 @@ class CudaDeviceInterface : public DeviceInterface {
19
20
 
20
21
  std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
21
22
 
22
- void initializeContext(AVCodecContext* codecContext) override;
23
+ void initialize(
24
+ const AVStream* avStream,
25
+ const UniqueDecodingAVFormatContext& avFormatCtx,
26
+ const SharedAVCodecContext& codecContext) override;
23
27
 
24
- void convertAVFrameToFrameOutput(
28
+ void initializeVideo(
25
29
  const VideoStreamOptions& videoStreamOptions,
26
- const AVRational& timeBase,
30
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
31
+ transforms,
32
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims)
33
+ override;
34
+
35
+ void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
36
+
37
+ void convertAVFrameToFrameOutput(
27
38
  UniqueAVFrame& avFrame,
28
39
  FrameOutput& frameOutput,
29
40
  std::optional<torch::Tensor> preAllocatedOutputTensor =
30
41
  std::nullopt) override;
31
42
 
43
+ std::string getDetails() override;
44
+
32
45
  private:
33
- UniqueAVBufferRef ctx_;
34
- std::unique_ptr<NppStreamContext> nppCtx_;
46
+ // Our CUDA decoding code assumes NV12 format. In order to handle other
47
+ // kinds of input, we need to convert them to NV12. Our current implementation
48
+ // does this using filtergraph.
49
+ UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame);
50
+
51
+ // We sometimes encounter frames that cannot be decoded on the CUDA device.
52
+ // Rather than erroring out, we decode them on the CPU.
53
+ std::unique_ptr<DeviceInterface> cpuInterface_;
54
+
55
+ VideoStreamOptions videoStreamOptions_;
56
+ AVRational timeBase_;
57
+
58
+ UniqueAVBufferRef hardwareDeviceCtx_;
59
+ UniqueNppContext nppCtx_;
60
+
61
+ // This filtergraph instance is only used for NV12 format conversion in
62
+ // maybeConvertAVFrameToNV12().
63
+ std::unique_ptr<FiltersContext> nv12ConversionContext_;
64
+ std::unique_ptr<FilterGraph> nv12Conversion_;
65
+
66
+ bool usingCPUFallback_ = false;
35
67
  };
36
68
 
37
69
  } // namespace facebook::torchcodec
@@ -11,7 +11,8 @@
11
11
  namespace facebook::torchcodec {
12
12
 
13
13
  namespace {
14
- using DeviceInterfaceMap = std::map<torch::DeviceType, CreateDeviceInterfaceFn>;
14
+ using DeviceInterfaceMap =
15
+ std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
15
16
  static std::mutex g_interface_mutex;
16
17
 
17
18
  DeviceInterfaceMap& getDeviceMap() {
@@ -30,50 +31,87 @@ std::string getDeviceType(const std::string& device) {
30
31
  } // namespace
31
32
 
32
33
  bool registerDeviceInterface(
33
- torch::DeviceType deviceType,
34
+ const DeviceInterfaceKey& key,
34
35
  CreateDeviceInterfaceFn createInterface) {
35
36
  std::scoped_lock lock(g_interface_mutex);
36
37
  DeviceInterfaceMap& deviceMap = getDeviceMap();
37
38
 
38
39
  TORCH_CHECK(
39
- deviceMap.find(deviceType) == deviceMap.end(),
40
- "Device interface already registered for ",
41
- deviceType);
42
- deviceMap.insert({deviceType, createInterface});
40
+ deviceMap.find(key) == deviceMap.end(),
41
+ "Device interface already registered for device type ",
42
+ key.deviceType,
43
+ " variant '",
44
+ key.variant,
45
+ "'");
46
+ deviceMap.insert({key, createInterface});
43
47
 
44
48
  return true;
45
49
  }
46
50
 
47
- torch::Device createTorchDevice(const std::string device) {
51
+ void validateDeviceInterface(
52
+ const std::string device,
53
+ const std::string variant) {
48
54
  std::scoped_lock lock(g_interface_mutex);
49
55
  std::string deviceType = getDeviceType(device);
56
+
50
57
  DeviceInterfaceMap& deviceMap = getDeviceMap();
51
58
 
59
+ // Find device interface that matches device type and variant
60
+ torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
61
+
52
62
  auto deviceInterface = std::find_if(
53
63
  deviceMap.begin(),
54
64
  deviceMap.end(),
55
- [&](const std::pair<torch::DeviceType, CreateDeviceInterfaceFn>& arg) {
56
- return device.rfind(
57
- torch::DeviceTypeName(arg.first, /*lcase*/ true), 0) == 0;
65
+ [&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
66
+ return arg.first.deviceType == deviceTypeEnum &&
67
+ arg.first.variant == variant;
58
68
  });
59
- TORCH_CHECK(
60
- deviceInterface != deviceMap.end(), "Unsupported device: ", device);
61
69
 
62
- return torch::Device(device);
70
+ TORCH_CHECK(
71
+ deviceInterface != deviceMap.end(),
72
+ "Unsupported device: ",
73
+ device,
74
+ " (device type: ",
75
+ deviceType,
76
+ ", variant: ",
77
+ variant,
78
+ ")");
63
79
  }
64
80
 
65
81
  std::unique_ptr<DeviceInterface> createDeviceInterface(
66
- const torch::Device& device) {
67
- auto deviceType = device.type();
82
+ const torch::Device& device,
83
+ const std::string_view variant) {
84
+ DeviceInterfaceKey key(device.type(), variant);
68
85
  std::scoped_lock lock(g_interface_mutex);
69
86
  DeviceInterfaceMap& deviceMap = getDeviceMap();
70
87
 
88
+ auto it = deviceMap.find(key);
89
+ if (it != deviceMap.end()) {
90
+ return std::unique_ptr<DeviceInterface>(it->second(device));
91
+ }
92
+
71
93
  TORCH_CHECK(
72
- deviceMap.find(deviceType) != deviceMap.end(),
73
- "Unsupported device: ",
74
- device);
94
+ false,
95
+ "No device interface found for device type: ",
96
+ device.type(),
97
+ " variant: '",
98
+ variant,
99
+ "'");
100
+ }
75
101
 
76
- return std::unique_ptr<DeviceInterface>(deviceMap[deviceType](device));
102
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
103
+ TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
104
+
105
+ int height = avFrame->height;
106
+ int width = avFrame->width;
107
+ std::vector<int64_t> shape = {height, width, 3};
108
+ std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
109
+ AVFrame* avFrameClone = av_frame_clone(avFrame.get());
110
+ auto deleter = [avFrameClone](void*) {
111
+ UniqueAVFrame avFrameToDelete(avFrameClone);
112
+ };
113
+ return torch::from_blob(
114
+ avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
77
115
  }
78
116
 
79
117
  } // namespace facebook::torchcodec
@@ -14,16 +14,27 @@
14
14
  #include "FFMPEGCommon.h"
15
15
  #include "src/torchcodec/_core/Frame.h"
16
16
  #include "src/torchcodec/_core/StreamOptions.h"
17
+ #include "src/torchcodec/_core/Transform.h"
17
18
 
18
19
  namespace facebook::torchcodec {
19
20
 
20
- // Note that all these device functions should only be called if the device is
21
- // not a CPU device. CPU device functions are already implemented in the
22
- // SingleStreamDecoder implementation.
23
- // These functions should only be called from within an if block like this:
24
- // if (device.type() != torch::kCPU) {
25
- // deviceFunction(device, ...);
26
- // }
21
+ // Key for device interface registration with device type + variant support
22
+ struct DeviceInterfaceKey {
23
+ torch::DeviceType deviceType;
24
+ std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
25
+
26
+ bool operator<(const DeviceInterfaceKey& other) const {
27
+ if (deviceType != other.deviceType) {
28
+ return deviceType < other.deviceType;
29
+ }
30
+ return variant < other.variant;
31
+ }
32
+
33
+ explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
34
+
35
+ DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
36
+ : deviceType(type), variant(variant) {}
37
+ };
27
38
 
28
39
  class DeviceInterface {
29
40
  public:
@@ -35,33 +46,103 @@ class DeviceInterface {
35
46
  return device_;
36
47
  };
37
48
 
38
- virtual std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) = 0;
49
+ virtual std::optional<const AVCodec*> findCodec(
50
+ [[maybe_unused]] const AVCodecID& codecId) {
51
+ return std::nullopt;
52
+ };
39
53
 
40
- // Initialize the hardware device that is specified in `device`. Some builds
41
- // support CUDA and others only support CPU.
42
- virtual void initializeContext(AVCodecContext* codecContext) = 0;
54
+ // Initialize the device with parameters generic to all kinds of decoding.
55
+ virtual void initialize(
56
+ const AVStream* avStream,
57
+ const UniqueDecodingAVFormatContext& avFormatCtx,
58
+ const SharedAVCodecContext& codecContext) = 0;
59
+
60
+ // Initialize the device with parameters specific to video decoding. There is
61
+ // a default empty implementation.
62
+ virtual void initializeVideo(
63
+ [[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
64
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
65
+ transforms,
66
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
67
+
68
+ // In order for decoding to actually happen on an FFmpeg managed hardware
69
+ // device, we need to register the DeviceInterface managed
70
+ // AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
71
+ // on the CPU and if FFmpeg is not managing the hardware device.
72
+ virtual void registerHardwareDeviceWithCodec(
73
+ [[maybe_unused]] AVCodecContext* codecContext) {}
43
74
 
44
75
  virtual void convertAVFrameToFrameOutput(
45
- const VideoStreamOptions& videoStreamOptions,
46
- const AVRational& timeBase,
47
76
  UniqueAVFrame& avFrame,
48
77
  FrameOutput& frameOutput,
49
78
  std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
50
79
 
80
+ // ------------------------------------------
81
+ // Extension points for custom decoding paths
82
+ // ------------------------------------------
83
+
84
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
85
+ // other AVERROR on failure
86
+ // Default implementation uses FFmpeg directly
87
+ virtual int sendPacket(ReferenceAVPacket& avPacket) {
88
+ TORCH_CHECK(
89
+ codecContext_ != nullptr,
90
+ "Codec context not available for default packet sending");
91
+ return avcodec_send_packet(codecContext_.get(), avPacket.get());
92
+ }
93
+
94
+ // Send an EOF packet to flush the decoder
95
+ // Returns AVSUCCESS on success, or other AVERROR on failure
96
+ // Default implementation uses FFmpeg directly
97
+ virtual int sendEOFPacket() {
98
+ TORCH_CHECK(
99
+ codecContext_ != nullptr,
100
+ "Codec context not available for default EOF packet sending");
101
+ return avcodec_send_packet(codecContext_.get(), nullptr);
102
+ }
103
+
104
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
105
+ // AVERROR_EOF if end of stream, or other AVERROR on failure
106
+ // Default implementation uses FFmpeg directly
107
+ virtual int receiveFrame(UniqueAVFrame& avFrame) {
108
+ TORCH_CHECK(
109
+ codecContext_ != nullptr,
110
+ "Codec context not available for default frame receiving");
111
+ return avcodec_receive_frame(codecContext_.get(), avFrame.get());
112
+ }
113
+
114
+ // Flush remaining frames from decoder
115
+ virtual void flush() {
116
+ TORCH_CHECK(
117
+ codecContext_ != nullptr,
118
+ "Codec context not available for default flushing");
119
+ avcodec_flush_buffers(codecContext_.get());
120
+ }
121
+
122
+ virtual std::string getDetails() {
123
+ return "";
124
+ }
125
+
51
126
  protected:
52
127
  torch::Device device_;
128
+ SharedAVCodecContext codecContext_;
53
129
  };
54
130
 
55
131
  using CreateDeviceInterfaceFn =
56
132
  std::function<DeviceInterface*(const torch::Device& device)>;
57
133
 
58
134
  bool registerDeviceInterface(
59
- torch::DeviceType deviceType,
135
+ const DeviceInterfaceKey& key,
60
136
  const CreateDeviceInterfaceFn createInterface);
61
137
 
62
- torch::Device createTorchDevice(const std::string device);
138
+ void validateDeviceInterface(
139
+ const std::string device,
140
+ const std::string variant);
63
141
 
64
142
  std::unique_ptr<DeviceInterface> createDeviceInterface(
65
- const torch::Device& device);
143
+ const torch::Device& device,
144
+ const std::string_view variant = "ffmpeg");
145
+
146
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
66
147
 
67
148
  } // namespace facebook::torchcodec