PyPI - torchcodec - Versions diffs - 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl - Mend

torchcodec 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchcodec might be problematic. Click here for more details.

Files changed (66) hide show

torchcodec/_core/AVIOTensorContext.cpp +23 -16
torchcodec/_core/AVIOTensorContext.h +2 -1
torchcodec/_core/BetaCudaDeviceInterface.cpp +718 -0
torchcodec/_core/BetaCudaDeviceInterface.h +193 -0
torchcodec/_core/CMakeLists.txt +18 -3
torchcodec/_core/CUDACommon.cpp +330 -0
torchcodec/_core/CUDACommon.h +51 -0
torchcodec/_core/Cache.h +6 -20
torchcodec/_core/CpuDeviceInterface.cpp +195 -108
torchcodec/_core/CpuDeviceInterface.h +84 -19
torchcodec/_core/CudaDeviceInterface.cpp +227 -376
torchcodec/_core/CudaDeviceInterface.h +38 -6
torchcodec/_core/DeviceInterface.cpp +57 -19
torchcodec/_core/DeviceInterface.h +97 -16
torchcodec/_core/Encoder.cpp +346 -9
torchcodec/_core/Encoder.h +62 -1
torchcodec/_core/FFMPEGCommon.cpp +190 -3
torchcodec/_core/FFMPEGCommon.h +27 -1
torchcodec/_core/FilterGraph.cpp +30 -22
torchcodec/_core/FilterGraph.h +15 -1
torchcodec/_core/Frame.cpp +22 -7
torchcodec/_core/Frame.h +15 -61
torchcodec/_core/Metadata.h +2 -2
torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
torchcodec/_core/NVDECCache.cpp +60 -0
torchcodec/_core/NVDECCache.h +102 -0
torchcodec/_core/SingleStreamDecoder.cpp +196 -201
torchcodec/_core/SingleStreamDecoder.h +42 -15
torchcodec/_core/StreamOptions.h +16 -6
torchcodec/_core/Transform.cpp +87 -0
torchcodec/_core/Transform.h +84 -0
torchcodec/_core/__init__.py +4 -0
torchcodec/_core/custom_ops.cpp +257 -32
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +61 -1
torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
torchcodec/_core/ops.py +147 -44
torchcodec/_core/pybind_ops.cpp +22 -59
torchcodec/_samplers/video_clip_sampler.py +7 -19
torchcodec/decoders/__init__.py +1 -0
torchcodec/decoders/_decoder_utils.py +61 -1
torchcodec/decoders/_video_decoder.py +46 -20
torchcodec/libtorchcodec_core4.dll +0 -0
torchcodec/libtorchcodec_core5.dll +0 -0
torchcodec/libtorchcodec_core6.dll +0 -0
torchcodec/libtorchcodec_core7.dll +0 -0
torchcodec/libtorchcodec_core8.dll +0 -0
torchcodec/libtorchcodec_custom_ops4.dll +0 -0
torchcodec/libtorchcodec_custom_ops5.dll +0 -0
torchcodec/libtorchcodec_custom_ops6.dll +0 -0
torchcodec/libtorchcodec_custom_ops7.dll +0 -0
torchcodec/libtorchcodec_custom_ops8.dll +0 -0
torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
torchcodec/samplers/_time_based.py +8 -0
torchcodec/version.py +1 -1
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +29 -16
torchcodec-0.8.1.dist-info/RECORD +82 -0
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +1 -1
torchcodec-0.7.0.dist-info/RECORD +0 -67
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
{torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0

torchcodec/_core/CudaDeviceInterface.h CHANGED Viewed

@@ -6,8 +6,9 @@
 #pragma once
-#include <npp.h>
+#include "src/torchcodec/_core/CUDACommon.h"
 #include "src/torchcodec/_core/DeviceInterface.h"
+#include "src/torchcodec/_core/FilterGraph.h"
 namespace facebook::torchcodec {
@@ -19,19 +20,50 @@ class CudaDeviceInterface : public DeviceInterface {
   std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
-  void initializeContext(AVCodecContext* codecContext) override;
+  void initialize(
+      const AVStream* avStream,
+      const UniqueDecodingAVFormatContext& avFormatCtx,
+      const SharedAVCodecContext& codecContext) override;
-  void convertAVFrameToFrameOutput(
+  void initializeVideo(
       const VideoStreamOptions& videoStreamOptions,
-      const AVRational& timeBase,
+      [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
+          transforms,
+      [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims)
+      override;
+  void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
+  void convertAVFrameToFrameOutput(
       UniqueAVFrame& avFrame,
       FrameOutput& frameOutput,
       std::optional<torch::Tensor> preAllocatedOutputTensor =
           std::nullopt) override;
+  std::string getDetails() override;
  private:
-  UniqueAVBufferRef ctx_;
-  std::unique_ptr<NppStreamContext> nppCtx_;
+  // Our CUDA decoding code assumes NV12 format. In order to handle other
+  // kinds of input, we need to convert them to NV12. Our current implementation
+  // does this using filtergraph.
+  UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame);
+  // We sometimes encounter frames that cannot be decoded on the CUDA device.
+  // Rather than erroring out, we decode them on the CPU.
+  std::unique_ptr<DeviceInterface> cpuInterface_;
+  VideoStreamOptions videoStreamOptions_;
+  AVRational timeBase_;
+  UniqueAVBufferRef hardwareDeviceCtx_;
+  UniqueNppContext nppCtx_;
+  // This filtergraph instance is only used for NV12 format conversion in
+  // maybeConvertAVFrameToNV12().
+  std::unique_ptr<FiltersContext> nv12ConversionContext_;
+  std::unique_ptr<FilterGraph> nv12Conversion_;
+  bool usingCPUFallback_ = false;
 };
 } // namespace facebook::torchcodec

torchcodec/_core/DeviceInterface.cpp CHANGED Viewed

@@ -11,7 +11,8 @@
 namespace facebook::torchcodec {
 namespace {
-using DeviceInterfaceMap = std::map<torch::DeviceType, CreateDeviceInterfaceFn>;
+using DeviceInterfaceMap =
+    std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
 static std::mutex g_interface_mutex;
 DeviceInterfaceMap& getDeviceMap() {
@@ -30,50 +31,87 @@ std::string getDeviceType(const std::string& device) {
 } // namespace
 bool registerDeviceInterface(
-    torch::DeviceType deviceType,
+    const DeviceInterfaceKey& key,
     CreateDeviceInterfaceFn createInterface) {
   std::scoped_lock lock(g_interface_mutex);
   DeviceInterfaceMap& deviceMap = getDeviceMap();
   TORCH_CHECK(
-      deviceMap.find(deviceType) == deviceMap.end(),
-      "Device interface already registered for ",
-      deviceType);
-  deviceMap.insert({deviceType, createInterface});
+      deviceMap.find(key) == deviceMap.end(),
+      "Device interface already registered for device type ",
+      key.deviceType,
+      " variant '",
+      key.variant,
+      "'");
+  deviceMap.insert({key, createInterface});
   return true;
 }
-torch::Device createTorchDevice(const std::string device) {
+void validateDeviceInterface(
+    const std::string device,
+    const std::string variant) {
   std::scoped_lock lock(g_interface_mutex);
   std::string deviceType = getDeviceType(device);
   DeviceInterfaceMap& deviceMap = getDeviceMap();
+  // Find device interface that matches device type and variant
+  torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
   auto deviceInterface = std::find_if(
       deviceMap.begin(),
       deviceMap.end(),
-      [&](const std::pair<torch::DeviceType, CreateDeviceInterfaceFn>& arg) {
-        return device.rfind(
-                   torch::DeviceTypeName(arg.first, /*lcase*/ true), 0) == 0;
+      [&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
+        return arg.first.deviceType == deviceTypeEnum &&
+            arg.first.variant == variant;
       });
-  TORCH_CHECK(
-      deviceInterface != deviceMap.end(), "Unsupported device: ", device);
-  return torch::Device(device);
+  TORCH_CHECK(
+      deviceInterface != deviceMap.end(),
+      "Unsupported device: ",
+      device,
+      " (device type: ",
+      deviceType,
+      ", variant: ",
+      variant,
+      ")");
 }
 std::unique_ptr<DeviceInterface> createDeviceInterface(
-    const torch::Device& device) {
-  auto deviceType = device.type();
+    const torch::Device& device,
+    const std::string_view variant) {
+  DeviceInterfaceKey key(device.type(), variant);
   std::scoped_lock lock(g_interface_mutex);
   DeviceInterfaceMap& deviceMap = getDeviceMap();
+  auto it = deviceMap.find(key);
+  if (it != deviceMap.end()) {
+    return std::unique_ptr<DeviceInterface>(it->second(device));
+  }
   TORCH_CHECK(
-      deviceMap.find(deviceType) != deviceMap.end(),
-      "Unsupported device: ",
-      device);
+      false,
+      "No device interface found for device type: ",
+      device.type(),
+      " variant: '",
+      variant,
+      "'");
+}
-  return std::unique_ptr<DeviceInterface>(deviceMap[deviceType](device));
+torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
+  TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
+  int height = avFrame->height;
+  int width = avFrame->width;
+  std::vector<int64_t> shape = {height, width, 3};
+  std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
+  AVFrame* avFrameClone = av_frame_clone(avFrame.get());
+  auto deleter = [avFrameClone](void*) {
+    UniqueAVFrame avFrameToDelete(avFrameClone);
+  };
+  return torch::from_blob(
+      avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
 }
 } // namespace facebook::torchcodec

torchcodec/_core/DeviceInterface.h CHANGED Viewed

@@ -14,16 +14,27 @@
 #include "FFMPEGCommon.h"
 #include "src/torchcodec/_core/Frame.h"
 #include "src/torchcodec/_core/StreamOptions.h"
+#include "src/torchcodec/_core/Transform.h"
 namespace facebook::torchcodec {
-// Note that all these device functions should only be called if the device is
-// not a CPU device. CPU device functions are already implemented in the
-// SingleStreamDecoder implementation.
-// These functions should only be called from within an if block like this:
-// if (device.type() != torch::kCPU) {
-//   deviceFunction(device, ...);
-// }
+// Key for device interface registration with device type + variant support
+struct DeviceInterfaceKey {
+  torch::DeviceType deviceType;
+  std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
+  bool operator<(const DeviceInterfaceKey& other) const {
+    if (deviceType != other.deviceType) {
+      return deviceType < other.deviceType;
+    }
+    return variant < other.variant;
+  }
+  explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
+  DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
+      : deviceType(type), variant(variant) {}
+};
 class DeviceInterface {
  public:
@@ -35,33 +46,103 @@ class DeviceInterface {
     return device_;
   };
-  virtual std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) = 0;
+  virtual std::optional<const AVCodec*> findCodec(
+      [[maybe_unused]] const AVCodecID& codecId) {
+    return std::nullopt;
+  };
-  // Initialize the hardware device that is specified in `device`. Some builds
-  // support CUDA and others only support CPU.
-  virtual void initializeContext(AVCodecContext* codecContext) = 0;
+  // Initialize the device with parameters generic to all kinds of decoding.
+  virtual void initialize(
+      const AVStream* avStream,
+      const UniqueDecodingAVFormatContext& avFormatCtx,
+      const SharedAVCodecContext& codecContext) = 0;
+  // Initialize the device with parameters specific to video decoding. There is
+  // a default empty implementation.
+  virtual void initializeVideo(
+      [[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
+      [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
+          transforms,
+      [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
+  // In order for decoding to actually happen on an FFmpeg managed hardware
+  // device, we need to register the DeviceInterface managed
+  // AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
+  // on the CPU and if FFmpeg is not managing the hardware device.
+  virtual void registerHardwareDeviceWithCodec(
+      [[maybe_unused]] AVCodecContext* codecContext) {}
   virtual void convertAVFrameToFrameOutput(
-      const VideoStreamOptions& videoStreamOptions,
-      const AVRational& timeBase,
       UniqueAVFrame& avFrame,
       FrameOutput& frameOutput,
       std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
+  // ------------------------------------------
+  // Extension points for custom decoding paths
+  // ------------------------------------------
+  // Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
+  // other AVERROR on failure
+  // Default implementation uses FFmpeg directly
+  virtual int sendPacket(ReferenceAVPacket& avPacket) {
+    TORCH_CHECK(
+        codecContext_ != nullptr,
+        "Codec context not available for default packet sending");
+    return avcodec_send_packet(codecContext_.get(), avPacket.get());
+  }
+  // Send an EOF packet to flush the decoder
+  // Returns AVSUCCESS on success, or other AVERROR on failure
+  // Default implementation uses FFmpeg directly
+  virtual int sendEOFPacket() {
+    TORCH_CHECK(
+        codecContext_ != nullptr,
+        "Codec context not available for default EOF packet sending");
+    return avcodec_send_packet(codecContext_.get(), nullptr);
+  }
+  // Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
+  // AVERROR_EOF if end of stream, or other AVERROR on failure
+  // Default implementation uses FFmpeg directly
+  virtual int receiveFrame(UniqueAVFrame& avFrame) {
+    TORCH_CHECK(
+        codecContext_ != nullptr,
+        "Codec context not available for default frame receiving");
+    return avcodec_receive_frame(codecContext_.get(), avFrame.get());
+  }
+  // Flush remaining frames from decoder
+  virtual void flush() {
+    TORCH_CHECK(
+        codecContext_ != nullptr,
+        "Codec context not available for default flushing");
+    avcodec_flush_buffers(codecContext_.get());
+  }
+  virtual std::string getDetails() {
+    return "";
+  }
  protected:
   torch::Device device_;
+  SharedAVCodecContext codecContext_;
 };
 using CreateDeviceInterfaceFn =
     std::function<DeviceInterface*(const torch::Device& device)>;
 bool registerDeviceInterface(
-    torch::DeviceType deviceType,
+    const DeviceInterfaceKey& key,
     const CreateDeviceInterfaceFn createInterface);
-torch::Device createTorchDevice(const std::string device);
+void validateDeviceInterface(
+    const std::string device,
+    const std::string variant);
 std::unique_ptr<DeviceInterface> createDeviceInterface(
-    const torch::Device& device);
+    const torch::Device& device,
+    const std::string_view variant = "ffmpeg");
+torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
 } // namespace facebook::torchcodec