PyPI - torchcodec - Versions diffs - 0.4.0__cp313-cp313-macosx_11_0_arm64.whl → 0.5__cp313-cp313-macosx_11_0_arm64.whl - Mend

torchcodec 0.4.0__cp313-cp313-macosx_11_0_arm64.whl → 0.5__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchcodec might be problematic. Click here for more details.

Files changed (52) hide show

torchcodec/.dylibs/libc++.1.0.dylib +0 -0
torchcodec/.dylibs/libpython3.13.dylib +0 -0
torchcodec/_core/AVIOContextHolder.h +3 -2
torchcodec/_core/AVIOTensorContext.cpp +121 -0
torchcodec/_core/AVIOTensorContext.h +43 -0
torchcodec/_core/CMakeLists.txt +27 -20
torchcodec/_core/CpuDeviceInterface.cpp +30 -34
torchcodec/_core/CudaDeviceInterface.cpp +2 -10
torchcodec/_core/Encoder.cpp +233 -93
torchcodec/_core/Encoder.h +86 -20
torchcodec/_core/FFMPEGCommon.cpp +132 -55
torchcodec/_core/FFMPEGCommon.h +12 -3
torchcodec/_core/Metadata.h +12 -10
torchcodec/_core/SingleStreamDecoder.cpp +256 -165
torchcodec/_core/SingleStreamDecoder.h +30 -5
torchcodec/_core/StreamOptions.h +4 -1
torchcodec/_core/_metadata.py +56 -19
torchcodec/_core/custom_ops.cpp +109 -60
torchcodec/_core/ops.py +25 -11
torchcodec/_core/pybind_ops.cpp +5 -1
torchcodec/_frame.py +2 -2
torchcodec/_internally_replaced_utils.py +11 -0
torchcodec/_samplers/video_clip_sampler.py +11 -11
torchcodec/decoders/_audio_decoder.py +3 -2
torchcodec/decoders/_video_decoder.py +7 -2
torchcodec/encoders/__init__.py +1 -0
torchcodec/encoders/_audio_encoder.py +110 -0
torchcodec/libtorchcodec_core4.dylib +0 -0
torchcodec/libtorchcodec_core5.dylib +0 -0
torchcodec/libtorchcodec_core6.dylib +0 -0
torchcodec/libtorchcodec_core7.dylib +0 -0
torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
torchcodec/libtorchcodec_pybind_ops4.so +0 -0
torchcodec/libtorchcodec_pybind_ops5.so +0 -0
torchcodec/libtorchcodec_pybind_ops6.so +0 -0
torchcodec/libtorchcodec_pybind_ops7.so +0 -0
torchcodec/version.py +1 -1
{torchcodec-0.4.0.dist-info → torchcodec-0.5.dist-info}/METADATA +10 -8
torchcodec-0.5.dist-info/RECORD +64 -0
torchcodec/_core/AVIOBytesContext.cpp +0 -137
torchcodec/_core/AVIOBytesContext.h +0 -54
torchcodec/libtorchcodec_decoder4.dylib +0 -0
torchcodec/libtorchcodec_decoder5.dylib +0 -0
torchcodec/libtorchcodec_decoder6.dylib +0 -0
torchcodec/libtorchcodec_decoder7.dylib +0 -0
torchcodec-0.4.0.dist-info/RECORD +0 -62
{torchcodec-0.4.0.dist-info → torchcodec-0.5.dist-info}/WHEEL +0 -0
{torchcodec-0.4.0.dist-info → torchcodec-0.5.dist-info}/licenses/LICENSE +0 -0
{torchcodec-0.4.0.dist-info → torchcodec-0.5.dist-info}/top_level.txt +0 -0

torchcodec/.dylibs/libc++.1.0.dylib CHANGED Viewed

Binary file

torchcodec/.dylibs/libpython3.13.dylib CHANGED Viewed

Binary file

torchcodec/_core/AVIOContextHolder.h CHANGED Viewed

@@ -27,8 +27,9 @@ namespace facebook::torchcodec {
 //           tracks the custom behavior of reading, seeking and writing. It is
 //           provided upon AVIOContext creation and to the read, seek and
 //           write callback functions.
-//      While it's not required, it is natural for the derived classes to make
-//      all of the above members. Base classes need to call
+//      The callback functions do not need to be members of the derived class,
+//      but the derived class must have access to them. The context object must
+//      be a member of the derived class. Derived classes need to call
 //      createAVIOContext(), ideally in their constructor.
 //  3. A generic handle for those that just need to manage having access to an
 //     AVIOContext, but aren't necessarily concerned with how it was customized:

torchcodec/_core/AVIOTensorContext.cpp ADDED Viewed

@@ -0,0 +1,121 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "src/torchcodec/_core/AVIOTensorContext.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+namespace {
+constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
+constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
+// The signature of this function is defined by FFMPEG.
+int read(void* opaque, uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  TORCH_CHECK(
+      tensorContext->current <= tensorContext->data.numel(),
+      "Tried to read outside of the buffer: current=",
+      tensorContext->current,
+      ", size=",
+      tensorContext->data.numel());
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size),
+      tensorContext->data.numel() - tensorContext->current);
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      tensorContext->data.numel(),
+      ", current=",
+      tensorContext->current);
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+  std::memcpy(
+      buf,
+      tensorContext->data.data_ptr<uint8_t>() + tensorContext->current,
+      numBytesRead);
+  tensorContext->current += numBytesRead;
+  return numBytesRead;
+}
+// The signature of this function is defined by FFMPEG.
+int write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t bufSize = static_cast<int64_t>(buf_size);
+  if (tensorContext->current + bufSize > tensorContext->data.numel()) {
+    TORCH_CHECK(
+        tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    tensorContext->data =
+        torch::cat({tensorContext->data, tensorContext->data});
+  }
+  TORCH_CHECK(
+      tensorContext->current + bufSize <= tensorContext->data.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+  uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
+  std::memcpy(outputTensorData + tensorContext->current, buf, bufSize);
+  tensorContext->current += bufSize;
+  return buf_size;
+}
+// The signature of this function is defined by FFMPEG.
+int64_t seek(void* opaque, int64_t offset, int whence) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t ret = -1;
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = tensorContext->data.numel();
+      break;
+    case SEEK_SET:
+      tensorContext->current = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+  return ret;
+}
+} // namespace
+AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+    : tensorContext_{data, 0} {
+  TORCH_CHECK(data.numel() > 0, "data must not be empty");
+  TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+  TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+  createAVIOContext(&read, nullptr, &seek, &tensorContext_);
+}
+AVIOToTensorContext::AVIOToTensorContext()
+    : tensorContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
+  createAVIOContext(nullptr, &write, &seek, &tensorContext_);
+}
+torch::Tensor AVIOToTensorContext::getOutputTensor() {
+  return tensorContext_.data.narrow(
+      /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.current);
+}
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOTensorContext.h ADDED Viewed

@@ -0,0 +1,43 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include "src/torchcodec/_core/AVIOContextHolder.h"
+namespace facebook::torchcodec {
+namespace detail {
+struct TensorContext {
+  torch::Tensor data;
+  int64_t current;
+};
+} // namespace detail
+// For Decoding: enables users to pass in the entire video or audio as bytes.
+// Our read and seek functions then traverse the bytes in memory.
+class AVIOFromTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFromTensorContext(torch::Tensor data);
+ private:
+  detail::TensorContext tensorContext_;
+};
+// For Encoding: used to encode into an output uint8 (bytes) tensor.
+class AVIOToTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOToTensorContext();
+  torch::Tensor getOutputTensor();
+ private:
+  detail::TensorContext tensorContext_;
+};
+} // namespace facebook::torchcodec

torchcodec/_core/CMakeLists.txt CHANGED Viewed

@@ -48,65 +48,63 @@ function(make_torchcodec_libraries
     # We create three shared libraries per version of FFmpeg, where the version
     # is denoted by N:
     #
-    # 1. libtorchcodec_decoderN.{ext}: Base library which contains the
+    # 1. libtorchcodec_coreN.{ext}: Base library which contains the
     #    implementation of VideoDecoder and everything VideoDecoder needs. On
     #    Linux, {ext} is so. On Mac, it is dylib.
     #
     # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
-    #    ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so.
+    #    ops. Depends on libtorchcodec_coreN.{ext}. On Linux, {ext} is so.
     #    On Mac, it is dylib.
     #
     # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
     #    keep these separate from the PyTorch custom ops because we have to
     #    load these libraries separately on the Python side. Depends on
-    #    libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so.
+    #    libtorchcodec_coreN.{ext}. On BOTH Linux and Mac {ext} is so.
-    # 1. Create libtorchcodec_decoderN.{ext}.
-    set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
-    set(decoder_sources
+    # 1. Create libtorchcodec_coreN.{ext}.
+    set(core_library_name "libtorchcodec_core${ffmpeg_major_version}")
+    set(core_sources
         AVIOContextHolder.cpp
-        AVIOBytesContext.cpp
+        AVIOTensorContext.cpp
         FFMPEGCommon.cpp
         Frame.cpp
         DeviceInterface.cpp
         CpuDeviceInterface.cpp
         SingleStreamDecoder.cpp
-        # TODO: lib name should probably not be "*_decoder*" now that it also
-        # contains an encoder
         Encoder.cpp
     )
     if(ENABLE_CUDA)
-	    list(APPEND decoder_sources CudaDeviceInterface.cpp)
+	    list(APPEND core_sources CudaDeviceInterface.cpp)
     endif()
-    set(decoder_library_dependencies
+    set(core_library_dependencies
         ${ffmpeg_target}
         ${TORCH_LIBRARIES}
     )
     if(ENABLE_CUDA)
-        list(APPEND decoder_library_dependencies
+        list(APPEND core_library_dependencies
             ${CUDA_nppi_LIBRARY}
             ${CUDA_nppicc_LIBRARY}
         )
     endif()
     make_torchcodec_sublibrary(
-        "${decoder_library_name}"
+        "${core_library_name}"
         SHARED
-        "${decoder_sources}"
-        "${decoder_library_dependencies}"
+        "${core_sources}"
+        "${core_library_dependencies}"
     )
     # 2. Create libtorchcodec_custom_opsN.{ext}.
     set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
     set(custom_ops_sources
-        AVIOBytesContext.cpp
+        AVIOTensorContext.cpp
         custom_ops.cpp
     )
     set(custom_ops_dependencies
-        ${decoder_library_name}
+        ${core_library_name}
         ${Python3_LIBRARIES}
     )
     make_torchcodec_sublibrary(
@@ -123,7 +121,7 @@ function(make_torchcodec_libraries
         pybind_ops.cpp
     )
     set(pybind_ops_dependencies
-       ${decoder_library_name}
+       ${core_library_name}
        pybind11::module # This library dependency makes sure we have the right
                         # Python libraries included as well as all of the right
                         # settings so that we can successfully load the shared
@@ -151,6 +149,15 @@ function(make_torchcodec_libraries
         PUBLIC
       "-fvisibility=hidden"
     )
+    # The value we use here must match the value we return from
+    # _get_pybind_ops_module_name() on the Python side. If the values do not
+    # match, then we will be unable to import the C++ shared library as a
+    # Python module at runtime.
+    target_compile_definitions(
+        ${pybind_ops_library_name}
+        PRIVATE
+        PYBIND_OPS_MODULE_NAME=core_pybind_ops
+    )
     # If we don't make sure this flag is set, we run into segfauls at import
     # time on Mac. See:
     #    https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
@@ -163,7 +170,7 @@ function(make_torchcodec_libraries
     # Install all libraries.
     set(
         all_libraries
-        ${decoder_library_name}
+        ${core_library_name}
         ${custom_ops_library_name}
         ${pybind_ops_library_name}
     )
@@ -240,7 +247,7 @@ else()
     # Expose these values updwards so that the test compilation does not need
     # to re-figure it out. FIXME: it's not great that we just copy-paste the
     # library names.
-    set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libtorchcodec_library_name "libtorchcodec_core${ffmpeg_major_version}" PARENT_SCOPE)
     set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
     set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
 endif()

torchcodec/_core/CpuDeviceInterface.cpp CHANGED Viewed

@@ -37,9 +37,8 @@ bool CpuDeviceInterface::DecodedFrameContext::operator!=(
 CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
     : DeviceInterface(device) {
   TORCH_CHECK(g_cpu, "CpuDeviceInterface was not registered!");
-  if (device_.type() != torch::kCPU) {
-    throw std::runtime_error("Unsupported device: " + device_.str());
-  }
+  TORCH_CHECK(
+      device_.type() == torch::kCPU, "Unsupported device: ", device_.str());
 }
 // Note [preAllocatedOutputTensor with swscale and filtergraph]:
@@ -161,9 +160,10 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
       frameOutput.data = outputTensor;
     }
   } else {
-    throw std::runtime_error(
-        "Invalid color conversion library: " +
-        std::to_string(static_cast<int>(colorConversionLibrary)));
+    TORCH_CHECK(
+        false,
+        "Invalid color conversion library: ",
+        static_cast<int>(colorConversionLibrary));
   }
 }
@@ -189,9 +189,8 @@ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
     const UniqueAVFrame& avFrame) {
   int status = av_buffersrc_write_frame(
       filterGraphContext_.sourceContext, avFrame.get());
-  if (status < AVSUCCESS) {
-    throw std::runtime_error("Failed to add frame to buffer source context");
-  }
+  TORCH_CHECK(
+      status >= AVSUCCESS, "Failed to add frame to buffer source context");
   UniqueAVFrame filteredAVFrame(av_frame_alloc());
   status = av_buffersink_get_frame(
@@ -241,11 +240,12 @@ void CpuDeviceInterface::createFilterGraph(
       filterArgs.str().c_str(),
       nullptr,
       filterGraphContext_.filterGraph.get());
-  if (status < 0) {
-    throw std::runtime_error(
-        std::string("Failed to create filter graph: ") + filterArgs.str() +
-        ": " + getFFMPEGErrorStringFromErrorCode(status));
-  }
+  TORCH_CHECK(
+      status >= 0,
+      "Failed to create filter graph: ",
+      filterArgs.str(),
+      ": ",
+      getFFMPEGErrorStringFromErrorCode(status));
   status = avfilter_graph_create_filter(
       &filterGraphContext_.sinkContext,
@@ -254,11 +254,10 @@ void CpuDeviceInterface::createFilterGraph(
       nullptr,
       nullptr,
       filterGraphContext_.filterGraph.get());
-  if (status < 0) {
-    throw std::runtime_error(
-        "Failed to create filter graph: " +
-        getFFMPEGErrorStringFromErrorCode(status));
-  }
+  TORCH_CHECK(
+      status >= 0,
+      "Failed to create filter graph: ",
+      getFFMPEGErrorStringFromErrorCode(status));
   enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE};
@@ -268,11 +267,10 @@ void CpuDeviceInterface::createFilterGraph(
       pix_fmts,
       AV_PIX_FMT_NONE,
       AV_OPT_SEARCH_CHILDREN);
-  if (status < 0) {
-    throw std::runtime_error(
-        "Failed to set output pixel formats: " +
-        getFFMPEGErrorStringFromErrorCode(status));
-  }
+  TORCH_CHECK(
+      status >= 0,
+      "Failed to set output pixel formats: ",
+      getFFMPEGErrorStringFromErrorCode(status));
   UniqueAVFilterInOut outputs(avfilter_inout_alloc());
   UniqueAVFilterInOut inputs(avfilter_inout_alloc());
@@ -301,19 +299,17 @@ void CpuDeviceInterface::createFilterGraph(
       nullptr);
   outputs.reset(outputsTmp);
   inputs.reset(inputsTmp);
-  if (status < 0) {
-    throw std::runtime_error(
-        "Failed to parse filter description: " +
-        getFFMPEGErrorStringFromErrorCode(status));
-  }
+  TORCH_CHECK(
+      status >= 0,
+      "Failed to parse filter description: ",
+      getFFMPEGErrorStringFromErrorCode(status));
   status =
       avfilter_graph_config(filterGraphContext_.filterGraph.get(), nullptr);
-  if (status < 0) {
-    throw std::runtime_error(
-        "Failed to configure filter graph: " +
-        getFFMPEGErrorStringFromErrorCode(status));
-  }
+  TORCH_CHECK(
+      status >= 0,
+      "Failed to configure filter graph: ",
+      getFFMPEGErrorStringFromErrorCode(status));
 }
 void CpuDeviceInterface::createSwsContext(

torchcodec/_core/CudaDeviceInterface.cpp CHANGED Viewed

@@ -166,9 +166,8 @@ AVBufferRef* getCudaContext(const torch::Device& device) {
 CudaDeviceInterface::CudaDeviceInterface(const torch::Device& device)
     : DeviceInterface(device) {
   TORCH_CHECK(g_cuda, "CudaDeviceInterface was not registered!");
-  if (device_.type() != torch::kCUDA) {
-    throw std::runtime_error("Unsupported device: " + device_.str());
-  }
+  TORCH_CHECK(
+      device_.type() == torch::kCUDA, "Unsupported device: ", device_.str());
 }
 CudaDeviceInterface::~CudaDeviceInterface() {
@@ -228,7 +227,6 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
   NppiSize oSizeROI = {width, height};
   Npp8u* input[2] = {avFrame->data[0], avFrame->data[1]};
-  auto start = std::chrono::high_resolution_clock::now();
   NppStatus status;
   if (avFrame->colorspace == AVColorSpace::AVCOL_SPC_BT709) {
     status = nppiNV12ToRGB_709CSC_8u_P2C3R(
@@ -254,12 +252,6 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
       c10::cuda::getStreamFromExternal(nppGetStream(), device_.index());
   nppDoneEvent.record(nppStreamWrapper);
   nppDoneEvent.block(at::cuda::getCurrentCUDAStream());
-  auto end = std::chrono::high_resolution_clock::now();
-  std::chrono::duration<double, std::micro> duration = end - start;
-  VLOG(9) << "NPP Conversion of frame height=" << height << " width=" << width
-          << " took: " << duration.count() << "us" << std::endl;
 }
 // inspired by https://github.com/FFmpeg/FFmpeg/commit/ad67ea9