PyPI - torchcodec - Versions diffs - 0.3.0__cp311-cp311-macosx_11_0_arm64.whl → 0.5__cp311-cp311-macosx_11_0_arm64.whl - Mend

torchcodec 0.3.0__cp311-cp311-macosx_11_0_arm64.whl → 0.5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchcodec might be problematic. Click here for more details.

Files changed (61) hide show

torchcodec/.dylibs/libc++.1.0.dylib +0 -0
torchcodec/.dylibs/libpython3.11.dylib +0 -0
torchcodec/_core/AVIOContextHolder.cpp +8 -3
torchcodec/_core/AVIOContextHolder.h +7 -9
torchcodec/_core/AVIOFileLikeContext.cpp +1 -1
torchcodec/_core/AVIOTensorContext.cpp +121 -0
torchcodec/_core/AVIOTensorContext.h +43 -0
torchcodec/_core/CMakeLists.txt +38 -22
torchcodec/_core/CpuDeviceInterface.cpp +360 -0
torchcodec/_core/CpuDeviceInterface.h +80 -0
torchcodec/_core/CudaDeviceInterface.cpp +5 -12
torchcodec/_core/CudaDeviceInterface.h +1 -0
torchcodec/_core/DeviceInterface.cpp +20 -29
torchcodec/_core/DeviceInterface.h +1 -0
torchcodec/_core/Encoder.cpp +297 -110
torchcodec/_core/Encoder.h +96 -14
torchcodec/_core/FFMPEGCommon.cpp +195 -46
torchcodec/_core/FFMPEGCommon.h +44 -12
torchcodec/_core/Frame.cpp +32 -0
torchcodec/_core/Frame.h +71 -0
torchcodec/_core/Metadata.h +12 -10
torchcodec/_core/SingleStreamDecoder.cpp +335 -567
torchcodec/_core/SingleStreamDecoder.h +30 -112
torchcodec/_core/StreamOptions.h +4 -0
torchcodec/_core/__init__.py +2 -2
torchcodec/_core/_metadata.py +59 -19
torchcodec/_core/custom_ops.cpp +137 -91
torchcodec/_core/ops.py +38 -20
torchcodec/_core/pybind_ops.cpp +5 -1
torchcodec/_frame.py +2 -2
torchcodec/_internally_replaced_utils.py +11 -0
torchcodec/_samplers/video_clip_sampler.py +11 -11
torchcodec/decoders/_audio_decoder.py +11 -4
torchcodec/decoders/_video_decoder.py +7 -2
torchcodec/encoders/__init__.py +1 -0
torchcodec/encoders/_audio_encoder.py +110 -0
torchcodec/libtorchcodec_core4.dylib +0 -0
torchcodec/libtorchcodec_core5.dylib +0 -0
torchcodec/libtorchcodec_core6.dylib +0 -0
torchcodec/libtorchcodec_core7.dylib +0 -0
torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
torchcodec/libtorchcodec_pybind_ops4.so +0 -0
torchcodec/libtorchcodec_pybind_ops5.so +0 -0
torchcodec/libtorchcodec_pybind_ops6.so +0 -0
torchcodec/libtorchcodec_pybind_ops7.so +0 -0
torchcodec/version.py +1 -1
{torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/METADATA +13 -24
torchcodec-0.5.dist-info/RECORD +64 -0
{torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/WHEEL +1 -1
torchcodec/_core/AVIOBytesContext.cpp +0 -70
torchcodec/_core/AVIOBytesContext.h +0 -32
torchcodec/libtorchcodec_decoder4.dylib +0 -0
torchcodec/libtorchcodec_decoder5.dylib +0 -0
torchcodec/libtorchcodec_decoder6.dylib +0 -0
torchcodec/libtorchcodec_decoder7.dylib +0 -0
torchcodec-0.3.0.dist-info/RECORD +0 -59
{torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info/licenses}/LICENSE +0 -0
{torchcodec-0.3.0.dist-info → torchcodec-0.5.dist-info}/top_level.txt +0 -0

torchcodec/.dylibs/libc++.1.0.dylib CHANGED Viewed

Binary file

torchcodec/.dylibs/libpython3.11.dylib CHANGED Viewed

Binary file

torchcodec/_core/AVIOContextHolder.cpp CHANGED Viewed

@@ -11,6 +11,7 @@ namespace facebook::torchcodec {
 void AVIOContextHolder::createAVIOContext(
     AVIOReadFunction read,
+    AVIOWriteFunction write,
     AVIOSeekFunction seek,
     void* heldData,
     int bufferSize) {
@@ -22,13 +23,17 @@ void AVIOContextHolder::createAVIOContext(
       buffer != nullptr,
       "Failed to allocate buffer of size " + std::to_string(bufferSize));
-  avioContext_.reset(avio_alloc_context(
+  TORCH_CHECK(
+      (seek != nullptr) && ((write != nullptr) ^ (read != nullptr)),
+      "seek method must be defined, and either write or read must be defined. "
+      "But not both!")
+  avioContext_.reset(avioAllocContext(
       buffer,
       bufferSize,
-      0,
+      /*write_flag=*/write != nullptr,
       heldData,
       read,
-      nullptr, // write function; not supported yet
+      write,
       seek));
   if (!avioContext_) {

torchcodec/_core/AVIOContextHolder.h CHANGED Viewed

@@ -19,16 +19,17 @@ namespace facebook::torchcodec {
 //      freed.
 //   2. It is a base class for AVIOContext specializations. When specializing a
 //      AVIOContext, we need to provide four things:
-//        1. A read callback function.
-//        2. A seek callback function.
-//        3. A write callback function. (Not supported yet; it's for encoding.)
+//        1. A read callback function, for decoding.
+//        2. A seek callback function, for decoding and encoding.
+//        3. A write callback function, for encoding.
 //        4. A pointer to some context object that has the same lifetime as the
 //           AVIOContext itself. This context object holds the custom state that
 //           tracks the custom behavior of reading, seeking and writing. It is
 //           provided upon AVIOContext creation and to the read, seek and
 //           write callback functions.
-//      While it's not required, it is natural for the derived classes to make
-//      all of the above members. Base classes need to call
+//      The callback functions do not need to be members of the derived class,
+//      but the derived class must have access to them. The context object must
+//      be a member of the derived class. Derived classes need to call
 //      createAVIOContext(), ideally in their constructor.
 //  3. A generic handle for those that just need to manage having access to an
 //     AVIOContext, but aren't necessarily concerned with how it was customized:
@@ -44,13 +45,10 @@ class AVIOContextHolder {
   // enforced by having a pure virtual methods, but we don't have any.)
   AVIOContextHolder() = default;
-  // These signatures are defined by FFmpeg.
-  using AVIOReadFunction = int (*)(void*, uint8_t*, int);
-  using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
   // Deriving classes should call this function in their constructor.
   void createAVIOContext(
       AVIOReadFunction read,
+      AVIOWriteFunction write,
       AVIOSeekFunction seek,
       void* heldData,
       int bufferSize = defaultBufferSize);

torchcodec/_core/AVIOFileLikeContext.cpp CHANGED Viewed

@@ -23,7 +23,7 @@ AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
         py::hasattr(fileLike, "seek"),
         "File like object must implement a seek method.");
   }
-  createAVIOContext(&read, &seek, &fileLike_);
+  createAVIOContext(&read, nullptr, &seek, &fileLike_);
 }
 int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {

torchcodec/_core/AVIOTensorContext.cpp ADDED Viewed

@@ -0,0 +1,121 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "src/torchcodec/_core/AVIOTensorContext.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+namespace {
+constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
+constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
+// The signature of this function is defined by FFMPEG.
+int read(void* opaque, uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  TORCH_CHECK(
+      tensorContext->current <= tensorContext->data.numel(),
+      "Tried to read outside of the buffer: current=",
+      tensorContext->current,
+      ", size=",
+      tensorContext->data.numel());
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size),
+      tensorContext->data.numel() - tensorContext->current);
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      tensorContext->data.numel(),
+      ", current=",
+      tensorContext->current);
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+  std::memcpy(
+      buf,
+      tensorContext->data.data_ptr<uint8_t>() + tensorContext->current,
+      numBytesRead);
+  tensorContext->current += numBytesRead;
+  return numBytesRead;
+}
+// The signature of this function is defined by FFMPEG.
+int write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t bufSize = static_cast<int64_t>(buf_size);
+  if (tensorContext->current + bufSize > tensorContext->data.numel()) {
+    TORCH_CHECK(
+        tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    tensorContext->data =
+        torch::cat({tensorContext->data, tensorContext->data});
+  }
+  TORCH_CHECK(
+      tensorContext->current + bufSize <= tensorContext->data.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+  uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
+  std::memcpy(outputTensorData + tensorContext->current, buf, bufSize);
+  tensorContext->current += bufSize;
+  return buf_size;
+}
+// The signature of this function is defined by FFMPEG.
+int64_t seek(void* opaque, int64_t offset, int whence) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t ret = -1;
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = tensorContext->data.numel();
+      break;
+    case SEEK_SET:
+      tensorContext->current = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+  return ret;
+}
+} // namespace
+AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+    : tensorContext_{data, 0} {
+  TORCH_CHECK(data.numel() > 0, "data must not be empty");
+  TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+  TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+  createAVIOContext(&read, nullptr, &seek, &tensorContext_);
+}
+AVIOToTensorContext::AVIOToTensorContext()
+    : tensorContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
+  createAVIOContext(nullptr, &write, &seek, &tensorContext_);
+}
+torch::Tensor AVIOToTensorContext::getOutputTensor() {
+  return tensorContext_.data.narrow(
+      /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.current);
+}
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOTensorContext.h ADDED Viewed

@@ -0,0 +1,43 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include "src/torchcodec/_core/AVIOContextHolder.h"
+namespace facebook::torchcodec {
+namespace detail {
+struct TensorContext {
+  torch::Tensor data;
+  int64_t current;
+};
+} // namespace detail
+// For Decoding: enables users to pass in the entire video or audio as bytes.
+// Our read and seek functions then traverse the bytes in memory.
+class AVIOFromTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFromTensorContext(torch::Tensor data);
+ private:
+  detail::TensorContext tensorContext_;
+};
+// For Encoding: used to encode into an output uint8 (bytes) tensor.
+class AVIOToTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOToTensorContext();
+  torch::Tensor getOutputTensor();
+ private:
+  detail::TensorContext tensorContext_;
+};
+} // namespace facebook::torchcodec

torchcodec/_core/CMakeLists.txt CHANGED Viewed

@@ -8,7 +8,13 @@ find_package(pybind11 REQUIRED)
 find_package(Torch REQUIRED)
 find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
+if(DEFINED TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR AND TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR)
+    set(TORCHCODEC_WERROR_OPTION "")
+else()
+    set(TORCHCODEC_WERROR_OPTION "-Werror")
+endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
 function(make_torchcodec_sublibrary
     library_name
@@ -42,62 +48,63 @@ function(make_torchcodec_libraries
     # We create three shared libraries per version of FFmpeg, where the version
     # is denoted by N:
     #
-    # 1. libtorchcodec_decoderN.{ext}: Base library which contains the
+    # 1. libtorchcodec_coreN.{ext}: Base library which contains the
     #    implementation of VideoDecoder and everything VideoDecoder needs. On
     #    Linux, {ext} is so. On Mac, it is dylib.
     #
     # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
-    #    ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so.
+    #    ops. Depends on libtorchcodec_coreN.{ext}. On Linux, {ext} is so.
     #    On Mac, it is dylib.
     #
     # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
     #    keep these separate from the PyTorch custom ops because we have to
     #    load these libraries separately on the Python side. Depends on
-    #    libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so.
+    #    libtorchcodec_coreN.{ext}. On BOTH Linux and Mac {ext} is so.
-    # 1. Create libtorchcodec_decoderN.{ext}.
-    set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
-    set(decoder_sources
+    # 1. Create libtorchcodec_coreN.{ext}.
+    set(core_library_name "libtorchcodec_core${ffmpeg_major_version}")
+    set(core_sources
         AVIOContextHolder.cpp
+        AVIOTensorContext.cpp
         FFMPEGCommon.cpp
-	DeviceInterface.cpp
+        Frame.cpp
+        DeviceInterface.cpp
+        CpuDeviceInterface.cpp
         SingleStreamDecoder.cpp
-        # TODO: lib name should probably not be "*_decoder*" now that it also
-        # contains an encoder
         Encoder.cpp
     )
     if(ENABLE_CUDA)
-	    list(APPEND decoder_sources CudaDeviceInterface.cpp)
+	    list(APPEND core_sources CudaDeviceInterface.cpp)
     endif()
-    set(decoder_library_dependencies
+    set(core_library_dependencies
         ${ffmpeg_target}
         ${TORCH_LIBRARIES}
     )
     if(ENABLE_CUDA)
-        list(APPEND decoder_library_dependencies
+        list(APPEND core_library_dependencies
             ${CUDA_nppi_LIBRARY}
             ${CUDA_nppicc_LIBRARY}
         )
     endif()
     make_torchcodec_sublibrary(
-        "${decoder_library_name}"
+        "${core_library_name}"
         SHARED
-        "${decoder_sources}"
-        "${decoder_library_dependencies}"
+        "${core_sources}"
+        "${core_library_dependencies}"
     )
     # 2. Create libtorchcodec_custom_opsN.{ext}.
     set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
     set(custom_ops_sources
-        AVIOBytesContext.cpp
+        AVIOTensorContext.cpp
         custom_ops.cpp
     )
     set(custom_ops_dependencies
-        ${decoder_library_name}
+        ${core_library_name}
         ${Python3_LIBRARIES}
     )
     make_torchcodec_sublibrary(
@@ -114,7 +121,7 @@ function(make_torchcodec_libraries
         pybind_ops.cpp
     )
     set(pybind_ops_dependencies
-       ${decoder_library_name}
+       ${core_library_name}
        pybind11::module # This library dependency makes sure we have the right
                         # Python libraries included as well as all of the right
                         # settings so that we can successfully load the shared
@@ -142,19 +149,28 @@ function(make_torchcodec_libraries
         PUBLIC
       "-fvisibility=hidden"
     )
+    # The value we use here must match the value we return from
+    # _get_pybind_ops_module_name() on the Python side. If the values do not
+    # match, then we will be unable to import the C++ shared library as a
+    # Python module at runtime.
+    target_compile_definitions(
+        ${pybind_ops_library_name}
+        PRIVATE
+        PYBIND_OPS_MODULE_NAME=core_pybind_ops
+    )
     # If we don't make sure this flag is set, we run into segfauls at import
     # time on Mac. See:
     #    https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
     target_link_options(
         ${pybind_ops_library_name}
         PUBLIC
-        "-undefined dynamic_lookup"
+        "LINKER:-undefined,dynamic_lookup"
     )
     # Install all libraries.
     set(
         all_libraries
-        ${decoder_library_name}
+        ${core_library_name}
         ${custom_ops_library_name}
         ${pybind_ops_library_name}
     )
@@ -231,7 +247,7 @@ else()
     # Expose these values updwards so that the test compilation does not need
     # to re-figure it out. FIXME: it's not great that we just copy-paste the
     # library names.
-    set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libtorchcodec_library_name "libtorchcodec_core${ffmpeg_major_version}" PARENT_SCOPE)
     set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
     set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
 endif()