PyPI - torchcodec - Versions diffs - 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl - Mend

torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

torchcodec/__init__.py +27 -0
torchcodec/_core/AVIOContextHolder.cpp +60 -0
torchcodec/_core/AVIOContextHolder.h +64 -0
torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
torchcodec/_core/AVIOFileLikeContext.h +55 -0
torchcodec/_core/AVIOTensorContext.cpp +130 -0
torchcodec/_core/AVIOTensorContext.h +44 -0
torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
torchcodec/_core/CMakeLists.txt +295 -0
torchcodec/_core/CUDACommon.cpp +330 -0
torchcodec/_core/CUDACommon.h +51 -0
torchcodec/_core/Cache.h +124 -0
torchcodec/_core/CpuDeviceInterface.cpp +509 -0
torchcodec/_core/CpuDeviceInterface.h +141 -0
torchcodec/_core/CudaDeviceInterface.cpp +602 -0
torchcodec/_core/CudaDeviceInterface.h +79 -0
torchcodec/_core/DeviceInterface.cpp +117 -0
torchcodec/_core/DeviceInterface.h +191 -0
torchcodec/_core/Encoder.cpp +1054 -0
torchcodec/_core/Encoder.h +192 -0
torchcodec/_core/FFMPEGCommon.cpp +684 -0
torchcodec/_core/FFMPEGCommon.h +314 -0
torchcodec/_core/FilterGraph.cpp +159 -0
torchcodec/_core/FilterGraph.h +59 -0
torchcodec/_core/Frame.cpp +47 -0
torchcodec/_core/Frame.h +72 -0
torchcodec/_core/Metadata.cpp +124 -0
torchcodec/_core/Metadata.h +92 -0
torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
torchcodec/_core/NVDECCache.cpp +60 -0
torchcodec/_core/NVDECCache.h +102 -0
torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
torchcodec/_core/SingleStreamDecoder.h +391 -0
torchcodec/_core/StreamOptions.h +70 -0
torchcodec/_core/Transform.cpp +128 -0
torchcodec/_core/Transform.h +86 -0
torchcodec/_core/ValidationUtils.cpp +35 -0
torchcodec/_core/ValidationUtils.h +21 -0
torchcodec/_core/__init__.py +46 -0
torchcodec/_core/_metadata.py +262 -0
torchcodec/_core/custom_ops.cpp +1090 -0
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
torchcodec/_core/ops.py +605 -0
torchcodec/_core/pybind_ops.cpp +50 -0
torchcodec/_frame.py +146 -0
torchcodec/_internally_replaced_utils.py +68 -0
torchcodec/_samplers/__init__.py +7 -0
torchcodec/_samplers/video_clip_sampler.py +419 -0
torchcodec/decoders/__init__.py +12 -0
torchcodec/decoders/_audio_decoder.py +185 -0
torchcodec/decoders/_decoder_utils.py +113 -0
torchcodec/decoders/_video_decoder.py +601 -0
torchcodec/encoders/__init__.py +2 -0
torchcodec/encoders/_audio_encoder.py +149 -0
torchcodec/encoders/_video_encoder.py +196 -0
torchcodec/libtorchcodec_core4.so +0 -0
torchcodec/libtorchcodec_core5.so +0 -0
torchcodec/libtorchcodec_core6.so +0 -0
torchcodec/libtorchcodec_core7.so +0 -0
torchcodec/libtorchcodec_core8.so +0 -0
torchcodec/libtorchcodec_custom_ops4.so +0 -0
torchcodec/libtorchcodec_custom_ops5.so +0 -0
torchcodec/libtorchcodec_custom_ops6.so +0 -0
torchcodec/libtorchcodec_custom_ops7.so +0 -0
torchcodec/libtorchcodec_custom_ops8.so +0 -0
torchcodec/libtorchcodec_pybind_ops4.so +0 -0
torchcodec/libtorchcodec_pybind_ops5.so +0 -0
torchcodec/libtorchcodec_pybind_ops6.so +0 -0
torchcodec/libtorchcodec_pybind_ops7.so +0 -0
torchcodec/libtorchcodec_pybind_ops8.so +0 -0
torchcodec/samplers/__init__.py +2 -0
torchcodec/samplers/_common.py +84 -0
torchcodec/samplers/_index_based.py +287 -0
torchcodec/samplers/_time_based.py +358 -0
torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
torchcodec/transforms/__init__.py +12 -0
torchcodec/transforms/_decoder_transforms.py +375 -0
torchcodec/version.py +2 -0
torchcodec-0.10.0.dist-info/METADATA +286 -0
torchcodec-0.10.0.dist-info/RECORD +88 -0
torchcodec-0.10.0.dist-info/WHEEL +5 -0
torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
torchcodec-0.10.0.dist-info/top_level.txt +2 -0

torchcodec/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from pathlib import Path
+# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
+# but that results in circular import.
+from ._frame import AudioSamples, Frame, FrameBatch  # usort:skip # noqa
+from . import decoders, encoders, samplers, transforms  # noqa
+try:
+    # Note that version.py is generated during install.
+    from .version import __version__  # noqa: F401
+except Exception:
+    pass
+# cmake_prefix_path is needed for downstream cmake-based builds that use
+# torchcodec as a dependency to tell cmake where torchcodec is installed and where to find its
+# CMake configuration files.
+# Pytorch itself has a similar mechanism which we use in our setup.py!
+cmake_prefix_path = Path(__file__).parent / "share" / "cmake"
+# Similarly, these are exposed for downstream builds that use torchcodec as a
+# dependency.
+from ._core import core_library_path, ffmpeg_major_version  # usort:skip

torchcodec/_core/AVIOContextHolder.cpp ADDED Viewed

@@ -0,0 +1,60 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "AVIOContextHolder.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+void AVIOContextHolder::createAVIOContext(
+    AVIOReadFunction read,
+    AVIOWriteFunction write,
+    AVIOSeekFunction seek,
+    void* heldData,
+    bool isForWriting,
+    int bufferSize) {
+  TORCH_CHECK(
+      bufferSize > 0,
+      "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
+  auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
+  TORCH_CHECK(
+      buffer != nullptr,
+      "Failed to allocate buffer of size " + std::to_string(bufferSize));
+  TORCH_CHECK(seek != nullptr, "seek method must be defined");
+  if (isForWriting) {
+    TORCH_CHECK(write != nullptr, "write method must be defined for writing");
+  } else {
+    TORCH_CHECK(read != nullptr, "read method must be defined for reading");
+  }
+  avioContext_.reset(avioAllocContext(
+      buffer,
+      bufferSize,
+      /*write_flag=*/isForWriting,
+      heldData,
+      read,
+      write,
+      seek));
+  if (!avioContext_) {
+    av_freep(&buffer);
+    TORCH_CHECK(false, "Failed to allocate AVIOContext");
+  }
+}
+AVIOContextHolder::~AVIOContextHolder() {
+  if (avioContext_) {
+    av_freep(&avioContext_->buffer);
+  }
+}
+AVIOContext* AVIOContextHolder::getAVIOContext() {
+  return avioContext_.get();
+}
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOContextHolder.h ADDED Viewed

@@ -0,0 +1,64 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include "FFMPEGCommon.h"
+namespace facebook::torchcodec {
+// The AVIOContextHolder serves several purposes:
+//
+//   1. It is a smart pointer for the AVIOContext. It has the logic to create
+//      a new AVIOContext and will appropriately free the AVIOContext when it
+//      goes out of scope. Note that this requires more than just having a
+//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
+//      freed.
+//   2. It is a base class for AVIOContext specializations. When specializing a
+//      AVIOContext, we need to provide four things:
+//        1. A read callback function, for decoding.
+//        2. A seek callback function, for decoding and encoding.
+//        3. A write callback function, for encoding.
+//        4. A pointer to some context object that has the same lifetime as the
+//           AVIOContext itself. This context object holds the custom state that
+//           tracks the custom behavior of reading, seeking and writing. It is
+//           provided upon AVIOContext creation and to the read, seek and
+//           write callback functions.
+//      The callback functions do not need to be members of the derived class,
+//      but the derived class must have access to them. The context object must
+//      be a member of the derived class. Derived classes need to call
+//      createAVIOContext(), ideally in their constructor.
+//  3. A generic handle for those that just need to manage having access to an
+//     AVIOContext, but aren't necessarily concerned with how it was customized:
+//     typically, the SingleStreamDecoder.
+class AVIOContextHolder {
+ public:
+  virtual ~AVIOContextHolder();
+  AVIOContext* getAVIOContext();
+ protected:
+  // Make constructor protected to prevent anyone from constructing
+  // an AVIOContextHolder without deriving it. (Ordinarily this would be
+  // enforced by having a pure virtual methods, but we don't have any.)
+  AVIOContextHolder() = default;
+  // Deriving classes should call this function in their constructor.
+  void createAVIOContext(
+      AVIOReadFunction read,
+      AVIOWriteFunction write,
+      AVIOSeekFunction seek,
+      void* heldData,
+      bool isForWriting,
+      int bufferSize = defaultBufferSize);
+ private:
+  UniqueAVIOContext avioContext_;
+  // Defaults to 64 KB
+  static const int defaultBufferSize = 64 * 1024;
+};
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOFileLikeContext.cpp ADDED Viewed

@@ -0,0 +1,98 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "AVIOFileLikeContext.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+AVIOFileLikeContext::AVIOFileLikeContext(
+    const py::object& fileLike,
+    bool isForWriting)
+    : fileLike_{UniquePyObject(new py::object(fileLike))} {
+  {
+    // TODO: Is it necessary to acquire the GIL here? Is it maybe even
+    // harmful? At the moment, this is only called from within a pybind
+    // function, and pybind guarantees we have the GIL.
+    py::gil_scoped_acquire gil;
+    if (isForWriting) {
+      TORCH_CHECK(
+          py::hasattr(fileLike, "write"),
+          "File like object must implement a write method for writing.");
+    } else {
+      TORCH_CHECK(
+          py::hasattr(fileLike, "read"),
+          "File like object must implement a read method for reading.");
+    }
+    TORCH_CHECK(
+        py::hasattr(fileLike, "seek"),
+        "File like object must implement a seek method.");
+  }
+  createAVIOContext(&read, &write, &seek, &fileLike_, isForWriting);
+}
+int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+  // Note that we acquire the GIL outside of the loop. This is likely more
+  // efficient than releasing and acquiring it each loop iteration.
+  py::gil_scoped_acquire gil;
+  int totalNumRead = 0;
+  while (totalNumRead < buf_size) {
+    int request = buf_size - totalNumRead;
+    // The Python method returns the actual bytes, which we access through the
+    // py::bytes wrapper. That wrapper, however, does not provide us access to
+    // the underlying data pointer, which we need for the memcpy below. So we
+    // convert the bytes to a string_view to get access to the data pointer.
+    // Becauase it's a view and not a copy, it should be cheap.
+    auto bytesRead = static_cast<py::bytes>((*fileLike)->attr("read")(request));
+    auto bytesView = static_cast<std::string_view>(bytesRead);
+    int numBytesRead = static_cast<int>(bytesView.size());
+    if (numBytesRead == 0) {
+      break;
+    }
+    TORCH_CHECK(
+        numBytesRead <= request,
+        "Requested up to ",
+        request,
+        " bytes but, received ",
+        numBytesRead,
+        " bytes. The given object does not conform to read protocol of file object.");
+    std::memcpy(buf, bytesView.data(), numBytesRead);
+    buf += numBytesRead;
+    totalNumRead += numBytesRead;
+  }
+  return totalNumRead == 0 ? AVERROR_EOF : totalNumRead;
+}
+int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
+  // We do not know the file size.
+  if (whence == AVSEEK_SIZE) {
+    return AVERROR(EIO);
+  }
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+  py::gil_scoped_acquire gil;
+  return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
+}
+int AVIOFileLikeContext::write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+  py::gil_scoped_acquire gil;
+  py::bytes bytes_obj(reinterpret_cast<const char*>(buf), buf_size);
+  return py::cast<int>((*fileLike)->attr("write")(bytes_obj));
+}
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOFileLikeContext.h ADDED Viewed

@@ -0,0 +1,55 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include "AVIOContextHolder.h"
+namespace py = pybind11;
+namespace facebook::torchcodec {
+// Enables uers to pass in a Python file-like object. We then forward all read
+// and seek calls back up to the methods on the Python object.
+class AVIOFileLikeContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFileLikeContext(const py::object& fileLike, bool isForWriting);
+ private:
+  static int read(void* opaque, uint8_t* buf, int buf_size);
+  static int64_t seek(void* opaque, int64_t offset, int whence);
+  static int write(void* opaque, const uint8_t* buf, int buf_size);
+  // Note that we dynamically allocate the Python object because we need to
+  // strictly control when its destructor is called. We must hold the GIL
+  // when its destructor gets called, as it needs to update the reference
+  // count. It's easiest to control that when it's dynamic memory. Otherwise,
+  // we'd have to ensure whatever enclosing scope holds the object has the GIL,
+  // and that's, at least, hard. For all of the common pitfalls, see:
+  //
+  //   https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
+  //
+  // We maintain a reference to the file-like object because the file-like
+  // object that was created on the Python side must live as long as our
+  // potential use. That is, even if there are no more references to the object
+  // on the Python side, we require that the object is still live.
+  struct PyObjectDeleter {
+    inline void operator()(py::object* obj) const {
+      if (obj) {
+        py::gil_scoped_acquire gil;
+        delete obj;
+      }
+    }
+  };
+  using UniquePyObject = std::unique_ptr<py::object, PyObjectDeleter>;
+  UniquePyObject fileLike_;
+};
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOTensorContext.cpp ADDED Viewed

@@ -0,0 +1,130 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "AVIOTensorContext.h"
+#include <torch/types.h>
+namespace facebook::torchcodec {
+namespace {
+constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
+constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
+// The signature of this function is defined by FFMPEG.
+int read(void* opaque, uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  TORCH_CHECK(
+      tensorContext->current_pos <= tensorContext->data.numel(),
+      "Tried to read outside of the buffer: current_pos=",
+      tensorContext->current_pos,
+      ", size=",
+      tensorContext->data.numel());
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size),
+      tensorContext->data.numel() - tensorContext->current_pos);
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      tensorContext->data.numel(),
+      ", current_pos=",
+      tensorContext->current_pos);
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+  std::memcpy(
+      buf,
+      tensorContext->data.data_ptr<uint8_t>() + tensorContext->current_pos,
+      numBytesRead);
+  tensorContext->current_pos += numBytesRead;
+  return numBytesRead;
+}
+// The signature of this function is defined by FFMPEG.
+int write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t bufSize = static_cast<int64_t>(buf_size);
+  if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) {
+    TORCH_CHECK(
+        tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    tensorContext->data =
+        torch::cat({tensorContext->data, tensorContext->data});
+  }
+  TORCH_CHECK(
+      tensorContext->current_pos + bufSize <= tensorContext->data.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+  uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
+  std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize);
+  tensorContext->current_pos += bufSize;
+  // Track the maximum position written so getOutputTensor's narrow() does not
+  // truncate the file if final seek was backwards
+  tensorContext->max_pos =
+      std::max(tensorContext->current_pos, tensorContext->max_pos);
+  return buf_size;
+}
+// The signature of this function is defined by FFMPEG.
+int64_t seek(void* opaque, int64_t offset, int whence) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t ret = -1;
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = tensorContext->data.numel();
+      break;
+    case SEEK_SET:
+      tensorContext->current_pos = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+  return ret;
+}
+} // namespace
+AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+    : tensorContext_{data, 0, 0} {
+  TORCH_CHECK(data.numel() > 0, "data must not be empty");
+  TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+  TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+  createAVIOContext(
+      &read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false);
+}
+AVIOToTensorContext::AVIOToTensorContext()
+    : tensorContext_{
+          torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}),
+          0,
+          0} {
+  createAVIOContext(
+      nullptr, &write, &seek, &tensorContext_, /*isForWriting=*/true);
+}
+torch::Tensor AVIOToTensorContext::getOutputTensor() {
+  return tensorContext_.data.narrow(
+      /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
+}
+} // namespace facebook::torchcodec

torchcodec/_core/AVIOTensorContext.h ADDED Viewed

@@ -0,0 +1,44 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include "AVIOContextHolder.h"
+namespace facebook::torchcodec {
+namespace detail {
+struct TensorContext {
+  torch::Tensor data;
+  int64_t current_pos;
+  int64_t max_pos;
+};
+} // namespace detail
+// For Decoding: enables users to pass in the entire video or audio as bytes.
+// Our read and seek functions then traverse the bytes in memory.
+class AVIOFromTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFromTensorContext(torch::Tensor data);
+ private:
+  detail::TensorContext tensorContext_;
+};
+// For Encoding: used to encode into an output uint8 (bytes) tensor.
+class AVIOToTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOToTensorContext();
+  torch::Tensor getOutputTensor();
+ private:
+  detail::TensorContext tensorContext_;
+};
+} // namespace facebook::torchcodec