PyPI - torchcodec - Versions diffs - 0.7.0__cp39-cp39-win_amd64.whl - Mend

torchcodec 0.7.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

torchcodec/__init__.py +16 -0
torchcodec/_core/AVIOContextHolder.cpp +60 -0
torchcodec/_core/AVIOContextHolder.h +64 -0
torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
torchcodec/_core/AVIOFileLikeContext.h +55 -0
torchcodec/_core/AVIOTensorContext.cpp +123 -0
torchcodec/_core/AVIOTensorContext.h +43 -0
torchcodec/_core/CMakeLists.txt +292 -0
torchcodec/_core/Cache.h +138 -0
torchcodec/_core/CpuDeviceInterface.cpp +266 -0
torchcodec/_core/CpuDeviceInterface.h +70 -0
torchcodec/_core/CudaDeviceInterface.cpp +514 -0
torchcodec/_core/CudaDeviceInterface.h +37 -0
torchcodec/_core/DeviceInterface.cpp +79 -0
torchcodec/_core/DeviceInterface.h +67 -0
torchcodec/_core/Encoder.cpp +514 -0
torchcodec/_core/Encoder.h +123 -0
torchcodec/_core/FFMPEGCommon.cpp +421 -0
torchcodec/_core/FFMPEGCommon.h +227 -0
torchcodec/_core/FilterGraph.cpp +142 -0
torchcodec/_core/FilterGraph.h +45 -0
torchcodec/_core/Frame.cpp +32 -0
torchcodec/_core/Frame.h +118 -0
torchcodec/_core/Metadata.h +72 -0
torchcodec/_core/SingleStreamDecoder.cpp +1715 -0
torchcodec/_core/SingleStreamDecoder.h +380 -0
torchcodec/_core/StreamOptions.h +53 -0
torchcodec/_core/ValidationUtils.cpp +35 -0
torchcodec/_core/ValidationUtils.h +21 -0
torchcodec/_core/__init__.py +40 -0
torchcodec/_core/_metadata.py +317 -0
torchcodec/_core/custom_ops.cpp +727 -0
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +300 -0
torchcodec/_core/ops.py +455 -0
torchcodec/_core/pybind_ops.cpp +87 -0
torchcodec/_frame.py +145 -0
torchcodec/_internally_replaced_utils.py +67 -0
torchcodec/_samplers/__init__.py +7 -0
torchcodec/_samplers/video_clip_sampler.py +430 -0
torchcodec/decoders/__init__.py +11 -0
torchcodec/decoders/_audio_decoder.py +177 -0
torchcodec/decoders/_decoder_utils.py +52 -0
torchcodec/decoders/_video_decoder.py +464 -0
torchcodec/encoders/__init__.py +1 -0
torchcodec/encoders/_audio_encoder.py +150 -0
torchcodec/libtorchcodec_core4.dll +0 -0
torchcodec/libtorchcodec_core5.dll +0 -0
torchcodec/libtorchcodec_core6.dll +0 -0
torchcodec/libtorchcodec_core7.dll +0 -0
torchcodec/libtorchcodec_custom_ops4.dll +0 -0
torchcodec/libtorchcodec_custom_ops5.dll +0 -0
torchcodec/libtorchcodec_custom_ops6.dll +0 -0
torchcodec/libtorchcodec_custom_ops7.dll +0 -0
torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
torchcodec/samplers/__init__.py +2 -0
torchcodec/samplers/_common.py +84 -0
torchcodec/samplers/_index_based.py +287 -0
torchcodec/samplers/_time_based.py +350 -0
torchcodec/version.py +2 -0
torchcodec-0.7.0.dist-info/METADATA +242 -0
torchcodec-0.7.0.dist-info/RECORD +67 -0
torchcodec-0.7.0.dist-info/WHEEL +5 -0
torchcodec-0.7.0.dist-info/licenses/LICENSE +28 -0
torchcodec-0.7.0.dist-info/top_level.txt +2 -0

torchcodec/_core/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,292 @@
+cmake_minimum_required(VERSION 3.18)
+project(TorchCodec)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(PYBIND11_FINDPYTHON ON)
+find_package(pybind11 REQUIRED)
+find_package(Torch REQUIRED)
+find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
+if(DEFINED TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR AND TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR)
+    set(TORCHCODEC_WERROR_OPTION "")
+else()
+    if (WIN32)
+        # TODO set warnings as errors on Windows as well.
+        # set(TORCHCODEC_WERROR_OPTION "/WX")
+    else()
+        set(TORCHCODEC_WERROR_OPTION "-Werror")
+    endif()
+endif()
+if (WIN32)
+  # Avoid warnings about non-ASCII characters in source files.
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4819")
+  # Important for when we add Windows CUDA: exporting all symbols is limited to
+  # 65535 symbols, which (apparently) will not work for CUDA.
+  # https://github.com/pytorch/pytorch/pull/3650
+  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+endif()
+if (WIN32)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
+endif()
+function(make_torchcodec_sublibrary
+    library_name
+    type
+    sources
+    library_dependencies)
+    add_library(${library_name} ${type} ${sources})
+    set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
+    target_include_directories(${library_name}
+        PRIVATE
+        ./../../../
+        "${TORCH_INSTALL_PREFIX}/include"
+        ${Python3_INCLUDE_DIRS}
+    )
+    # Avoid adding the "lib" prefix which we already add explicitly.
+    set_target_properties(${library_name} PROPERTIES PREFIX "")
+    target_link_libraries(
+        ${library_name}
+        PUBLIC
+        ${library_dependencies}
+    )
+endfunction()
+function(make_torchcodec_libraries
+    ffmpeg_major_version
+    ffmpeg_target)
+    # We create three shared libraries per version of FFmpeg, where the version
+    # is denoted by N:
+    #
+    # 1. libtorchcodec_coreN.{ext}: Base library which contains the
+    #    implementation of VideoDecoder and everything VideoDecoder needs. On
+    #    Linux, {ext} is so. On Mac, it is dylib. On Windows it's dll.
+    #
+    # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
+    #    ops. Depends on libtorchcodec_coreN.{ext}. On Linux, {ext} is so.
+    #    On Mac, it is dylib. On Windows it's dll.
+    #
+    # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
+    #    keep these separate from the PyTorch custom ops because we have to
+    #    load these libraries separately on the Python side. Depends on
+    #    libtorchcodec_coreN.{ext}. On BOTH Linux and Mac {ext} is so. On
+    #    Windows, it's pyd.
+    # 1. Create libtorchcodec_coreN.{ext}.
+    set(core_library_name "libtorchcodec_core${ffmpeg_major_version}")
+    set(core_sources
+        AVIOContextHolder.cpp
+        AVIOTensorContext.cpp
+        FFMPEGCommon.cpp
+        FilterGraph.cpp
+        Frame.cpp
+        DeviceInterface.cpp
+        CpuDeviceInterface.cpp
+        SingleStreamDecoder.cpp
+        Encoder.cpp
+        ValidationUtils.cpp
+    )
+    if(ENABLE_CUDA)
+	    list(APPEND core_sources CudaDeviceInterface.cpp)
+    endif()
+    set(core_library_dependencies
+        ${ffmpeg_target}
+        ${TORCH_LIBRARIES}
+    )
+    if(ENABLE_CUDA)
+        list(APPEND core_library_dependencies
+            ${CUDA_nppi_LIBRARY}
+            ${CUDA_nppicc_LIBRARY}
+        )
+    endif()
+    make_torchcodec_sublibrary(
+        "${core_library_name}"
+        SHARED
+        "${core_sources}"
+        "${core_library_dependencies}"
+    )
+    # 2. Create libtorchcodec_custom_opsN.{ext}.
+    set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
+    set(custom_ops_sources
+        AVIOTensorContext.cpp
+        custom_ops.cpp
+    )
+    set(custom_ops_dependencies
+        ${core_library_name}
+        ${Python3_LIBRARIES}
+    )
+    make_torchcodec_sublibrary(
+        "${custom_ops_library_name}"
+        SHARED
+        "${custom_ops_sources}"
+        "${custom_ops_dependencies}"
+    )
+    # 3. Create libtorchcodec_pybind_opsN.so.
+    set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
+    set(pybind_ops_sources
+        AVIOFileLikeContext.cpp
+        pybind_ops.cpp
+    )
+    set(pybind_ops_dependencies
+       ${core_library_name}
+       pybind11::module # This library dependency makes sure we have the right
+                        # Python libraries included as well as all of the right
+                        # settings so that we can successfully load the shared
+                        # library as a Python module on Mac. If we instead use
+                        # ${Python3_LIBRARIES}, it works on Linux but not on
+                        # Mac.
+    )
+    make_torchcodec_sublibrary(
+        "${pybind_ops_library_name}"
+        MODULE # Note that this not SHARED; otherwise we build the wrong kind
+               # of library on Mac. On Mac, SHARED becomes .dylib and MODULE becomes
+               # a .so. We want pybind11 libraries to become .so. If this is
+               # changed to SHARED, we will be able to succesfully compile a
+               # .dylib, but we will not be able to succesfully import that as
+               # a Python module on Mac.
+        "${pybind_ops_sources}"
+        "${pybind_ops_dependencies}"
+    )
+    if(WIN32)
+      # On Windows, we need to set the suffix to .pyd so that Python can
+      # import the shared library as a module. Just setting the MODULE type
+      # isn't enough.
+      set_target_properties(${pybind_ops_library_name} PROPERTIES SUFFIX ".pyd")
+    endif()
+    # pybind11 limits the visibility of symbols in the shared library to prevent
+    # stray initialization of py::objects. The rest of the object code must
+    # match. See:
+    #   https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes
+    if(NOT WIN32)
+        target_compile_options(
+            ${pybind_ops_library_name}
+            PUBLIC
+            "-fvisibility=hidden"
+        )
+    endif()
+    # The value we use here must match the value we return from
+    # _get_pybind_ops_module_name() on the Python side. If the values do not
+    # match, then we will be unable to import the C++ shared library as a
+    # Python module at runtime.
+    target_compile_definitions(
+        ${pybind_ops_library_name}
+        PRIVATE
+        PYBIND_OPS_MODULE_NAME=core_pybind_ops
+    )
+    if(APPLE)
+        # If we don't make sure this flag is set, we run into segfauls at import
+        # time on Mac. See:
+        # https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
+        target_link_options(
+            ${pybind_ops_library_name}
+            PUBLIC
+            "LINKER:-undefined,dynamic_lookup"
+        )
+    endif()
+    # Install all libraries.
+    set(
+        all_libraries
+        ${core_library_name}
+        ${custom_ops_library_name}
+        ${pybind_ops_library_name}
+    )
+    # The install step is invoked within CMakeBuild.build_library() in
+    # setup.py and just copies the built files from the temp
+    # cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
+    # still need to manually pass "DESTINATION ..." for cmake to copy those
+    # files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
+    install(
+        TARGETS ${all_libraries}
+        LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
+        RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}  # For Windows
+    )
+endfunction()
+if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
+    message(
+        STATUS
+        "Building and dynamically linking libtorchcodec against our pre-built
+        non-GPL FFmpeg libraries. These libraries are only used at build time,
+        you still need a different FFmpeg to be installed for run time!"
+    )
+    # This will expose the ffmpeg4, ffmpeg5, ffmpeg6, and ffmpeg7 targets
+    include(
+        ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
+    )
+    make_torchcodec_libraries(7 ffmpeg7)
+    make_torchcodec_libraries(6 ffmpeg6)
+    make_torchcodec_libraries(4 ffmpeg4)
+    make_torchcodec_libraries(5 ffmpeg5)
+else()
+    message(
+        STATUS
+        "Building and dynamically linking libtorchcodec against the installed
+        FFmpeg libraries. This require pkg-config to be installed. If you have
+        installed FFmpeg from conda, make sure pkg-config is installed from
+        conda as well."
+    )
+    find_package(PkgConfig REQUIRED)
+    pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET
+        libavdevice
+        libavfilter
+        libavformat
+        libavcodec
+        libavutil
+        libswresample
+        libswscale
+    )
+    # Split libavcodec's version string by '.' and convert it to a list
+    string(REPLACE "." ";" libavcodec_version_list ${LIBAV_libavcodec_VERSION})
+    # Get the first element of the list, which is the major version
+    list(GET libavcodec_version_list 0 libavcodec_major_version)
+    if (${libavcodec_major_version} STREQUAL "58")
+        set(ffmpeg_major_version "4")
+    elseif (${libavcodec_major_version} STREQUAL "59")
+        set(ffmpeg_major_version "5")
+    elseif (${libavcodec_major_version} STREQUAL "60")
+        set(ffmpeg_major_version "6")
+    elseif (${libavcodec_major_version} STREQUAL "61")
+        set(ffmpeg_major_version "7")
+    else()
+        message(
+            FATAL_ERROR
+            "Unsupported libavcodec version: ${libavcodec_major_version}"
+        )
+    endif()
+    make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV)
+    # Expose these values updwards so that the test compilation does not need
+    # to re-figure it out. FIXME: it's not great that we just copy-paste the
+    # library names.
+    set(libtorchcodec_library_name "libtorchcodec_core${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
+endif()

torchcodec/_core/Cache.h ADDED Viewed

@@ -0,0 +1,138 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include <memory>
+#include <mutex>
+namespace facebook::torchcodec {
+// This header defines simple cache class primitives to store reusable objects
+// across TorchCodec stream instances. Intended usage is to store hardware
+// contexts creation of which is expensive. The cache mechanism is as follows:
+// 1. 'PerGpuCache' provides a dynamic cache with the specified maximum capacity
+//    for the given number of GPUs.
+// 2. When stream object (e.g. SingleStreamDecoder) is destoyed cachable object
+//    must be released to the cache. Cache will accept the object if it is not
+//    full.
+// 3. When stream object (e.g. SingleStreamDecoder) is created cachable object
+//    must be first queried from the cache. If the cache is empty then new
+//    object must be created.
+template <typename T, typename D = std::default_delete<T>>
+class Cache {
+ public:
+  using element_type = std::unique_ptr<T, D>;
+  explicit Cache(int capacity) : capacity_(capacity) {}
+  // Adds an object to the cache if the cache has capacity. Returns true
+  // if object was added and false otherwise.
+  bool addIfCacheHasCapacity(element_type&& obj);
+  // Returns an object from the cache. Cache does not hold a reference
+  // to the object after this call.
+  element_type get();
+ private:
+  int capacity_;
+  std::mutex mutex_;
+  std::vector<element_type> cache_;
+};
+template <typename T, typename D>
+bool Cache<T, D>::addIfCacheHasCapacity(element_type&& obj) {
+  std::scoped_lock lock(mutex_);
+  if (capacity_ >= 0 && cache_.size() >= static_cast<size_t>(capacity_)) {
+    return false;
+  }
+  cache_.push_back(std::move(obj));
+  return true;
+}
+template <typename T, typename D>
+typename Cache<T, D>::element_type Cache<T, D>::get() {
+  std::scoped_lock lock(mutex_);
+  if (cache_.empty()) {
+    return nullptr;
+  }
+  element_type obj = std::move(cache_.back());
+  cache_.pop_back();
+  return obj;
+}
+template <typename T, typename D = std::default_delete<T>>
+class PerGpuCache {
+ public:
+  using element_type = typename Cache<T, D>::element_type;
+  // Initializes 'maxGpus' number of caches. Each cache can hold no
+  // more than 'capacity' items. If 'capacity' <0 cache size is unlimited.
+  PerGpuCache(int maxGpus, int capacity) {
+    TORCH_CHECK(maxGpus > 0, "maxGpus for PerGpuCache must be >0");
+    for (int i = 0; i < maxGpus; ++i) {
+      cache_.emplace_back(std::make_unique<Cache<T, D>>(capacity));
+    }
+  }
+  // Adds an object to the specified device cache if the cache has
+  // capacity. Returns true if object was added and false otherwise.
+  bool addIfCacheHasCapacity(const torch::Device& device, element_type&& obj);
+  // Returns an object from the cache of the specified device. Cache
+  // does not hold a reference to the object after this call.
+  element_type get(const torch::Device& device);
+ private:
+  // 'Cache' class implementation contains mutex which makes it non-movable
+  // and non-copyable, so we need to wrap it in std::unique_ptr.
+  std::vector<std::unique_ptr<Cache<T, D>>> cache_;
+};
+// Note: this function is inline for convenience, not performance. Because the
+// rest of this file is template functions, they must all be defined in this
+// header. This function is not a template function, and should, in principle,
+// be defined in a .cpp file to preserve the One Definition Rule. That's
+// annoying for such a small amount of code, so we just inline it. If this file
+// grows, and there are more such functions, we should break them out into a
+// .cpp file.
+inline torch::DeviceIndex getNonNegativeDeviceIndex(
+    const torch::Device& device) {
+  torch::DeviceIndex deviceIndex = device.index();
+  // For single GPU machines libtorch returns -1 for the device index. So for
+  // that case we set the device index to 0. That's used in per-gpu cache
+  // implementation and during initialization of CUDA and FFmpeg contexts
+  // which require non negative indices.
+  deviceIndex = std::max<at::DeviceIndex>(deviceIndex, 0);
+  TORCH_CHECK(deviceIndex >= 0, "Device index out of range");
+  return deviceIndex;
+}
+template <typename T, typename D>
+bool PerGpuCache<T, D>::addIfCacheHasCapacity(
+    const torch::Device& device,
+    element_type&& obj) {
+  torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
+  TORCH_CHECK(
+      static_cast<size_t>(deviceIndex) < cache_.size(),
+      "Device index out of range");
+  return cache_[deviceIndex]->addIfCacheHasCapacity(std::move(obj));
+}
+template <typename T, typename D>
+typename PerGpuCache<T, D>::element_type PerGpuCache<T, D>::get(
+    const torch::Device& device) {
+  torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
+  TORCH_CHECK(
+      static_cast<size_t>(deviceIndex) < cache_.size(),
+      "Device index out of range");
+  return cache_[deviceIndex]->get();
+}
+} // namespace facebook::torchcodec

torchcodec/_core/CpuDeviceInterface.cpp ADDED Viewed

@@ -0,0 +1,266 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#include "src/torchcodec/_core/CpuDeviceInterface.h"
+namespace facebook::torchcodec {
+namespace {
+static bool g_cpu = registerDeviceInterface(
+    torch::kCPU,
+    [](const torch::Device& device) { return new CpuDeviceInterface(device); });
+} // namespace
+bool CpuDeviceInterface::SwsFrameContext::operator==(
+    const CpuDeviceInterface::SwsFrameContext& other) const {
+  return inputWidth == other.inputWidth && inputHeight == other.inputHeight &&
+      inputFormat == other.inputFormat && outputWidth == other.outputWidth &&
+      outputHeight == other.outputHeight;
+}
+bool CpuDeviceInterface::SwsFrameContext::operator!=(
+    const CpuDeviceInterface::SwsFrameContext& other) const {
+  return !(*this == other);
+}
+CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
+    : DeviceInterface(device) {
+  TORCH_CHECK(g_cpu, "CpuDeviceInterface was not registered!");
+  TORCH_CHECK(
+      device_.type() == torch::kCPU, "Unsupported device: ", device_.str());
+}
+// Note [preAllocatedOutputTensor with swscale and filtergraph]:
+// Callers may pass a pre-allocated tensor, where the output.data tensor will
+// be stored. This parameter is honored in any case, but it only leads to a
+// speed-up when swscale is used. With swscale, we can tell ffmpeg to place the
+// decoded frame directly into `preAllocatedtensor.data_ptr()`. We haven't yet
+// found a way to do that with filtegraph.
+// TODO: Figure out whether that's possible!
+// Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
+// `dimension_order` parameter. It's up to callers to re-shape it if needed.
+void CpuDeviceInterface::convertAVFrameToFrameOutput(
+    const VideoStreamOptions& videoStreamOptions,
+    const AVRational& timeBase,
+    UniqueAVFrame& avFrame,
+    FrameOutput& frameOutput,
+    std::optional<torch::Tensor> preAllocatedOutputTensor) {
+  auto frameDims =
+      getHeightAndWidthFromOptionsOrAVFrame(videoStreamOptions, avFrame);
+  int expectedOutputHeight = frameDims.height;
+  int expectedOutputWidth = frameDims.width;
+  if (preAllocatedOutputTensor.has_value()) {
+    auto shape = preAllocatedOutputTensor.value().sizes();
+    TORCH_CHECK(
+        (shape.size() == 3) && (shape[0] == expectedOutputHeight) &&
+            (shape[1] == expectedOutputWidth) && (shape[2] == 3),
+        "Expected pre-allocated tensor of shape ",
+        expectedOutputHeight,
+        "x",
+        expectedOutputWidth,
+        "x3, got ",
+        shape);
+  }
+  torch::Tensor outputTensor;
+  enum AVPixelFormat frameFormat =
+      static_cast<enum AVPixelFormat>(avFrame->format);
+  // By default, we want to use swscale for color conversion because it is
+  // faster. However, it has width requirements, so we may need to fall back
+  // to filtergraph. We also need to respect what was requested from the
+  // options; we respect the options unconditionally, so it's possible for
+  // swscale's width requirements to be violated. We don't expose the ability to
+  // choose color conversion library publicly; we only use this ability
+  // internally.
+  // swscale requires widths to be multiples of 32:
+  // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
+  // so we fall back to filtergraph if the width is not a multiple of 32.
+  auto defaultLibrary = (expectedOutputWidth % 32 == 0)
+      ? ColorConversionLibrary::SWSCALE
+      : ColorConversionLibrary::FILTERGRAPH;
+  ColorConversionLibrary colorConversionLibrary =
+      videoStreamOptions.colorConversionLibrary.value_or(defaultLibrary);
+  if (colorConversionLibrary == ColorConversionLibrary::SWSCALE) {
+    // We need to compare the current frame context with our previous frame
+    // context. If they are different, then we need to re-create our colorspace
+    // conversion objects. We create our colorspace conversion objects late so
+    // that we don't have to depend on the unreliable metadata in the header.
+    // And we sometimes re-create them because it's possible for frame
+    // resolution to change mid-stream. Finally, we want to reuse the colorspace
+    // conversion objects as much as possible for performance reasons.
+    SwsFrameContext swsFrameContext;
+    swsFrameContext.inputWidth = avFrame->width;
+    swsFrameContext.inputHeight = avFrame->height;
+    swsFrameContext.inputFormat = frameFormat;
+    swsFrameContext.outputWidth = expectedOutputWidth;
+    swsFrameContext.outputHeight = expectedOutputHeight;
+    outputTensor = preAllocatedOutputTensor.value_or(allocateEmptyHWCTensor(
+        expectedOutputHeight, expectedOutputWidth, torch::kCPU));
+    if (!swsContext_ || prevSwsFrameContext_ != swsFrameContext) {
+      createSwsContext(swsFrameContext, avFrame->colorspace);
+      prevSwsFrameContext_ = swsFrameContext;
+    }
+    int resultHeight =
+        convertAVFrameToTensorUsingSwsScale(avFrame, outputTensor);
+    // If this check failed, it would mean that the frame wasn't reshaped to
+    // the expected height.
+    // TODO: Can we do the same check for width?
+    TORCH_CHECK(
+        resultHeight == expectedOutputHeight,
+        "resultHeight != expectedOutputHeight: ",
+        resultHeight,
+        " != ",
+        expectedOutputHeight);
+    frameOutput.data = outputTensor;
+  } else if (colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) {
+    // See comment above in swscale branch about the filterGraphContext_
+    // creation. creation
+    FiltersContext filtersContext;
+    filtersContext.inputWidth = avFrame->width;
+    filtersContext.inputHeight = avFrame->height;
+    filtersContext.inputFormat = frameFormat;
+    filtersContext.inputAspectRatio = avFrame->sample_aspect_ratio;
+    filtersContext.outputWidth = expectedOutputWidth;
+    filtersContext.outputHeight = expectedOutputHeight;
+    filtersContext.outputFormat = AV_PIX_FMT_RGB24;
+    filtersContext.timeBase = timeBase;
+    std::stringstream filters;
+    filters << "scale=" << expectedOutputWidth << ":" << expectedOutputHeight;
+    filters << ":sws_flags=bilinear";
+    filtersContext.filtergraphStr = filters.str();
+    if (!filterGraphContext_ || prevFiltersContext_ != filtersContext) {
+      filterGraphContext_ =
+          std::make_unique<FilterGraph>(filtersContext, videoStreamOptions);
+      prevFiltersContext_ = std::move(filtersContext);
+    }
+    outputTensor = convertAVFrameToTensorUsingFilterGraph(avFrame);
+    // Similarly to above, if this check fails it means the frame wasn't
+    // reshaped to its expected dimensions by filtergraph.
+    auto shape = outputTensor.sizes();
+    TORCH_CHECK(
+        (shape.size() == 3) && (shape[0] == expectedOutputHeight) &&
+            (shape[1] == expectedOutputWidth) && (shape[2] == 3),
+        "Expected output tensor of shape ",
+        expectedOutputHeight,
+        "x",
+        expectedOutputWidth,
+        "x3, got ",
+        shape);
+    if (preAllocatedOutputTensor.has_value()) {
+      // We have already validated that preAllocatedOutputTensor and
+      // outputTensor have the same shape.
+      preAllocatedOutputTensor.value().copy_(outputTensor);
+      frameOutput.data = preAllocatedOutputTensor.value();
+    } else {
+      frameOutput.data = outputTensor;
+    }
+  } else {
+    TORCH_CHECK(
+        false,
+        "Invalid color conversion library: ",
+        static_cast<int>(colorConversionLibrary));
+  }
+}
+int CpuDeviceInterface::convertAVFrameToTensorUsingSwsScale(
+    const UniqueAVFrame& avFrame,
+    torch::Tensor& outputTensor) {
+  uint8_t* pointers[4] = {
+      outputTensor.data_ptr<uint8_t>(), nullptr, nullptr, nullptr};
+  int expectedOutputWidth = outputTensor.sizes()[1];
+  int linesizes[4] = {expectedOutputWidth * 3, 0, 0, 0};
+  int resultHeight = sws_scale(
+      swsContext_.get(),
+      avFrame->data,
+      avFrame->linesize,
+      0,
+      avFrame->height,
+      pointers,
+      linesizes);
+  return resultHeight;
+}
+torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
+    const UniqueAVFrame& avFrame) {
+  UniqueAVFrame filteredAVFrame = filterGraphContext_->convert(avFrame);
+  TORCH_CHECK_EQ(filteredAVFrame->format, AV_PIX_FMT_RGB24);
+  auto frameDims = getHeightAndWidthFromResizedAVFrame(*filteredAVFrame.get());
+  int height = frameDims.height;
+  int width = frameDims.width;
+  std::vector<int64_t> shape = {height, width, 3};
+  std::vector<int64_t> strides = {filteredAVFrame->linesize[0], 3, 1};
+  AVFrame* filteredAVFramePtr = filteredAVFrame.release();
+  auto deleter = [filteredAVFramePtr](void*) {
+    UniqueAVFrame avFrameToDelete(filteredAVFramePtr);
+  };
+  return torch::from_blob(
+      filteredAVFramePtr->data[0], shape, strides, deleter, {torch::kUInt8});
+}
+void CpuDeviceInterface::createSwsContext(
+    const SwsFrameContext& swsFrameContext,
+    const enum AVColorSpace colorspace) {
+  SwsContext* swsContext = sws_getContext(
+      swsFrameContext.inputWidth,
+      swsFrameContext.inputHeight,
+      swsFrameContext.inputFormat,
+      swsFrameContext.outputWidth,
+      swsFrameContext.outputHeight,
+      AV_PIX_FMT_RGB24,
+      SWS_BILINEAR,
+      nullptr,
+      nullptr,
+      nullptr);
+  TORCH_CHECK(swsContext, "sws_getContext() returned nullptr");
+  int* invTable = nullptr;
+  int* table = nullptr;
+  int srcRange, dstRange, brightness, contrast, saturation;
+  int ret = sws_getColorspaceDetails(
+      swsContext,
+      &invTable,
+      &srcRange,
+      &table,
+      &dstRange,
+      &brightness,
+      &contrast,
+      &saturation);
+  TORCH_CHECK(ret != -1, "sws_getColorspaceDetails returned -1");
+  const int* colorspaceTable = sws_getCoefficients(colorspace);
+  ret = sws_setColorspaceDetails(
+      swsContext,
+      colorspaceTable,
+      srcRange,
+      colorspaceTable,
+      dstRange,
+      brightness,
+      contrast,
+      saturation);
+  TORCH_CHECK(ret != -1, "sws_setColorspaceDetails returned -1");
+  swsContext_.reset(swsContext);
+}
+} // namespace facebook::torchcodec