PyPI - torchcodec - Versions diffs - 0.3.0__cp39-cp39-manylinux_2_28_x86_64.whl - Mend

torchcodec 0.3.0__cp39-cp39-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of torchcodec might be problematic. Click here for more details.

Files changed (57) hide show

torchcodec/__init__.py +16 -0
torchcodec/_core/AVIOBytesContext.cpp +70 -0
torchcodec/_core/AVIOBytesContext.h +32 -0
torchcodec/_core/AVIOContextHolder.cpp +50 -0
torchcodec/_core/AVIOContextHolder.h +65 -0
torchcodec/_core/AVIOFileLikeContext.cpp +80 -0
torchcodec/_core/AVIOFileLikeContext.h +54 -0
torchcodec/_core/CMakeLists.txt +237 -0
torchcodec/_core/CudaDeviceInterface.cpp +289 -0
torchcodec/_core/CudaDeviceInterface.h +34 -0
torchcodec/_core/DeviceInterface.cpp +88 -0
torchcodec/_core/DeviceInterface.h +66 -0
torchcodec/_core/Encoder.cpp +319 -0
torchcodec/_core/Encoder.h +39 -0
torchcodec/_core/FFMPEGCommon.cpp +264 -0
torchcodec/_core/FFMPEGCommon.h +180 -0
torchcodec/_core/Frame.h +47 -0
torchcodec/_core/Metadata.h +70 -0
torchcodec/_core/SingleStreamDecoder.cpp +1947 -0
torchcodec/_core/SingleStreamDecoder.h +462 -0
torchcodec/_core/StreamOptions.h +49 -0
torchcodec/_core/__init__.py +39 -0
torchcodec/_core/_metadata.py +277 -0
torchcodec/_core/custom_ops.cpp +681 -0
torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +226 -0
torchcodec/_core/ops.py +381 -0
torchcodec/_core/pybind_ops.cpp +45 -0
torchcodec/_frame.py +145 -0
torchcodec/_internally_replaced_utils.py +53 -0
torchcodec/_samplers/__init__.py +7 -0
torchcodec/_samplers/video_clip_sampler.py +430 -0
torchcodec/decoders/__init__.py +11 -0
torchcodec/decoders/_audio_decoder.py +168 -0
torchcodec/decoders/_decoder_utils.py +52 -0
torchcodec/decoders/_video_decoder.py +399 -0
torchcodec/libtorchcodec_custom_ops4.so +0 -0
torchcodec/libtorchcodec_custom_ops5.so +0 -0
torchcodec/libtorchcodec_custom_ops6.so +0 -0
torchcodec/libtorchcodec_custom_ops7.so +0 -0
torchcodec/libtorchcodec_decoder4.so +0 -0
torchcodec/libtorchcodec_decoder5.so +0 -0
torchcodec/libtorchcodec_decoder6.so +0 -0
torchcodec/libtorchcodec_decoder7.so +0 -0
torchcodec/libtorchcodec_pybind_ops4.so +0 -0
torchcodec/libtorchcodec_pybind_ops5.so +0 -0
torchcodec/libtorchcodec_pybind_ops6.so +0 -0
torchcodec/libtorchcodec_pybind_ops7.so +0 -0
torchcodec/samplers/__init__.py +2 -0
torchcodec/samplers/_common.py +84 -0
torchcodec/samplers/_index_based.py +285 -0
torchcodec/samplers/_time_based.py +348 -0
torchcodec/version.py +2 -0
torchcodec-0.3.0.dist-info/LICENSE +28 -0
torchcodec-0.3.0.dist-info/METADATA +280 -0
torchcodec-0.3.0.dist-info/RECORD +57 -0
torchcodec-0.3.0.dist-info/WHEEL +5 -0
torchcodec-0.3.0.dist-info/top_level.txt +2 -0

torchcodec/_core/FFMPEGCommon.h ADDED Viewed

@@ -0,0 +1,180 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <memory>
+#include <stdexcept>
+#include <string>
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavfilter/avfilter.h>
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libavutil/avutil.h>
+#include <libavutil/dict.h>
+#include <libavutil/display.h>
+#include <libavutil/file.h>
+#include <libavutil/opt.h>
+#include <libavutil/pixfmt.h>
+#include <libavutil/version.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+}
+namespace facebook::torchcodec {
+// FFMPEG uses special delete functions for some structures. These template
+// functions are used to pass into unique_ptr as custom deleters so we can
+// wrap FFMPEG structs with unique_ptrs for ease of use.
+template <typename T, typename R, R (*Fn)(T**)>
+struct Deleterp {
+  inline void operator()(T* p) const {
+    if (p) {
+      Fn(&p);
+    }
+  }
+};
+template <typename T, typename R, R (*Fn)(T*)>
+struct Deleter {
+  inline void operator()(T* p) const {
+    if (p) {
+      Fn(p);
+    }
+  }
+};
+// Unique pointers for FFMPEG structures.
+using UniqueDecodingAVFormatContext = std::unique_ptr<
+    AVFormatContext,
+    Deleterp<AVFormatContext, void, avformat_close_input>>;
+using UniqueEncodingAVFormatContext = std::unique_ptr<
+    AVFormatContext,
+    Deleter<AVFormatContext, void, avformat_free_context>>;
+using UniqueAVCodecContext = std::unique_ptr<
+    AVCodecContext,
+    Deleterp<AVCodecContext, void, avcodec_free_context>>;
+using UniqueAVFrame =
+    std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
+using UniqueAVFilterGraph = std::unique_ptr<
+    AVFilterGraph,
+    Deleterp<AVFilterGraph, void, avfilter_graph_free>>;
+using UniqueAVFilterInOut = std::unique_ptr<
+    AVFilterInOut,
+    Deleterp<AVFilterInOut, void, avfilter_inout_free>>;
+using UniqueAVIOContext = std::
+    unique_ptr<AVIOContext, Deleterp<AVIOContext, void, avio_context_free>>;
+using UniqueSwsContext =
+    std::unique_ptr<SwsContext, Deleter<SwsContext, void, sws_freeContext>>;
+using UniqueSwrContext =
+    std::unique_ptr<SwrContext, Deleterp<SwrContext, void, swr_free>>;
+// These 2 classes share the same underlying AVPacket object. They are meant to
+// be used in tandem, like so:
+//
+// AutoAVPacket autoAVPacket; // <-- malloc for AVPacket happens here
+// while(...){
+//   ReferenceAVPacket packet(autoAVPacket);
+//   av_read_frame(..., packet.get());  <-- av_packet_ref() called by FFmpeg
+// } <-- av_packet_unref() called here
+//
+// This achieves a few desirable things:
+// - Memory allocation of the underlying AVPacket happens only once, when
+//   autoAVPacket is created.
+// - av_packet_free() is called when autoAVPacket gets out of scope
+// - av_packet_unref() is automatically called when needed, i.e. at the end of
+//   each loop iteration (or when hitting break / continue). This prevents the
+//   risk of us forgetting to call it.
+class AutoAVPacket {
+  friend class ReferenceAVPacket;
+ private:
+  AVPacket* avPacket_;
+ public:
+  AutoAVPacket();
+  AutoAVPacket(const AutoAVPacket& other) = delete;
+  AutoAVPacket& operator=(const AutoAVPacket& other) = delete;
+  ~AutoAVPacket();
+};
+class ReferenceAVPacket {
+ private:
+  AVPacket* avPacket_;
+ public:
+  explicit ReferenceAVPacket(AutoAVPacket& shared);
+  ReferenceAVPacket(const ReferenceAVPacket& other) = delete;
+  ReferenceAVPacket& operator=(const ReferenceAVPacket& other) = delete;
+  ~ReferenceAVPacket();
+  AVPacket* get();
+  AVPacket* operator->();
+};
+// av_find_best_stream is not const-correct before commit:
+// https://github.com/FFmpeg/FFmpeg/commit/46dac8cf3d250184ab4247809bc03f60e14f4c0c
+// which was released in FFMPEG version=5.0.3
+// with libavcodec's version=59.18.100
+// (https://www.ffmpeg.org/olddownload.html).
+// Note that the alias is so-named so that it is only used when interacting with
+// av_find_best_stream(). It is not needed elsewhere.
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
+using AVCodecOnlyUseForCallingAVFindBestStream = AVCodec*;
+#else
+using AVCodecOnlyUseForCallingAVFindBestStream = const AVCodec*;
+#endif
+AVCodecOnlyUseForCallingAVFindBestStream
+makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec);
+// Success code from FFMPEG is just a 0. We define it to make the code more
+// readable.
+const int AVSUCCESS = 0;
+// Returns the FFMPEG error as a string using the provided `errorCode`.
+std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
+// Returns duration from the frame. Abstracted into a function because the
+// struct member representing duration has changed across the versions we
+// support.
+int64_t getDuration(const UniqueAVFrame& frame);
+int getNumChannels(const UniqueAVFrame& avFrame);
+int getNumChannels(const UniqueAVCodecContext& avCodecContext);
+void setDefaultChannelLayout(
+    UniqueAVCodecContext& avCodecContext,
+    int numChannels);
+void setChannelLayout(
+    UniqueAVFrame& dstAVFrame,
+    const UniqueAVCodecContext& avCodecContext);
+void setChannelLayout(
+    UniqueAVFrame& dstAVFrame,
+    const UniqueAVFrame& srcAVFrame);
+SwrContext* createSwrContext(
+    UniqueAVCodecContext& avCodecContext,
+    AVSampleFormat sourceSampleFormat,
+    AVSampleFormat desiredSampleFormat,
+    int sourceSampleRate,
+    int desiredSampleRate);
+UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
+    const UniqueSwrContext& swrContext,
+    const UniqueAVFrame& srcAVFrame,
+    AVSampleFormat desiredSampleFormat,
+    int sourceSampleRate,
+    int desiredSampleRate);
+// Returns true if sws_scale can handle unaligned data.
+bool canSwsScaleHandleUnalignedData();
+void setFFmpegLogLevel();
+} // namespace facebook::torchcodec

torchcodec/_core/Frame.h ADDED Viewed

@@ -0,0 +1,47 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <torch/types.h>
+#include "src/torchcodec/_core/Metadata.h"
+#include "src/torchcodec/_core/StreamOptions.h"
+namespace facebook::torchcodec {
+// All public video decoding entry points return either a FrameOutput or a
+// FrameBatchOutput.
+// They are the equivalent of the user-facing Frame and FrameBatch classes in
+// Python. They contain RGB decoded frames along with some associated data
+// like PTS and duration.
+// FrameOutput is also relevant for audio decoding, typically as the output of
+// getNextFrame(), or as a temporary output variable.
+struct FrameOutput {
+  // data shape is:
+  // - 3D (C, H, W) or (H, W, C) for videos
+  // - 2D (numChannels, numSamples) for audio
+  torch::Tensor data;
+  double ptsSeconds;
+  double durationSeconds;
+};
+struct FrameBatchOutput {
+  torch::Tensor data; // 4D: of shape NCHW or NHWC.
+  torch::Tensor ptsSeconds; // 1D of shape (N,)
+  torch::Tensor durationSeconds; // 1D of shape (N,)
+  explicit FrameBatchOutput(
+      int64_t numFrames,
+      const VideoStreamOptions& videoStreamOptions,
+      const StreamMetadata& streamMetadata);
+};
+struct AudioFramesOutput {
+  torch::Tensor data; // shape is (numChannels, numSamples)
+  double ptsSeconds;
+};
+} // namespace facebook::torchcodec

torchcodec/_core/Metadata.h ADDED Viewed

@@ -0,0 +1,70 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+#include <optional>
+#include <string>
+#include <vector>
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavutil/avutil.h>
+}
+namespace facebook::torchcodec {
+struct StreamMetadata {
+  // Common (video and audio) fields derived from the AVStream.
+  int streamIndex;
+  // See this link for what various values are available:
+  // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
+  AVMediaType mediaType;
+  std::optional<AVCodecID> codecId;
+  std::optional<std::string> codecName;
+  std::optional<double> durationSeconds;
+  std::optional<double> beginStreamFromHeader;
+  std::optional<int64_t> numFrames;
+  std::optional<int64_t> numKeyFrames;
+  std::optional<double> averageFps;
+  std::optional<double> bitRate;
+  // More accurate duration, obtained by scanning the file.
+  // These presentation timestamps are in time base.
+  std::optional<int64_t> minPtsFromScan;
+  std::optional<int64_t> maxPtsFromScan;
+  // These presentation timestamps are in seconds.
+  std::optional<double> minPtsSecondsFromScan;
+  std::optional<double> maxPtsSecondsFromScan;
+  // This can be useful for index-based seeking.
+  std::optional<int64_t> numFramesFromScan;
+  // Video-only fields derived from the AVCodecContext.
+  std::optional<int64_t> width;
+  std::optional<int64_t> height;
+  // Audio-only fields
+  std::optional<int64_t> sampleRate;
+  std::optional<int64_t> numChannels;
+  std::optional<std::string> sampleFormat;
+};
+struct ContainerMetadata {
+  std::vector<StreamMetadata> allStreamMetadata;
+  int numAudioStreams = 0;
+  int numVideoStreams = 0;
+  // Note that this is the container-level duration, which is usually the max
+  // of all stream durations available in the container.
+  std::optional<double> durationSeconds;
+  // Total BitRate level information at the container level in bit/s
+  std::optional<double> bitRate;
+  // If set, this is the index to the default audio stream.
+  std::optional<int> bestAudioStreamIndex;
+  // If set, this is the index to the default video stream.
+  std::optional<int> bestVideoStreamIndex;
+};
+} // namespace facebook::torchcodec