torchcodec 0.3.0__cp39-cp39-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (57) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOBytesContext.cpp +70 -0
  3. torchcodec/_core/AVIOBytesContext.h +32 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +50 -0
  5. torchcodec/_core/AVIOContextHolder.h +65 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +80 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +54 -0
  8. torchcodec/_core/CMakeLists.txt +237 -0
  9. torchcodec/_core/CudaDeviceInterface.cpp +289 -0
  10. torchcodec/_core/CudaDeviceInterface.h +34 -0
  11. torchcodec/_core/DeviceInterface.cpp +88 -0
  12. torchcodec/_core/DeviceInterface.h +66 -0
  13. torchcodec/_core/Encoder.cpp +319 -0
  14. torchcodec/_core/Encoder.h +39 -0
  15. torchcodec/_core/FFMPEGCommon.cpp +264 -0
  16. torchcodec/_core/FFMPEGCommon.h +180 -0
  17. torchcodec/_core/Frame.h +47 -0
  18. torchcodec/_core/Metadata.h +70 -0
  19. torchcodec/_core/SingleStreamDecoder.cpp +1947 -0
  20. torchcodec/_core/SingleStreamDecoder.h +462 -0
  21. torchcodec/_core/StreamOptions.h +49 -0
  22. torchcodec/_core/__init__.py +39 -0
  23. torchcodec/_core/_metadata.py +277 -0
  24. torchcodec/_core/custom_ops.cpp +681 -0
  25. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +226 -0
  26. torchcodec/_core/ops.py +381 -0
  27. torchcodec/_core/pybind_ops.cpp +45 -0
  28. torchcodec/_frame.py +145 -0
  29. torchcodec/_internally_replaced_utils.py +53 -0
  30. torchcodec/_samplers/__init__.py +7 -0
  31. torchcodec/_samplers/video_clip_sampler.py +430 -0
  32. torchcodec/decoders/__init__.py +11 -0
  33. torchcodec/decoders/_audio_decoder.py +168 -0
  34. torchcodec/decoders/_decoder_utils.py +52 -0
  35. torchcodec/decoders/_video_decoder.py +399 -0
  36. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  37. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  38. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  39. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  40. torchcodec/libtorchcodec_decoder4.so +0 -0
  41. torchcodec/libtorchcodec_decoder5.so +0 -0
  42. torchcodec/libtorchcodec_decoder6.so +0 -0
  43. torchcodec/libtorchcodec_decoder7.so +0 -0
  44. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  45. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  46. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  47. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  48. torchcodec/samplers/__init__.py +2 -0
  49. torchcodec/samplers/_common.py +84 -0
  50. torchcodec/samplers/_index_based.py +285 -0
  51. torchcodec/samplers/_time_based.py +348 -0
  52. torchcodec/version.py +2 -0
  53. torchcodec-0.3.0.dist-info/LICENSE +28 -0
  54. torchcodec-0.3.0.dist-info/METADATA +280 -0
  55. torchcodec-0.3.0.dist-info/RECORD +57 -0
  56. torchcodec-0.3.0.dist-info/WHEEL +5 -0
  57. torchcodec-0.3.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,180 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <memory>
10
+ #include <stdexcept>
11
+ #include <string>
12
+
13
+ extern "C" {
14
+ #include <libavcodec/avcodec.h>
15
+ #include <libavfilter/avfilter.h>
16
+ #include <libavformat/avformat.h>
17
+ #include <libavformat/avio.h>
18
+ #include <libavutil/avutil.h>
19
+ #include <libavutil/dict.h>
20
+ #include <libavutil/display.h>
21
+ #include <libavutil/file.h>
22
+ #include <libavutil/opt.h>
23
+ #include <libavutil/pixfmt.h>
24
+ #include <libavutil/version.h>
25
+ #include <libswresample/swresample.h>
26
+ #include <libswscale/swscale.h>
27
+ }
28
+
29
+ namespace facebook::torchcodec {
30
+
31
+ // FFMPEG uses special delete functions for some structures. These template
32
+ // functions are used to pass into unique_ptr as custom deleters so we can
33
+ // wrap FFMPEG structs with unique_ptrs for ease of use.
34
+ template <typename T, typename R, R (*Fn)(T**)>
35
+ struct Deleterp {
36
+ inline void operator()(T* p) const {
37
+ if (p) {
38
+ Fn(&p);
39
+ }
40
+ }
41
+ };
42
+
43
+ template <typename T, typename R, R (*Fn)(T*)>
44
+ struct Deleter {
45
+ inline void operator()(T* p) const {
46
+ if (p) {
47
+ Fn(p);
48
+ }
49
+ }
50
+ };
51
+
52
+ // Unique pointers for FFMPEG structures.
53
+ using UniqueDecodingAVFormatContext = std::unique_ptr<
54
+ AVFormatContext,
55
+ Deleterp<AVFormatContext, void, avformat_close_input>>;
56
+ using UniqueEncodingAVFormatContext = std::unique_ptr<
57
+ AVFormatContext,
58
+ Deleter<AVFormatContext, void, avformat_free_context>>;
59
+ using UniqueAVCodecContext = std::unique_ptr<
60
+ AVCodecContext,
61
+ Deleterp<AVCodecContext, void, avcodec_free_context>>;
62
+ using UniqueAVFrame =
63
+ std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
64
+ using UniqueAVFilterGraph = std::unique_ptr<
65
+ AVFilterGraph,
66
+ Deleterp<AVFilterGraph, void, avfilter_graph_free>>;
67
+ using UniqueAVFilterInOut = std::unique_ptr<
68
+ AVFilterInOut,
69
+ Deleterp<AVFilterInOut, void, avfilter_inout_free>>;
70
+ using UniqueAVIOContext = std::
71
+ unique_ptr<AVIOContext, Deleterp<AVIOContext, void, avio_context_free>>;
72
+ using UniqueSwsContext =
73
+ std::unique_ptr<SwsContext, Deleter<SwsContext, void, sws_freeContext>>;
74
+ using UniqueSwrContext =
75
+ std::unique_ptr<SwrContext, Deleterp<SwrContext, void, swr_free>>;
76
+
77
+ // These 2 classes share the same underlying AVPacket object. They are meant to
78
+ // be used in tandem, like so:
79
+ //
80
+ // AutoAVPacket autoAVPacket; // <-- malloc for AVPacket happens here
81
+ // while(...){
82
+ // ReferenceAVPacket packet(autoAVPacket);
83
+ // av_read_frame(..., packet.get()); <-- av_packet_ref() called by FFmpeg
84
+ // } <-- av_packet_unref() called here
85
+ //
86
+ // This achieves a few desirable things:
87
+ // - Memory allocation of the underlying AVPacket happens only once, when
88
+ // autoAVPacket is created.
89
+ // - av_packet_free() is called when autoAVPacket gets out of scope
90
+ // - av_packet_unref() is automatically called when needed, i.e. at the end of
91
+ // each loop iteration (or when hitting break / continue). This prevents the
92
+ // risk of us forgetting to call it.
93
+ class AutoAVPacket {
94
+ friend class ReferenceAVPacket;
95
+
96
+ private:
97
+ AVPacket* avPacket_;
98
+
99
+ public:
100
+ AutoAVPacket();
101
+ AutoAVPacket(const AutoAVPacket& other) = delete;
102
+ AutoAVPacket& operator=(const AutoAVPacket& other) = delete;
103
+ ~AutoAVPacket();
104
+ };
105
+
106
+ class ReferenceAVPacket {
107
+ private:
108
+ AVPacket* avPacket_;
109
+
110
+ public:
111
+ explicit ReferenceAVPacket(AutoAVPacket& shared);
112
+ ReferenceAVPacket(const ReferenceAVPacket& other) = delete;
113
+ ReferenceAVPacket& operator=(const ReferenceAVPacket& other) = delete;
114
+ ~ReferenceAVPacket();
115
+ AVPacket* get();
116
+ AVPacket* operator->();
117
+ };
118
+
119
+ // av_find_best_stream is not const-correct before commit:
120
+ // https://github.com/FFmpeg/FFmpeg/commit/46dac8cf3d250184ab4247809bc03f60e14f4c0c
121
+ // which was released in FFMPEG version=5.0.3
122
+ // with libavcodec's version=59.18.100
123
+ // (https://www.ffmpeg.org/olddownload.html).
124
+ // Note that the alias is so-named so that it is only used when interacting with
125
+ // av_find_best_stream(). It is not needed elsewhere.
126
+ #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
127
+ using AVCodecOnlyUseForCallingAVFindBestStream = AVCodec*;
128
+ #else
129
+ using AVCodecOnlyUseForCallingAVFindBestStream = const AVCodec*;
130
+ #endif
131
+
132
+ AVCodecOnlyUseForCallingAVFindBestStream
133
+ makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec);
134
+
135
+ // Success code from FFMPEG is just a 0. We define it to make the code more
136
+ // readable.
137
+ const int AVSUCCESS = 0;
138
+
139
+ // Returns the FFMPEG error as a string using the provided `errorCode`.
140
+ std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
141
+
142
+ // Returns duration from the frame. Abstracted into a function because the
143
+ // struct member representing duration has changed across the versions we
144
+ // support.
145
+ int64_t getDuration(const UniqueAVFrame& frame);
146
+
147
+ int getNumChannels(const UniqueAVFrame& avFrame);
148
+ int getNumChannels(const UniqueAVCodecContext& avCodecContext);
149
+
150
+ void setDefaultChannelLayout(
151
+ UniqueAVCodecContext& avCodecContext,
152
+ int numChannels);
153
+
154
+ void setChannelLayout(
155
+ UniqueAVFrame& dstAVFrame,
156
+ const UniqueAVCodecContext& avCodecContext);
157
+
158
+ void setChannelLayout(
159
+ UniqueAVFrame& dstAVFrame,
160
+ const UniqueAVFrame& srcAVFrame);
161
+ SwrContext* createSwrContext(
162
+ UniqueAVCodecContext& avCodecContext,
163
+ AVSampleFormat sourceSampleFormat,
164
+ AVSampleFormat desiredSampleFormat,
165
+ int sourceSampleRate,
166
+ int desiredSampleRate);
167
+
168
+ UniqueAVFrame convertAudioAVFrameSampleFormatAndSampleRate(
169
+ const UniqueSwrContext& swrContext,
170
+ const UniqueAVFrame& srcAVFrame,
171
+ AVSampleFormat desiredSampleFormat,
172
+ int sourceSampleRate,
173
+ int desiredSampleRate);
174
+
175
+ // Returns true if sws_scale can handle unaligned data.
176
+ bool canSwsScaleHandleUnalignedData();
177
+
178
+ void setFFmpegLogLevel();
179
+
180
+ } // namespace facebook::torchcodec
@@ -0,0 +1,47 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include "src/torchcodec/_core/Metadata.h"
11
+ #include "src/torchcodec/_core/StreamOptions.h"
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ // All public video decoding entry points return either a FrameOutput or a
16
+ // FrameBatchOutput.
17
+ // They are the equivalent of the user-facing Frame and FrameBatch classes in
18
+ // Python. They contain RGB decoded frames along with some associated data
19
+ // like PTS and duration.
20
+ // FrameOutput is also relevant for audio decoding, typically as the output of
21
+ // getNextFrame(), or as a temporary output variable.
22
+ struct FrameOutput {
23
+ // data shape is:
24
+ // - 3D (C, H, W) or (H, W, C) for videos
25
+ // - 2D (numChannels, numSamples) for audio
26
+ torch::Tensor data;
27
+ double ptsSeconds;
28
+ double durationSeconds;
29
+ };
30
+
31
+ struct FrameBatchOutput {
32
+ torch::Tensor data; // 4D: of shape NCHW or NHWC.
33
+ torch::Tensor ptsSeconds; // 1D of shape (N,)
34
+ torch::Tensor durationSeconds; // 1D of shape (N,)
35
+
36
+ explicit FrameBatchOutput(
37
+ int64_t numFrames,
38
+ const VideoStreamOptions& videoStreamOptions,
39
+ const StreamMetadata& streamMetadata);
40
+ };
41
+
42
+ struct AudioFramesOutput {
43
+ torch::Tensor data; // shape is (numChannels, numSamples)
44
+ double ptsSeconds;
45
+ };
46
+
47
+ } // namespace facebook::torchcodec
@@ -0,0 +1,70 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <optional>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ extern "C" {
14
+ #include <libavcodec/avcodec.h>
15
+ #include <libavutil/avutil.h>
16
+ }
17
+
18
+ namespace facebook::torchcodec {
19
+
20
+ struct StreamMetadata {
21
+ // Common (video and audio) fields derived from the AVStream.
22
+ int streamIndex;
23
+ // See this link for what various values are available:
24
+ // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
25
+ AVMediaType mediaType;
26
+ std::optional<AVCodecID> codecId;
27
+ std::optional<std::string> codecName;
28
+ std::optional<double> durationSeconds;
29
+ std::optional<double> beginStreamFromHeader;
30
+ std::optional<int64_t> numFrames;
31
+ std::optional<int64_t> numKeyFrames;
32
+ std::optional<double> averageFps;
33
+ std::optional<double> bitRate;
34
+
35
+ // More accurate duration, obtained by scanning the file.
36
+ // These presentation timestamps are in time base.
37
+ std::optional<int64_t> minPtsFromScan;
38
+ std::optional<int64_t> maxPtsFromScan;
39
+ // These presentation timestamps are in seconds.
40
+ std::optional<double> minPtsSecondsFromScan;
41
+ std::optional<double> maxPtsSecondsFromScan;
42
+ // This can be useful for index-based seeking.
43
+ std::optional<int64_t> numFramesFromScan;
44
+
45
+ // Video-only fields derived from the AVCodecContext.
46
+ std::optional<int64_t> width;
47
+ std::optional<int64_t> height;
48
+
49
+ // Audio-only fields
50
+ std::optional<int64_t> sampleRate;
51
+ std::optional<int64_t> numChannels;
52
+ std::optional<std::string> sampleFormat;
53
+ };
54
+
55
+ struct ContainerMetadata {
56
+ std::vector<StreamMetadata> allStreamMetadata;
57
+ int numAudioStreams = 0;
58
+ int numVideoStreams = 0;
59
+ // Note that this is the container-level duration, which is usually the max
60
+ // of all stream durations available in the container.
61
+ std::optional<double> durationSeconds;
62
+ // Total BitRate level information at the container level in bit/s
63
+ std::optional<double> bitRate;
64
+ // If set, this is the index to the default audio stream.
65
+ std::optional<int> bestAudioStreamIndex;
66
+ // If set, this is the index to the default video stream.
67
+ std::optional<int> bestVideoStreamIndex;
68
+ };
69
+
70
+ } // namespace facebook::torchcodec