torchcodec 0.8.0__cp313-cp313-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (82) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.13.dylib +0 -0
  3. torchcodec/__init__.py +16 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  5. torchcodec/_core/AVIOContextHolder.h +64 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  8. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  9. torchcodec/_core/AVIOTensorContext.h +43 -0
  10. torchcodec/_core/BetaCudaDeviceInterface.cpp +636 -0
  11. torchcodec/_core/BetaCudaDeviceInterface.h +191 -0
  12. torchcodec/_core/CMakeLists.txt +325 -0
  13. torchcodec/_core/CUDACommon.cpp +315 -0
  14. torchcodec/_core/CUDACommon.h +46 -0
  15. torchcodec/_core/Cache.h +138 -0
  16. torchcodec/_core/CpuDeviceInterface.cpp +347 -0
  17. torchcodec/_core/CpuDeviceInterface.h +132 -0
  18. torchcodec/_core/CudaDeviceInterface.cpp +357 -0
  19. torchcodec/_core/CudaDeviceInterface.h +64 -0
  20. torchcodec/_core/DeviceInterface.cpp +117 -0
  21. torchcodec/_core/DeviceInterface.h +148 -0
  22. torchcodec/_core/Encoder.cpp +807 -0
  23. torchcodec/_core/Encoder.h +173 -0
  24. torchcodec/_core/FFMPEGCommon.cpp +608 -0
  25. torchcodec/_core/FFMPEGCommon.h +245 -0
  26. torchcodec/_core/FilterGraph.cpp +149 -0
  27. torchcodec/_core/FilterGraph.h +59 -0
  28. torchcodec/_core/Frame.cpp +42 -0
  29. torchcodec/_core/Frame.h +72 -0
  30. torchcodec/_core/Metadata.h +72 -0
  31. torchcodec/_core/NVDECCache.cpp +70 -0
  32. torchcodec/_core/NVDECCache.h +104 -0
  33. torchcodec/_core/SingleStreamDecoder.cpp +1719 -0
  34. torchcodec/_core/SingleStreamDecoder.h +405 -0
  35. torchcodec/_core/StreamOptions.h +63 -0
  36. torchcodec/_core/Transform.cpp +60 -0
  37. torchcodec/_core/Transform.h +59 -0
  38. torchcodec/_core/ValidationUtils.cpp +35 -0
  39. torchcodec/_core/ValidationUtils.h +21 -0
  40. torchcodec/_core/__init__.py +41 -0
  41. torchcodec/_core/_metadata.py +317 -0
  42. torchcodec/_core/custom_ops.cpp +875 -0
  43. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +360 -0
  44. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  45. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  46. torchcodec/_core/ops.py +498 -0
  47. torchcodec/_core/pybind_ops.cpp +50 -0
  48. torchcodec/_frame.py +145 -0
  49. torchcodec/_internally_replaced_utils.py +67 -0
  50. torchcodec/_samplers/__init__.py +7 -0
  51. torchcodec/_samplers/video_clip_sampler.py +418 -0
  52. torchcodec/decoders/__init__.py +12 -0
  53. torchcodec/decoders/_audio_decoder.py +177 -0
  54. torchcodec/decoders/_decoder_utils.py +112 -0
  55. torchcodec/decoders/_video_decoder.py +500 -0
  56. torchcodec/encoders/__init__.py +1 -0
  57. torchcodec/encoders/_audio_encoder.py +150 -0
  58. torchcodec/libtorchcodec_core4.dylib +0 -0
  59. torchcodec/libtorchcodec_core5.dylib +0 -0
  60. torchcodec/libtorchcodec_core6.dylib +0 -0
  61. torchcodec/libtorchcodec_core7.dylib +0 -0
  62. torchcodec/libtorchcodec_core8.dylib +0 -0
  63. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  64. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  65. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  66. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  67. torchcodec/libtorchcodec_custom_ops8.dylib +0 -0
  68. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  69. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  73. torchcodec/samplers/__init__.py +2 -0
  74. torchcodec/samplers/_common.py +84 -0
  75. torchcodec/samplers/_index_based.py +287 -0
  76. torchcodec/samplers/_time_based.py +358 -0
  77. torchcodec/version.py +2 -0
  78. torchcodec-0.8.0.dist-info/METADATA +253 -0
  79. torchcodec-0.8.0.dist-info/RECORD +82 -0
  80. torchcodec-0.8.0.dist-info/WHEEL +5 -0
  81. torchcodec-0.8.0.dist-info/licenses/LICENSE +28 -0
  82. torchcodec-0.8.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,245 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <memory>
10
+ #include <stdexcept>
11
+ #include <string>
12
+
13
+ extern "C" {
14
+ #include <libavcodec/avcodec.h>
15
+ #include <libavcodec/bsf.h>
16
+ #include <libavfilter/avfilter.h>
17
+ #include <libavfilter/buffersrc.h>
18
+ #include <libavformat/avformat.h>
19
+ #include <libavformat/avio.h>
20
+ #include <libavutil/audio_fifo.h>
21
+ #include <libavutil/avutil.h>
22
+ #include <libavutil/dict.h>
23
+ #include <libavutil/display.h>
24
+ #include <libavutil/file.h>
25
+ #include <libavutil/opt.h>
26
+ #include <libavutil/pixfmt.h>
27
+ #include <libavutil/version.h>
28
+ #include <libswresample/swresample.h>
29
+ #include <libswscale/swscale.h>
30
+ }
31
+
32
+ namespace facebook::torchcodec {
33
+
34
+ // FFMPEG uses special delete functions for some structures. These template
35
+ // functions are used to pass into unique_ptr as custom deleters so we can
36
+ // wrap FFMPEG structs with unique_ptrs for ease of use.
37
+ template <typename T, typename R, R (*Fn)(T**)>
38
+ struct Deleterp {
39
+ inline void operator()(T* p) const {
40
+ if (p) {
41
+ Fn(&p);
42
+ }
43
+ }
44
+ };
45
+
46
+ template <typename T, typename R, R (*Fn)(void*)>
47
+ struct Deleterv {
48
+ inline void operator()(T* p) const {
49
+ if (p) {
50
+ Fn(&p);
51
+ }
52
+ }
53
+ };
54
+
55
+ template <typename T, typename R, R (*Fn)(T*)>
56
+ struct Deleter {
57
+ inline void operator()(T* p) const {
58
+ if (p) {
59
+ Fn(p);
60
+ }
61
+ }
62
+ };
63
+
64
+ // Unique pointers for FFMPEG structures.
65
+ using UniqueDecodingAVFormatContext = std::unique_ptr<
66
+ AVFormatContext,
67
+ Deleterp<AVFormatContext, void, avformat_close_input>>;
68
+ using UniqueEncodingAVFormatContext = std::unique_ptr<
69
+ AVFormatContext,
70
+ Deleter<AVFormatContext, void, avformat_free_context>>;
71
+ using UniqueAVCodecContext = std::unique_ptr<
72
+ AVCodecContext,
73
+ Deleterp<AVCodecContext, void, avcodec_free_context>>;
74
+ using UniqueAVFrame =
75
+ std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
76
+ using UniqueAVFilterGraph = std::unique_ptr<
77
+ AVFilterGraph,
78
+ Deleterp<AVFilterGraph, void, avfilter_graph_free>>;
79
+ using UniqueAVFilterInOut = std::unique_ptr<
80
+ AVFilterInOut,
81
+ Deleterp<AVFilterInOut, void, avfilter_inout_free>>;
82
+ using UniqueAVIOContext = std::
83
+ unique_ptr<AVIOContext, Deleterp<AVIOContext, void, avio_context_free>>;
84
+ using UniqueSwsContext =
85
+ std::unique_ptr<SwsContext, Deleter<SwsContext, void, sws_freeContext>>;
86
+ using UniqueSwrContext =
87
+ std::unique_ptr<SwrContext, Deleterp<SwrContext, void, swr_free>>;
88
+ using UniqueAVAudioFifo = std::
89
+ unique_ptr<AVAudioFifo, Deleter<AVAudioFifo, void, av_audio_fifo_free>>;
90
+ using UniqueAVBSFContext =
91
+ std::unique_ptr<AVBSFContext, Deleterp<AVBSFContext, void, av_bsf_free>>;
92
+ using UniqueAVBufferRef =
93
+ std::unique_ptr<AVBufferRef, Deleterp<AVBufferRef, void, av_buffer_unref>>;
94
+ using UniqueAVBufferSrcParameters = std::unique_ptr<
95
+ AVBufferSrcParameters,
96
+ Deleterv<AVBufferSrcParameters, void, av_freep>>;
97
+
98
+ // These 2 classes share the same underlying AVPacket object. They are meant to
99
+ // be used in tandem, like so:
100
+ //
101
+ // AutoAVPacket autoAVPacket; // <-- malloc for AVPacket happens here
102
+ // while(...){
103
+ // ReferenceAVPacket packet(autoAVPacket);
104
+ // av_read_frame(..., packet.get()); <-- av_packet_ref() called by FFmpeg
105
+ // } <-- av_packet_unref() called here
106
+ //
107
+ // This achieves a few desirable things:
108
+ // - Memory allocation of the underlying AVPacket happens only once, when
109
+ // autoAVPacket is created.
110
+ // - av_packet_free() is called when autoAVPacket gets out of scope
111
+ // - av_packet_unref() is automatically called when needed, i.e. at the end of
112
+ // each loop iteration (or when hitting break / continue). This prevents the
113
+ // risk of us forgetting to call it.
114
+ class AutoAVPacket {
115
+ friend class ReferenceAVPacket;
116
+
117
+ private:
118
+ AVPacket* avPacket_;
119
+
120
+ public:
121
+ AutoAVPacket();
122
+ AutoAVPacket(const AutoAVPacket& other) = delete;
123
+ AutoAVPacket& operator=(const AutoAVPacket& other) = delete;
124
+ ~AutoAVPacket();
125
+ };
126
+
127
+ class ReferenceAVPacket {
128
+ private:
129
+ AVPacket* avPacket_;
130
+
131
+ public:
132
+ explicit ReferenceAVPacket(AutoAVPacket& shared);
133
+ ReferenceAVPacket(const ReferenceAVPacket& other) = delete;
134
+ ReferenceAVPacket& operator=(const ReferenceAVPacket& other) = delete;
135
+ ~ReferenceAVPacket();
136
+ AVPacket* get();
137
+ AVPacket* operator->();
138
+ };
139
+
140
+ // av_find_best_stream is not const-correct before commit:
141
+ // https://github.com/FFmpeg/FFmpeg/commit/46dac8cf3d250184ab4247809bc03f60e14f4c0c
142
+ // which was released in FFMPEG version=5.0.3
143
+ // with libavcodec's version=59.18.100
144
+ // (https://www.ffmpeg.org/olddownload.html).
145
+ // Note that the alias is so-named so that it is only used when interacting with
146
+ // av_find_best_stream(). It is not needed elsewhere.
147
+ #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(59, 18, 100)
148
+ using AVCodecOnlyUseForCallingAVFindBestStream = AVCodec*;
149
+ #else
150
+ using AVCodecOnlyUseForCallingAVFindBestStream = const AVCodec*;
151
+ #endif
152
+
153
+ AVCodecOnlyUseForCallingAVFindBestStream
154
+ makeAVCodecOnlyUseForCallingAVFindBestStream(const AVCodec* codec);
155
+
156
+ // Success code from FFMPEG is just a 0. We define it to make the code more
157
+ // readable.
158
+ const int AVSUCCESS = 0;
159
+
160
+ // Returns the FFMPEG error as a string using the provided `errorCode`.
161
+ std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
162
+
163
+ // Returns duration from the frame. Abstracted into a function because the
164
+ // struct member representing duration has changed across the versions we
165
+ // support.
166
+ int64_t getDuration(const UniqueAVFrame& frame);
167
+ void setDuration(const UniqueAVFrame& frame, int64_t duration);
168
+
169
+ const int* getSupportedSampleRates(const AVCodec& avCodec);
170
+ const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec);
171
+ const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec);
172
+
173
+ int getNumChannels(const UniqueAVFrame& avFrame);
174
+ int getNumChannels(const UniqueAVCodecContext& avCodecContext);
175
+
176
+ void setDefaultChannelLayout(
177
+ UniqueAVCodecContext& avCodecContext,
178
+ int numChannels);
179
+
180
+ void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels);
181
+
182
+ void validateNumChannels(const AVCodec& avCodec, int numChannels);
183
+
184
+ void setChannelLayout(
185
+ UniqueAVFrame& dstAVFrame,
186
+ const UniqueAVFrame& srcAVFrame,
187
+ int desiredNumChannels);
188
+
189
+ UniqueAVFrame allocateAVFrame(
190
+ int numSamples,
191
+ int sampleRate,
192
+ int numChannels,
193
+ AVSampleFormat sampleFormat);
194
+
195
+ SwrContext* createSwrContext(
196
+ AVSampleFormat srcSampleFormat,
197
+ AVSampleFormat desiredSampleFormat,
198
+ int srcSampleRate,
199
+ int desiredSampleRate,
200
+ const UniqueAVFrame& srcAVFrame,
201
+ int desiredNumChannels);
202
+
203
+ // Converts, if needed:
204
+ // - sample format
205
+ // - sample rate
206
+ // - number of channels.
207
+ // createSwrContext must have been previously called with matching parameters.
208
+ UniqueAVFrame convertAudioAVFrameSamples(
209
+ const UniqueSwrContext& swrContext,
210
+ const UniqueAVFrame& srcAVFrame,
211
+ AVSampleFormat desiredSampleFormat,
212
+ int desiredSampleRate,
213
+ int desiredNumChannels);
214
+
215
+ // Returns true if sws_scale can handle unaligned data.
216
+ bool canSwsScaleHandleUnalignedData();
217
+
218
+ void setFFmpegLogLevel();
219
+
220
+ // These signatures are defined by FFmpeg.
221
+ using AVIOReadFunction = int (*)(void*, uint8_t*, int);
222
+ using AVIOWriteFunction = int (*)(void*, const uint8_t*, int); // FFmpeg >= 7
223
+ using AVIOWriteFunctionOld = int (*)(void*, uint8_t*, int); // FFmpeg < 7
224
+ using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
225
+
226
+ AVIOContext* avioAllocContext(
227
+ uint8_t* buffer,
228
+ int buffer_size,
229
+ int write_flag,
230
+ void* opaque,
231
+ AVIOReadFunction read_packet,
232
+ AVIOWriteFunction write_packet,
233
+ AVIOSeekFunction seek);
234
+
235
+ double ptsToSeconds(int64_t pts, const AVRational& timeBase);
236
+ int64_t secondsToClosestPts(double seconds, const AVRational& timeBase);
237
+ int64_t computeSafeDuration(
238
+ const AVRational& frameRate,
239
+ const AVRational& timeBase);
240
+
241
+ AVFilterContext* createBuffersinkFilter(
242
+ AVFilterGraph* filterGraph,
243
+ enum AVPixelFormat outputFormat);
244
+
245
+ } // namespace facebook::torchcodec
@@ -0,0 +1,149 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/FilterGraph.h"
8
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
9
+
10
+ extern "C" {
11
+ #include <libavfilter/buffersink.h>
12
+ #include <libavfilter/buffersrc.h>
13
+ }
14
+
15
+ namespace facebook::torchcodec {
16
+
17
+ FiltersContext::FiltersContext(
18
+ int inputWidth,
19
+ int inputHeight,
20
+ AVPixelFormat inputFormat,
21
+ AVRational inputAspectRatio,
22
+ int outputWidth,
23
+ int outputHeight,
24
+ AVPixelFormat outputFormat,
25
+ const std::string& filtergraphStr,
26
+ AVRational timeBase,
27
+ AVBufferRef* hwFramesCtx)
28
+ : inputWidth(inputWidth),
29
+ inputHeight(inputHeight),
30
+ inputFormat(inputFormat),
31
+ inputAspectRatio(inputAspectRatio),
32
+ outputWidth(outputWidth),
33
+ outputHeight(outputHeight),
34
+ outputFormat(outputFormat),
35
+ filtergraphStr(filtergraphStr),
36
+ timeBase(timeBase),
37
+ hwFramesCtx(hwFramesCtx) {}
38
+
39
+ bool operator==(const AVRational& lhs, const AVRational& rhs) {
40
+ return lhs.num == rhs.num && lhs.den == rhs.den;
41
+ }
42
+
43
+ bool FiltersContext::operator==(const FiltersContext& other) const {
44
+ return inputWidth == other.inputWidth && inputHeight == other.inputHeight &&
45
+ inputFormat == other.inputFormat && outputWidth == other.outputWidth &&
46
+ outputHeight == other.outputHeight &&
47
+ outputFormat == other.outputFormat &&
48
+ filtergraphStr == other.filtergraphStr && timeBase == other.timeBase &&
49
+ hwFramesCtx.get() == other.hwFramesCtx.get();
50
+ }
51
+
52
+ bool FiltersContext::operator!=(const FiltersContext& other) const {
53
+ return !(*this == other);
54
+ }
55
+
56
+ FilterGraph::FilterGraph(
57
+ const FiltersContext& filtersContext,
58
+ const VideoStreamOptions& videoStreamOptions) {
59
+ filterGraph_.reset(avfilter_graph_alloc());
60
+ TORCH_CHECK(filterGraph_.get() != nullptr);
61
+
62
+ if (videoStreamOptions.ffmpegThreadCount.has_value()) {
63
+ filterGraph_->nb_threads = videoStreamOptions.ffmpegThreadCount.value();
64
+ }
65
+
66
+ const AVFilter* buffersrc = avfilter_get_by_name("buffer");
67
+
68
+ UniqueAVBufferSrcParameters srcParams(av_buffersrc_parameters_alloc());
69
+ TORCH_CHECK(srcParams, "Failed to allocate buffersrc params");
70
+
71
+ srcParams->format = filtersContext.inputFormat;
72
+ srcParams->width = filtersContext.inputWidth;
73
+ srcParams->height = filtersContext.inputHeight;
74
+ srcParams->sample_aspect_ratio = filtersContext.inputAspectRatio;
75
+ srcParams->time_base = filtersContext.timeBase;
76
+ if (filtersContext.hwFramesCtx) {
77
+ srcParams->hw_frames_ctx = av_buffer_ref(filtersContext.hwFramesCtx.get());
78
+ }
79
+
80
+ sourceContext_ =
81
+ avfilter_graph_alloc_filter(filterGraph_.get(), buffersrc, "in");
82
+ TORCH_CHECK(sourceContext_, "Failed to allocate filter graph");
83
+
84
+ int status = av_buffersrc_parameters_set(sourceContext_, srcParams.get());
85
+ TORCH_CHECK(
86
+ status >= 0,
87
+ "Failed to create filter graph: ",
88
+ getFFMPEGErrorStringFromErrorCode(status));
89
+
90
+ status = avfilter_init_str(sourceContext_, nullptr);
91
+ TORCH_CHECK(
92
+ status >= 0,
93
+ "Failed to create filter graph : ",
94
+ getFFMPEGErrorStringFromErrorCode(status));
95
+
96
+ sinkContext_ =
97
+ createBuffersinkFilter(filterGraph_.get(), filtersContext.outputFormat);
98
+ TORCH_CHECK(
99
+ sinkContext_ != nullptr, "Failed to create and configure buffersink");
100
+
101
+ UniqueAVFilterInOut outputs(avfilter_inout_alloc());
102
+ UniqueAVFilterInOut inputs(avfilter_inout_alloc());
103
+
104
+ outputs->name = av_strdup("in");
105
+ outputs->filter_ctx = sourceContext_;
106
+ outputs->pad_idx = 0;
107
+ outputs->next = nullptr;
108
+ inputs->name = av_strdup("out");
109
+ inputs->filter_ctx = sinkContext_;
110
+ inputs->pad_idx = 0;
111
+ inputs->next = nullptr;
112
+
113
+ AVFilterInOut* outputsTmp = outputs.release();
114
+ AVFilterInOut* inputsTmp = inputs.release();
115
+ status = avfilter_graph_parse_ptr(
116
+ filterGraph_.get(),
117
+ filtersContext.filtergraphStr.c_str(),
118
+ &inputsTmp,
119
+ &outputsTmp,
120
+ nullptr);
121
+ outputs.reset(outputsTmp);
122
+ inputs.reset(inputsTmp);
123
+ TORCH_CHECK(
124
+ status >= 0,
125
+ "Failed to parse filter description: ",
126
+ getFFMPEGErrorStringFromErrorCode(status),
127
+ ", provided filters: " + filtersContext.filtergraphStr);
128
+
129
+ status = avfilter_graph_config(filterGraph_.get(), nullptr);
130
+ TORCH_CHECK(
131
+ status >= 0,
132
+ "Failed to configure filter graph: ",
133
+ getFFMPEGErrorStringFromErrorCode(status));
134
+ }
135
+
136
+ UniqueAVFrame FilterGraph::convert(const UniqueAVFrame& avFrame) {
137
+ int status = av_buffersrc_write_frame(sourceContext_, avFrame.get());
138
+ TORCH_CHECK(
139
+ status >= AVSUCCESS, "Failed to add frame to buffer source context");
140
+
141
+ UniqueAVFrame filteredAVFrame(av_frame_alloc());
142
+ status = av_buffersink_get_frame(sinkContext_, filteredAVFrame.get());
143
+ TORCH_CHECK(
144
+ status >= AVSUCCESS, "Failed to get frame from buffer sink context");
145
+
146
+ return filteredAVFrame;
147
+ }
148
+
149
+ } // namespace facebook::torchcodec
@@ -0,0 +1,59 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
10
+ #include "src/torchcodec/_core/StreamOptions.h"
11
+
12
+ namespace facebook::torchcodec {
13
+
14
+ struct FiltersContext {
15
+ int inputWidth = 0;
16
+ int inputHeight = 0;
17
+ AVPixelFormat inputFormat = AV_PIX_FMT_NONE;
18
+ AVRational inputAspectRatio = {0, 0};
19
+ int outputWidth = 0;
20
+ int outputHeight = 0;
21
+ AVPixelFormat outputFormat = AV_PIX_FMT_NONE;
22
+ std::string filtergraphStr;
23
+ AVRational timeBase = {0, 0};
24
+ UniqueAVBufferRef hwFramesCtx;
25
+
26
+ FiltersContext() = default;
27
+ FiltersContext(FiltersContext&&) = default;
28
+ FiltersContext& operator=(FiltersContext&&) = default;
29
+ FiltersContext(
30
+ int inputWidth,
31
+ int inputHeight,
32
+ AVPixelFormat inputFormat,
33
+ AVRational inputAspectRatio,
34
+ int outputWidth,
35
+ int outputHeight,
36
+ AVPixelFormat outputFormat,
37
+ const std::string& filtergraphStr,
38
+ AVRational timeBase,
39
+ AVBufferRef* hwFramesCtx = nullptr);
40
+
41
+ bool operator==(const FiltersContext&) const;
42
+ bool operator!=(const FiltersContext&) const;
43
+ };
44
+
45
+ class FilterGraph {
46
+ public:
47
+ FilterGraph(
48
+ const FiltersContext& filtersContext,
49
+ const VideoStreamOptions& videoStreamOptions);
50
+
51
+ UniqueAVFrame convert(const UniqueAVFrame& avFrame);
52
+
53
+ private:
54
+ UniqueAVFilterGraph filterGraph_;
55
+ AVFilterContext* sourceContext_ = nullptr;
56
+ AVFilterContext* sinkContext_ = nullptr;
57
+ };
58
+
59
+ } // namespace facebook::torchcodec
@@ -0,0 +1,42 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/Frame.h"
8
+
9
+ namespace facebook::torchcodec {
10
+
11
+ FrameBatchOutput::FrameBatchOutput(
12
+ int64_t numFrames,
13
+ const FrameDims& outputDims,
14
+ const torch::Device& device)
15
+ : ptsSeconds(torch::empty({numFrames}, {torch::kFloat64})),
16
+ durationSeconds(torch::empty({numFrames}, {torch::kFloat64})) {
17
+ data = allocateEmptyHWCTensor(outputDims, device, numFrames);
18
+ }
19
+
20
+ torch::Tensor allocateEmptyHWCTensor(
21
+ const FrameDims& frameDims,
22
+ const torch::Device& device,
23
+ std::optional<int> numFrames) {
24
+ auto tensorOptions = torch::TensorOptions()
25
+ .dtype(torch::kUInt8)
26
+ .layout(torch::kStrided)
27
+ .device(device);
28
+ TORCH_CHECK(
29
+ frameDims.height > 0, "height must be > 0, got: ", frameDims.height);
30
+ TORCH_CHECK(frameDims.width > 0, "width must be > 0, got: ", frameDims.width);
31
+ if (numFrames.has_value()) {
32
+ auto numFramesValue = numFrames.value();
33
+ TORCH_CHECK(
34
+ numFramesValue >= 0, "numFrames must be >= 0, got: ", numFramesValue);
35
+ return torch::empty(
36
+ {numFramesValue, frameDims.height, frameDims.width, 3}, tensorOptions);
37
+ } else {
38
+ return torch::empty({frameDims.height, frameDims.width, 3}, tensorOptions);
39
+ }
40
+ }
41
+
42
+ } // namespace facebook::torchcodec
@@ -0,0 +1,72 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
11
+ #include "src/torchcodec/_core/Metadata.h"
12
+ #include "src/torchcodec/_core/StreamOptions.h"
13
+
14
+ namespace facebook::torchcodec {
15
+
16
+ struct FrameDims {
17
+ int height = 0;
18
+ int width = 0;
19
+
20
+ FrameDims() = default;
21
+
22
+ FrameDims(int h, int w) : height(h), width(w) {}
23
+ };
24
+
25
+ // All public video decoding entry points return either a FrameOutput or a
26
+ // FrameBatchOutput.
27
+ // They are the equivalent of the user-facing Frame and FrameBatch classes in
28
+ // Python. They contain RGB decoded frames along with some associated data
29
+ // like PTS and duration.
30
+ // FrameOutput is also relevant for audio decoding, typically as the output of
31
+ // getNextFrame(), or as a temporary output variable.
32
+ struct FrameOutput {
33
+ // data shape is:
34
+ // - 3D (C, H, W) or (H, W, C) for videos
35
+ // - 2D (numChannels, numSamples) for audio
36
+ torch::Tensor data;
37
+ double ptsSeconds;
38
+ double durationSeconds;
39
+ };
40
+
41
+ struct FrameBatchOutput {
42
+ torch::Tensor data; // 4D: of shape NCHW or NHWC.
43
+ torch::Tensor ptsSeconds; // 1D of shape (N,)
44
+ torch::Tensor durationSeconds; // 1D of shape (N,)
45
+
46
+ FrameBatchOutput(
47
+ int64_t numFrames,
48
+ const FrameDims& outputDims,
49
+ const torch::Device& device);
50
+ };
51
+
52
+ struct AudioFramesOutput {
53
+ torch::Tensor data; // shape is (numChannels, numSamples)
54
+ double ptsSeconds;
55
+ };
56
+
57
+ // --------------------------------------------------------------------------
58
+ // FRAME TENSOR ALLOCATION APIs
59
+ // --------------------------------------------------------------------------
60
+
61
+ // Note [Frame Tensor allocation]
62
+ //
63
+ // We always allocate [N]HWC tensors. The low-level decoding functions all
64
+ // assume HWC tensors, since this is what FFmpeg natively handles. It's up to
65
+ // the high-level decoding entry-points to permute that back to CHW, by calling
66
+ // maybePermuteHWC2CHW().
67
+ torch::Tensor allocateEmptyHWCTensor(
68
+ const FrameDims& frameDims,
69
+ const torch::Device& device,
70
+ std::optional<int> numFrames = std::nullopt);
71
+
72
+ } // namespace facebook::torchcodec
@@ -0,0 +1,72 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <optional>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ extern "C" {
14
+ #include <libavcodec/avcodec.h>
15
+ #include <libavutil/avutil.h>
16
+ #include <libavutil/rational.h>
17
+ }
18
+
19
+ namespace facebook::torchcodec {
20
+
21
+ struct StreamMetadata {
22
+ // Common (video and audio) fields derived from the AVStream.
23
+ int streamIndex;
24
+ // See this link for what various values are available:
25
+ // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
26
+ AVMediaType mediaType;
27
+ std::optional<AVCodecID> codecId;
28
+ std::optional<std::string> codecName;
29
+ std::optional<double> durationSecondsFromHeader;
30
+ std::optional<double> beginStreamSecondsFromHeader;
31
+ std::optional<int64_t> numFramesFromHeader;
32
+ std::optional<int64_t> numKeyFrames;
33
+ std::optional<double> averageFpsFromHeader;
34
+ std::optional<double> bitRate;
35
+
36
+ // More accurate duration, obtained by scanning the file.
37
+ // These presentation timestamps are in time base.
38
+ std::optional<int64_t> beginStreamPtsFromContent;
39
+ std::optional<int64_t> endStreamPtsFromContent;
40
+ // These presentation timestamps are in seconds.
41
+ std::optional<double> beginStreamPtsSecondsFromContent;
42
+ std::optional<double> endStreamPtsSecondsFromContent;
43
+ // This can be useful for index-based seeking.
44
+ std::optional<int64_t> numFramesFromContent;
45
+
46
+ // Video-only fields derived from the AVCodecContext.
47
+ std::optional<int> width;
48
+ std::optional<int> height;
49
+ std::optional<AVRational> sampleAspectRatio;
50
+
51
+ // Audio-only fields
52
+ std::optional<int64_t> sampleRate;
53
+ std::optional<int64_t> numChannels;
54
+ std::optional<std::string> sampleFormat;
55
+ };
56
+
57
+ struct ContainerMetadata {
58
+ std::vector<StreamMetadata> allStreamMetadata;
59
+ int numAudioStreams = 0;
60
+ int numVideoStreams = 0;
61
+ // Note that this is the container-level duration, which is usually the max
62
+ // of all stream durations available in the container.
63
+ std::optional<double> durationSecondsFromHeader;
64
+ // Total BitRate level information at the container level in bit/s
65
+ std::optional<double> bitRate;
66
+ // If set, this is the index to the default audio stream.
67
+ std::optional<int> bestAudioStreamIndex;
68
+ // If set, this is the index to the default video stream.
69
+ std::optional<int> bestVideoStreamIndex;
70
+ };
71
+
72
+ } // namespace facebook::torchcodec