torchcodec 0.8.0__cp313-cp313-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of torchcodec might be problematic. Click here for more details.

Files changed (82) hide show
  1. torchcodec/.dylibs/libc++.1.0.dylib +0 -0
  2. torchcodec/.dylibs/libpython3.13.dylib +0 -0
  3. torchcodec/__init__.py +16 -0
  4. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  5. torchcodec/_core/AVIOContextHolder.h +64 -0
  6. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  7. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  8. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  9. torchcodec/_core/AVIOTensorContext.h +43 -0
  10. torchcodec/_core/BetaCudaDeviceInterface.cpp +636 -0
  11. torchcodec/_core/BetaCudaDeviceInterface.h +191 -0
  12. torchcodec/_core/CMakeLists.txt +325 -0
  13. torchcodec/_core/CUDACommon.cpp +315 -0
  14. torchcodec/_core/CUDACommon.h +46 -0
  15. torchcodec/_core/Cache.h +138 -0
  16. torchcodec/_core/CpuDeviceInterface.cpp +347 -0
  17. torchcodec/_core/CpuDeviceInterface.h +132 -0
  18. torchcodec/_core/CudaDeviceInterface.cpp +357 -0
  19. torchcodec/_core/CudaDeviceInterface.h +64 -0
  20. torchcodec/_core/DeviceInterface.cpp +117 -0
  21. torchcodec/_core/DeviceInterface.h +148 -0
  22. torchcodec/_core/Encoder.cpp +807 -0
  23. torchcodec/_core/Encoder.h +173 -0
  24. torchcodec/_core/FFMPEGCommon.cpp +608 -0
  25. torchcodec/_core/FFMPEGCommon.h +245 -0
  26. torchcodec/_core/FilterGraph.cpp +149 -0
  27. torchcodec/_core/FilterGraph.h +59 -0
  28. torchcodec/_core/Frame.cpp +42 -0
  29. torchcodec/_core/Frame.h +72 -0
  30. torchcodec/_core/Metadata.h +72 -0
  31. torchcodec/_core/NVDECCache.cpp +70 -0
  32. torchcodec/_core/NVDECCache.h +104 -0
  33. torchcodec/_core/SingleStreamDecoder.cpp +1719 -0
  34. torchcodec/_core/SingleStreamDecoder.h +405 -0
  35. torchcodec/_core/StreamOptions.h +63 -0
  36. torchcodec/_core/Transform.cpp +60 -0
  37. torchcodec/_core/Transform.h +59 -0
  38. torchcodec/_core/ValidationUtils.cpp +35 -0
  39. torchcodec/_core/ValidationUtils.h +21 -0
  40. torchcodec/_core/__init__.py +41 -0
  41. torchcodec/_core/_metadata.py +317 -0
  42. torchcodec/_core/custom_ops.cpp +875 -0
  43. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +360 -0
  44. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  45. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  46. torchcodec/_core/ops.py +498 -0
  47. torchcodec/_core/pybind_ops.cpp +50 -0
  48. torchcodec/_frame.py +145 -0
  49. torchcodec/_internally_replaced_utils.py +67 -0
  50. torchcodec/_samplers/__init__.py +7 -0
  51. torchcodec/_samplers/video_clip_sampler.py +418 -0
  52. torchcodec/decoders/__init__.py +12 -0
  53. torchcodec/decoders/_audio_decoder.py +177 -0
  54. torchcodec/decoders/_decoder_utils.py +112 -0
  55. torchcodec/decoders/_video_decoder.py +500 -0
  56. torchcodec/encoders/__init__.py +1 -0
  57. torchcodec/encoders/_audio_encoder.py +150 -0
  58. torchcodec/libtorchcodec_core4.dylib +0 -0
  59. torchcodec/libtorchcodec_core5.dylib +0 -0
  60. torchcodec/libtorchcodec_core6.dylib +0 -0
  61. torchcodec/libtorchcodec_core7.dylib +0 -0
  62. torchcodec/libtorchcodec_core8.dylib +0 -0
  63. torchcodec/libtorchcodec_custom_ops4.dylib +0 -0
  64. torchcodec/libtorchcodec_custom_ops5.dylib +0 -0
  65. torchcodec/libtorchcodec_custom_ops6.dylib +0 -0
  66. torchcodec/libtorchcodec_custom_ops7.dylib +0 -0
  67. torchcodec/libtorchcodec_custom_ops8.dylib +0 -0
  68. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  69. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  73. torchcodec/samplers/__init__.py +2 -0
  74. torchcodec/samplers/_common.py +84 -0
  75. torchcodec/samplers/_index_based.py +287 -0
  76. torchcodec/samplers/_time_based.py +358 -0
  77. torchcodec/version.py +2 -0
  78. torchcodec-0.8.0.dist-info/METADATA +253 -0
  79. torchcodec-0.8.0.dist-info/RECORD +82 -0
  80. torchcodec-0.8.0.dist-info/WHEEL +5 -0
  81. torchcodec-0.8.0.dist-info/licenses/LICENSE +28 -0
  82. torchcodec-0.8.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,347 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/CpuDeviceInterface.h"
8
+
9
+ namespace facebook::torchcodec {
10
+ namespace {
11
+
12
+ static bool g_cpu = registerDeviceInterface(
13
+ DeviceInterfaceKey(torch::kCPU),
14
+ [](const torch::Device& device) { return new CpuDeviceInterface(device); });
15
+
16
+ } // namespace
17
+
18
+ CpuDeviceInterface::SwsFrameContext::SwsFrameContext(
19
+ int inputWidth,
20
+ int inputHeight,
21
+ AVPixelFormat inputFormat,
22
+ int outputWidth,
23
+ int outputHeight)
24
+ : inputWidth(inputWidth),
25
+ inputHeight(inputHeight),
26
+ inputFormat(inputFormat),
27
+ outputWidth(outputWidth),
28
+ outputHeight(outputHeight) {}
29
+
30
+ bool CpuDeviceInterface::SwsFrameContext::operator==(
31
+ const CpuDeviceInterface::SwsFrameContext& other) const {
32
+ return inputWidth == other.inputWidth && inputHeight == other.inputHeight &&
33
+ inputFormat == other.inputFormat && outputWidth == other.outputWidth &&
34
+ outputHeight == other.outputHeight;
35
+ }
36
+
37
+ bool CpuDeviceInterface::SwsFrameContext::operator!=(
38
+ const CpuDeviceInterface::SwsFrameContext& other) const {
39
+ return !(*this == other);
40
+ }
41
+
42
+ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
43
+ : DeviceInterface(device) {
44
+ TORCH_CHECK(g_cpu, "CpuDeviceInterface was not registered!");
45
+ TORCH_CHECK(
46
+ device_.type() == torch::kCPU, "Unsupported device: ", device_.str());
47
+ }
48
+
49
+ void CpuDeviceInterface::initialize(
50
+ const AVStream* avStream,
51
+ [[maybe_unused]] const UniqueDecodingAVFormatContext& avFormatCtx) {
52
+ TORCH_CHECK(avStream != nullptr, "avStream is null");
53
+ timeBase_ = avStream->time_base;
54
+ }
55
+
56
+ void CpuDeviceInterface::initializeVideo(
57
+ const VideoStreamOptions& videoStreamOptions,
58
+ const std::vector<std::unique_ptr<Transform>>& transforms,
59
+ const std::optional<FrameDims>& resizedOutputDims) {
60
+ videoStreamOptions_ = videoStreamOptions;
61
+ resizedOutputDims_ = resizedOutputDims;
62
+
63
+ // We can only use swscale when we have a single resize transform. Note that
64
+ // this means swscale will not support the case of having several,
65
+ // back-to-base resizes. There's no strong reason to even do that, but if
66
+ // someone does, it's more correct to implement that with filtergraph.
67
+ //
68
+ // We calculate this value during initilization but we don't refer to it until
69
+ // getColorConversionLibrary() is called. Calculating this value during
70
+ // initialization saves us from having to save all of the transforms.
71
+ areTransformsSwScaleCompatible_ = transforms.empty() ||
72
+ (transforms.size() == 1 && transforms[0]->isResize());
73
+
74
+ // Note that we do not expose this capability in the public API, only through
75
+ // the core API.
76
+ //
77
+ // Same as above, we calculate this value during initialization and refer to
78
+ // it in getColorConversionLibrary().
79
+ userRequestedSwScale_ = videoStreamOptions_.colorConversionLibrary ==
80
+ ColorConversionLibrary::SWSCALE;
81
+
82
+ // We can only use swscale when we have a single resize transform. Note that
83
+ // we actually decide on whether or not to actually use swscale at the last
84
+ // possible moment, when we actually convert the frame. This is because we
85
+ // need to know the actual frame dimensions.
86
+ if (transforms.size() == 1 && transforms[0]->isResize()) {
87
+ auto resize = dynamic_cast<ResizeTransform*>(transforms[0].get());
88
+ TORCH_CHECK(resize != nullptr, "ResizeTransform expected but not found!")
89
+ swsFlags_ = resize->getSwsFlags();
90
+ }
91
+
92
+ // If we have any transforms, replace filters_ with the filter strings from
93
+ // the transforms. As noted above, we decide between swscale and filtergraph
94
+ // when we actually decode a frame.
95
+ std::stringstream filters;
96
+ bool first = true;
97
+ for (const auto& transform : transforms) {
98
+ if (!first) {
99
+ filters << ",";
100
+ }
101
+ filters << transform->getFilterGraphCpu();
102
+ first = false;
103
+ }
104
+ if (!transforms.empty()) {
105
+ filters_ = filters.str();
106
+ }
107
+
108
+ initialized_ = true;
109
+ }
110
+
111
+ ColorConversionLibrary CpuDeviceInterface::getColorConversionLibrary(
112
+ const FrameDims& outputDims) const {
113
+ // swscale requires widths to be multiples of 32:
114
+ // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
115
+ bool isWidthSwScaleCompatible = (outputDims.width % 32) == 0;
116
+
117
+ // We want to use swscale for color conversion if possible because it is
118
+ // faster than filtergraph. The following are the conditions we need to meet
119
+ // to use it.
120
+ //
121
+ // Note that we treat the transform limitation differently from the width
122
+ // limitation. That is, we consider the transforms being compatible with
123
+ // swscale as a hard requirement. If the transforms are not compatiable,
124
+ // then we will end up not applying the transforms, and that is wrong.
125
+ //
126
+ // The width requirement, however, is a soft requirement. Even if we don't
127
+ // meet it, we let the user override it. We have tests that depend on this
128
+ // behavior. Since we don't expose the ability to choose swscale or
129
+ // filtergraph in our public API, this is probably okay. It's also the only
130
+ // way that we can be certain we are testing one versus the other.
131
+ if (areTransformsSwScaleCompatible_ &&
132
+ (userRequestedSwScale_ || isWidthSwScaleCompatible)) {
133
+ return ColorConversionLibrary::SWSCALE;
134
+ } else {
135
+ return ColorConversionLibrary::FILTERGRAPH;
136
+ }
137
+ }
138
+
139
+ // Note [preAllocatedOutputTensor with swscale and filtergraph]:
140
+ // Callers may pass a pre-allocated tensor, where the output.data tensor will
141
+ // be stored. This parameter is honored in any case, but it only leads to a
142
+ // speed-up when swscale is used. With swscale, we can tell ffmpeg to place the
143
+ // decoded frame directly into `preAllocatedtensor.data_ptr()`. We haven't yet
144
+ // found a way to do that with filtegraph.
145
+ // TODO: Figure out whether that's possible!
146
+ // Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
147
+ // `dimension_order` parameter. It's up to callers to re-shape it if needed.
148
+ void CpuDeviceInterface::convertAVFrameToFrameOutput(
149
+ UniqueAVFrame& avFrame,
150
+ FrameOutput& frameOutput,
151
+ std::optional<torch::Tensor> preAllocatedOutputTensor) {
152
+ TORCH_CHECK(initialized_, "CpuDeviceInterface was not initialized.");
153
+
154
+ // Note that we ignore the dimensions from the metadata; we don't even bother
155
+ // storing them. The resized dimensions take priority. If we don't have any,
156
+ // then we use the dimensions from the actual decoded frame. We use the actual
157
+ // decoded frame and not the metadata for two reasons:
158
+ //
159
+ // 1. Metadata may be wrong. If we access to more accurate information, we
160
+ // should use it.
161
+ // 2. Video streams can have variable resolution. This fact is not captured
162
+ // in the stream metadata.
163
+ //
164
+ // Both cases cause problems for our batch APIs, as we allocate
165
+ // FrameBatchOutputs based on the the stream metadata. But single-frame APIs
166
+ // can still work in such situations, so they should.
167
+ auto outputDims =
168
+ resizedOutputDims_.value_or(FrameDims(avFrame->height, avFrame->width));
169
+
170
+ if (preAllocatedOutputTensor.has_value()) {
171
+ auto shape = preAllocatedOutputTensor.value().sizes();
172
+ TORCH_CHECK(
173
+ (shape.size() == 3) && (shape[0] == outputDims.height) &&
174
+ (shape[1] == outputDims.width) && (shape[2] == 3),
175
+ "Expected pre-allocated tensor of shape ",
176
+ outputDims.height,
177
+ "x",
178
+ outputDims.width,
179
+ "x3, got ",
180
+ shape);
181
+ }
182
+
183
+ auto colorConversionLibrary = getColorConversionLibrary(outputDims);
184
+ torch::Tensor outputTensor;
185
+
186
+ if (colorConversionLibrary == ColorConversionLibrary::SWSCALE) {
187
+ outputTensor = preAllocatedOutputTensor.value_or(
188
+ allocateEmptyHWCTensor(outputDims, torch::kCPU));
189
+
190
+ int resultHeight =
191
+ convertAVFrameToTensorUsingSwScale(avFrame, outputTensor, outputDims);
192
+
193
+ // If this check failed, it would mean that the frame wasn't reshaped to
194
+ // the expected height.
195
+ // TODO: Can we do the same check for width?
196
+ TORCH_CHECK(
197
+ resultHeight == outputDims.height,
198
+ "resultHeight != outputDims.height: ",
199
+ resultHeight,
200
+ " != ",
201
+ outputDims.height);
202
+
203
+ frameOutput.data = outputTensor;
204
+ } else if (colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) {
205
+ outputTensor = convertAVFrameToTensorUsingFilterGraph(avFrame, outputDims);
206
+
207
+ // Similarly to above, if this check fails it means the frame wasn't
208
+ // reshaped to its expected dimensions by filtergraph.
209
+ auto shape = outputTensor.sizes();
210
+ TORCH_CHECK(
211
+ (shape.size() == 3) && (shape[0] == outputDims.height) &&
212
+ (shape[1] == outputDims.width) && (shape[2] == 3),
213
+ "Expected output tensor of shape ",
214
+ outputDims.height,
215
+ "x",
216
+ outputDims.width,
217
+ "x3, got ",
218
+ shape);
219
+
220
+ if (preAllocatedOutputTensor.has_value()) {
221
+ // We have already validated that preAllocatedOutputTensor and
222
+ // outputTensor have the same shape.
223
+ preAllocatedOutputTensor.value().copy_(outputTensor);
224
+ frameOutput.data = preAllocatedOutputTensor.value();
225
+ } else {
226
+ frameOutput.data = outputTensor;
227
+ }
228
+ } else {
229
+ TORCH_CHECK(
230
+ false,
231
+ "Invalid color conversion library: ",
232
+ static_cast<int>(colorConversionLibrary));
233
+ }
234
+ }
235
+
236
+ int CpuDeviceInterface::convertAVFrameToTensorUsingSwScale(
237
+ const UniqueAVFrame& avFrame,
238
+ torch::Tensor& outputTensor,
239
+ const FrameDims& outputDims) {
240
+ enum AVPixelFormat frameFormat =
241
+ static_cast<enum AVPixelFormat>(avFrame->format);
242
+
243
+ // We need to compare the current frame context with our previous frame
244
+ // context. If they are different, then we need to re-create our colorspace
245
+ // conversion objects. We create our colorspace conversion objects late so
246
+ // that we don't have to depend on the unreliable metadata in the header.
247
+ // And we sometimes re-create them because it's possible for frame
248
+ // resolution to change mid-stream. Finally, we want to reuse the colorspace
249
+ // conversion objects as much as possible for performance reasons.
250
+ SwsFrameContext swsFrameContext(
251
+ avFrame->width,
252
+ avFrame->height,
253
+ frameFormat,
254
+ outputDims.width,
255
+ outputDims.height);
256
+
257
+ if (!swsContext_ || prevSwsFrameContext_ != swsFrameContext) {
258
+ createSwsContext(swsFrameContext, avFrame->colorspace);
259
+ prevSwsFrameContext_ = swsFrameContext;
260
+ }
261
+
262
+ uint8_t* pointers[4] = {
263
+ outputTensor.data_ptr<uint8_t>(), nullptr, nullptr, nullptr};
264
+ int expectedOutputWidth = outputTensor.sizes()[1];
265
+ int linesizes[4] = {expectedOutputWidth * 3, 0, 0, 0};
266
+ int resultHeight = sws_scale(
267
+ swsContext_.get(),
268
+ avFrame->data,
269
+ avFrame->linesize,
270
+ 0,
271
+ avFrame->height,
272
+ pointers,
273
+ linesizes);
274
+ return resultHeight;
275
+ }
276
+
277
+ void CpuDeviceInterface::createSwsContext(
278
+ const SwsFrameContext& swsFrameContext,
279
+ const enum AVColorSpace colorspace) {
280
+ SwsContext* swsContext = sws_getContext(
281
+ swsFrameContext.inputWidth,
282
+ swsFrameContext.inputHeight,
283
+ swsFrameContext.inputFormat,
284
+ swsFrameContext.outputWidth,
285
+ swsFrameContext.outputHeight,
286
+ AV_PIX_FMT_RGB24,
287
+ swsFlags_,
288
+ nullptr,
289
+ nullptr,
290
+ nullptr);
291
+ TORCH_CHECK(swsContext, "sws_getContext() returned nullptr");
292
+
293
+ int* invTable = nullptr;
294
+ int* table = nullptr;
295
+ int srcRange, dstRange, brightness, contrast, saturation;
296
+ int ret = sws_getColorspaceDetails(
297
+ swsContext,
298
+ &invTable,
299
+ &srcRange,
300
+ &table,
301
+ &dstRange,
302
+ &brightness,
303
+ &contrast,
304
+ &saturation);
305
+ TORCH_CHECK(ret != -1, "sws_getColorspaceDetails returned -1");
306
+
307
+ const int* colorspaceTable = sws_getCoefficients(colorspace);
308
+ ret = sws_setColorspaceDetails(
309
+ swsContext,
310
+ colorspaceTable,
311
+ srcRange,
312
+ colorspaceTable,
313
+ dstRange,
314
+ brightness,
315
+ contrast,
316
+ saturation);
317
+ TORCH_CHECK(ret != -1, "sws_setColorspaceDetails returned -1");
318
+
319
+ swsContext_.reset(swsContext);
320
+ }
321
+
322
+ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
323
+ const UniqueAVFrame& avFrame,
324
+ const FrameDims& outputDims) {
325
+ enum AVPixelFormat frameFormat =
326
+ static_cast<enum AVPixelFormat>(avFrame->format);
327
+
328
+ FiltersContext filtersContext(
329
+ avFrame->width,
330
+ avFrame->height,
331
+ frameFormat,
332
+ avFrame->sample_aspect_ratio,
333
+ outputDims.width,
334
+ outputDims.height,
335
+ AV_PIX_FMT_RGB24,
336
+ filters_,
337
+ timeBase_);
338
+
339
+ if (!filterGraph_ || prevFiltersContext_ != filtersContext) {
340
+ filterGraph_ =
341
+ std::make_unique<FilterGraph>(filtersContext, videoStreamOptions_);
342
+ prevFiltersContext_ = std::move(filtersContext);
343
+ }
344
+ return rgbAVFrameToTensor(filterGraph_->convert(avFrame));
345
+ }
346
+
347
+ } // namespace facebook::torchcodec
@@ -0,0 +1,132 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include "src/torchcodec/_core/DeviceInterface.h"
10
+ #include "src/torchcodec/_core/FFMPEGCommon.h"
11
+ #include "src/torchcodec/_core/FilterGraph.h"
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ class CpuDeviceInterface : public DeviceInterface {
16
+ public:
17
+ CpuDeviceInterface(const torch::Device& device);
18
+
19
+ virtual ~CpuDeviceInterface() {}
20
+
21
+ std::optional<const AVCodec*> findCodec(
22
+ [[maybe_unused]] const AVCodecID& codecId) override {
23
+ return std::nullopt;
24
+ }
25
+
26
+ virtual void initialize(
27
+ const AVStream* avStream,
28
+ const UniqueDecodingAVFormatContext& avFormatCtx) override;
29
+
30
+ virtual void initializeVideo(
31
+ const VideoStreamOptions& videoStreamOptions,
32
+ const std::vector<std::unique_ptr<Transform>>& transforms,
33
+ const std::optional<FrameDims>& resizedOutputDims) override;
34
+
35
+ void convertAVFrameToFrameOutput(
36
+ UniqueAVFrame& avFrame,
37
+ FrameOutput& frameOutput,
38
+ std::optional<torch::Tensor> preAllocatedOutputTensor =
39
+ std::nullopt) override;
40
+
41
+ private:
42
+ int convertAVFrameToTensorUsingSwScale(
43
+ const UniqueAVFrame& avFrame,
44
+ torch::Tensor& outputTensor,
45
+ const FrameDims& outputDims);
46
+
47
+ torch::Tensor convertAVFrameToTensorUsingFilterGraph(
48
+ const UniqueAVFrame& avFrame,
49
+ const FrameDims& outputDims);
50
+
51
+ ColorConversionLibrary getColorConversionLibrary(
52
+ const FrameDims& inputFrameDims) const;
53
+
54
+ struct SwsFrameContext {
55
+ int inputWidth = 0;
56
+ int inputHeight = 0;
57
+ AVPixelFormat inputFormat = AV_PIX_FMT_NONE;
58
+ int outputWidth = 0;
59
+ int outputHeight = 0;
60
+
61
+ SwsFrameContext() = default;
62
+ SwsFrameContext(
63
+ int inputWidth,
64
+ int inputHeight,
65
+ AVPixelFormat inputFormat,
66
+ int outputWidth,
67
+ int outputHeight);
68
+ bool operator==(const SwsFrameContext&) const;
69
+ bool operator!=(const SwsFrameContext&) const;
70
+ };
71
+
72
+ void createSwsContext(
73
+ const SwsFrameContext& swsFrameContext,
74
+ const enum AVColorSpace colorspace);
75
+
76
+ VideoStreamOptions videoStreamOptions_;
77
+ AVRational timeBase_;
78
+
79
+ // If the resized output dimensions are present, then we always use those as
80
+ // the output frame's dimensions. If they are not present, then we use the
81
+ // dimensions of the raw decoded frame. Note that we do not know the
82
+ // dimensions of the raw decoded frame until very late; we learn it in
83
+ // convertAVFrameToFrameOutput(). Deciding the final output frame's actual
84
+ // dimensions late allows us to handle video streams with variable
85
+ // resolutions.
86
+ std::optional<FrameDims> resizedOutputDims_;
87
+
88
+ // Color-conversion objects. Only one of filterGraph_ and swsContext_ should
89
+ // be non-null. Which one we use is determined dynamically in
90
+ // getColorConversionLibrary() each time we decode a frame.
91
+ //
92
+ // Creating both filterGraph_ and swsContext_ is relatively expensive, so we
93
+ // reuse them across frames. However, it is possbile that subsequent frames
94
+ // are different enough (change in dimensions) that we can't reuse the color
95
+ // conversion object. We store the relevant frame context from the frame used
96
+ // to create the object last time. We always compare the current frame's info
97
+ // against the previous one to determine if we need to recreate the color
98
+ // conversion object.
99
+ //
100
+ // TODO: The names of these fields is confusing, as the actual color
101
+ // conversion object for Sws has "context" in the name, and we use
102
+ // "context" for the structs we store to know if we need to recreate a
103
+ // color conversion object. We should clean that up.
104
+ std::unique_ptr<FilterGraph> filterGraph_;
105
+ FiltersContext prevFiltersContext_;
106
+ UniqueSwsContext swsContext_;
107
+ SwsFrameContext prevSwsFrameContext_;
108
+
109
+ // The filter we supply to filterGraph_, if it is used. The default is the
110
+ // copy filter, which just copies the input to the output. Computationally, it
111
+ // should be a no-op. If we get no user-provided transforms, we will use the
112
+ // copy filter. Otherwise, we will construct the string from the transforms.
113
+ //
114
+ // Note that even if we only use the copy filter, we still get the desired
115
+ // colorspace conversion. We construct the filtergraph with its output sink
116
+ // set to RGB24.
117
+ std::string filters_ = "copy";
118
+
119
+ // The flags we supply to swsContext_, if it used. The flags control the
120
+ // resizing algorithm. We default to bilinear. Users can override this with a
121
+ // ResizeTransform.
122
+ int swsFlags_ = SWS_BILINEAR;
123
+
124
+ // Values set during initialization and referred to in
125
+ // getColorConversionLibrary().
126
+ bool areTransformsSwScaleCompatible_;
127
+ bool userRequestedSwScale_;
128
+
129
+ bool initialized_ = false;
130
+ };
131
+
132
+ } // namespace facebook::torchcodec