torchcodec 0.7.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. torchcodec/__init__.py +16 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +123 -0
  7. torchcodec/_core/AVIOTensorContext.h +43 -0
  8. torchcodec/_core/CMakeLists.txt +292 -0
  9. torchcodec/_core/Cache.h +138 -0
  10. torchcodec/_core/CpuDeviceInterface.cpp +266 -0
  11. torchcodec/_core/CpuDeviceInterface.h +70 -0
  12. torchcodec/_core/CudaDeviceInterface.cpp +514 -0
  13. torchcodec/_core/CudaDeviceInterface.h +37 -0
  14. torchcodec/_core/DeviceInterface.cpp +79 -0
  15. torchcodec/_core/DeviceInterface.h +67 -0
  16. torchcodec/_core/Encoder.cpp +514 -0
  17. torchcodec/_core/Encoder.h +123 -0
  18. torchcodec/_core/FFMPEGCommon.cpp +421 -0
  19. torchcodec/_core/FFMPEGCommon.h +227 -0
  20. torchcodec/_core/FilterGraph.cpp +142 -0
  21. torchcodec/_core/FilterGraph.h +45 -0
  22. torchcodec/_core/Frame.cpp +32 -0
  23. torchcodec/_core/Frame.h +118 -0
  24. torchcodec/_core/Metadata.h +72 -0
  25. torchcodec/_core/SingleStreamDecoder.cpp +1715 -0
  26. torchcodec/_core/SingleStreamDecoder.h +380 -0
  27. torchcodec/_core/StreamOptions.h +53 -0
  28. torchcodec/_core/ValidationUtils.cpp +35 -0
  29. torchcodec/_core/ValidationUtils.h +21 -0
  30. torchcodec/_core/__init__.py +40 -0
  31. torchcodec/_core/_metadata.py +317 -0
  32. torchcodec/_core/custom_ops.cpp +727 -0
  33. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +300 -0
  34. torchcodec/_core/ops.py +455 -0
  35. torchcodec/_core/pybind_ops.cpp +87 -0
  36. torchcodec/_frame.py +145 -0
  37. torchcodec/_internally_replaced_utils.py +67 -0
  38. torchcodec/_samplers/__init__.py +7 -0
  39. torchcodec/_samplers/video_clip_sampler.py +430 -0
  40. torchcodec/decoders/__init__.py +11 -0
  41. torchcodec/decoders/_audio_decoder.py +177 -0
  42. torchcodec/decoders/_decoder_utils.py +52 -0
  43. torchcodec/decoders/_video_decoder.py +464 -0
  44. torchcodec/encoders/__init__.py +1 -0
  45. torchcodec/encoders/_audio_encoder.py +150 -0
  46. torchcodec/libtorchcodec_core4.dll +0 -0
  47. torchcodec/libtorchcodec_core5.dll +0 -0
  48. torchcodec/libtorchcodec_core6.dll +0 -0
  49. torchcodec/libtorchcodec_core7.dll +0 -0
  50. torchcodec/libtorchcodec_custom_ops4.dll +0 -0
  51. torchcodec/libtorchcodec_custom_ops5.dll +0 -0
  52. torchcodec/libtorchcodec_custom_ops6.dll +0 -0
  53. torchcodec/libtorchcodec_custom_ops7.dll +0 -0
  54. torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
  55. torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
  56. torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
  57. torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
  58. torchcodec/samplers/__init__.py +2 -0
  59. torchcodec/samplers/_common.py +84 -0
  60. torchcodec/samplers/_index_based.py +287 -0
  61. torchcodec/samplers/_time_based.py +350 -0
  62. torchcodec/version.py +2 -0
  63. torchcodec-0.7.0.dist-info/METADATA +242 -0
  64. torchcodec-0.7.0.dist-info/RECORD +67 -0
  65. torchcodec-0.7.0.dist-info/WHEEL +5 -0
  66. torchcodec-0.7.0.dist-info/licenses/LICENSE +28 -0
  67. torchcodec-0.7.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,292 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+ project(TorchCodec)
3
+ set(CMAKE_CXX_STANDARD 17)
4
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
5
+
6
+ set(PYBIND11_FINDPYTHON ON)
7
+ find_package(pybind11 REQUIRED)
8
+ find_package(Torch REQUIRED)
9
+ find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
10
+
11
+ if(DEFINED TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR AND TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR)
12
+ set(TORCHCODEC_WERROR_OPTION "")
13
+ else()
14
+ if (WIN32)
15
+ # TODO set warnings as errors on Windows as well.
16
+ # set(TORCHCODEC_WERROR_OPTION "/WX")
17
+ else()
18
+ set(TORCHCODEC_WERROR_OPTION "-Werror")
19
+ endif()
20
+ endif()
21
+
22
+ if (WIN32)
23
+ # Avoid warnings about non-ASCII characters in source files.
24
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4819")
25
+ # Important for when we add Windows CUDA: exporting all symbols is limited to
26
+ # 65535 symbols, which (apparently) will not work for CUDA.
27
+ # https://github.com/pytorch/pytorch/pull/3650
28
+ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
29
+ endif()
30
+
31
+ if (WIN32)
32
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
33
+ else()
34
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic ${TORCHCODEC_WERROR_OPTION} ${TORCH_CXX_FLAGS}")
35
+ endif()
36
+
37
+
38
+ function(make_torchcodec_sublibrary
39
+ library_name
40
+ type
41
+ sources
42
+ library_dependencies)
43
+
44
+ add_library(${library_name} ${type} ${sources})
45
+ set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
46
+ target_include_directories(${library_name}
47
+ PRIVATE
48
+ ./../../../
49
+ "${TORCH_INSTALL_PREFIX}/include"
50
+ ${Python3_INCLUDE_DIRS}
51
+ )
52
+
53
+ # Avoid adding the "lib" prefix which we already add explicitly.
54
+ set_target_properties(${library_name} PROPERTIES PREFIX "")
55
+
56
+ target_link_libraries(
57
+ ${library_name}
58
+ PUBLIC
59
+ ${library_dependencies}
60
+ )
61
+
62
+ endfunction()
63
+
64
+ function(make_torchcodec_libraries
65
+ ffmpeg_major_version
66
+ ffmpeg_target)
67
+
68
+ # We create three shared libraries per version of FFmpeg, where the version
69
+ # is denoted by N:
70
+ #
71
+ # 1. libtorchcodec_coreN.{ext}: Base library which contains the
72
+ # implementation of VideoDecoder and everything VideoDecoder needs. On
73
+ # Linux, {ext} is so. On Mac, it is dylib. On Windows it's dll.
74
+ #
75
+ # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
76
+ # ops. Depends on libtorchcodec_coreN.{ext}. On Linux, {ext} is so.
77
+ # On Mac, it is dylib. On Windows it's dll.
78
+ #
79
+ # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
80
+ # keep these separate from the PyTorch custom ops because we have to
81
+ # load these libraries separately on the Python side. Depends on
82
+ # libtorchcodec_coreN.{ext}. On BOTH Linux and Mac {ext} is so. On
83
+ # Windows, it's pyd.
84
+
85
+ # 1. Create libtorchcodec_coreN.{ext}.
86
+ set(core_library_name "libtorchcodec_core${ffmpeg_major_version}")
87
+ set(core_sources
88
+ AVIOContextHolder.cpp
89
+ AVIOTensorContext.cpp
90
+ FFMPEGCommon.cpp
91
+ FilterGraph.cpp
92
+ Frame.cpp
93
+ DeviceInterface.cpp
94
+ CpuDeviceInterface.cpp
95
+ SingleStreamDecoder.cpp
96
+ Encoder.cpp
97
+ ValidationUtils.cpp
98
+ )
99
+
100
+ if(ENABLE_CUDA)
101
+ list(APPEND core_sources CudaDeviceInterface.cpp)
102
+ endif()
103
+
104
+ set(core_library_dependencies
105
+ ${ffmpeg_target}
106
+ ${TORCH_LIBRARIES}
107
+ )
108
+
109
+ if(ENABLE_CUDA)
110
+ list(APPEND core_library_dependencies
111
+ ${CUDA_nppi_LIBRARY}
112
+ ${CUDA_nppicc_LIBRARY}
113
+ )
114
+ endif()
115
+
116
+ make_torchcodec_sublibrary(
117
+ "${core_library_name}"
118
+ SHARED
119
+ "${core_sources}"
120
+ "${core_library_dependencies}"
121
+ )
122
+
123
+ # 2. Create libtorchcodec_custom_opsN.{ext}.
124
+ set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
125
+ set(custom_ops_sources
126
+ AVIOTensorContext.cpp
127
+ custom_ops.cpp
128
+ )
129
+ set(custom_ops_dependencies
130
+ ${core_library_name}
131
+ ${Python3_LIBRARIES}
132
+ )
133
+ make_torchcodec_sublibrary(
134
+ "${custom_ops_library_name}"
135
+ SHARED
136
+ "${custom_ops_sources}"
137
+ "${custom_ops_dependencies}"
138
+ )
139
+
140
+ # 3. Create libtorchcodec_pybind_opsN.so.
141
+ set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
142
+ set(pybind_ops_sources
143
+ AVIOFileLikeContext.cpp
144
+ pybind_ops.cpp
145
+ )
146
+ set(pybind_ops_dependencies
147
+ ${core_library_name}
148
+ pybind11::module # This library dependency makes sure we have the right
149
+ # Python libraries included as well as all of the right
150
+ # settings so that we can successfully load the shared
151
+ # library as a Python module on Mac. If we instead use
152
+ # ${Python3_LIBRARIES}, it works on Linux but not on
153
+ # Mac.
154
+ )
155
+ make_torchcodec_sublibrary(
156
+ "${pybind_ops_library_name}"
157
+ MODULE # Note that this not SHARED; otherwise we build the wrong kind
158
+ # of library on Mac. On Mac, SHARED becomes .dylib and MODULE becomes
159
+ # a .so. We want pybind11 libraries to become .so. If this is
160
+ # changed to SHARED, we will be able to succesfully compile a
161
+ # .dylib, but we will not be able to succesfully import that as
162
+ # a Python module on Mac.
163
+ "${pybind_ops_sources}"
164
+ "${pybind_ops_dependencies}"
165
+ )
166
+
167
+ if(WIN32)
168
+ # On Windows, we need to set the suffix to .pyd so that Python can
169
+ # import the shared library as a module. Just setting the MODULE type
170
+ # isn't enough.
171
+ set_target_properties(${pybind_ops_library_name} PROPERTIES SUFFIX ".pyd")
172
+ endif()
173
+
174
+ # pybind11 limits the visibility of symbols in the shared library to prevent
175
+ # stray initialization of py::objects. The rest of the object code must
176
+ # match. See:
177
+ # https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes
178
+ if(NOT WIN32)
179
+ target_compile_options(
180
+ ${pybind_ops_library_name}
181
+ PUBLIC
182
+ "-fvisibility=hidden"
183
+ )
184
+ endif()
185
+
186
+ # The value we use here must match the value we return from
187
+ # _get_pybind_ops_module_name() on the Python side. If the values do not
188
+ # match, then we will be unable to import the C++ shared library as a
189
+ # Python module at runtime.
190
+ target_compile_definitions(
191
+ ${pybind_ops_library_name}
192
+ PRIVATE
193
+ PYBIND_OPS_MODULE_NAME=core_pybind_ops
194
+ )
195
+
196
+ if(APPLE)
197
+ # If we don't make sure this flag is set, we run into segfauls at import
198
+ # time on Mac. See:
199
+ # https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
200
+ target_link_options(
201
+ ${pybind_ops_library_name}
202
+ PUBLIC
203
+ "LINKER:-undefined,dynamic_lookup"
204
+ )
205
+ endif()
206
+
207
+ # Install all libraries.
208
+ set(
209
+ all_libraries
210
+ ${core_library_name}
211
+ ${custom_ops_library_name}
212
+ ${pybind_ops_library_name}
213
+ )
214
+
215
+ # The install step is invoked within CMakeBuild.build_library() in
216
+ # setup.py and just copies the built files from the temp
217
+ # cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
218
+ # still need to manually pass "DESTINATION ..." for cmake to copy those
219
+ # files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
220
+ install(
221
+ TARGETS ${all_libraries}
222
+ LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
223
+ RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX} # For Windows
224
+ )
225
+
226
+ endfunction()
227
+
228
+ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
229
+ message(
230
+ STATUS
231
+ "Building and dynamically linking libtorchcodec against our pre-built
232
+ non-GPL FFmpeg libraries. These libraries are only used at build time,
233
+ you still need a different FFmpeg to be installed for run time!"
234
+ )
235
+
236
+ # This will expose the ffmpeg4, ffmpeg5, ffmpeg6, and ffmpeg7 targets
237
+ include(
238
+ ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
239
+ )
240
+
241
+ make_torchcodec_libraries(7 ffmpeg7)
242
+ make_torchcodec_libraries(6 ffmpeg6)
243
+ make_torchcodec_libraries(4 ffmpeg4)
244
+ make_torchcodec_libraries(5 ffmpeg5)
245
+ else()
246
+ message(
247
+ STATUS
248
+ "Building and dynamically linking libtorchcodec against the installed
249
+ FFmpeg libraries. This require pkg-config to be installed. If you have
250
+ installed FFmpeg from conda, make sure pkg-config is installed from
251
+ conda as well."
252
+ )
253
+ find_package(PkgConfig REQUIRED)
254
+ pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET
255
+ libavdevice
256
+ libavfilter
257
+ libavformat
258
+ libavcodec
259
+ libavutil
260
+ libswresample
261
+ libswscale
262
+ )
263
+
264
+ # Split libavcodec's version string by '.' and convert it to a list
265
+ string(REPLACE "." ";" libavcodec_version_list ${LIBAV_libavcodec_VERSION})
266
+ # Get the first element of the list, which is the major version
267
+ list(GET libavcodec_version_list 0 libavcodec_major_version)
268
+
269
+ if (${libavcodec_major_version} STREQUAL "58")
270
+ set(ffmpeg_major_version "4")
271
+ elseif (${libavcodec_major_version} STREQUAL "59")
272
+ set(ffmpeg_major_version "5")
273
+ elseif (${libavcodec_major_version} STREQUAL "60")
274
+ set(ffmpeg_major_version "6")
275
+ elseif (${libavcodec_major_version} STREQUAL "61")
276
+ set(ffmpeg_major_version "7")
277
+ else()
278
+ message(
279
+ FATAL_ERROR
280
+ "Unsupported libavcodec version: ${libavcodec_major_version}"
281
+ )
282
+ endif()
283
+
284
+ make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV)
285
+
286
+ # Expose these values updwards so that the test compilation does not need
287
+ # to re-figure it out. FIXME: it's not great that we just copy-paste the
288
+ # library names.
289
+ set(libtorchcodec_library_name "libtorchcodec_core${ffmpeg_major_version}" PARENT_SCOPE)
290
+ set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
291
+ set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
292
+ endif()
@@ -0,0 +1,138 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include <memory>
11
+ #include <mutex>
12
+
13
+ namespace facebook::torchcodec {
14
+
15
+ // This header defines simple cache class primitives to store reusable objects
16
+ // across TorchCodec stream instances. Intended usage is to store hardware
17
+ // contexts creation of which is expensive. The cache mechanism is as follows:
18
+ // 1. 'PerGpuCache' provides a dynamic cache with the specified maximum capacity
19
+ // for the given number of GPUs.
20
+ // 2. When stream object (e.g. SingleStreamDecoder) is destoyed cachable object
21
+ // must be released to the cache. Cache will accept the object if it is not
22
+ // full.
23
+ // 3. When stream object (e.g. SingleStreamDecoder) is created cachable object
24
+ // must be first queried from the cache. If the cache is empty then new
25
+ // object must be created.
26
+
27
+ template <typename T, typename D = std::default_delete<T>>
28
+ class Cache {
29
+ public:
30
+ using element_type = std::unique_ptr<T, D>;
31
+
32
+ explicit Cache(int capacity) : capacity_(capacity) {}
33
+
34
+ // Adds an object to the cache if the cache has capacity. Returns true
35
+ // if object was added and false otherwise.
36
+ bool addIfCacheHasCapacity(element_type&& obj);
37
+
38
+ // Returns an object from the cache. Cache does not hold a reference
39
+ // to the object after this call.
40
+ element_type get();
41
+
42
+ private:
43
+ int capacity_;
44
+ std::mutex mutex_;
45
+ std::vector<element_type> cache_;
46
+ };
47
+
48
+ template <typename T, typename D>
49
+ bool Cache<T, D>::addIfCacheHasCapacity(element_type&& obj) {
50
+ std::scoped_lock lock(mutex_);
51
+ if (capacity_ >= 0 && cache_.size() >= static_cast<size_t>(capacity_)) {
52
+ return false;
53
+ }
54
+ cache_.push_back(std::move(obj));
55
+ return true;
56
+ }
57
+
58
+ template <typename T, typename D>
59
+ typename Cache<T, D>::element_type Cache<T, D>::get() {
60
+ std::scoped_lock lock(mutex_);
61
+ if (cache_.empty()) {
62
+ return nullptr;
63
+ }
64
+
65
+ element_type obj = std::move(cache_.back());
66
+ cache_.pop_back();
67
+ return obj;
68
+ }
69
+
70
+ template <typename T, typename D = std::default_delete<T>>
71
+ class PerGpuCache {
72
+ public:
73
+ using element_type = typename Cache<T, D>::element_type;
74
+
75
+ // Initializes 'maxGpus' number of caches. Each cache can hold no
76
+ // more than 'capacity' items. If 'capacity' <0 cache size is unlimited.
77
+ PerGpuCache(int maxGpus, int capacity) {
78
+ TORCH_CHECK(maxGpus > 0, "maxGpus for PerGpuCache must be >0");
79
+ for (int i = 0; i < maxGpus; ++i) {
80
+ cache_.emplace_back(std::make_unique<Cache<T, D>>(capacity));
81
+ }
82
+ }
83
+
84
+ // Adds an object to the specified device cache if the cache has
85
+ // capacity. Returns true if object was added and false otherwise.
86
+ bool addIfCacheHasCapacity(const torch::Device& device, element_type&& obj);
87
+
88
+ // Returns an object from the cache of the specified device. Cache
89
+ // does not hold a reference to the object after this call.
90
+ element_type get(const torch::Device& device);
91
+
92
+ private:
93
+ // 'Cache' class implementation contains mutex which makes it non-movable
94
+ // and non-copyable, so we need to wrap it in std::unique_ptr.
95
+ std::vector<std::unique_ptr<Cache<T, D>>> cache_;
96
+ };
97
+
98
+ // Note: this function is inline for convenience, not performance. Because the
99
+ // rest of this file is template functions, they must all be defined in this
100
+ // header. This function is not a template function, and should, in principle,
101
+ // be defined in a .cpp file to preserve the One Definition Rule. That's
102
+ // annoying for such a small amount of code, so we just inline it. If this file
103
+ // grows, and there are more such functions, we should break them out into a
104
+ // .cpp file.
105
+ inline torch::DeviceIndex getNonNegativeDeviceIndex(
106
+ const torch::Device& device) {
107
+ torch::DeviceIndex deviceIndex = device.index();
108
+ // For single GPU machines libtorch returns -1 for the device index. So for
109
+ // that case we set the device index to 0. That's used in per-gpu cache
110
+ // implementation and during initialization of CUDA and FFmpeg contexts
111
+ // which require non negative indices.
112
+ deviceIndex = std::max<at::DeviceIndex>(deviceIndex, 0);
113
+ TORCH_CHECK(deviceIndex >= 0, "Device index out of range");
114
+ return deviceIndex;
115
+ }
116
+
117
+ template <typename T, typename D>
118
+ bool PerGpuCache<T, D>::addIfCacheHasCapacity(
119
+ const torch::Device& device,
120
+ element_type&& obj) {
121
+ torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
122
+ TORCH_CHECK(
123
+ static_cast<size_t>(deviceIndex) < cache_.size(),
124
+ "Device index out of range");
125
+ return cache_[deviceIndex]->addIfCacheHasCapacity(std::move(obj));
126
+ }
127
+
128
+ template <typename T, typename D>
129
+ typename PerGpuCache<T, D>::element_type PerGpuCache<T, D>::get(
130
+ const torch::Device& device) {
131
+ torch::DeviceIndex deviceIndex = getNonNegativeDeviceIndex(device);
132
+ TORCH_CHECK(
133
+ static_cast<size_t>(deviceIndex) < cache_.size(),
134
+ "Device index out of range");
135
+ return cache_[deviceIndex]->get();
136
+ }
137
+
138
+ } // namespace facebook::torchcodec
@@ -0,0 +1,266 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "src/torchcodec/_core/CpuDeviceInterface.h"
8
+
9
+ namespace facebook::torchcodec {
10
+ namespace {
11
+
12
+ static bool g_cpu = registerDeviceInterface(
13
+ torch::kCPU,
14
+ [](const torch::Device& device) { return new CpuDeviceInterface(device); });
15
+
16
+ } // namespace
17
+
18
+ bool CpuDeviceInterface::SwsFrameContext::operator==(
19
+ const CpuDeviceInterface::SwsFrameContext& other) const {
20
+ return inputWidth == other.inputWidth && inputHeight == other.inputHeight &&
21
+ inputFormat == other.inputFormat && outputWidth == other.outputWidth &&
22
+ outputHeight == other.outputHeight;
23
+ }
24
+
25
+ bool CpuDeviceInterface::SwsFrameContext::operator!=(
26
+ const CpuDeviceInterface::SwsFrameContext& other) const {
27
+ return !(*this == other);
28
+ }
29
+
30
+ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
31
+ : DeviceInterface(device) {
32
+ TORCH_CHECK(g_cpu, "CpuDeviceInterface was not registered!");
33
+ TORCH_CHECK(
34
+ device_.type() == torch::kCPU, "Unsupported device: ", device_.str());
35
+ }
36
+
37
+ // Note [preAllocatedOutputTensor with swscale and filtergraph]:
38
+ // Callers may pass a pre-allocated tensor, where the output.data tensor will
39
+ // be stored. This parameter is honored in any case, but it only leads to a
40
+ // speed-up when swscale is used. With swscale, we can tell ffmpeg to place the
41
+ // decoded frame directly into `preAllocatedtensor.data_ptr()`. We haven't yet
42
+ // found a way to do that with filtegraph.
43
+ // TODO: Figure out whether that's possible!
44
+ // Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
45
+ // `dimension_order` parameter. It's up to callers to re-shape it if needed.
46
+ void CpuDeviceInterface::convertAVFrameToFrameOutput(
47
+ const VideoStreamOptions& videoStreamOptions,
48
+ const AVRational& timeBase,
49
+ UniqueAVFrame& avFrame,
50
+ FrameOutput& frameOutput,
51
+ std::optional<torch::Tensor> preAllocatedOutputTensor) {
52
+ auto frameDims =
53
+ getHeightAndWidthFromOptionsOrAVFrame(videoStreamOptions, avFrame);
54
+ int expectedOutputHeight = frameDims.height;
55
+ int expectedOutputWidth = frameDims.width;
56
+
57
+ if (preAllocatedOutputTensor.has_value()) {
58
+ auto shape = preAllocatedOutputTensor.value().sizes();
59
+ TORCH_CHECK(
60
+ (shape.size() == 3) && (shape[0] == expectedOutputHeight) &&
61
+ (shape[1] == expectedOutputWidth) && (shape[2] == 3),
62
+ "Expected pre-allocated tensor of shape ",
63
+ expectedOutputHeight,
64
+ "x",
65
+ expectedOutputWidth,
66
+ "x3, got ",
67
+ shape);
68
+ }
69
+
70
+ torch::Tensor outputTensor;
71
+ enum AVPixelFormat frameFormat =
72
+ static_cast<enum AVPixelFormat>(avFrame->format);
73
+
74
+ // By default, we want to use swscale for color conversion because it is
75
+ // faster. However, it has width requirements, so we may need to fall back
76
+ // to filtergraph. We also need to respect what was requested from the
77
+ // options; we respect the options unconditionally, so it's possible for
78
+ // swscale's width requirements to be violated. We don't expose the ability to
79
+ // choose color conversion library publicly; we only use this ability
80
+ // internally.
81
+
82
+ // swscale requires widths to be multiples of 32:
83
+ // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
84
+ // so we fall back to filtergraph if the width is not a multiple of 32.
85
+ auto defaultLibrary = (expectedOutputWidth % 32 == 0)
86
+ ? ColorConversionLibrary::SWSCALE
87
+ : ColorConversionLibrary::FILTERGRAPH;
88
+
89
+ ColorConversionLibrary colorConversionLibrary =
90
+ videoStreamOptions.colorConversionLibrary.value_or(defaultLibrary);
91
+
92
+ if (colorConversionLibrary == ColorConversionLibrary::SWSCALE) {
93
+ // We need to compare the current frame context with our previous frame
94
+ // context. If they are different, then we need to re-create our colorspace
95
+ // conversion objects. We create our colorspace conversion objects late so
96
+ // that we don't have to depend on the unreliable metadata in the header.
97
+ // And we sometimes re-create them because it's possible for frame
98
+ // resolution to change mid-stream. Finally, we want to reuse the colorspace
99
+ // conversion objects as much as possible for performance reasons.
100
+ SwsFrameContext swsFrameContext;
101
+
102
+ swsFrameContext.inputWidth = avFrame->width;
103
+ swsFrameContext.inputHeight = avFrame->height;
104
+ swsFrameContext.inputFormat = frameFormat;
105
+ swsFrameContext.outputWidth = expectedOutputWidth;
106
+ swsFrameContext.outputHeight = expectedOutputHeight;
107
+
108
+ outputTensor = preAllocatedOutputTensor.value_or(allocateEmptyHWCTensor(
109
+ expectedOutputHeight, expectedOutputWidth, torch::kCPU));
110
+
111
+ if (!swsContext_ || prevSwsFrameContext_ != swsFrameContext) {
112
+ createSwsContext(swsFrameContext, avFrame->colorspace);
113
+ prevSwsFrameContext_ = swsFrameContext;
114
+ }
115
+ int resultHeight =
116
+ convertAVFrameToTensorUsingSwsScale(avFrame, outputTensor);
117
+ // If this check failed, it would mean that the frame wasn't reshaped to
118
+ // the expected height.
119
+ // TODO: Can we do the same check for width?
120
+ TORCH_CHECK(
121
+ resultHeight == expectedOutputHeight,
122
+ "resultHeight != expectedOutputHeight: ",
123
+ resultHeight,
124
+ " != ",
125
+ expectedOutputHeight);
126
+
127
+ frameOutput.data = outputTensor;
128
+ } else if (colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) {
129
+ // See comment above in swscale branch about the filterGraphContext_
130
+ // creation. creation
131
+ FiltersContext filtersContext;
132
+
133
+ filtersContext.inputWidth = avFrame->width;
134
+ filtersContext.inputHeight = avFrame->height;
135
+ filtersContext.inputFormat = frameFormat;
136
+ filtersContext.inputAspectRatio = avFrame->sample_aspect_ratio;
137
+ filtersContext.outputWidth = expectedOutputWidth;
138
+ filtersContext.outputHeight = expectedOutputHeight;
139
+ filtersContext.outputFormat = AV_PIX_FMT_RGB24;
140
+ filtersContext.timeBase = timeBase;
141
+
142
+ std::stringstream filters;
143
+ filters << "scale=" << expectedOutputWidth << ":" << expectedOutputHeight;
144
+ filters << ":sws_flags=bilinear";
145
+
146
+ filtersContext.filtergraphStr = filters.str();
147
+
148
+ if (!filterGraphContext_ || prevFiltersContext_ != filtersContext) {
149
+ filterGraphContext_ =
150
+ std::make_unique<FilterGraph>(filtersContext, videoStreamOptions);
151
+ prevFiltersContext_ = std::move(filtersContext);
152
+ }
153
+ outputTensor = convertAVFrameToTensorUsingFilterGraph(avFrame);
154
+
155
+ // Similarly to above, if this check fails it means the frame wasn't
156
+ // reshaped to its expected dimensions by filtergraph.
157
+ auto shape = outputTensor.sizes();
158
+ TORCH_CHECK(
159
+ (shape.size() == 3) && (shape[0] == expectedOutputHeight) &&
160
+ (shape[1] == expectedOutputWidth) && (shape[2] == 3),
161
+ "Expected output tensor of shape ",
162
+ expectedOutputHeight,
163
+ "x",
164
+ expectedOutputWidth,
165
+ "x3, got ",
166
+ shape);
167
+
168
+ if (preAllocatedOutputTensor.has_value()) {
169
+ // We have already validated that preAllocatedOutputTensor and
170
+ // outputTensor have the same shape.
171
+ preAllocatedOutputTensor.value().copy_(outputTensor);
172
+ frameOutput.data = preAllocatedOutputTensor.value();
173
+ } else {
174
+ frameOutput.data = outputTensor;
175
+ }
176
+ } else {
177
+ TORCH_CHECK(
178
+ false,
179
+ "Invalid color conversion library: ",
180
+ static_cast<int>(colorConversionLibrary));
181
+ }
182
+ }
183
+
184
+ int CpuDeviceInterface::convertAVFrameToTensorUsingSwsScale(
185
+ const UniqueAVFrame& avFrame,
186
+ torch::Tensor& outputTensor) {
187
+ uint8_t* pointers[4] = {
188
+ outputTensor.data_ptr<uint8_t>(), nullptr, nullptr, nullptr};
189
+ int expectedOutputWidth = outputTensor.sizes()[1];
190
+ int linesizes[4] = {expectedOutputWidth * 3, 0, 0, 0};
191
+ int resultHeight = sws_scale(
192
+ swsContext_.get(),
193
+ avFrame->data,
194
+ avFrame->linesize,
195
+ 0,
196
+ avFrame->height,
197
+ pointers,
198
+ linesizes);
199
+ return resultHeight;
200
+ }
201
+
202
+ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
203
+ const UniqueAVFrame& avFrame) {
204
+ UniqueAVFrame filteredAVFrame = filterGraphContext_->convert(avFrame);
205
+
206
+ TORCH_CHECK_EQ(filteredAVFrame->format, AV_PIX_FMT_RGB24);
207
+
208
+ auto frameDims = getHeightAndWidthFromResizedAVFrame(*filteredAVFrame.get());
209
+ int height = frameDims.height;
210
+ int width = frameDims.width;
211
+ std::vector<int64_t> shape = {height, width, 3};
212
+ std::vector<int64_t> strides = {filteredAVFrame->linesize[0], 3, 1};
213
+ AVFrame* filteredAVFramePtr = filteredAVFrame.release();
214
+ auto deleter = [filteredAVFramePtr](void*) {
215
+ UniqueAVFrame avFrameToDelete(filteredAVFramePtr);
216
+ };
217
+ return torch::from_blob(
218
+ filteredAVFramePtr->data[0], shape, strides, deleter, {torch::kUInt8});
219
+ }
220
+
221
+ void CpuDeviceInterface::createSwsContext(
222
+ const SwsFrameContext& swsFrameContext,
223
+ const enum AVColorSpace colorspace) {
224
+ SwsContext* swsContext = sws_getContext(
225
+ swsFrameContext.inputWidth,
226
+ swsFrameContext.inputHeight,
227
+ swsFrameContext.inputFormat,
228
+ swsFrameContext.outputWidth,
229
+ swsFrameContext.outputHeight,
230
+ AV_PIX_FMT_RGB24,
231
+ SWS_BILINEAR,
232
+ nullptr,
233
+ nullptr,
234
+ nullptr);
235
+ TORCH_CHECK(swsContext, "sws_getContext() returned nullptr");
236
+
237
+ int* invTable = nullptr;
238
+ int* table = nullptr;
239
+ int srcRange, dstRange, brightness, contrast, saturation;
240
+ int ret = sws_getColorspaceDetails(
241
+ swsContext,
242
+ &invTable,
243
+ &srcRange,
244
+ &table,
245
+ &dstRange,
246
+ &brightness,
247
+ &contrast,
248
+ &saturation);
249
+ TORCH_CHECK(ret != -1, "sws_getColorspaceDetails returned -1");
250
+
251
+ const int* colorspaceTable = sws_getCoefficients(colorspace);
252
+ ret = sws_setColorspaceDetails(
253
+ swsContext,
254
+ colorspaceTable,
255
+ srcRange,
256
+ colorspaceTable,
257
+ dstRange,
258
+ brightness,
259
+ contrast,
260
+ saturation);
261
+ TORCH_CHECK(ret != -1, "sws_setColorspaceDetails returned -1");
262
+
263
+ swsContext_.reset(swsContext);
264
+ }
265
+
266
+ } // namespace facebook::torchcodec