torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. torchcodec/__init__.py +27 -0
  2. torchcodec/_core/AVIOContextHolder.cpp +60 -0
  3. torchcodec/_core/AVIOContextHolder.h +64 -0
  4. torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
  5. torchcodec/_core/AVIOFileLikeContext.h +55 -0
  6. torchcodec/_core/AVIOTensorContext.cpp +130 -0
  7. torchcodec/_core/AVIOTensorContext.h +44 -0
  8. torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
  9. torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
  10. torchcodec/_core/CMakeLists.txt +295 -0
  11. torchcodec/_core/CUDACommon.cpp +330 -0
  12. torchcodec/_core/CUDACommon.h +51 -0
  13. torchcodec/_core/Cache.h +124 -0
  14. torchcodec/_core/CpuDeviceInterface.cpp +509 -0
  15. torchcodec/_core/CpuDeviceInterface.h +141 -0
  16. torchcodec/_core/CudaDeviceInterface.cpp +602 -0
  17. torchcodec/_core/CudaDeviceInterface.h +79 -0
  18. torchcodec/_core/DeviceInterface.cpp +117 -0
  19. torchcodec/_core/DeviceInterface.h +191 -0
  20. torchcodec/_core/Encoder.cpp +1054 -0
  21. torchcodec/_core/Encoder.h +192 -0
  22. torchcodec/_core/FFMPEGCommon.cpp +684 -0
  23. torchcodec/_core/FFMPEGCommon.h +314 -0
  24. torchcodec/_core/FilterGraph.cpp +159 -0
  25. torchcodec/_core/FilterGraph.h +59 -0
  26. torchcodec/_core/Frame.cpp +47 -0
  27. torchcodec/_core/Frame.h +72 -0
  28. torchcodec/_core/Metadata.cpp +124 -0
  29. torchcodec/_core/Metadata.h +92 -0
  30. torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
  31. torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
  32. torchcodec/_core/NVDECCache.cpp +60 -0
  33. torchcodec/_core/NVDECCache.h +102 -0
  34. torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
  35. torchcodec/_core/SingleStreamDecoder.h +391 -0
  36. torchcodec/_core/StreamOptions.h +70 -0
  37. torchcodec/_core/Transform.cpp +128 -0
  38. torchcodec/_core/Transform.h +86 -0
  39. torchcodec/_core/ValidationUtils.cpp +35 -0
  40. torchcodec/_core/ValidationUtils.h +21 -0
  41. torchcodec/_core/__init__.py +46 -0
  42. torchcodec/_core/_metadata.py +262 -0
  43. torchcodec/_core/custom_ops.cpp +1090 -0
  44. torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
  45. torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
  46. torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
  47. torchcodec/_core/ops.py +605 -0
  48. torchcodec/_core/pybind_ops.cpp +50 -0
  49. torchcodec/_frame.py +146 -0
  50. torchcodec/_internally_replaced_utils.py +68 -0
  51. torchcodec/_samplers/__init__.py +7 -0
  52. torchcodec/_samplers/video_clip_sampler.py +419 -0
  53. torchcodec/decoders/__init__.py +12 -0
  54. torchcodec/decoders/_audio_decoder.py +185 -0
  55. torchcodec/decoders/_decoder_utils.py +113 -0
  56. torchcodec/decoders/_video_decoder.py +601 -0
  57. torchcodec/encoders/__init__.py +2 -0
  58. torchcodec/encoders/_audio_encoder.py +149 -0
  59. torchcodec/encoders/_video_encoder.py +196 -0
  60. torchcodec/libtorchcodec_core4.so +0 -0
  61. torchcodec/libtorchcodec_core5.so +0 -0
  62. torchcodec/libtorchcodec_core6.so +0 -0
  63. torchcodec/libtorchcodec_core7.so +0 -0
  64. torchcodec/libtorchcodec_core8.so +0 -0
  65. torchcodec/libtorchcodec_custom_ops4.so +0 -0
  66. torchcodec/libtorchcodec_custom_ops5.so +0 -0
  67. torchcodec/libtorchcodec_custom_ops6.so +0 -0
  68. torchcodec/libtorchcodec_custom_ops7.so +0 -0
  69. torchcodec/libtorchcodec_custom_ops8.so +0 -0
  70. torchcodec/libtorchcodec_pybind_ops4.so +0 -0
  71. torchcodec/libtorchcodec_pybind_ops5.so +0 -0
  72. torchcodec/libtorchcodec_pybind_ops6.so +0 -0
  73. torchcodec/libtorchcodec_pybind_ops7.so +0 -0
  74. torchcodec/libtorchcodec_pybind_ops8.so +0 -0
  75. torchcodec/samplers/__init__.py +2 -0
  76. torchcodec/samplers/_common.py +84 -0
  77. torchcodec/samplers/_index_based.py +287 -0
  78. torchcodec/samplers/_time_based.py +358 -0
  79. torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
  80. torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
  81. torchcodec/transforms/__init__.py +12 -0
  82. torchcodec/transforms/_decoder_transforms.py +375 -0
  83. torchcodec/version.py +2 -0
  84. torchcodec-0.10.0.dist-info/METADATA +286 -0
  85. torchcodec-0.10.0.dist-info/RECORD +88 -0
  86. torchcodec-0.10.0.dist-info/WHEEL +5 -0
  87. torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
  88. torchcodec-0.10.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,117 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #include "DeviceInterface.h"
8
+ #include <map>
9
+ #include <mutex>
10
+
11
+ namespace facebook::torchcodec {
12
+
13
+ namespace {
14
+ using DeviceInterfaceMap =
15
+ std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
16
+ static std::mutex g_interface_mutex;
17
+
18
+ DeviceInterfaceMap& getDeviceMap() {
19
+ static DeviceInterfaceMap deviceMap;
20
+ return deviceMap;
21
+ }
22
+
23
+ std::string getDeviceType(const std::string& device) {
24
+ size_t pos = device.find(':');
25
+ if (pos == std::string::npos) {
26
+ return device;
27
+ }
28
+ return device.substr(0, pos);
29
+ }
30
+
31
+ } // namespace
32
+
33
+ bool registerDeviceInterface(
34
+ const DeviceInterfaceKey& key,
35
+ CreateDeviceInterfaceFn createInterface) {
36
+ std::scoped_lock lock(g_interface_mutex);
37
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
38
+
39
+ TORCH_CHECK(
40
+ deviceMap.find(key) == deviceMap.end(),
41
+ "Device interface already registered for device type ",
42
+ key.deviceType,
43
+ " variant '",
44
+ key.variant,
45
+ "'");
46
+ deviceMap.insert({key, createInterface});
47
+
48
+ return true;
49
+ }
50
+
51
+ void validateDeviceInterface(
52
+ const std::string device,
53
+ const std::string variant) {
54
+ std::scoped_lock lock(g_interface_mutex);
55
+ std::string deviceType = getDeviceType(device);
56
+
57
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
58
+
59
+ // Find device interface that matches device type and variant
60
+ torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
61
+
62
+ auto deviceInterface = std::find_if(
63
+ deviceMap.begin(),
64
+ deviceMap.end(),
65
+ [&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
66
+ return arg.first.deviceType == deviceTypeEnum &&
67
+ arg.first.variant == variant;
68
+ });
69
+
70
+ TORCH_CHECK(
71
+ deviceInterface != deviceMap.end(),
72
+ "Unsupported device: ",
73
+ device,
74
+ " (device type: ",
75
+ deviceType,
76
+ ", variant: ",
77
+ variant,
78
+ ")");
79
+ }
80
+
81
+ std::unique_ptr<DeviceInterface> createDeviceInterface(
82
+ const torch::Device& device,
83
+ const std::string_view variant) {
84
+ DeviceInterfaceKey key(device.type(), variant);
85
+ std::scoped_lock lock(g_interface_mutex);
86
+ DeviceInterfaceMap& deviceMap = getDeviceMap();
87
+
88
+ auto it = deviceMap.find(key);
89
+ if (it != deviceMap.end()) {
90
+ return std::unique_ptr<DeviceInterface>(it->second(device));
91
+ }
92
+
93
+ TORCH_CHECK(
94
+ false,
95
+ "No device interface found for device type: ",
96
+ device.type(),
97
+ " variant: '",
98
+ variant,
99
+ "'");
100
+ }
101
+
102
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
103
+ TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
104
+
105
+ int height = avFrame->height;
106
+ int width = avFrame->width;
107
+ std::vector<int64_t> shape = {height, width, 3};
108
+ std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
109
+ AVFrame* avFrameClone = av_frame_clone(avFrame.get());
110
+ auto deleter = [avFrameClone](void*) {
111
+ UniqueAVFrame avFrameToDelete(avFrameClone);
112
+ };
113
+ return torch::from_blob(
114
+ avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
115
+ }
116
+
117
+ } // namespace facebook::torchcodec
@@ -0,0 +1,191 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // All rights reserved.
3
+ //
4
+ // This source code is licensed under the BSD-style license found in the
5
+ // LICENSE file in the root directory of this source tree.
6
+
7
+ #pragma once
8
+
9
+ #include <torch/types.h>
10
+ #include <functional>
11
+ #include <memory>
12
+ #include <stdexcept>
13
+ #include <string>
14
+ #include "FFMPEGCommon.h"
15
+ #include "Frame.h"
16
+ #include "StreamOptions.h"
17
+ #include "Transform.h"
18
+
19
+ namespace facebook::torchcodec {
20
+
21
+ // Key for device interface registration with device type + variant support
22
+ struct DeviceInterfaceKey {
23
+ torch::DeviceType deviceType;
24
+ std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
25
+
26
+ bool operator<(const DeviceInterfaceKey& other) const {
27
+ if (deviceType != other.deviceType) {
28
+ return deviceType < other.deviceType;
29
+ }
30
+ return variant < other.variant;
31
+ }
32
+
33
+ explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
34
+
35
+ DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
36
+ : deviceType(type), variant(variant) {}
37
+ };
38
+
39
+ class DeviceInterface {
40
+ public:
41
+ DeviceInterface(const torch::Device& device) : device_(device) {}
42
+
43
+ virtual ~DeviceInterface(){};
44
+
45
+ torch::Device& device() {
46
+ return device_;
47
+ };
48
+
49
+ virtual std::optional<const AVCodec*> findCodec(
50
+ [[maybe_unused]] const AVCodecID& codecId,
51
+ [[maybe_unused]] bool isDecoder = true) {
52
+ return std::nullopt;
53
+ };
54
+
55
+ // Initialize the device with parameters generic to all kinds of decoding.
56
+ virtual void initialize(
57
+ const AVStream* avStream,
58
+ const UniqueDecodingAVFormatContext& avFormatCtx,
59
+ const SharedAVCodecContext& codecContext) = 0;
60
+
61
+ // Initialize the device with parameters specific to video decoding. There is
62
+ // a default empty implementation.
63
+ virtual void initializeVideo(
64
+ [[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
65
+ [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
66
+ transforms,
67
+ [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
68
+
69
+ // Initialize the device with parameters specific to audio decoding. There is
70
+ // a default empty implementation.
71
+ virtual void initializeAudio(
72
+ [[maybe_unused]] const AudioStreamOptions& audioStreamOptions) {}
73
+
74
+ // Flush any remaining samples from the audio resampler buffer.
75
+ // When sample rate conversion is involved, some samples may be buffered
76
+ // between frames for proper interpolation. This function flushes those
77
+ // buffered samples.
78
+ // Returns an optional tensor containing the flushed samples, or std::nullopt
79
+ // if there are no buffered samples or audio is not supported.
80
+ virtual std::optional<torch::Tensor> maybeFlushAudioBuffers() {
81
+ return std::nullopt;
82
+ }
83
+
84
+ // In order for decoding to actually happen on an FFmpeg managed hardware
85
+ // device, we need to register the DeviceInterface managed
86
+ // AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
87
+ // on the CPU and if FFmpeg is not managing the hardware device.
88
+ virtual void registerHardwareDeviceWithCodec(
89
+ [[maybe_unused]] AVCodecContext* codecContext) {}
90
+
91
+ virtual void convertAVFrameToFrameOutput(
92
+ UniqueAVFrame& avFrame,
93
+ FrameOutput& frameOutput,
94
+ std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
95
+
96
+ // ------------------------------------------
97
+ // Extension points for custom decoding paths
98
+ // ------------------------------------------
99
+
100
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
101
+ // other AVERROR on failure
102
+ // Default implementation uses FFmpeg directly
103
+ virtual int sendPacket(ReferenceAVPacket& avPacket) {
104
+ TORCH_CHECK(
105
+ codecContext_ != nullptr,
106
+ "Codec context not available for default packet sending");
107
+ return avcodec_send_packet(codecContext_.get(), avPacket.get());
108
+ }
109
+
110
+ // Send an EOF packet to flush the decoder
111
+ // Returns AVSUCCESS on success, or other AVERROR on failure
112
+ // Default implementation uses FFmpeg directly
113
+ virtual int sendEOFPacket() {
114
+ TORCH_CHECK(
115
+ codecContext_ != nullptr,
116
+ "Codec context not available for default EOF packet sending");
117
+ return avcodec_send_packet(codecContext_.get(), nullptr);
118
+ }
119
+
120
+ // Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
121
+ // AVERROR_EOF if end of stream, or other AVERROR on failure
122
+ // Default implementation uses FFmpeg directly
123
+ virtual int receiveFrame(UniqueAVFrame& avFrame) {
124
+ TORCH_CHECK(
125
+ codecContext_ != nullptr,
126
+ "Codec context not available for default frame receiving");
127
+ return avcodec_receive_frame(codecContext_.get(), avFrame.get());
128
+ }
129
+
130
+ // Flush remaining frames from decoder
131
+ virtual void flush() {
132
+ TORCH_CHECK(
133
+ codecContext_ != nullptr,
134
+ "Codec context not available for default flushing");
135
+ avcodec_flush_buffers(codecContext_.get());
136
+ }
137
+
138
+ virtual std::string getDetails() {
139
+ return "";
140
+ }
141
+
142
+ // Pixel format used for encoding on CUDA devices
143
+ static constexpr AVPixelFormat CUDA_ENCODING_PIXEL_FORMAT = AV_PIX_FMT_NV12;
144
+
145
+ // Function used for video encoding, only implemented in CudaDeviceInterface.
146
+ // It is here to isolate CUDA dependencies from CPU builds
147
+ // TODO Video-Encoder: Reconsider using video encoding functions in device
148
+ // interface
149
+ virtual UniqueAVFrame convertCUDATensorToAVFrameForEncoding(
150
+ [[maybe_unused]] const torch::Tensor& tensor,
151
+ [[maybe_unused]] int frameIndex,
152
+ [[maybe_unused]] AVCodecContext* codecContext) {
153
+ TORCH_CHECK(false);
154
+ }
155
+
156
+ // Function used for video encoding, only implemented in CudaDeviceInterface.
157
+ // It is here to isolate CUDA dependencies from CPU builds
158
+ virtual void setupHardwareFrameContextForEncoding(
159
+ [[maybe_unused]] AVCodecContext* codecContext) {
160
+ TORCH_CHECK(false);
161
+ }
162
+
163
+ virtual std::optional<const AVCodec*> findHardwareEncoder(
164
+ [[maybe_unused]] const AVCodecID& codecId) {
165
+ TORCH_CHECK(false);
166
+ }
167
+
168
+ protected:
169
+ torch::Device device_;
170
+ SharedAVCodecContext codecContext_;
171
+ AVMediaType avMediaType_;
172
+ };
173
+
174
+ using CreateDeviceInterfaceFn =
175
+ std::function<DeviceInterface*(const torch::Device& device)>;
176
+
177
+ bool registerDeviceInterface(
178
+ const DeviceInterfaceKey& key,
179
+ const CreateDeviceInterfaceFn createInterface);
180
+
181
+ void validateDeviceInterface(
182
+ const std::string device,
183
+ const std::string variant);
184
+
185
+ std::unique_ptr<DeviceInterface> createDeviceInterface(
186
+ const torch::Device& device,
187
+ const std::string_view variant = "ffmpeg");
188
+
189
+ torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
190
+
191
+ } // namespace facebook::torchcodec