torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchcodec/__init__.py +27 -0
- torchcodec/_core/AVIOContextHolder.cpp +60 -0
- torchcodec/_core/AVIOContextHolder.h +64 -0
- torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
- torchcodec/_core/AVIOFileLikeContext.h +55 -0
- torchcodec/_core/AVIOTensorContext.cpp +130 -0
- torchcodec/_core/AVIOTensorContext.h +44 -0
- torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
- torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
- torchcodec/_core/CMakeLists.txt +295 -0
- torchcodec/_core/CUDACommon.cpp +330 -0
- torchcodec/_core/CUDACommon.h +51 -0
- torchcodec/_core/Cache.h +124 -0
- torchcodec/_core/CpuDeviceInterface.cpp +509 -0
- torchcodec/_core/CpuDeviceInterface.h +141 -0
- torchcodec/_core/CudaDeviceInterface.cpp +602 -0
- torchcodec/_core/CudaDeviceInterface.h +79 -0
- torchcodec/_core/DeviceInterface.cpp +117 -0
- torchcodec/_core/DeviceInterface.h +191 -0
- torchcodec/_core/Encoder.cpp +1054 -0
- torchcodec/_core/Encoder.h +192 -0
- torchcodec/_core/FFMPEGCommon.cpp +684 -0
- torchcodec/_core/FFMPEGCommon.h +314 -0
- torchcodec/_core/FilterGraph.cpp +159 -0
- torchcodec/_core/FilterGraph.h +59 -0
- torchcodec/_core/Frame.cpp +47 -0
- torchcodec/_core/Frame.h +72 -0
- torchcodec/_core/Metadata.cpp +124 -0
- torchcodec/_core/Metadata.h +92 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +60 -0
- torchcodec/_core/NVDECCache.h +102 -0
- torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
- torchcodec/_core/SingleStreamDecoder.h +391 -0
- torchcodec/_core/StreamOptions.h +70 -0
- torchcodec/_core/Transform.cpp +128 -0
- torchcodec/_core/Transform.h +86 -0
- torchcodec/_core/ValidationUtils.cpp +35 -0
- torchcodec/_core/ValidationUtils.h +21 -0
- torchcodec/_core/__init__.py +46 -0
- torchcodec/_core/_metadata.py +262 -0
- torchcodec/_core/custom_ops.cpp +1090 -0
- torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
- torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
- torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
- torchcodec/_core/ops.py +605 -0
- torchcodec/_core/pybind_ops.cpp +50 -0
- torchcodec/_frame.py +146 -0
- torchcodec/_internally_replaced_utils.py +68 -0
- torchcodec/_samplers/__init__.py +7 -0
- torchcodec/_samplers/video_clip_sampler.py +419 -0
- torchcodec/decoders/__init__.py +12 -0
- torchcodec/decoders/_audio_decoder.py +185 -0
- torchcodec/decoders/_decoder_utils.py +113 -0
- torchcodec/decoders/_video_decoder.py +601 -0
- torchcodec/encoders/__init__.py +2 -0
- torchcodec/encoders/_audio_encoder.py +149 -0
- torchcodec/encoders/_video_encoder.py +196 -0
- torchcodec/libtorchcodec_core4.so +0 -0
- torchcodec/libtorchcodec_core5.so +0 -0
- torchcodec/libtorchcodec_core6.so +0 -0
- torchcodec/libtorchcodec_core7.so +0 -0
- torchcodec/libtorchcodec_core8.so +0 -0
- torchcodec/libtorchcodec_custom_ops4.so +0 -0
- torchcodec/libtorchcodec_custom_ops5.so +0 -0
- torchcodec/libtorchcodec_custom_ops6.so +0 -0
- torchcodec/libtorchcodec_custom_ops7.so +0 -0
- torchcodec/libtorchcodec_custom_ops8.so +0 -0
- torchcodec/libtorchcodec_pybind_ops4.so +0 -0
- torchcodec/libtorchcodec_pybind_ops5.so +0 -0
- torchcodec/libtorchcodec_pybind_ops6.so +0 -0
- torchcodec/libtorchcodec_pybind_ops7.so +0 -0
- torchcodec/libtorchcodec_pybind_ops8.so +0 -0
- torchcodec/samplers/__init__.py +2 -0
- torchcodec/samplers/_common.py +84 -0
- torchcodec/samplers/_index_based.py +287 -0
- torchcodec/samplers/_time_based.py +358 -0
- torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
- torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
- torchcodec/transforms/__init__.py +12 -0
- torchcodec/transforms/_decoder_transforms.py +375 -0
- torchcodec/version.py +2 -0
- torchcodec-0.10.0.dist-info/METADATA +286 -0
- torchcodec-0.10.0.dist-info/RECORD +88 -0
- torchcodec-0.10.0.dist-info/WHEEL +5 -0
- torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
- torchcodec-0.10.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "DeviceInterface.h"
|
|
8
|
+
#include <map>
|
|
9
|
+
#include <mutex>
|
|
10
|
+
|
|
11
|
+
namespace facebook::torchcodec {
|
|
12
|
+
|
|
13
|
+
namespace {
|
|
14
|
+
using DeviceInterfaceMap =
|
|
15
|
+
std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
|
|
16
|
+
static std::mutex g_interface_mutex;
|
|
17
|
+
|
|
18
|
+
DeviceInterfaceMap& getDeviceMap() {
|
|
19
|
+
static DeviceInterfaceMap deviceMap;
|
|
20
|
+
return deviceMap;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
std::string getDeviceType(const std::string& device) {
|
|
24
|
+
size_t pos = device.find(':');
|
|
25
|
+
if (pos == std::string::npos) {
|
|
26
|
+
return device;
|
|
27
|
+
}
|
|
28
|
+
return device.substr(0, pos);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
} // namespace
|
|
32
|
+
|
|
33
|
+
bool registerDeviceInterface(
|
|
34
|
+
const DeviceInterfaceKey& key,
|
|
35
|
+
CreateDeviceInterfaceFn createInterface) {
|
|
36
|
+
std::scoped_lock lock(g_interface_mutex);
|
|
37
|
+
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
38
|
+
|
|
39
|
+
TORCH_CHECK(
|
|
40
|
+
deviceMap.find(key) == deviceMap.end(),
|
|
41
|
+
"Device interface already registered for device type ",
|
|
42
|
+
key.deviceType,
|
|
43
|
+
" variant '",
|
|
44
|
+
key.variant,
|
|
45
|
+
"'");
|
|
46
|
+
deviceMap.insert({key, createInterface});
|
|
47
|
+
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void validateDeviceInterface(
|
|
52
|
+
const std::string device,
|
|
53
|
+
const std::string variant) {
|
|
54
|
+
std::scoped_lock lock(g_interface_mutex);
|
|
55
|
+
std::string deviceType = getDeviceType(device);
|
|
56
|
+
|
|
57
|
+
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
58
|
+
|
|
59
|
+
// Find device interface that matches device type and variant
|
|
60
|
+
torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
|
|
61
|
+
|
|
62
|
+
auto deviceInterface = std::find_if(
|
|
63
|
+
deviceMap.begin(),
|
|
64
|
+
deviceMap.end(),
|
|
65
|
+
[&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
|
|
66
|
+
return arg.first.deviceType == deviceTypeEnum &&
|
|
67
|
+
arg.first.variant == variant;
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
TORCH_CHECK(
|
|
71
|
+
deviceInterface != deviceMap.end(),
|
|
72
|
+
"Unsupported device: ",
|
|
73
|
+
device,
|
|
74
|
+
" (device type: ",
|
|
75
|
+
deviceType,
|
|
76
|
+
", variant: ",
|
|
77
|
+
variant,
|
|
78
|
+
")");
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
std::unique_ptr<DeviceInterface> createDeviceInterface(
|
|
82
|
+
const torch::Device& device,
|
|
83
|
+
const std::string_view variant) {
|
|
84
|
+
DeviceInterfaceKey key(device.type(), variant);
|
|
85
|
+
std::scoped_lock lock(g_interface_mutex);
|
|
86
|
+
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
87
|
+
|
|
88
|
+
auto it = deviceMap.find(key);
|
|
89
|
+
if (it != deviceMap.end()) {
|
|
90
|
+
return std::unique_ptr<DeviceInterface>(it->second(device));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
TORCH_CHECK(
|
|
94
|
+
false,
|
|
95
|
+
"No device interface found for device type: ",
|
|
96
|
+
device.type(),
|
|
97
|
+
" variant: '",
|
|
98
|
+
variant,
|
|
99
|
+
"'");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
|
|
103
|
+
TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
|
|
104
|
+
|
|
105
|
+
int height = avFrame->height;
|
|
106
|
+
int width = avFrame->width;
|
|
107
|
+
std::vector<int64_t> shape = {height, width, 3};
|
|
108
|
+
std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
|
|
109
|
+
AVFrame* avFrameClone = av_frame_clone(avFrame.get());
|
|
110
|
+
auto deleter = [avFrameClone](void*) {
|
|
111
|
+
UniqueAVFrame avFrameToDelete(avFrameClone);
|
|
112
|
+
};
|
|
113
|
+
return torch::from_blob(
|
|
114
|
+
avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <torch/types.h>
|
|
10
|
+
#include <functional>
|
|
11
|
+
#include <memory>
|
|
12
|
+
#include <stdexcept>
|
|
13
|
+
#include <string>
|
|
14
|
+
#include "FFMPEGCommon.h"
|
|
15
|
+
#include "Frame.h"
|
|
16
|
+
#include "StreamOptions.h"
|
|
17
|
+
#include "Transform.h"
|
|
18
|
+
|
|
19
|
+
namespace facebook::torchcodec {
|
|
20
|
+
|
|
21
|
+
// Key for device interface registration with device type + variant support
|
|
22
|
+
struct DeviceInterfaceKey {
|
|
23
|
+
torch::DeviceType deviceType;
|
|
24
|
+
std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
|
|
25
|
+
|
|
26
|
+
bool operator<(const DeviceInterfaceKey& other) const {
|
|
27
|
+
if (deviceType != other.deviceType) {
|
|
28
|
+
return deviceType < other.deviceType;
|
|
29
|
+
}
|
|
30
|
+
return variant < other.variant;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
|
|
34
|
+
|
|
35
|
+
DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
|
|
36
|
+
: deviceType(type), variant(variant) {}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
class DeviceInterface {
|
|
40
|
+
public:
|
|
41
|
+
DeviceInterface(const torch::Device& device) : device_(device) {}
|
|
42
|
+
|
|
43
|
+
virtual ~DeviceInterface(){};
|
|
44
|
+
|
|
45
|
+
torch::Device& device() {
|
|
46
|
+
return device_;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
virtual std::optional<const AVCodec*> findCodec(
|
|
50
|
+
[[maybe_unused]] const AVCodecID& codecId,
|
|
51
|
+
[[maybe_unused]] bool isDecoder = true) {
|
|
52
|
+
return std::nullopt;
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Initialize the device with parameters generic to all kinds of decoding.
|
|
56
|
+
virtual void initialize(
|
|
57
|
+
const AVStream* avStream,
|
|
58
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
59
|
+
const SharedAVCodecContext& codecContext) = 0;
|
|
60
|
+
|
|
61
|
+
// Initialize the device with parameters specific to video decoding. There is
|
|
62
|
+
// a default empty implementation.
|
|
63
|
+
virtual void initializeVideo(
|
|
64
|
+
[[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
|
|
65
|
+
[[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
|
|
66
|
+
transforms,
|
|
67
|
+
[[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
|
|
68
|
+
|
|
69
|
+
// Initialize the device with parameters specific to audio decoding. There is
|
|
70
|
+
// a default empty implementation.
|
|
71
|
+
virtual void initializeAudio(
|
|
72
|
+
[[maybe_unused]] const AudioStreamOptions& audioStreamOptions) {}
|
|
73
|
+
|
|
74
|
+
// Flush any remaining samples from the audio resampler buffer.
|
|
75
|
+
// When sample rate conversion is involved, some samples may be buffered
|
|
76
|
+
// between frames for proper interpolation. This function flushes those
|
|
77
|
+
// buffered samples.
|
|
78
|
+
// Returns an optional tensor containing the flushed samples, or std::nullopt
|
|
79
|
+
// if there are no buffered samples or audio is not supported.
|
|
80
|
+
virtual std::optional<torch::Tensor> maybeFlushAudioBuffers() {
|
|
81
|
+
return std::nullopt;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// In order for decoding to actually happen on an FFmpeg managed hardware
|
|
85
|
+
// device, we need to register the DeviceInterface managed
|
|
86
|
+
// AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
|
|
87
|
+
// on the CPU and if FFmpeg is not managing the hardware device.
|
|
88
|
+
virtual void registerHardwareDeviceWithCodec(
|
|
89
|
+
[[maybe_unused]] AVCodecContext* codecContext) {}
|
|
90
|
+
|
|
91
|
+
virtual void convertAVFrameToFrameOutput(
|
|
92
|
+
UniqueAVFrame& avFrame,
|
|
93
|
+
FrameOutput& frameOutput,
|
|
94
|
+
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
|
|
95
|
+
|
|
96
|
+
// ------------------------------------------
|
|
97
|
+
// Extension points for custom decoding paths
|
|
98
|
+
// ------------------------------------------
|
|
99
|
+
|
|
100
|
+
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
|
|
101
|
+
// other AVERROR on failure
|
|
102
|
+
// Default implementation uses FFmpeg directly
|
|
103
|
+
virtual int sendPacket(ReferenceAVPacket& avPacket) {
|
|
104
|
+
TORCH_CHECK(
|
|
105
|
+
codecContext_ != nullptr,
|
|
106
|
+
"Codec context not available for default packet sending");
|
|
107
|
+
return avcodec_send_packet(codecContext_.get(), avPacket.get());
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Send an EOF packet to flush the decoder
|
|
111
|
+
// Returns AVSUCCESS on success, or other AVERROR on failure
|
|
112
|
+
// Default implementation uses FFmpeg directly
|
|
113
|
+
virtual int sendEOFPacket() {
|
|
114
|
+
TORCH_CHECK(
|
|
115
|
+
codecContext_ != nullptr,
|
|
116
|
+
"Codec context not available for default EOF packet sending");
|
|
117
|
+
return avcodec_send_packet(codecContext_.get(), nullptr);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
|
|
121
|
+
// AVERROR_EOF if end of stream, or other AVERROR on failure
|
|
122
|
+
// Default implementation uses FFmpeg directly
|
|
123
|
+
virtual int receiveFrame(UniqueAVFrame& avFrame) {
|
|
124
|
+
TORCH_CHECK(
|
|
125
|
+
codecContext_ != nullptr,
|
|
126
|
+
"Codec context not available for default frame receiving");
|
|
127
|
+
return avcodec_receive_frame(codecContext_.get(), avFrame.get());
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Flush remaining frames from decoder
|
|
131
|
+
virtual void flush() {
|
|
132
|
+
TORCH_CHECK(
|
|
133
|
+
codecContext_ != nullptr,
|
|
134
|
+
"Codec context not available for default flushing");
|
|
135
|
+
avcodec_flush_buffers(codecContext_.get());
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
virtual std::string getDetails() {
|
|
139
|
+
return "";
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Pixel format used for encoding on CUDA devices
|
|
143
|
+
static constexpr AVPixelFormat CUDA_ENCODING_PIXEL_FORMAT = AV_PIX_FMT_NV12;
|
|
144
|
+
|
|
145
|
+
// Function used for video encoding, only implemented in CudaDeviceInterface.
|
|
146
|
+
// It is here to isolate CUDA dependencies from CPU builds
|
|
147
|
+
// TODO Video-Encoder: Reconsider using video encoding functions in device
|
|
148
|
+
// interface
|
|
149
|
+
virtual UniqueAVFrame convertCUDATensorToAVFrameForEncoding(
|
|
150
|
+
[[maybe_unused]] const torch::Tensor& tensor,
|
|
151
|
+
[[maybe_unused]] int frameIndex,
|
|
152
|
+
[[maybe_unused]] AVCodecContext* codecContext) {
|
|
153
|
+
TORCH_CHECK(false);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Function used for video encoding, only implemented in CudaDeviceInterface.
|
|
157
|
+
// It is here to isolate CUDA dependencies from CPU builds
|
|
158
|
+
virtual void setupHardwareFrameContextForEncoding(
|
|
159
|
+
[[maybe_unused]] AVCodecContext* codecContext) {
|
|
160
|
+
TORCH_CHECK(false);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
virtual std::optional<const AVCodec*> findHardwareEncoder(
|
|
164
|
+
[[maybe_unused]] const AVCodecID& codecId) {
|
|
165
|
+
TORCH_CHECK(false);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
protected:
|
|
169
|
+
torch::Device device_;
|
|
170
|
+
SharedAVCodecContext codecContext_;
|
|
171
|
+
AVMediaType avMediaType_;
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
using CreateDeviceInterfaceFn =
|
|
175
|
+
std::function<DeviceInterface*(const torch::Device& device)>;
|
|
176
|
+
|
|
177
|
+
bool registerDeviceInterface(
|
|
178
|
+
const DeviceInterfaceKey& key,
|
|
179
|
+
const CreateDeviceInterfaceFn createInterface);
|
|
180
|
+
|
|
181
|
+
void validateDeviceInterface(
|
|
182
|
+
const std::string device,
|
|
183
|
+
const std::string variant);
|
|
184
|
+
|
|
185
|
+
std::unique_ptr<DeviceInterface> createDeviceInterface(
|
|
186
|
+
const torch::Device& device,
|
|
187
|
+
const std::string_view variant = "ffmpeg");
|
|
188
|
+
|
|
189
|
+
torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
|
|
190
|
+
|
|
191
|
+
} // namespace facebook::torchcodec
|