torchcodec 0.7.0__cp313-cp313-win_amd64.whl → 0.8.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchcodec might be problematic. Click here for more details.
- torchcodec/_core/AVIOTensorContext.cpp +23 -16
- torchcodec/_core/AVIOTensorContext.h +2 -1
- torchcodec/_core/BetaCudaDeviceInterface.cpp +718 -0
- torchcodec/_core/BetaCudaDeviceInterface.h +193 -0
- torchcodec/_core/CMakeLists.txt +18 -3
- torchcodec/_core/CUDACommon.cpp +330 -0
- torchcodec/_core/CUDACommon.h +51 -0
- torchcodec/_core/Cache.h +6 -20
- torchcodec/_core/CpuDeviceInterface.cpp +195 -108
- torchcodec/_core/CpuDeviceInterface.h +84 -19
- torchcodec/_core/CudaDeviceInterface.cpp +227 -376
- torchcodec/_core/CudaDeviceInterface.h +38 -6
- torchcodec/_core/DeviceInterface.cpp +57 -19
- torchcodec/_core/DeviceInterface.h +97 -16
- torchcodec/_core/Encoder.cpp +346 -9
- torchcodec/_core/Encoder.h +62 -1
- torchcodec/_core/FFMPEGCommon.cpp +190 -3
- torchcodec/_core/FFMPEGCommon.h +27 -1
- torchcodec/_core/FilterGraph.cpp +30 -22
- torchcodec/_core/FilterGraph.h +15 -1
- torchcodec/_core/Frame.cpp +22 -7
- torchcodec/_core/Frame.h +15 -61
- torchcodec/_core/Metadata.h +2 -2
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +60 -0
- torchcodec/_core/NVDECCache.h +102 -0
- torchcodec/_core/SingleStreamDecoder.cpp +196 -201
- torchcodec/_core/SingleStreamDecoder.h +42 -15
- torchcodec/_core/StreamOptions.h +16 -6
- torchcodec/_core/Transform.cpp +87 -0
- torchcodec/_core/Transform.h +84 -0
- torchcodec/_core/__init__.py +4 -0
- torchcodec/_core/custom_ops.cpp +257 -32
- torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +61 -1
- torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
- torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
- torchcodec/_core/ops.py +147 -44
- torchcodec/_core/pybind_ops.cpp +22 -59
- torchcodec/_samplers/video_clip_sampler.py +7 -19
- torchcodec/decoders/__init__.py +1 -0
- torchcodec/decoders/_decoder_utils.py +61 -1
- torchcodec/decoders/_video_decoder.py +46 -20
- torchcodec/libtorchcodec_core4.dll +0 -0
- torchcodec/libtorchcodec_core5.dll +0 -0
- torchcodec/libtorchcodec_core6.dll +0 -0
- torchcodec/libtorchcodec_core7.dll +0 -0
- torchcodec/libtorchcodec_core8.dll +0 -0
- torchcodec/libtorchcodec_custom_ops4.dll +0 -0
- torchcodec/libtorchcodec_custom_ops5.dll +0 -0
- torchcodec/libtorchcodec_custom_ops6.dll +0 -0
- torchcodec/libtorchcodec_custom_ops7.dll +0 -0
- torchcodec/libtorchcodec_custom_ops8.dll +0 -0
- torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
- torchcodec/samplers/_time_based.py +8 -0
- torchcodec/version.py +1 -1
- {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +29 -16
- torchcodec-0.8.1.dist-info/RECORD +82 -0
- {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +1 -1
- torchcodec-0.7.0.dist-info/RECORD +0 -67
- {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
- {torchcodec-0.7.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
|
@@ -6,8 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
#pragma once
|
|
8
8
|
|
|
9
|
-
#include
|
|
9
|
+
#include "src/torchcodec/_core/CUDACommon.h"
|
|
10
10
|
#include "src/torchcodec/_core/DeviceInterface.h"
|
|
11
|
+
#include "src/torchcodec/_core/FilterGraph.h"
|
|
11
12
|
|
|
12
13
|
namespace facebook::torchcodec {
|
|
13
14
|
|
|
@@ -19,19 +20,50 @@ class CudaDeviceInterface : public DeviceInterface {
|
|
|
19
20
|
|
|
20
21
|
std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
|
|
21
22
|
|
|
22
|
-
void
|
|
23
|
+
void initialize(
|
|
24
|
+
const AVStream* avStream,
|
|
25
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
26
|
+
const SharedAVCodecContext& codecContext) override;
|
|
23
27
|
|
|
24
|
-
void
|
|
28
|
+
void initializeVideo(
|
|
25
29
|
const VideoStreamOptions& videoStreamOptions,
|
|
26
|
-
const
|
|
30
|
+
[[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
|
|
31
|
+
transforms,
|
|
32
|
+
[[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims)
|
|
33
|
+
override;
|
|
34
|
+
|
|
35
|
+
void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
|
|
36
|
+
|
|
37
|
+
void convertAVFrameToFrameOutput(
|
|
27
38
|
UniqueAVFrame& avFrame,
|
|
28
39
|
FrameOutput& frameOutput,
|
|
29
40
|
std::optional<torch::Tensor> preAllocatedOutputTensor =
|
|
30
41
|
std::nullopt) override;
|
|
31
42
|
|
|
43
|
+
std::string getDetails() override;
|
|
44
|
+
|
|
32
45
|
private:
|
|
33
|
-
|
|
34
|
-
|
|
46
|
+
// Our CUDA decoding code assumes NV12 format. In order to handle other
|
|
47
|
+
// kinds of input, we need to convert them to NV12. Our current implementation
|
|
48
|
+
// does this using filtergraph.
|
|
49
|
+
UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame);
|
|
50
|
+
|
|
51
|
+
// We sometimes encounter frames that cannot be decoded on the CUDA device.
|
|
52
|
+
// Rather than erroring out, we decode them on the CPU.
|
|
53
|
+
std::unique_ptr<DeviceInterface> cpuInterface_;
|
|
54
|
+
|
|
55
|
+
VideoStreamOptions videoStreamOptions_;
|
|
56
|
+
AVRational timeBase_;
|
|
57
|
+
|
|
58
|
+
UniqueAVBufferRef hardwareDeviceCtx_;
|
|
59
|
+
UniqueNppContext nppCtx_;
|
|
60
|
+
|
|
61
|
+
// This filtergraph instance is only used for NV12 format conversion in
|
|
62
|
+
// maybeConvertAVFrameToNV12().
|
|
63
|
+
std::unique_ptr<FiltersContext> nv12ConversionContext_;
|
|
64
|
+
std::unique_ptr<FilterGraph> nv12Conversion_;
|
|
65
|
+
|
|
66
|
+
bool usingCPUFallback_ = false;
|
|
35
67
|
};
|
|
36
68
|
|
|
37
69
|
} // namespace facebook::torchcodec
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
namespace facebook::torchcodec {
|
|
12
12
|
|
|
13
13
|
namespace {
|
|
14
|
-
using DeviceInterfaceMap =
|
|
14
|
+
using DeviceInterfaceMap =
|
|
15
|
+
std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
|
|
15
16
|
static std::mutex g_interface_mutex;
|
|
16
17
|
|
|
17
18
|
DeviceInterfaceMap& getDeviceMap() {
|
|
@@ -30,50 +31,87 @@ std::string getDeviceType(const std::string& device) {
|
|
|
30
31
|
} // namespace
|
|
31
32
|
|
|
32
33
|
bool registerDeviceInterface(
|
|
33
|
-
|
|
34
|
+
const DeviceInterfaceKey& key,
|
|
34
35
|
CreateDeviceInterfaceFn createInterface) {
|
|
35
36
|
std::scoped_lock lock(g_interface_mutex);
|
|
36
37
|
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
37
38
|
|
|
38
39
|
TORCH_CHECK(
|
|
39
|
-
deviceMap.find(
|
|
40
|
-
"Device interface already registered for ",
|
|
41
|
-
deviceType
|
|
42
|
-
|
|
40
|
+
deviceMap.find(key) == deviceMap.end(),
|
|
41
|
+
"Device interface already registered for device type ",
|
|
42
|
+
key.deviceType,
|
|
43
|
+
" variant '",
|
|
44
|
+
key.variant,
|
|
45
|
+
"'");
|
|
46
|
+
deviceMap.insert({key, createInterface});
|
|
43
47
|
|
|
44
48
|
return true;
|
|
45
49
|
}
|
|
46
50
|
|
|
47
|
-
|
|
51
|
+
void validateDeviceInterface(
|
|
52
|
+
const std::string device,
|
|
53
|
+
const std::string variant) {
|
|
48
54
|
std::scoped_lock lock(g_interface_mutex);
|
|
49
55
|
std::string deviceType = getDeviceType(device);
|
|
56
|
+
|
|
50
57
|
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
51
58
|
|
|
59
|
+
// Find device interface that matches device type and variant
|
|
60
|
+
torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
|
|
61
|
+
|
|
52
62
|
auto deviceInterface = std::find_if(
|
|
53
63
|
deviceMap.begin(),
|
|
54
64
|
deviceMap.end(),
|
|
55
|
-
[&](const std::pair<
|
|
56
|
-
return
|
|
57
|
-
|
|
65
|
+
[&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
|
|
66
|
+
return arg.first.deviceType == deviceTypeEnum &&
|
|
67
|
+
arg.first.variant == variant;
|
|
58
68
|
});
|
|
59
|
-
TORCH_CHECK(
|
|
60
|
-
deviceInterface != deviceMap.end(), "Unsupported device: ", device);
|
|
61
69
|
|
|
62
|
-
|
|
70
|
+
TORCH_CHECK(
|
|
71
|
+
deviceInterface != deviceMap.end(),
|
|
72
|
+
"Unsupported device: ",
|
|
73
|
+
device,
|
|
74
|
+
" (device type: ",
|
|
75
|
+
deviceType,
|
|
76
|
+
", variant: ",
|
|
77
|
+
variant,
|
|
78
|
+
")");
|
|
63
79
|
}
|
|
64
80
|
|
|
65
81
|
std::unique_ptr<DeviceInterface> createDeviceInterface(
|
|
66
|
-
const torch::Device& device
|
|
67
|
-
|
|
82
|
+
const torch::Device& device,
|
|
83
|
+
const std::string_view variant) {
|
|
84
|
+
DeviceInterfaceKey key(device.type(), variant);
|
|
68
85
|
std::scoped_lock lock(g_interface_mutex);
|
|
69
86
|
DeviceInterfaceMap& deviceMap = getDeviceMap();
|
|
70
87
|
|
|
88
|
+
auto it = deviceMap.find(key);
|
|
89
|
+
if (it != deviceMap.end()) {
|
|
90
|
+
return std::unique_ptr<DeviceInterface>(it->second(device));
|
|
91
|
+
}
|
|
92
|
+
|
|
71
93
|
TORCH_CHECK(
|
|
72
|
-
|
|
73
|
-
"
|
|
74
|
-
device)
|
|
94
|
+
false,
|
|
95
|
+
"No device interface found for device type: ",
|
|
96
|
+
device.type(),
|
|
97
|
+
" variant: '",
|
|
98
|
+
variant,
|
|
99
|
+
"'");
|
|
100
|
+
}
|
|
75
101
|
|
|
76
|
-
|
|
102
|
+
torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
|
|
103
|
+
TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
|
|
104
|
+
|
|
105
|
+
int height = avFrame->height;
|
|
106
|
+
int width = avFrame->width;
|
|
107
|
+
std::vector<int64_t> shape = {height, width, 3};
|
|
108
|
+
std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
|
|
109
|
+
AVFrame* avFrameClone = av_frame_clone(avFrame.get());
|
|
110
|
+
auto deleter = [avFrameClone](void*) {
|
|
111
|
+
UniqueAVFrame avFrameToDelete(avFrameClone);
|
|
112
|
+
};
|
|
113
|
+
return torch::from_blob(
|
|
114
|
+
avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
|
|
77
115
|
}
|
|
78
116
|
|
|
79
117
|
} // namespace facebook::torchcodec
|
|
@@ -14,16 +14,27 @@
|
|
|
14
14
|
#include "FFMPEGCommon.h"
|
|
15
15
|
#include "src/torchcodec/_core/Frame.h"
|
|
16
16
|
#include "src/torchcodec/_core/StreamOptions.h"
|
|
17
|
+
#include "src/torchcodec/_core/Transform.h"
|
|
17
18
|
|
|
18
19
|
namespace facebook::torchcodec {
|
|
19
20
|
|
|
20
|
-
//
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
21
|
+
// Key for device interface registration with device type + variant support
|
|
22
|
+
struct DeviceInterfaceKey {
|
|
23
|
+
torch::DeviceType deviceType;
|
|
24
|
+
std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
|
|
25
|
+
|
|
26
|
+
bool operator<(const DeviceInterfaceKey& other) const {
|
|
27
|
+
if (deviceType != other.deviceType) {
|
|
28
|
+
return deviceType < other.deviceType;
|
|
29
|
+
}
|
|
30
|
+
return variant < other.variant;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
explicit DeviceInterfaceKey(torch::DeviceType type) : deviceType(type) {}
|
|
34
|
+
|
|
35
|
+
DeviceInterfaceKey(torch::DeviceType type, const std::string_view& variant)
|
|
36
|
+
: deviceType(type), variant(variant) {}
|
|
37
|
+
};
|
|
27
38
|
|
|
28
39
|
class DeviceInterface {
|
|
29
40
|
public:
|
|
@@ -35,33 +46,103 @@ class DeviceInterface {
|
|
|
35
46
|
return device_;
|
|
36
47
|
};
|
|
37
48
|
|
|
38
|
-
virtual std::optional<const AVCodec*> findCodec(
|
|
49
|
+
virtual std::optional<const AVCodec*> findCodec(
|
|
50
|
+
[[maybe_unused]] const AVCodecID& codecId) {
|
|
51
|
+
return std::nullopt;
|
|
52
|
+
};
|
|
39
53
|
|
|
40
|
-
// Initialize the
|
|
41
|
-
|
|
42
|
-
|
|
54
|
+
// Initialize the device with parameters generic to all kinds of decoding.
|
|
55
|
+
virtual void initialize(
|
|
56
|
+
const AVStream* avStream,
|
|
57
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
58
|
+
const SharedAVCodecContext& codecContext) = 0;
|
|
59
|
+
|
|
60
|
+
// Initialize the device with parameters specific to video decoding. There is
|
|
61
|
+
// a default empty implementation.
|
|
62
|
+
virtual void initializeVideo(
|
|
63
|
+
[[maybe_unused]] const VideoStreamOptions& videoStreamOptions,
|
|
64
|
+
[[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
|
|
65
|
+
transforms,
|
|
66
|
+
[[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims) {}
|
|
67
|
+
|
|
68
|
+
// In order for decoding to actually happen on an FFmpeg managed hardware
|
|
69
|
+
// device, we need to register the DeviceInterface managed
|
|
70
|
+
// AVHardwareDeviceContext with the AVCodecContext. We don't need to do this
|
|
71
|
+
// on the CPU and if FFmpeg is not managing the hardware device.
|
|
72
|
+
virtual void registerHardwareDeviceWithCodec(
|
|
73
|
+
[[maybe_unused]] AVCodecContext* codecContext) {}
|
|
43
74
|
|
|
44
75
|
virtual void convertAVFrameToFrameOutput(
|
|
45
|
-
const VideoStreamOptions& videoStreamOptions,
|
|
46
|
-
const AVRational& timeBase,
|
|
47
76
|
UniqueAVFrame& avFrame,
|
|
48
77
|
FrameOutput& frameOutput,
|
|
49
78
|
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
|
|
50
79
|
|
|
80
|
+
// ------------------------------------------
|
|
81
|
+
// Extension points for custom decoding paths
|
|
82
|
+
// ------------------------------------------
|
|
83
|
+
|
|
84
|
+
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
|
|
85
|
+
// other AVERROR on failure
|
|
86
|
+
// Default implementation uses FFmpeg directly
|
|
87
|
+
virtual int sendPacket(ReferenceAVPacket& avPacket) {
|
|
88
|
+
TORCH_CHECK(
|
|
89
|
+
codecContext_ != nullptr,
|
|
90
|
+
"Codec context not available for default packet sending");
|
|
91
|
+
return avcodec_send_packet(codecContext_.get(), avPacket.get());
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Send an EOF packet to flush the decoder
|
|
95
|
+
// Returns AVSUCCESS on success, or other AVERROR on failure
|
|
96
|
+
// Default implementation uses FFmpeg directly
|
|
97
|
+
virtual int sendEOFPacket() {
|
|
98
|
+
TORCH_CHECK(
|
|
99
|
+
codecContext_ != nullptr,
|
|
100
|
+
"Codec context not available for default EOF packet sending");
|
|
101
|
+
return avcodec_send_packet(codecContext_.get(), nullptr);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
|
|
105
|
+
// AVERROR_EOF if end of stream, or other AVERROR on failure
|
|
106
|
+
// Default implementation uses FFmpeg directly
|
|
107
|
+
virtual int receiveFrame(UniqueAVFrame& avFrame) {
|
|
108
|
+
TORCH_CHECK(
|
|
109
|
+
codecContext_ != nullptr,
|
|
110
|
+
"Codec context not available for default frame receiving");
|
|
111
|
+
return avcodec_receive_frame(codecContext_.get(), avFrame.get());
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Flush remaining frames from decoder
|
|
115
|
+
virtual void flush() {
|
|
116
|
+
TORCH_CHECK(
|
|
117
|
+
codecContext_ != nullptr,
|
|
118
|
+
"Codec context not available for default flushing");
|
|
119
|
+
avcodec_flush_buffers(codecContext_.get());
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
virtual std::string getDetails() {
|
|
123
|
+
return "";
|
|
124
|
+
}
|
|
125
|
+
|
|
51
126
|
protected:
|
|
52
127
|
torch::Device device_;
|
|
128
|
+
SharedAVCodecContext codecContext_;
|
|
53
129
|
};
|
|
54
130
|
|
|
55
131
|
using CreateDeviceInterfaceFn =
|
|
56
132
|
std::function<DeviceInterface*(const torch::Device& device)>;
|
|
57
133
|
|
|
58
134
|
bool registerDeviceInterface(
|
|
59
|
-
|
|
135
|
+
const DeviceInterfaceKey& key,
|
|
60
136
|
const CreateDeviceInterfaceFn createInterface);
|
|
61
137
|
|
|
62
|
-
|
|
138
|
+
void validateDeviceInterface(
|
|
139
|
+
const std::string device,
|
|
140
|
+
const std::string variant);
|
|
63
141
|
|
|
64
142
|
std::unique_ptr<DeviceInterface> createDeviceInterface(
|
|
65
|
-
const torch::Device& device
|
|
143
|
+
const torch::Device& device,
|
|
144
|
+
const std::string_view variant = "ffmpeg");
|
|
145
|
+
|
|
146
|
+
torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
|
|
66
147
|
|
|
67
148
|
} // namespace facebook::torchcodec
|