torchcodec 0.8.0__cp311-cp311-win_amd64.whl → 0.8.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of torchcodec might be problematic. Click here for more details.
- torchcodec/_core/AVIOTensorContext.cpp +23 -16
- torchcodec/_core/AVIOTensorContext.h +2 -1
- torchcodec/_core/BetaCudaDeviceInterface.cpp +168 -86
- torchcodec/_core/BetaCudaDeviceInterface.h +7 -5
- torchcodec/_core/CMakeLists.txt +1 -19
- torchcodec/_core/CUDACommon.cpp +21 -6
- torchcodec/_core/CUDACommon.h +6 -1
- torchcodec/_core/Cache.h +6 -20
- torchcodec/_core/CpuDeviceInterface.cpp +7 -1
- torchcodec/_core/CpuDeviceInterface.h +4 -1
- torchcodec/_core/CudaDeviceInterface.cpp +19 -11
- torchcodec/_core/CudaDeviceInterface.h +6 -1
- torchcodec/_core/DeviceInterface.h +27 -27
- torchcodec/_core/Encoder.cpp +51 -7
- torchcodec/_core/Encoder.h +12 -1
- torchcodec/_core/FFMPEGCommon.cpp +1 -1
- torchcodec/_core/FFMPEGCommon.h +9 -1
- torchcodec/_core/FilterGraph.cpp +2 -1
- torchcodec/_core/Frame.cpp +5 -0
- torchcodec/_core/Frame.h +1 -1
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +3 -13
- torchcodec/_core/NVDECCache.h +4 -6
- torchcodec/_core/SingleStreamDecoder.cpp +22 -31
- torchcodec/_core/SingleStreamDecoder.h +4 -2
- torchcodec/_core/StreamOptions.h +2 -2
- torchcodec/_core/Transform.cpp +27 -0
- torchcodec/_core/Transform.h +25 -0
- torchcodec/_core/__init__.py +3 -0
- torchcodec/_core/custom_ops.cpp +99 -22
- torchcodec/_core/ops.py +76 -16
- torchcodec/decoders/_video_decoder.py +0 -10
- torchcodec/libtorchcodec_core4.dll +0 -0
- torchcodec/libtorchcodec_core5.dll +0 -0
- torchcodec/libtorchcodec_core6.dll +0 -0
- torchcodec/libtorchcodec_core7.dll +0 -0
- torchcodec/libtorchcodec_core8.dll +0 -0
- torchcodec/libtorchcodec_custom_ops4.dll +0 -0
- torchcodec/libtorchcodec_custom_ops5.dll +0 -0
- torchcodec/libtorchcodec_custom_ops6.dll +0 -0
- torchcodec/libtorchcodec_custom_ops7.dll +0 -0
- torchcodec/libtorchcodec_custom_ops8.dll +0 -0
- torchcodec/libtorchcodec_pybind_ops4.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops5.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops6.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops7.pyd +0 -0
- torchcodec/libtorchcodec_pybind_ops8.pyd +0 -0
- torchcodec/version.py +1 -1
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/METADATA +6 -4
- torchcodec-0.8.1.dist-info/RECORD +82 -0
- torchcodec-0.8.0.dist-info/RECORD +0 -80
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/WHEEL +0 -0
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/licenses/LICENSE +0 -0
- {torchcodec-0.8.0.dist-info → torchcodec-0.8.1.dist-info}/top_level.txt +0 -0
torchcodec/_core/Cache.h
CHANGED
|
@@ -95,30 +95,16 @@ class PerGpuCache {
|
|
|
95
95
|
std::vector<std::unique_ptr<Cache<T, D>>> cache_;
|
|
96
96
|
};
|
|
97
97
|
|
|
98
|
-
//
|
|
99
|
-
//
|
|
100
|
-
//
|
|
101
|
-
|
|
102
|
-
// annoying for such a small amount of code, so we just inline it. If this file
|
|
103
|
-
// grows, and there are more such functions, we should break them out into a
|
|
104
|
-
// .cpp file.
|
|
105
|
-
inline torch::DeviceIndex getNonNegativeDeviceIndex(
|
|
106
|
-
const torch::Device& device) {
|
|
107
|
-
torch::DeviceIndex deviceIndex = device.index();
|
|
108
|
-
// For single GPU machines libtorch returns -1 for the device index. So for
|
|
109
|
-
// that case we set the device index to 0. That's used in per-gpu cache
|
|
110
|
-
// implementation and during initialization of CUDA and FFmpeg contexts
|
|
111
|
-
// which require non negative indices.
|
|
112
|
-
deviceIndex = std::max<at::DeviceIndex>(deviceIndex, 0);
|
|
113
|
-
TORCH_CHECK(deviceIndex >= 0, "Device index out of range");
|
|
114
|
-
return deviceIndex;
|
|
115
|
-
}
|
|
98
|
+
// Forward declaration of getDeviceIndex which exists in CUDACommon.h
|
|
99
|
+
// This avoids circular dependency between Cache.h and CUDACommon.cpp which also
|
|
100
|
+
// needs to include Cache.h
|
|
101
|
+
int getDeviceIndex(const torch::Device& device);
|
|
116
102
|
|
|
117
103
|
template <typename T, typename D>
|
|
118
104
|
bool PerGpuCache<T, D>::addIfCacheHasCapacity(
|
|
119
105
|
const torch::Device& device,
|
|
120
106
|
element_type&& obj) {
|
|
121
|
-
|
|
107
|
+
int deviceIndex = getDeviceIndex(device);
|
|
122
108
|
TORCH_CHECK(
|
|
123
109
|
static_cast<size_t>(deviceIndex) < cache_.size(),
|
|
124
110
|
"Device index out of range");
|
|
@@ -128,7 +114,7 @@ bool PerGpuCache<T, D>::addIfCacheHasCapacity(
|
|
|
128
114
|
template <typename T, typename D>
|
|
129
115
|
typename PerGpuCache<T, D>::element_type PerGpuCache<T, D>::get(
|
|
130
116
|
const torch::Device& device) {
|
|
131
|
-
|
|
117
|
+
int deviceIndex = getDeviceIndex(device);
|
|
132
118
|
TORCH_CHECK(
|
|
133
119
|
static_cast<size_t>(deviceIndex) < cache_.size(),
|
|
134
120
|
"Device index out of range");
|
|
@@ -48,8 +48,10 @@ CpuDeviceInterface::CpuDeviceInterface(const torch::Device& device)
|
|
|
48
48
|
|
|
49
49
|
void CpuDeviceInterface::initialize(
|
|
50
50
|
const AVStream* avStream,
|
|
51
|
-
[[maybe_unused]] const UniqueDecodingAVFormatContext& avFormatCtx
|
|
51
|
+
[[maybe_unused]] const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
52
|
+
const SharedAVCodecContext& codecContext) {
|
|
52
53
|
TORCH_CHECK(avStream != nullptr, "avStream is null");
|
|
54
|
+
codecContext_ = codecContext;
|
|
53
55
|
timeBase_ = avStream->time_base;
|
|
54
56
|
}
|
|
55
57
|
|
|
@@ -344,4 +346,8 @@ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
|
|
|
344
346
|
return rgbAVFrameToTensor(filterGraph_->convert(avFrame));
|
|
345
347
|
}
|
|
346
348
|
|
|
349
|
+
std::string CpuDeviceInterface::getDetails() {
|
|
350
|
+
return std::string("CPU Device Interface.");
|
|
351
|
+
}
|
|
352
|
+
|
|
347
353
|
} // namespace facebook::torchcodec
|
|
@@ -25,7 +25,8 @@ class CpuDeviceInterface : public DeviceInterface {
|
|
|
25
25
|
|
|
26
26
|
virtual void initialize(
|
|
27
27
|
const AVStream* avStream,
|
|
28
|
-
const UniqueDecodingAVFormatContext& avFormatCtx
|
|
28
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
29
|
+
const SharedAVCodecContext& codecContext) override;
|
|
29
30
|
|
|
30
31
|
virtual void initializeVideo(
|
|
31
32
|
const VideoStreamOptions& videoStreamOptions,
|
|
@@ -38,6 +39,8 @@ class CpuDeviceInterface : public DeviceInterface {
|
|
|
38
39
|
std::optional<torch::Tensor> preAllocatedOutputTensor =
|
|
39
40
|
std::nullopt) override;
|
|
40
41
|
|
|
42
|
+
std::string getDetails() override;
|
|
43
|
+
|
|
41
44
|
private:
|
|
42
45
|
int convertAVFrameToTensorUsingSwScale(
|
|
43
46
|
const UniqueAVFrame& avFrame,
|
|
@@ -32,9 +32,6 @@ static bool g_cuda = registerDeviceInterface(
|
|
|
32
32
|
// from
|
|
33
33
|
// the cache. If the cache is empty we create a new cuda context.
|
|
34
34
|
|
|
35
|
-
// Pytorch can only handle up to 128 GPUs.
|
|
36
|
-
// https://github.com/pytorch/pytorch/blob/e30c55ee527b40d67555464b9e402b4b7ce03737/c10/cuda/CUDAMacros.h#L44
|
|
37
|
-
const int MAX_CUDA_GPUS = 128;
|
|
38
35
|
// Set to -1 to have an infinitely sized cache. Set it to 0 to disable caching.
|
|
39
36
|
// Set to a positive number to have a cache of that size.
|
|
40
37
|
const int MAX_CONTEXTS_PER_GPU_IN_CACHE = -1;
|
|
@@ -54,7 +51,7 @@ int getFlagsAVHardwareDeviceContextCreate() {
|
|
|
54
51
|
UniqueAVBufferRef getHardwareDeviceContext(const torch::Device& device) {
|
|
55
52
|
enum AVHWDeviceType type = av_hwdevice_find_type_by_name("cuda");
|
|
56
53
|
TORCH_CHECK(type != AV_HWDEVICE_TYPE_NONE, "Failed to find cuda device");
|
|
57
|
-
|
|
54
|
+
int deviceIndex = getDeviceIndex(device);
|
|
58
55
|
|
|
59
56
|
UniqueAVBufferRef hardwareDeviceCtx = g_cached_hw_device_ctxs.get(device);
|
|
60
57
|
if (hardwareDeviceCtx) {
|
|
@@ -63,14 +60,12 @@ UniqueAVBufferRef getHardwareDeviceContext(const torch::Device& device) {
|
|
|
63
60
|
|
|
64
61
|
// Create hardware device context
|
|
65
62
|
c10::cuda::CUDAGuard deviceGuard(device);
|
|
66
|
-
// Valid values for the argument to cudaSetDevice are 0 to maxDevices - 1:
|
|
67
|
-
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g159587909ffa0791bbe4b40187a4c6bb
|
|
68
|
-
// So we ensure the deviceIndex is not negative.
|
|
69
63
|
// We set the device because we may be called from a different thread than
|
|
70
64
|
// the one that initialized the cuda context.
|
|
71
|
-
|
|
65
|
+
TORCH_CHECK(
|
|
66
|
+
cudaSetDevice(deviceIndex) == cudaSuccess, "Failed to set CUDA device");
|
|
72
67
|
AVBufferRef* hardwareDeviceCtxRaw = nullptr;
|
|
73
|
-
std::string deviceOrdinal = std::to_string(
|
|
68
|
+
std::string deviceOrdinal = std::to_string(deviceIndex);
|
|
74
69
|
|
|
75
70
|
int err = av_hwdevice_ctx_create(
|
|
76
71
|
&hardwareDeviceCtxRaw,
|
|
@@ -117,15 +112,17 @@ CudaDeviceInterface::~CudaDeviceInterface() {
|
|
|
117
112
|
|
|
118
113
|
void CudaDeviceInterface::initialize(
|
|
119
114
|
const AVStream* avStream,
|
|
120
|
-
const UniqueDecodingAVFormatContext& avFormatCtx
|
|
115
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
116
|
+
const SharedAVCodecContext& codecContext) {
|
|
121
117
|
TORCH_CHECK(avStream != nullptr, "avStream is null");
|
|
118
|
+
codecContext_ = codecContext;
|
|
122
119
|
timeBase_ = avStream->time_base;
|
|
123
120
|
|
|
124
121
|
// TODO: Ideally, we should keep all interface implementations independent.
|
|
125
122
|
cpuInterface_ = createDeviceInterface(torch::kCPU);
|
|
126
123
|
TORCH_CHECK(
|
|
127
124
|
cpuInterface_ != nullptr, "Failed to create CPU device interface");
|
|
128
|
-
cpuInterface_->initialize(avStream, avFormatCtx);
|
|
125
|
+
cpuInterface_->initialize(avStream, avFormatCtx, codecContext);
|
|
129
126
|
cpuInterface_->initializeVideo(
|
|
130
127
|
VideoStreamOptions(),
|
|
131
128
|
{},
|
|
@@ -287,9 +284,12 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
|
|
|
287
284
|
frameOutput.data = cpuFrameOutput.data.to(device_);
|
|
288
285
|
}
|
|
289
286
|
|
|
287
|
+
usingCPUFallback_ = true;
|
|
290
288
|
return;
|
|
291
289
|
}
|
|
292
290
|
|
|
291
|
+
usingCPUFallback_ = false;
|
|
292
|
+
|
|
293
293
|
// Above we checked that the AVFrame was on GPU, but that's not enough, we
|
|
294
294
|
// also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits),
|
|
295
295
|
// because this is what the NPP color conversion routines expect. This SHOULD
|
|
@@ -354,4 +354,12 @@ std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
|
|
|
354
354
|
return std::nullopt;
|
|
355
355
|
}
|
|
356
356
|
|
|
357
|
+
std::string CudaDeviceInterface::getDetails() {
|
|
358
|
+
// Note: for this interface specifically the fallback is only known after a
|
|
359
|
+
// frame has been decoded, not before: that's when FFmpeg decides to fallback,
|
|
360
|
+
// so we can't know earlier.
|
|
361
|
+
return std::string("FFmpeg CUDA Device Interface. Using ") +
|
|
362
|
+
(usingCPUFallback_ ? "CPU fallback." : "NVDEC.");
|
|
363
|
+
}
|
|
364
|
+
|
|
357
365
|
} // namespace facebook::torchcodec
|
|
@@ -22,7 +22,8 @@ class CudaDeviceInterface : public DeviceInterface {
|
|
|
22
22
|
|
|
23
23
|
void initialize(
|
|
24
24
|
const AVStream* avStream,
|
|
25
|
-
const UniqueDecodingAVFormatContext& avFormatCtx
|
|
25
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
26
|
+
const SharedAVCodecContext& codecContext) override;
|
|
26
27
|
|
|
27
28
|
void initializeVideo(
|
|
28
29
|
const VideoStreamOptions& videoStreamOptions,
|
|
@@ -39,6 +40,8 @@ class CudaDeviceInterface : public DeviceInterface {
|
|
|
39
40
|
std::optional<torch::Tensor> preAllocatedOutputTensor =
|
|
40
41
|
std::nullopt) override;
|
|
41
42
|
|
|
43
|
+
std::string getDetails() override;
|
|
44
|
+
|
|
42
45
|
private:
|
|
43
46
|
// Our CUDA decoding code assumes NV12 format. In order to handle other
|
|
44
47
|
// kinds of input, we need to convert them to NV12. Our current implementation
|
|
@@ -59,6 +62,8 @@ class CudaDeviceInterface : public DeviceInterface {
|
|
|
59
62
|
// maybeConvertAVFrameToNV12().
|
|
60
63
|
std::unique_ptr<FiltersContext> nv12ConversionContext_;
|
|
61
64
|
std::unique_ptr<FilterGraph> nv12Conversion_;
|
|
65
|
+
|
|
66
|
+
bool usingCPUFallback_ = false;
|
|
62
67
|
};
|
|
63
68
|
|
|
64
69
|
} // namespace facebook::torchcodec
|
|
@@ -21,7 +21,7 @@ namespace facebook::torchcodec {
|
|
|
21
21
|
// Key for device interface registration with device type + variant support
|
|
22
22
|
struct DeviceInterfaceKey {
|
|
23
23
|
torch::DeviceType deviceType;
|
|
24
|
-
std::string_view variant = "
|
|
24
|
+
std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc.
|
|
25
25
|
|
|
26
26
|
bool operator<(const DeviceInterfaceKey& other) const {
|
|
27
27
|
if (deviceType != other.deviceType) {
|
|
@@ -54,7 +54,8 @@ class DeviceInterface {
|
|
|
54
54
|
// Initialize the device with parameters generic to all kinds of decoding.
|
|
55
55
|
virtual void initialize(
|
|
56
56
|
const AVStream* avStream,
|
|
57
|
-
const UniqueDecodingAVFormatContext& avFormatCtx
|
|
57
|
+
const UniqueDecodingAVFormatContext& avFormatCtx,
|
|
58
|
+
const SharedAVCodecContext& codecContext) = 0;
|
|
58
59
|
|
|
59
60
|
// Initialize the device with parameters specific to video decoding. There is
|
|
60
61
|
// a default empty implementation.
|
|
@@ -80,52 +81,51 @@ class DeviceInterface {
|
|
|
80
81
|
// Extension points for custom decoding paths
|
|
81
82
|
// ------------------------------------------
|
|
82
83
|
|
|
83
|
-
// Override to return true if this device interface can decode packets
|
|
84
|
-
// directly. This means that the following two member functions can both
|
|
85
|
-
// be called:
|
|
86
|
-
//
|
|
87
|
-
// 1. sendPacket()
|
|
88
|
-
// 2. receiveFrame()
|
|
89
|
-
virtual bool canDecodePacketDirectly() const {
|
|
90
|
-
return false;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Moral equivalent of avcodec_send_packet()
|
|
94
84
|
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if decoder queue full, or
|
|
95
85
|
// other AVERROR on failure
|
|
96
|
-
|
|
86
|
+
// Default implementation uses FFmpeg directly
|
|
87
|
+
virtual int sendPacket(ReferenceAVPacket& avPacket) {
|
|
97
88
|
TORCH_CHECK(
|
|
98
|
-
|
|
99
|
-
"
|
|
100
|
-
return
|
|
89
|
+
codecContext_ != nullptr,
|
|
90
|
+
"Codec context not available for default packet sending");
|
|
91
|
+
return avcodec_send_packet(codecContext_.get(), avPacket.get());
|
|
101
92
|
}
|
|
102
93
|
|
|
103
94
|
// Send an EOF packet to flush the decoder
|
|
104
95
|
// Returns AVSUCCESS on success, or other AVERROR on failure
|
|
96
|
+
// Default implementation uses FFmpeg directly
|
|
105
97
|
virtual int sendEOFPacket() {
|
|
106
98
|
TORCH_CHECK(
|
|
107
|
-
|
|
108
|
-
|
|
99
|
+
codecContext_ != nullptr,
|
|
100
|
+
"Codec context not available for default EOF packet sending");
|
|
101
|
+
return avcodec_send_packet(codecContext_.get(), nullptr);
|
|
109
102
|
}
|
|
110
103
|
|
|
111
|
-
// Moral equivalent of avcodec_receive_frame()
|
|
112
104
|
// Returns AVSUCCESS on success, AVERROR(EAGAIN) if no frame ready,
|
|
113
105
|
// AVERROR_EOF if end of stream, or other AVERROR on failure
|
|
114
|
-
|
|
106
|
+
// Default implementation uses FFmpeg directly
|
|
107
|
+
virtual int receiveFrame(UniqueAVFrame& avFrame) {
|
|
115
108
|
TORCH_CHECK(
|
|
116
|
-
|
|
117
|
-
"
|
|
118
|
-
return
|
|
109
|
+
codecContext_ != nullptr,
|
|
110
|
+
"Codec context not available for default frame receiving");
|
|
111
|
+
return avcodec_receive_frame(codecContext_.get(), avFrame.get());
|
|
119
112
|
}
|
|
120
113
|
|
|
121
114
|
// Flush remaining frames from decoder
|
|
122
115
|
virtual void flush() {
|
|
123
|
-
|
|
124
|
-
|
|
116
|
+
TORCH_CHECK(
|
|
117
|
+
codecContext_ != nullptr,
|
|
118
|
+
"Codec context not available for default flushing");
|
|
119
|
+
avcodec_flush_buffers(codecContext_.get());
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
virtual std::string getDetails() {
|
|
123
|
+
return "";
|
|
125
124
|
}
|
|
126
125
|
|
|
127
126
|
protected:
|
|
128
127
|
torch::Device device_;
|
|
128
|
+
SharedAVCodecContext codecContext_;
|
|
129
129
|
};
|
|
130
130
|
|
|
131
131
|
using CreateDeviceInterfaceFn =
|
|
@@ -141,7 +141,7 @@ void validateDeviceInterface(
|
|
|
141
141
|
|
|
142
142
|
std::unique_ptr<DeviceInterface> createDeviceInterface(
|
|
143
143
|
const torch::Device& device,
|
|
144
|
-
const std::string_view variant = "
|
|
144
|
+
const std::string_view variant = "ffmpeg");
|
|
145
145
|
|
|
146
146
|
torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame);
|
|
147
147
|
|
torchcodec/_core/Encoder.cpp
CHANGED
|
@@ -4,10 +4,6 @@
|
|
|
4
4
|
#include "src/torchcodec/_core/Encoder.h"
|
|
5
5
|
#include "torch/types.h"
|
|
6
6
|
|
|
7
|
-
extern "C" {
|
|
8
|
-
#include <libavutil/pixdesc.h>
|
|
9
|
-
}
|
|
10
|
-
|
|
11
7
|
namespace facebook::torchcodec {
|
|
12
8
|
|
|
13
9
|
namespace {
|
|
@@ -542,10 +538,17 @@ torch::Tensor validateFrames(const torch::Tensor& frames) {
|
|
|
542
538
|
} // namespace
|
|
543
539
|
|
|
544
540
|
VideoEncoder::~VideoEncoder() {
|
|
541
|
+
// TODO-VideoEncoder: Unify destructor with ~AudioEncoder()
|
|
545
542
|
if (avFormatContext_ && avFormatContext_->pb) {
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
543
|
+
if (avFormatContext_->pb->error == 0) {
|
|
544
|
+
avio_flush(avFormatContext_->pb);
|
|
545
|
+
}
|
|
546
|
+
if (!avioContextHolder_) {
|
|
547
|
+
if (avFormatContext_->pb->error == 0) {
|
|
548
|
+
avio_close(avFormatContext_->pb);
|
|
549
|
+
}
|
|
550
|
+
avFormatContext_->pb = nullptr;
|
|
551
|
+
}
|
|
549
552
|
}
|
|
550
553
|
}
|
|
551
554
|
|
|
@@ -581,6 +584,36 @@ VideoEncoder::VideoEncoder(
|
|
|
581
584
|
initializeEncoder(videoStreamOptions);
|
|
582
585
|
}
|
|
583
586
|
|
|
587
|
+
VideoEncoder::VideoEncoder(
|
|
588
|
+
const torch::Tensor& frames,
|
|
589
|
+
int frameRate,
|
|
590
|
+
std::string_view formatName,
|
|
591
|
+
std::unique_ptr<AVIOContextHolder> avioContextHolder,
|
|
592
|
+
const VideoStreamOptions& videoStreamOptions)
|
|
593
|
+
: frames_(validateFrames(frames)),
|
|
594
|
+
inFrameRate_(frameRate),
|
|
595
|
+
avioContextHolder_(std::move(avioContextHolder)) {
|
|
596
|
+
setFFmpegLogLevel();
|
|
597
|
+
// Map mkv -> matroska when used as format name
|
|
598
|
+
formatName = (formatName == "mkv") ? "matroska" : formatName;
|
|
599
|
+
AVFormatContext* avFormatContext = nullptr;
|
|
600
|
+
int status = avformat_alloc_output_context2(
|
|
601
|
+
&avFormatContext, nullptr, formatName.data(), nullptr);
|
|
602
|
+
|
|
603
|
+
TORCH_CHECK(
|
|
604
|
+
avFormatContext != nullptr,
|
|
605
|
+
"Couldn't allocate AVFormatContext. ",
|
|
606
|
+
"Check the desired format? Got format=",
|
|
607
|
+
formatName,
|
|
608
|
+
". ",
|
|
609
|
+
getFFMPEGErrorStringFromErrorCode(status));
|
|
610
|
+
avFormatContext_.reset(avFormatContext);
|
|
611
|
+
|
|
612
|
+
avFormatContext_->pb = avioContextHolder_->getAVIOContext();
|
|
613
|
+
|
|
614
|
+
initializeEncoder(videoStreamOptions);
|
|
615
|
+
}
|
|
616
|
+
|
|
584
617
|
void VideoEncoder::initializeEncoder(
|
|
585
618
|
const VideoStreamOptions& videoStreamOptions) {
|
|
586
619
|
const AVCodec* avCodec =
|
|
@@ -751,6 +784,17 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
|
|
|
751
784
|
return avFrame;
|
|
752
785
|
}
|
|
753
786
|
|
|
787
|
+
torch::Tensor VideoEncoder::encodeToTensor() {
|
|
788
|
+
TORCH_CHECK(
|
|
789
|
+
avioContextHolder_ != nullptr,
|
|
790
|
+
"Cannot encode to tensor, avio tensor context doesn't exist.");
|
|
791
|
+
encode();
|
|
792
|
+
auto avioToTensorContext =
|
|
793
|
+
dynamic_cast<AVIOToTensorContext*>(avioContextHolder_.get());
|
|
794
|
+
TORCH_CHECK(avioToTensorContext != nullptr, "Invalid AVIO context holder.");
|
|
795
|
+
return avioToTensorContext->getOutputTensor();
|
|
796
|
+
}
|
|
797
|
+
|
|
754
798
|
void VideoEncoder::encodeFrame(
|
|
755
799
|
AutoAVPacket& autoAVPacket,
|
|
756
800
|
const UniqueAVFrame& avFrame) {
|
torchcodec/_core/Encoder.h
CHANGED
|
@@ -141,8 +141,17 @@ class VideoEncoder {
|
|
|
141
141
|
std::string_view fileName,
|
|
142
142
|
const VideoStreamOptions& videoStreamOptions);
|
|
143
143
|
|
|
144
|
+
VideoEncoder(
|
|
145
|
+
const torch::Tensor& frames,
|
|
146
|
+
int frameRate,
|
|
147
|
+
std::string_view formatName,
|
|
148
|
+
std::unique_ptr<AVIOContextHolder> avioContextHolder,
|
|
149
|
+
const VideoStreamOptions& videoStreamOptions);
|
|
150
|
+
|
|
144
151
|
void encode();
|
|
145
152
|
|
|
153
|
+
torch::Tensor encodeToTensor();
|
|
154
|
+
|
|
146
155
|
private:
|
|
147
156
|
void initializeEncoder(const VideoStreamOptions& videoStreamOptions);
|
|
148
157
|
UniqueAVFrame convertTensorToAVFrame(
|
|
@@ -153,7 +162,7 @@ class VideoEncoder {
|
|
|
153
162
|
|
|
154
163
|
UniqueEncodingAVFormatContext avFormatContext_;
|
|
155
164
|
UniqueAVCodecContext avCodecContext_;
|
|
156
|
-
AVStream* avStream_;
|
|
165
|
+
AVStream* avStream_ = nullptr;
|
|
157
166
|
UniqueSwsContext swsContext_;
|
|
158
167
|
|
|
159
168
|
const torch::Tensor frames_;
|
|
@@ -167,6 +176,8 @@ class VideoEncoder {
|
|
|
167
176
|
int outHeight_ = -1;
|
|
168
177
|
AVPixelFormat outPixelFormat_ = AV_PIX_FMT_NONE;
|
|
169
178
|
|
|
179
|
+
std::unique_ptr<AVIOContextHolder> avioContextHolder_;
|
|
180
|
+
|
|
170
181
|
bool encodeWasCalled_ = false;
|
|
171
182
|
};
|
|
172
183
|
|
|
@@ -149,7 +149,7 @@ int getNumChannels(const UniqueAVFrame& avFrame) {
|
|
|
149
149
|
#endif
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
int getNumChannels(const
|
|
152
|
+
int getNumChannels(const SharedAVCodecContext& avCodecContext) {
|
|
153
153
|
#if LIBAVFILTER_VERSION_MAJOR > 8 || \
|
|
154
154
|
(LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
|
|
155
155
|
return avCodecContext->ch_layout.nb_channels;
|
torchcodec/_core/FFMPEGCommon.h
CHANGED
|
@@ -71,6 +71,14 @@ using UniqueEncodingAVFormatContext = std::unique_ptr<
|
|
|
71
71
|
using UniqueAVCodecContext = std::unique_ptr<
|
|
72
72
|
AVCodecContext,
|
|
73
73
|
Deleterp<AVCodecContext, void, avcodec_free_context>>;
|
|
74
|
+
using SharedAVCodecContext = std::shared_ptr<AVCodecContext>;
|
|
75
|
+
|
|
76
|
+
// create SharedAVCodecContext with custom deleter
|
|
77
|
+
inline SharedAVCodecContext makeSharedAVCodecContext(AVCodecContext* ctx) {
|
|
78
|
+
return SharedAVCodecContext(
|
|
79
|
+
ctx, Deleterp<AVCodecContext, void, avcodec_free_context>{});
|
|
80
|
+
}
|
|
81
|
+
|
|
74
82
|
using UniqueAVFrame =
|
|
75
83
|
std::unique_ptr<AVFrame, Deleterp<AVFrame, void, av_frame_free>>;
|
|
76
84
|
using UniqueAVFilterGraph = std::unique_ptr<
|
|
@@ -171,7 +179,7 @@ const AVSampleFormat* getSupportedOutputSampleFormats(const AVCodec& avCodec);
|
|
|
171
179
|
const AVPixelFormat* getSupportedPixelFormats(const AVCodec& avCodec);
|
|
172
180
|
|
|
173
181
|
int getNumChannels(const UniqueAVFrame& avFrame);
|
|
174
|
-
int getNumChannels(const
|
|
182
|
+
int getNumChannels(const SharedAVCodecContext& avCodecContext);
|
|
175
183
|
|
|
176
184
|
void setDefaultChannelLayout(
|
|
177
185
|
UniqueAVCodecContext& avCodecContext,
|
torchcodec/_core/FilterGraph.cpp
CHANGED
|
@@ -130,7 +130,8 @@ FilterGraph::FilterGraph(
|
|
|
130
130
|
TORCH_CHECK(
|
|
131
131
|
status >= 0,
|
|
132
132
|
"Failed to configure filter graph: ",
|
|
133
|
-
getFFMPEGErrorStringFromErrorCode(status)
|
|
133
|
+
getFFMPEGErrorStringFromErrorCode(status),
|
|
134
|
+
", provided filters: " + filtersContext.filtergraphStr);
|
|
134
135
|
}
|
|
135
136
|
|
|
136
137
|
UniqueAVFrame FilterGraph::convert(const UniqueAVFrame& avFrame) {
|
torchcodec/_core/Frame.cpp
CHANGED
|
@@ -8,6 +8,11 @@
|
|
|
8
8
|
|
|
9
9
|
namespace facebook::torchcodec {
|
|
10
10
|
|
|
11
|
+
FrameDims::FrameDims(int height, int width) : height(height), width(width) {
|
|
12
|
+
TORCH_CHECK(height > 0, "FrameDims.height must be > 0, got: ", height);
|
|
13
|
+
TORCH_CHECK(width > 0, "FrameDims.width must be > 0, got: ", width);
|
|
14
|
+
}
|
|
15
|
+
|
|
11
16
|
FrameBatchOutput::FrameBatchOutput(
|
|
12
17
|
int64_t numFrames,
|
|
13
18
|
const FrameDims& outputDims,
|