torchcodec 0.10.0__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchcodec/__init__.py +27 -0
- torchcodec/_core/AVIOContextHolder.cpp +60 -0
- torchcodec/_core/AVIOContextHolder.h +64 -0
- torchcodec/_core/AVIOFileLikeContext.cpp +98 -0
- torchcodec/_core/AVIOFileLikeContext.h +55 -0
- torchcodec/_core/AVIOTensorContext.cpp +130 -0
- torchcodec/_core/AVIOTensorContext.h +44 -0
- torchcodec/_core/BetaCudaDeviceInterface.cpp +849 -0
- torchcodec/_core/BetaCudaDeviceInterface.h +196 -0
- torchcodec/_core/CMakeLists.txt +295 -0
- torchcodec/_core/CUDACommon.cpp +330 -0
- torchcodec/_core/CUDACommon.h +51 -0
- torchcodec/_core/Cache.h +124 -0
- torchcodec/_core/CpuDeviceInterface.cpp +509 -0
- torchcodec/_core/CpuDeviceInterface.h +141 -0
- torchcodec/_core/CudaDeviceInterface.cpp +602 -0
- torchcodec/_core/CudaDeviceInterface.h +79 -0
- torchcodec/_core/DeviceInterface.cpp +117 -0
- torchcodec/_core/DeviceInterface.h +191 -0
- torchcodec/_core/Encoder.cpp +1054 -0
- torchcodec/_core/Encoder.h +192 -0
- torchcodec/_core/FFMPEGCommon.cpp +684 -0
- torchcodec/_core/FFMPEGCommon.h +314 -0
- torchcodec/_core/FilterGraph.cpp +159 -0
- torchcodec/_core/FilterGraph.h +59 -0
- torchcodec/_core/Frame.cpp +47 -0
- torchcodec/_core/Frame.h +72 -0
- torchcodec/_core/Metadata.cpp +124 -0
- torchcodec/_core/Metadata.h +92 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.cpp +320 -0
- torchcodec/_core/NVCUVIDRuntimeLoader.h +14 -0
- torchcodec/_core/NVDECCache.cpp +60 -0
- torchcodec/_core/NVDECCache.h +102 -0
- torchcodec/_core/SingleStreamDecoder.cpp +1586 -0
- torchcodec/_core/SingleStreamDecoder.h +391 -0
- torchcodec/_core/StreamOptions.h +70 -0
- torchcodec/_core/Transform.cpp +128 -0
- torchcodec/_core/Transform.h +86 -0
- torchcodec/_core/ValidationUtils.cpp +35 -0
- torchcodec/_core/ValidationUtils.h +21 -0
- torchcodec/_core/__init__.py +46 -0
- torchcodec/_core/_metadata.py +262 -0
- torchcodec/_core/custom_ops.cpp +1090 -0
- torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +169 -0
- torchcodec/_core/nvcuvid_include/cuviddec.h +1374 -0
- torchcodec/_core/nvcuvid_include/nvcuvid.h +610 -0
- torchcodec/_core/ops.py +605 -0
- torchcodec/_core/pybind_ops.cpp +50 -0
- torchcodec/_frame.py +146 -0
- torchcodec/_internally_replaced_utils.py +68 -0
- torchcodec/_samplers/__init__.py +7 -0
- torchcodec/_samplers/video_clip_sampler.py +419 -0
- torchcodec/decoders/__init__.py +12 -0
- torchcodec/decoders/_audio_decoder.py +185 -0
- torchcodec/decoders/_decoder_utils.py +113 -0
- torchcodec/decoders/_video_decoder.py +601 -0
- torchcodec/encoders/__init__.py +2 -0
- torchcodec/encoders/_audio_encoder.py +149 -0
- torchcodec/encoders/_video_encoder.py +196 -0
- torchcodec/libtorchcodec_core4.so +0 -0
- torchcodec/libtorchcodec_core5.so +0 -0
- torchcodec/libtorchcodec_core6.so +0 -0
- torchcodec/libtorchcodec_core7.so +0 -0
- torchcodec/libtorchcodec_core8.so +0 -0
- torchcodec/libtorchcodec_custom_ops4.so +0 -0
- torchcodec/libtorchcodec_custom_ops5.so +0 -0
- torchcodec/libtorchcodec_custom_ops6.so +0 -0
- torchcodec/libtorchcodec_custom_ops7.so +0 -0
- torchcodec/libtorchcodec_custom_ops8.so +0 -0
- torchcodec/libtorchcodec_pybind_ops4.so +0 -0
- torchcodec/libtorchcodec_pybind_ops5.so +0 -0
- torchcodec/libtorchcodec_pybind_ops6.so +0 -0
- torchcodec/libtorchcodec_pybind_ops7.so +0 -0
- torchcodec/libtorchcodec_pybind_ops8.so +0 -0
- torchcodec/samplers/__init__.py +2 -0
- torchcodec/samplers/_common.py +84 -0
- torchcodec/samplers/_index_based.py +287 -0
- torchcodec/samplers/_time_based.py +358 -0
- torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake +76 -0
- torchcodec/share/cmake/TorchCodec/ffmpeg_versions.cmake +122 -0
- torchcodec/transforms/__init__.py +12 -0
- torchcodec/transforms/_decoder_transforms.py +375 -0
- torchcodec/version.py +2 -0
- torchcodec-0.10.0.dist-info/METADATA +286 -0
- torchcodec-0.10.0.dist-info/RECORD +88 -0
- torchcodec-0.10.0.dist-info/WHEEL +5 -0
- torchcodec-0.10.0.dist-info/licenses/LICENSE +28 -0
- torchcodec-0.10.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include "Metadata.h"
|
|
8
|
+
#include "torch/types.h"
|
|
9
|
+
|
|
10
|
+
namespace facebook::torchcodec {
|
|
11
|
+
|
|
12
|
+
std::optional<double> StreamMetadata::getDurationSeconds(
|
|
13
|
+
SeekMode seekMode) const {
|
|
14
|
+
switch (seekMode) {
|
|
15
|
+
case SeekMode::custom_frame_mappings:
|
|
16
|
+
case SeekMode::exact:
|
|
17
|
+
TORCH_CHECK(
|
|
18
|
+
endStreamPtsSecondsFromContent.has_value() &&
|
|
19
|
+
beginStreamPtsSecondsFromContent.has_value(),
|
|
20
|
+
"Missing beginStreamPtsSecondsFromContent or endStreamPtsSecondsFromContent");
|
|
21
|
+
return endStreamPtsSecondsFromContent.value() -
|
|
22
|
+
beginStreamPtsSecondsFromContent.value();
|
|
23
|
+
case SeekMode::approximate:
|
|
24
|
+
if (durationSecondsFromHeader.has_value()) {
|
|
25
|
+
return durationSecondsFromHeader.value();
|
|
26
|
+
}
|
|
27
|
+
if (numFramesFromHeader.has_value() && averageFpsFromHeader.has_value() &&
|
|
28
|
+
averageFpsFromHeader.value() != 0.0) {
|
|
29
|
+
return static_cast<double>(numFramesFromHeader.value()) /
|
|
30
|
+
averageFpsFromHeader.value();
|
|
31
|
+
}
|
|
32
|
+
if (durationSecondsFromContainer.has_value()) {
|
|
33
|
+
return durationSecondsFromContainer.value();
|
|
34
|
+
}
|
|
35
|
+
return std::nullopt;
|
|
36
|
+
default:
|
|
37
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
double StreamMetadata::getBeginStreamSeconds(SeekMode seekMode) const {
|
|
42
|
+
switch (seekMode) {
|
|
43
|
+
case SeekMode::custom_frame_mappings:
|
|
44
|
+
case SeekMode::exact:
|
|
45
|
+
TORCH_CHECK(
|
|
46
|
+
beginStreamPtsSecondsFromContent.has_value(),
|
|
47
|
+
"Missing beginStreamPtsSecondsFromContent");
|
|
48
|
+
return beginStreamPtsSecondsFromContent.value();
|
|
49
|
+
case SeekMode::approximate:
|
|
50
|
+
if (beginStreamPtsSecondsFromContent.has_value()) {
|
|
51
|
+
return beginStreamPtsSecondsFromContent.value();
|
|
52
|
+
}
|
|
53
|
+
return 0.0;
|
|
54
|
+
default:
|
|
55
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
std::optional<double> StreamMetadata::getEndStreamSeconds(
|
|
60
|
+
SeekMode seekMode) const {
|
|
61
|
+
switch (seekMode) {
|
|
62
|
+
case SeekMode::custom_frame_mappings:
|
|
63
|
+
case SeekMode::exact:
|
|
64
|
+
TORCH_CHECK(
|
|
65
|
+
endStreamPtsSecondsFromContent.has_value(),
|
|
66
|
+
"Missing endStreamPtsSecondsFromContent");
|
|
67
|
+
return endStreamPtsSecondsFromContent.value();
|
|
68
|
+
case SeekMode::approximate:
|
|
69
|
+
if (endStreamPtsSecondsFromContent.has_value()) {
|
|
70
|
+
return endStreamPtsSecondsFromContent.value();
|
|
71
|
+
}
|
|
72
|
+
return getDurationSeconds(seekMode);
|
|
73
|
+
default:
|
|
74
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
std::optional<int64_t> StreamMetadata::getNumFrames(SeekMode seekMode) const {
|
|
79
|
+
switch (seekMode) {
|
|
80
|
+
case SeekMode::custom_frame_mappings:
|
|
81
|
+
case SeekMode::exact:
|
|
82
|
+
TORCH_CHECK(
|
|
83
|
+
numFramesFromContent.has_value(), "Missing numFramesFromContent");
|
|
84
|
+
return numFramesFromContent.value();
|
|
85
|
+
case SeekMode::approximate: {
|
|
86
|
+
auto durationSeconds = getDurationSeconds(seekMode);
|
|
87
|
+
if (numFramesFromHeader.has_value()) {
|
|
88
|
+
return numFramesFromHeader.value();
|
|
89
|
+
}
|
|
90
|
+
if (averageFpsFromHeader.has_value() && durationSeconds.has_value()) {
|
|
91
|
+
return static_cast<int64_t>(
|
|
92
|
+
averageFpsFromHeader.value() * durationSeconds.value());
|
|
93
|
+
}
|
|
94
|
+
return std::nullopt;
|
|
95
|
+
}
|
|
96
|
+
default:
|
|
97
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
std::optional<double> StreamMetadata::getAverageFps(SeekMode seekMode) const {
|
|
102
|
+
switch (seekMode) {
|
|
103
|
+
case SeekMode::custom_frame_mappings:
|
|
104
|
+
case SeekMode::exact: {
|
|
105
|
+
auto numFrames = getNumFrames(seekMode);
|
|
106
|
+
if (numFrames.has_value() &&
|
|
107
|
+
beginStreamPtsSecondsFromContent.has_value() &&
|
|
108
|
+
endStreamPtsSecondsFromContent.has_value()) {
|
|
109
|
+
double duration = endStreamPtsSecondsFromContent.value() -
|
|
110
|
+
beginStreamPtsSecondsFromContent.value();
|
|
111
|
+
if (duration != 0.0) {
|
|
112
|
+
return static_cast<double>(numFrames.value()) / duration;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return averageFpsFromHeader;
|
|
116
|
+
}
|
|
117
|
+
case SeekMode::approximate:
|
|
118
|
+
return averageFpsFromHeader;
|
|
119
|
+
default:
|
|
120
|
+
TORCH_CHECK(false, "Unknown SeekMode");
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include <optional>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
extern "C" {
|
|
14
|
+
#include <libavcodec/avcodec.h>
|
|
15
|
+
#include <libavutil/avutil.h>
|
|
16
|
+
#include <libavutil/rational.h>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
namespace facebook::torchcodec {
|
|
20
|
+
|
|
21
|
+
enum class SeekMode { exact, approximate, custom_frame_mappings };
|
|
22
|
+
|
|
23
|
+
struct StreamMetadata {
|
|
24
|
+
// Common (video and audio) fields derived from the AVStream.
|
|
25
|
+
int streamIndex = -1;
|
|
26
|
+
|
|
27
|
+
// See this link for what various values are available:
|
|
28
|
+
// https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
|
|
29
|
+
AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN;
|
|
30
|
+
|
|
31
|
+
std::optional<AVCodecID> codecId;
|
|
32
|
+
std::optional<std::string> codecName;
|
|
33
|
+
std::optional<double> durationSecondsFromHeader;
|
|
34
|
+
std::optional<double> beginStreamSecondsFromHeader;
|
|
35
|
+
std::optional<int64_t> numFramesFromHeader;
|
|
36
|
+
std::optional<int64_t> numKeyFrames;
|
|
37
|
+
std::optional<double> averageFpsFromHeader;
|
|
38
|
+
std::optional<double> bitRate;
|
|
39
|
+
|
|
40
|
+
// Used as fallback in approximate mode when stream duration is unavailable.
|
|
41
|
+
std::optional<double> durationSecondsFromContainer;
|
|
42
|
+
|
|
43
|
+
// More accurate duration, obtained by scanning the file.
|
|
44
|
+
// These presentation timestamps are in time base.
|
|
45
|
+
std::optional<int64_t> beginStreamPtsFromContent;
|
|
46
|
+
std::optional<int64_t> endStreamPtsFromContent;
|
|
47
|
+
|
|
48
|
+
// These presentation timestamps are in seconds.
|
|
49
|
+
std::optional<double> beginStreamPtsSecondsFromContent;
|
|
50
|
+
std::optional<double> endStreamPtsSecondsFromContent;
|
|
51
|
+
|
|
52
|
+
// This can be useful for index-based seeking.
|
|
53
|
+
std::optional<int64_t> numFramesFromContent;
|
|
54
|
+
|
|
55
|
+
// Video-only fields
|
|
56
|
+
std::optional<int> width;
|
|
57
|
+
std::optional<int> height;
|
|
58
|
+
std::optional<AVRational> sampleAspectRatio;
|
|
59
|
+
|
|
60
|
+
// Audio-only fields
|
|
61
|
+
std::optional<int64_t> sampleRate;
|
|
62
|
+
std::optional<int64_t> numChannels;
|
|
63
|
+
std::optional<std::string> sampleFormat;
|
|
64
|
+
|
|
65
|
+
// Computed methods with fallback logic
|
|
66
|
+
std::optional<double> getDurationSeconds(SeekMode seekMode) const;
|
|
67
|
+
double getBeginStreamSeconds(SeekMode seekMode) const;
|
|
68
|
+
std::optional<double> getEndStreamSeconds(SeekMode seekMode) const;
|
|
69
|
+
std::optional<int64_t> getNumFrames(SeekMode seekMode) const;
|
|
70
|
+
std::optional<double> getAverageFps(SeekMode seekMode) const;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
struct ContainerMetadata {
|
|
74
|
+
std::vector<StreamMetadata> allStreamMetadata;
|
|
75
|
+
int numAudioStreams = 0;
|
|
76
|
+
int numVideoStreams = 0;
|
|
77
|
+
|
|
78
|
+
// Note that this is the container-level duration, which is usually the max
|
|
79
|
+
// of all stream durations available in the container.
|
|
80
|
+
std::optional<double> durationSecondsFromHeader;
|
|
81
|
+
|
|
82
|
+
// Total BitRate level information at the container level in bit/s
|
|
83
|
+
std::optional<double> bitRate;
|
|
84
|
+
|
|
85
|
+
// If set, this is the index to the default audio stream.
|
|
86
|
+
std::optional<int> bestAudioStreamIndex;
|
|
87
|
+
|
|
88
|
+
// If set, this is the index to the default video stream.
|
|
89
|
+
std::optional<int> bestVideoStreamIndex;
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#ifdef FBCODE_CAFFE2
|
|
8
|
+
// No need to do anything on fbcode. NVCUVID is available there, we can take a
|
|
9
|
+
// hard dependency on it.
|
|
10
|
+
// The FBCODE_CAFFE2 macro is defined in the upstream fbcode build of torch, so
|
|
11
|
+
// we can rely on it, that's what torch does too.
|
|
12
|
+
|
|
13
|
+
namespace facebook::torchcodec {
|
|
14
|
+
bool loadNVCUVIDLibrary() {
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
} // namespace facebook::torchcodec
|
|
18
|
+
#else
|
|
19
|
+
|
|
20
|
+
#include "NVCUVIDRuntimeLoader.h"
|
|
21
|
+
|
|
22
|
+
#include "nvcuvid_include/cuviddec.h"
|
|
23
|
+
#include "nvcuvid_include/nvcuvid.h"
|
|
24
|
+
|
|
25
|
+
#include <torch/types.h>
|
|
26
|
+
#include <cstdio>
|
|
27
|
+
#include <mutex>
|
|
28
|
+
|
|
29
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
30
|
+
#include <windows.h>
|
|
31
|
+
typedef HMODULE tHandle;
|
|
32
|
+
#else
|
|
33
|
+
#include <dlfcn.h>
|
|
34
|
+
typedef void* tHandle;
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
namespace facebook::torchcodec {
|
|
38
|
+
|
|
39
|
+
/* clang-format off */
|
|
40
|
+
// This file defines the logic to load the NVCUVID library **at runtime**,
|
|
41
|
+
// along with the corresponding NVCUVID functions that we'll need.
|
|
42
|
+
//
|
|
43
|
+
// We do this because we *do not want* to link (statically or dynamically)
|
|
44
|
+
// against libnvcuvid.so: it is not always available on the users machine! If we
|
|
45
|
+
// were to link against libnvcuvid.so, that would mean that our
|
|
46
|
+
// libtorchcodec_coreN.so would try to look for it when loaded at import time.
|
|
47
|
+
// And if it's not on the users machine, that causes `import torchcodec` to
|
|
48
|
+
// fail. Source: that's what we did, and we got user reports.
|
|
49
|
+
//
|
|
50
|
+
// So, we don't link against libnvcuvid.so. But we still want to call its
|
|
51
|
+
// functions. So here's how it's done, we'll use cuvidCreateVideoParser as an
|
|
52
|
+
// example, but it works the same for all. We are largely following the
|
|
53
|
+
// instructions from the NVCUVID docs:
|
|
54
|
+
// https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvdec-video-decoder-api-prog-guide/index.html#dynamic-loading-nvidia-components
|
|
55
|
+
//
|
|
56
|
+
// This:
|
|
57
|
+
// typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
|
|
58
|
+
// defines tcuvidCreateVideoParser, which is the *type* of a *function*.
|
|
59
|
+
// We define such a function of that type just below with:
|
|
60
|
+
// static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
|
|
61
|
+
// "dl" is for "dynamically loaded. For now dl_cuvidCreateVideoParser is
|
|
62
|
+
// nullptr, but later it will be a proper function [pointer] that can be called
|
|
63
|
+
// with dl_cuvidCreateVideoParser(...);
|
|
64
|
+
//
|
|
65
|
+
// For that to happen we need to call loadNVCUVIDLibrary(): in there, we first
|
|
66
|
+
// dlopen(libnvcuvid.so) which loads the .so somewhere in memory. Then we call
|
|
67
|
+
// dlsym(...), which binds dl_cuvidCreateVideoParser to its actual address: it
|
|
68
|
+
// literally sets the value of the dl_cuvidCreateVideoParser pointer to the
|
|
69
|
+
// address of the actual code section. If all went well, by now, we can safely
|
|
70
|
+
// call dl_cuvidCreateVideoParser(...);
|
|
71
|
+
// All of that happens at runtime *after* import time, when the first instance
|
|
72
|
+
// of the Beta CUDA interface is created, i.e. only when the user explicitly
|
|
73
|
+
// requests it.
|
|
74
|
+
//
|
|
75
|
+
// At the bottom of this file we have an `extern "C"` section with function
|
|
76
|
+
// definitions like:
|
|
77
|
+
//
|
|
78
|
+
// CUresult CUDAAPI cuvidCreateVideoParser(
|
|
79
|
+
// CUvideoparser* videoParser,
|
|
80
|
+
// CUVIDPARSERPARAMS* parserParams) {...}
|
|
81
|
+
//
|
|
82
|
+
// These are the actual functions that are compiled against and called by the
|
|
83
|
+
// Beta CUDA interface code. Crucially, these functions signature match exactly
|
|
84
|
+
// the NVCUVID functions (as defined in cuviddec.h). Inside of
|
|
85
|
+
// cuvidCreateVideoParser(...) we simply call the dl_cuvidCreateVideoParser
|
|
86
|
+
// function [pointer] that we dynamically loaded earlier.
|
|
87
|
+
//
|
|
88
|
+
// At runtime, within the Beta CUDA interface code we have a fallback mechanism
|
|
89
|
+
// to switch back to the CPU backend if any of the NVCUVID functions are not
|
|
90
|
+
// available, or if libnvcuvid.so itself couldn't be found. This is what FFmpeg
|
|
91
|
+
// does too.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
// Function pointers types
|
|
95
|
+
typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*);
|
|
96
|
+
typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser, CUVIDSOURCEDATAPACKET*);
|
|
97
|
+
typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser);
|
|
98
|
+
typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS*);
|
|
99
|
+
typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder*, CUVIDDECODECREATEINFO*);
|
|
100
|
+
typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder);
|
|
101
|
+
typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder, CUVIDPICPARAMS*);
|
|
102
|
+
typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder, int, unsigned int*, unsigned int*, CUVIDPROCPARAMS*);
|
|
103
|
+
typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder, unsigned int);
|
|
104
|
+
typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder, int, unsigned long long*, unsigned int*, CUVIDPROCPARAMS*);
|
|
105
|
+
typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder, unsigned long long);
|
|
106
|
+
/* clang-format on */
|
|
107
|
+
|
|
108
|
+
// Global function pointers - will be dynamically loaded
|
|
109
|
+
static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr;
|
|
110
|
+
static tcuvidParseVideoData* dl_cuvidParseVideoData = nullptr;
|
|
111
|
+
static tcuvidDestroyVideoParser* dl_cuvidDestroyVideoParser = nullptr;
|
|
112
|
+
static tcuvidGetDecoderCaps* dl_cuvidGetDecoderCaps = nullptr;
|
|
113
|
+
static tcuvidCreateDecoder* dl_cuvidCreateDecoder = nullptr;
|
|
114
|
+
static tcuvidDestroyDecoder* dl_cuvidDestroyDecoder = nullptr;
|
|
115
|
+
static tcuvidDecodePicture* dl_cuvidDecodePicture = nullptr;
|
|
116
|
+
static tcuvidMapVideoFrame* dl_cuvidMapVideoFrame = nullptr;
|
|
117
|
+
static tcuvidUnmapVideoFrame* dl_cuvidUnmapVideoFrame = nullptr;
|
|
118
|
+
static tcuvidMapVideoFrame64* dl_cuvidMapVideoFrame64 = nullptr;
|
|
119
|
+
static tcuvidUnmapVideoFrame64* dl_cuvidUnmapVideoFrame64 = nullptr;
|
|
120
|
+
|
|
121
|
+
static tHandle g_nvcuvid_handle = nullptr;
|
|
122
|
+
static std::mutex g_nvcuvid_mutex;
|
|
123
|
+
|
|
124
|
+
bool isLoaded() {
|
|
125
|
+
return (
|
|
126
|
+
g_nvcuvid_handle && dl_cuvidCreateVideoParser && dl_cuvidParseVideoData &&
|
|
127
|
+
dl_cuvidDestroyVideoParser && dl_cuvidGetDecoderCaps &&
|
|
128
|
+
dl_cuvidCreateDecoder && dl_cuvidDestroyDecoder &&
|
|
129
|
+
dl_cuvidDecodePicture && dl_cuvidMapVideoFrame &&
|
|
130
|
+
dl_cuvidUnmapVideoFrame && dl_cuvidMapVideoFrame64 &&
|
|
131
|
+
dl_cuvidUnmapVideoFrame64);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template <typename T>
|
|
135
|
+
T* bindFunction(const char* functionName) {
|
|
136
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
137
|
+
return reinterpret_cast<T*>(GetProcAddress(g_nvcuvid_handle, functionName));
|
|
138
|
+
#else
|
|
139
|
+
return reinterpret_cast<T*>(dlsym(g_nvcuvid_handle, functionName));
|
|
140
|
+
#endif
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
bool _loadLibrary() {
|
|
144
|
+
// Helper that just calls dlopen or equivalent on Windows. In a separate
|
|
145
|
+
// function because of the #ifdef uglyness.
|
|
146
|
+
#if defined(WIN64) || defined(_WIN64)
|
|
147
|
+
#ifdef UNICODE
|
|
148
|
+
static LPCWSTR nvcuvidDll = L"nvcuvid.dll";
|
|
149
|
+
#else
|
|
150
|
+
static LPCSTR nvcuvidDll = "nvcuvid.dll";
|
|
151
|
+
#endif
|
|
152
|
+
g_nvcuvid_handle = LoadLibrary(nvcuvidDll);
|
|
153
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
#else
|
|
157
|
+
g_nvcuvid_handle = dlopen("libnvcuvid.so", RTLD_NOW);
|
|
158
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
159
|
+
g_nvcuvid_handle = dlopen("libnvcuvid.so.1", RTLD_NOW);
|
|
160
|
+
}
|
|
161
|
+
if (g_nvcuvid_handle == nullptr) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
#endif
|
|
165
|
+
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
bool loadNVCUVIDLibrary() {
|
|
170
|
+
// Loads NVCUVID library and all required function pointers.
|
|
171
|
+
// Returns true on success, false on failure.
|
|
172
|
+
std::lock_guard<std::mutex> lock(g_nvcuvid_mutex);
|
|
173
|
+
|
|
174
|
+
if (isLoaded()) {
|
|
175
|
+
return true;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (!_loadLibrary()) {
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Load all function pointers. They'll be set to nullptr if not found.
|
|
183
|
+
dl_cuvidCreateVideoParser =
|
|
184
|
+
bindFunction<tcuvidCreateVideoParser>("cuvidCreateVideoParser");
|
|
185
|
+
dl_cuvidParseVideoData =
|
|
186
|
+
bindFunction<tcuvidParseVideoData>("cuvidParseVideoData");
|
|
187
|
+
dl_cuvidDestroyVideoParser =
|
|
188
|
+
bindFunction<tcuvidDestroyVideoParser>("cuvidDestroyVideoParser");
|
|
189
|
+
dl_cuvidGetDecoderCaps =
|
|
190
|
+
bindFunction<tcuvidGetDecoderCaps>("cuvidGetDecoderCaps");
|
|
191
|
+
dl_cuvidCreateDecoder =
|
|
192
|
+
bindFunction<tcuvidCreateDecoder>("cuvidCreateDecoder");
|
|
193
|
+
dl_cuvidDestroyDecoder =
|
|
194
|
+
bindFunction<tcuvidDestroyDecoder>("cuvidDestroyDecoder");
|
|
195
|
+
dl_cuvidDecodePicture =
|
|
196
|
+
bindFunction<tcuvidDecodePicture>("cuvidDecodePicture");
|
|
197
|
+
dl_cuvidMapVideoFrame =
|
|
198
|
+
bindFunction<tcuvidMapVideoFrame>("cuvidMapVideoFrame");
|
|
199
|
+
dl_cuvidUnmapVideoFrame =
|
|
200
|
+
bindFunction<tcuvidUnmapVideoFrame>("cuvidUnmapVideoFrame");
|
|
201
|
+
dl_cuvidMapVideoFrame64 =
|
|
202
|
+
bindFunction<tcuvidMapVideoFrame64>("cuvidMapVideoFrame64");
|
|
203
|
+
dl_cuvidUnmapVideoFrame64 =
|
|
204
|
+
bindFunction<tcuvidUnmapVideoFrame64>("cuvidUnmapVideoFrame64");
|
|
205
|
+
|
|
206
|
+
return isLoaded();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
} // namespace facebook::torchcodec
|
|
210
|
+
|
|
211
|
+
extern "C" {
|
|
212
|
+
|
|
213
|
+
CUresult CUDAAPI cuvidCreateVideoParser(
|
|
214
|
+
CUvideoparser* videoParser,
|
|
215
|
+
CUVIDPARSERPARAMS* parserParams) {
|
|
216
|
+
TORCH_CHECK(
|
|
217
|
+
facebook::torchcodec::dl_cuvidCreateVideoParser,
|
|
218
|
+
"cuvidCreateVideoParser called but NVCUVID not loaded!");
|
|
219
|
+
return facebook::torchcodec::dl_cuvidCreateVideoParser(
|
|
220
|
+
videoParser, parserParams);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
CUresult CUDAAPI cuvidParseVideoData(
|
|
224
|
+
CUvideoparser videoParser,
|
|
225
|
+
CUVIDSOURCEDATAPACKET* cuvidPacket) {
|
|
226
|
+
TORCH_CHECK(
|
|
227
|
+
facebook::torchcodec::dl_cuvidParseVideoData,
|
|
228
|
+
"cuvidParseVideoData called but NVCUVID not loaded!");
|
|
229
|
+
return facebook::torchcodec::dl_cuvidParseVideoData(videoParser, cuvidPacket);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser videoParser) {
|
|
233
|
+
TORCH_CHECK(
|
|
234
|
+
facebook::torchcodec::dl_cuvidDestroyVideoParser,
|
|
235
|
+
"cuvidDestroyVideoParser called but NVCUVID not loaded!");
|
|
236
|
+
return facebook::torchcodec::dl_cuvidDestroyVideoParser(videoParser);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS* caps) {
|
|
240
|
+
TORCH_CHECK(
|
|
241
|
+
facebook::torchcodec::dl_cuvidGetDecoderCaps,
|
|
242
|
+
"cuvidGetDecoderCaps called but NVCUVID not loaded!");
|
|
243
|
+
return facebook::torchcodec::dl_cuvidGetDecoderCaps(caps);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
CUresult CUDAAPI cuvidCreateDecoder(
|
|
247
|
+
CUvideodecoder* decoder,
|
|
248
|
+
CUVIDDECODECREATEINFO* decoderParams) {
|
|
249
|
+
TORCH_CHECK(
|
|
250
|
+
facebook::torchcodec::dl_cuvidCreateDecoder,
|
|
251
|
+
"cuvidCreateDecoder called but NVCUVID not loaded!");
|
|
252
|
+
return facebook::torchcodec::dl_cuvidCreateDecoder(decoder, decoderParams);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder decoder) {
|
|
256
|
+
TORCH_CHECK(
|
|
257
|
+
facebook::torchcodec::dl_cuvidDestroyDecoder,
|
|
258
|
+
"cuvidDestroyDecoder called but NVCUVID not loaded!");
|
|
259
|
+
return facebook::torchcodec::dl_cuvidDestroyDecoder(decoder);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
CUresult CUDAAPI
|
|
263
|
+
cuvidDecodePicture(CUvideodecoder decoder, CUVIDPICPARAMS* picParams) {
|
|
264
|
+
TORCH_CHECK(
|
|
265
|
+
facebook::torchcodec::dl_cuvidDecodePicture,
|
|
266
|
+
"cuvidDecodePicture called but NVCUVID not loaded!");
|
|
267
|
+
return facebook::torchcodec::dl_cuvidDecodePicture(decoder, picParams);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
|
|
271
|
+
// We need to protect the definition of the 32bit versions under the above
|
|
272
|
+
// conditions (see cuviddec.h). Defining them unconditionally would cause
|
|
273
|
+
// conflict compilation errors when cuviddec.h redefines those to the 64bit
|
|
274
|
+
// versions.
|
|
275
|
+
CUresult CUDAAPI cuvidMapVideoFrame(
|
|
276
|
+
CUvideodecoder decoder,
|
|
277
|
+
int pixIndex,
|
|
278
|
+
unsigned int* framePtr,
|
|
279
|
+
unsigned int* pitch,
|
|
280
|
+
CUVIDPROCPARAMS* procParams) {
|
|
281
|
+
TORCH_CHECK(
|
|
282
|
+
facebook::torchcodec::dl_cuvidMapVideoFrame,
|
|
283
|
+
"cuvidMapVideoFrame called but NVCUVID not loaded!");
|
|
284
|
+
return facebook::torchcodec::dl_cuvidMapVideoFrame(
|
|
285
|
+
decoder, pixIndex, framePtr, pitch, procParams);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
CUresult CUDAAPI
|
|
289
|
+
cuvidUnmapVideoFrame(CUvideodecoder decoder, unsigned int framePtr) {
|
|
290
|
+
TORCH_CHECK(
|
|
291
|
+
facebook::torchcodec::dl_cuvidUnmapVideoFrame,
|
|
292
|
+
"cuvidUnmapVideoFrame called but NVCUVID not loaded!");
|
|
293
|
+
return facebook::torchcodec::dl_cuvidUnmapVideoFrame(decoder, framePtr);
|
|
294
|
+
}
|
|
295
|
+
#endif
|
|
296
|
+
|
|
297
|
+
CUresult CUDAAPI cuvidMapVideoFrame64(
|
|
298
|
+
CUvideodecoder decoder,
|
|
299
|
+
int pixIndex,
|
|
300
|
+
unsigned long long* framePtr,
|
|
301
|
+
unsigned int* pitch,
|
|
302
|
+
CUVIDPROCPARAMS* procParams) {
|
|
303
|
+
TORCH_CHECK(
|
|
304
|
+
facebook::torchcodec::dl_cuvidMapVideoFrame64,
|
|
305
|
+
"cuvidMapVideoFrame64 called but NVCUVID not loaded!");
|
|
306
|
+
return facebook::torchcodec::dl_cuvidMapVideoFrame64(
|
|
307
|
+
decoder, pixIndex, framePtr, pitch, procParams);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
CUresult CUDAAPI
|
|
311
|
+
cuvidUnmapVideoFrame64(CUvideodecoder decoder, unsigned long long framePtr) {
|
|
312
|
+
TORCH_CHECK(
|
|
313
|
+
facebook::torchcodec::dl_cuvidUnmapVideoFrame64,
|
|
314
|
+
"cuvidUnmapVideoFrame64 called but NVCUVID not loaded!");
|
|
315
|
+
return facebook::torchcodec::dl_cuvidUnmapVideoFrame64(decoder, framePtr);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
} // extern "C"
|
|
319
|
+
|
|
320
|
+
#endif // FBCODE_CAFFE2
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
namespace facebook::torchcodec {
|
|
10
|
+
|
|
11
|
+
// See note in corresponding cpp file
|
|
12
|
+
bool loadNVCUVIDLibrary();
|
|
13
|
+
|
|
14
|
+
} // namespace facebook::torchcodec
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// This source code is licensed under the BSD-style license found in the
|
|
5
|
+
// LICENSE file in the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
#include <torch/types.h>
|
|
8
|
+
#include <mutex>
|
|
9
|
+
|
|
10
|
+
#include "CUDACommon.h"
|
|
11
|
+
#include "FFMPEGCommon.h"
|
|
12
|
+
#include "NVDECCache.h"
|
|
13
|
+
|
|
14
|
+
#include <cuda_runtime.h> // For cudaGetDevice
|
|
15
|
+
|
|
16
|
+
extern "C" {
|
|
17
|
+
#include <libavutil/hwcontext_cuda.h>
|
|
18
|
+
#include <libavutil/pixdesc.h>
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
namespace facebook::torchcodec {
|
|
22
|
+
|
|
23
|
+
NVDECCache& NVDECCache::getCache(const torch::Device& device) {
|
|
24
|
+
static NVDECCache cacheInstances[MAX_CUDA_GPUS];
|
|
25
|
+
return cacheInstances[getDeviceIndex(device)];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
UniqueCUvideodecoder NVDECCache::getDecoder(CUVIDEOFORMAT* videoFormat) {
|
|
29
|
+
CacheKey key(videoFormat);
|
|
30
|
+
std::lock_guard<std::mutex> lock(cacheLock_);
|
|
31
|
+
|
|
32
|
+
auto it = cache_.find(key);
|
|
33
|
+
if (it != cache_.end()) {
|
|
34
|
+
auto decoder = std::move(it->second);
|
|
35
|
+
cache_.erase(it);
|
|
36
|
+
return decoder;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return nullptr;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
bool NVDECCache::returnDecoder(
|
|
43
|
+
CUVIDEOFORMAT* videoFormat,
|
|
44
|
+
UniqueCUvideodecoder decoder) {
|
|
45
|
+
if (!decoder) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
CacheKey key(videoFormat);
|
|
50
|
+
std::lock_guard<std::mutex> lock(cacheLock_);
|
|
51
|
+
|
|
52
|
+
if (cache_.size() >= MAX_CACHE_SIZE) {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
cache_[key] = std::move(decoder);
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
} // namespace facebook::torchcodec
|